Source code for kdiagram.plot.feature_based

# License: Apache 2.0
# Author: LKouadio <etanoyau@gmail.com>

import warnings
from typing import Any, Optional, Union

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from ..compat.matplotlib import get_cmap
from ..decorators import check_non_emptiness
from ..utils.handlers import columns_manager
from ..utils.validator import ensure_2d

__all__ = ["plot_feature_fingerprint"]


[docs] @check_non_emptiness(params=["importances"]) def plot_feature_fingerprint( importances, features: Optional[list[str]] = None, labels: Optional[list[str]] = None, normalize: bool = True, fill: bool = True, cmap: Union[str, list[Any]] = "tab10", title: str = "Feature Impact Fingerprint", figsize: Optional[tuple[float, float]] = None, show_grid: bool = True, savefig: Optional[str] = None, ): # --- Input Validation and Preparation --- # Ensure importances is a 2D NumPy array importance_matrix = ensure_2d(importances) n_layers, n_features_data = importance_matrix.shape # Manage feature names if features is None: # Generate default feature names if none provided features_list = [f"feature {i+1}" for i in range(n_features_data)] else: # Ensure features is a list and handle potential discrepancies features_list = columns_manager(features, empty_as_none=False) # If user provided fewer feature names than data columns, append # generic names if len(features_list) < n_features_data: features_list.extend( [ f"feature {ix + 1}" for ix in range(len(features_list), n_features_data) ] ) # Truncate if user provided more names than needed (optional, # could also raise error) elif len(features_list) > n_features_data: warnings.warn( f"More feature names ({len(features_list)}) provided " f"than data columns ({n_features_data}). " "Extra names ignored.", UserWarning, stacklevel=2, ) features_list = features_list[:n_features_data] n_features = len(features_list) # Final number of features used # Manage labels if labels is None: # Generate default layer labels if none provided labels_list = [f"Layer {idx+1}" for idx in range(n_layers)] else: labels_list = list(labels) # Ensure it's a list # Check label count consistency if len(labels_list) < n_layers: warnings.warn( f"Fewer labels ({len(labels_list)}) provided than " f"layers ({n_layers}). Using generic names for the rest.", UserWarning, stacklevel=2, ) labels_list.extend( [ f"Layer {ix + 1}" for ix in range(len(labels_list), n_layers) ] ) elif len(labels_list) > n_layers: warnings.warn( f"More labels ({len(labels_list)}) provided than " f"layers ({n_layers}). Extra labels ignored.", UserWarning, stacklevel=2, ) labels_list = labels_list[:n_layers] # --- Normalization (if requested) --- if normalize: # Calculate max per row (layer), keep dimensions for broadcasting # max_per_row shape: (n_layers, 1), e.g., (3, 1) importance_matrix = ( importance_matrix.values if isinstance(importance_matrix, pd.DataFrame) else importance_matrix ) max_per_row = importance_matrix.max(axis=1, keepdims=True) # Create a mask for rows with max_val > 0 (where normalization is safe) # valid_max_mask shape: (n_layers, 1), e.g., (3, 1) valid_max_mask = max_per_row > 1e-9 # Initialize normalized matrix normalized_matrix = np.zeros_like(importance_matrix, dtype=float) # --- FIX START --- # Get boolean index for valid rows, shape (n_layers,) e.g., (3,) valid_rows_indices = valid_max_mask[:, 0] # Proceed only if there are any rows to normalize if np.any(valid_rows_indices): # Select the rows from the original matrix that need normalization # Shape: (n_valid_rows, n_features), e.g., (3, 6) rows_to_normalize = importance_matrix[valid_rows_indices] # Select the corresponding max values for these rows # Since max_per_row is (n_layers, 1) and valid_rows_indices is (n_layers,), # this indexing correctly results in shape (n_valid_rows, 1), e.g., (3, 1) max_values_for_valid_rows = max_per_row[valid_rows_indices] # Perform the division using broadcasting: (MxN / Mx1 works) normalized_rows = rows_to_normalize / max_values_for_valid_rows # Place the normalized rows back into the result matrix normalized_matrix[valid_rows_indices] = normalized_rows # --- FIX END --- # Rows where max_val <= 0 remain zero (already initialized) # Update importance_matrix with normalized values importance_matrix = normalized_matrix # --- Angle Calculation for Radar Axes --- # Calculate evenly spaced angles for each feature axis angles = np.linspace(0, 2 * np.pi, n_features, endpoint=False).tolist() # Add the first angle to the end to close the loop for plotting angles_closed = angles + angles[:1] # --- Plotting Setup --- fig, ax = plt.subplots(figsize=figsize, subplot_kw=dict(polar=True)) # Get colors from specified colormap or list try: cmap_obj = get_cmap(cmap, default="tab10", failsafe="discrete") # Sample colors if it's a standard Matplotlib cmap colors = [cmap_obj(i / n_layers) for i in range(n_layers)] except ValueError: # Handle case where cmap might be a list of colors if isinstance(cmap, list): colors = cmap if len(colors) < n_layers: warnings.warn( f"Provided color list has fewer colors " f"({len(colors)}) than layers ({n_layers}). " f"Colors will repeat.", UserWarning, stacklevel=2, ) else: # Fallback if cmap is invalid string or list warnings.warn( f"Invalid cmap '{cmap}'. Falling back to 'tab10'.", UserWarning, stacklevel=2, ) cmap_obj = get_cmap("tab10", default="tab10", failsafe="discrete") colors = [cmap_obj(i / n_layers) for i in range(n_layers)] # --- Plot Each Layer --- for idx, row in enumerate(importance_matrix): # Get the importance values for the current layer values = row.tolist() # Add the first value to the end to close the loop values_closed = values + values[:1] # Determine the label for the legend label = labels_list[idx] # Determine the color, cycling if necessary color = colors[idx % len(colors)] # Plot the outline ax.plot( angles_closed, values_closed, label=label, color=color, linewidth=2, ) # Fill the area if requested if fill: ax.fill(angles_closed, values_closed, color=color, alpha=0.25) # --- Customize Plot Appearance --- ax.set_title(title, size=16, y=1.1) # Adjust title position # Set feature labels on the angular axes ax.set_xticks(angles) ax.set_xticklabels(features_list, fontsize=11) # Hide radial tick labels (often preferred for normalized data) ax.set_yticklabels([]) # Set radial limits (optional, e.g., enforce 0 start) ax.set_ylim(bottom=0) if normalize: # Optionally add a single radial label for the max value (1.0) ax.set_yticks([0.25, 0.5, 0.75, 1.0]) ax.set_yticklabels( ["0.25", "0.50", "0.75", "1.00"], fontsize=9, color="gray" ) # Show grid lines if requested if show_grid: ax.grid(True, linestyle="--", linewidth=0.5, alpha=0.6) else: ax.grid(False) # Add legend, positioned outside the plot area ax.legend(loc="upper right", bbox_to_anchor=(1.3, 1.1), fontsize=10) # Adjust layout to prevent labels/title overlapping plt.tight_layout(pad=2.0) # --- Save or Show --- if savefig: try: plt.savefig(savefig, bbox_inches="tight", dpi=300) print(f"Plot saved to {savefig}") except Exception as e: print(f"Error saving plot to {savefig}: {e}") else: plt.show() return ax
plot_feature_fingerprint.__doc__ = r""" Create a radar chart visualizing feature-importance profiles. This function draws a polar (radar) chart that compares how the importance of a common set of features varies across multiple groups/layers (e.g., different models, years, or spatial zones). Each group is drawn as a closed polygon, producing an interpretable "fingerprint" of relative influence across features (see also the dataset helper :func:`~kdiagram.datasets.make_fingerprint_data`; concept introduced in :footcite:t:`kouadiob2025`. The angular position encodes the feature index, and the radius encodes its (optionally normalized) importance value. Normalization allows shape-only comparison across layers, independent of absolute scale. Parameters ---------- importances : array-like of shape (n_layers, n_features) The importance matrix. Each row corresponds to one layer/group and each column to a feature. Accepts a list of lists, a NumPy array, or a pandas DataFrame. features : list of str, optional Names of the features (length must match the number of columns in ``importances``). If ``None``, generic names ``['feature 1', ..., 'feature N']`` are generated. labels : list of str, optional Display names for layers (length should match ``n_layers``). If ``None``, generic names ``['Layer 1', ..., 'Layer M']`` are generated. When counts mismatch, the function pads/truncates and issues a warning. normalize : bool, default=True If ``True``, normalize each row to the unit interval via :math:`r'_{ij} = r_{ij}/\max_k r_{ik}` (safe-dividing by zero yields zeros). This highlights *shape* differences across layers. If ``False``, raw magnitudes are plotted. fill : bool, default=True If ``True``, fill each polygon with a translucent color; otherwise draw outlines only. cmap : str or list, default='tab10' Either a Matplotlib colormap name (e.g., ``'viridis'``, ``'plasma'``, ``'tab10'``) or an explicit list of colors. Lists shorter than the number of layers will cycle with a warning. title : str, default='Feature Impact Fingerprint' Figure title. figsize : tuple of (float, float), optional Figure size in inches. If ``None``, a sensible default is used. show_grid : bool, default=True Whether to show polar grid lines. savefig : str, optional Path to save the figure (e.g., ``'fingerprint.png'``). If omitted, the plot is shown interactively. Returns ------- ax : matplotlib.axes.Axes The polar axes containing the radar chart (useful for further customization). Notes ----- **Angular encoding.** With :math:`N` features, angular positions are equally spaced: .. math:: \theta_j \;=\; \frac{2\pi j}{N}, \qquad j = 0, \dots, N-1. **Closing polygons.** To draw closed fingerprints, the first vertex :math:`(\theta_0, r_{i0})` is appended again at :math:`2\pi` for each layer :math:`i`. **Row-wise normalization (default).** If ``normalize=True``, each row :math:`\mathbf r_i=(r_{i0},\dots,r_{i,N-1})` is scaled to its maximum: .. math:: r'_{ij} \;=\; \begin{cases} \dfrac{r_{ij}}{\max_k r_{ik}}, & \max_k r_{ik} > 0,\\[6pt] 0, & \text{otherwise,} \end{cases} which emphasizes *shape* differences between layers but removes absolute magnitude information. Set ``normalize=False`` to compare magnitudes. **Alternative min–max scaling (pre-processing).** If you prefer values distributed over :math:`[0,1]` using the local range, apply this transformation per row before calling the function: .. math:: r''_{ij} \;=\; \frac{r_{ij} - \min_k r_{ik}} {\max_k r_{ik} - \min_k r_{ik} + \varepsilon}, with a small :math:`\varepsilon>0` to avoid division by zero. **Data assumptions.** Importance values are expected to be non-negative. Rows with a non-positive maximum (all zeros or all negative) become zeros under the default normalization. If your data can be negative, either: (1) set ``normalize=False`` and choose appropriate radial limits, or (2) shift/scale to non-negative values (e.g., min–max per row). **Missing/invalid values.** ``NaN`` or ``inf`` entries propagate to the plot and may render gaps. Clean data beforehand, e.g.: .. code-block:: python import numpy as np X = np.asarray(importances, float) X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0) **Radial limits and ticks.** The plot enforces a non-negative radius (``ax.set_ylim(bottom=0)``). For unnormalized data, you may set a custom maximum: .. code-block:: python ax.set_rmax( np.nanmax(importances) ) Optionally add/readjust radial ticks for readability: .. code-block:: python ax.set_yticks([0.25, 0.5, 0.75, 1.0]) ax.set_yticklabels(["0.25", "0.50", "0.75", "1.00"]) **Feature order matters.** The perceived shape depends on feature ordering around the circle. Keep a consistent, meaningful order across comparisons (e.g., domain grouping or sorted by average importance). **Many features or layers.** With large :math:`N`, tick labels can overlap. Consider thinning labels or rotating them: .. code-block:: python angles = ax.get_xticks() ax.set_xticks(angles[::2]) ax.set_xticklabels([lbl for i, lbl in enumerate(features) if i % 2 == 0], rotation=25, ha="right") For many layers, prefer a discrete colormap and a multi-column legend or move it outside: .. code-block:: python ax.legend(loc="upper left", bbox_to_anchor=(1.02, 1.0), ncol=2) **Color and accessibility.** Use colorblind-friendly palettes (e.g., ``'tab10'``, ``'tab20'``) or pass an explicit color list. Avoid relying on color alone when printing in grayscale—consider distinct linestyles. **Complexity.** Runtime and memory scale as :math:`\mathcal O(MN)` for :math:`M` layers and :math:`N` features. For very large inputs, down-select features or layers for clarity. **Utilities.** Inputs are coerced to a numeric 2D array and feature names managed via lightweight helpers (e.g., ``ensure_2d``, ``columns_manager``). Name count mismatches are padded/truncated with a warning rather than raising. See Also -------- kdiagram.datasets.make_fingerprint_data : Generate a synthetic importance matrix suitable for this plot. kdiagram.plot.relationship.plot_relationship : Polar scatter for true–predicted relationships. matplotlib.pyplot.polar : Underlying polar plotting primitives. Examples -------- Generate random importances and plot with normalization and fills. >>> import numpy as np >>> from kdiagram.plot.feature_based import plot_feature_fingerprint >>> rng = np.random.default_rng(42) >>> imp = rng.random((3, 6)) # 3 layers, 6 features >>> feats = [f'Feature {i+1}' for i in range(6)] >>> labels = ['Model A', 'Model B', 'Model C'] >>> ax = plot_feature_fingerprint( ... importances=imp, ... features=feats, ... labels=labels, ... title='Random Feature Importance Comparison', ... cmap='Set3', ... normalize=True, ... fill=True ... ) Year-over-year weights without normalization. >>> features = ['rainfall', 'GWL', 'seismic', 'density', 'geo'] >>> weights = [ ... [0.2, 0.4, 0.1, 0.6, 0.3], # 2023 ... [0.3, 0.5, 0.2, 0.4, 0.4], # 2024 ... [0.1, 0.6, 0.2, 0.5, 0.3], # 2025 ... ] >>> years = ['2023', '2024', '2025'] >>> ax = plot_feature_fingerprint( ... importances=weights, ... features=features, ... labels=years, ... title='Feature Influence Over Years', ... cmap='tab10', ... normalize=False ... ) References ---------- .. footbibliography:: """