Source code for kdiagram.plot.comparison

# License: Apache 2.0
# Author: LKouadio <etanoyau@gmail.com>

"""Model comparison plots."""

from __future__ import annotations

import warnings
from numbers import Real
from typing import Any, Callable, Literal

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import cm
from matplotlib.axes import Axes
from matplotlib.collections import LineCollection
from matplotlib.colors import Normalize

from ..api.typing import Acov
from ..compat.matplotlib import get_cmap
from ..compat.sklearn import StrOptions, type_of_target, validate_params
from ..decorators import check_non_emptiness, isdf
from ..utils.generic_utils import drop_nan_in
from ..utils.handlers import columns_manager
from ..utils.metric_utils import get_scorer
from ..utils.plot import (
    _setup_axes_for_reliability,
    set_axis_grid,
    setup_polar_axes,
)
from ..utils.validator import _assert_all_types, is_iterable, validate_yy

__all__ = [
    "plot_reliability_diagram",
    "plot_model_comparison",
    "plot_horizon_metrics",
    "plot_polar_reliability",
]


[docs] @validate_params( { "y_true": ["array-like"], "strategy": [StrOptions({"uniform", "quantile"})], "error_bars": [StrOptions({"wilson", "normal", "none"})], "counts_panel": [StrOptions({"none", "bottom"})], "counts_norm": [StrOptions({"fraction", "count"})], } ) def plot_reliability_diagram( y_true, *y_preds, names: list[str] | None = None, sample_weight: list[float] | np.ndarray | None = None, n_bins: int = 10, strategy: str = "uniform", positive_label: int | float | str = 1, class_index: int | None = None, clip_probs: tuple[float, float] = (0.0, 1.0), normalize_probs: bool = True, error_bars: str = "wilson", conf_level: float = 0.95, show_diagonal: bool = True, diagonal_kwargs: dict[str, Any] | None = None, show_ece: bool = True, show_brier: bool = True, counts_panel: str = "bottom", counts_norm: Literal["fraction", "count"] = "fraction", counts_alpha: float = 0.35, figsize: tuple[float, float] | None = (9, 7), title: str | None = None, xlabel: str | None = "Predicted probability", ylabel: str | None = "Observed frequency", cmap: str = "tab10", color_palette: list[Any] | None = None, marker: str = "o", s: int = 40, linewidth: float = 2.0, alpha: float = 0.9, connect: bool = True, legend: bool = True, legend_loc: str = "best", show_grid: bool = True, grid_props: dict | None = None, xlim: tuple[float, float] = (0.0, 1.0), ylim: tuple[float, float] = (0.0, 1.0), savefig: str | None = None, return_data: bool = False, ax: Axes | None = None, **kw, ): # -------------- input handling -------------- # if len(y_preds) == 0: raise ValueError( "Provide at least one prediction array via *y_preds." ) names = columns_manager(names, to_string=True) or [] if len(names) < len(y_preds): names.extend( [f"Model_{i + 1}" for i in range(len(names), len(y_preds))] ) if len(names) > len(y_preds): warnings.warn( ( f"Received {len(names)} names for {len(y_preds)} models. " "Extra names ignored." ), UserWarning, stacklevel=2, ) names = names[: len(y_preds)] y_true = np.asarray(y_true) if type_of_target(y_true) not in ("binary", "multiclass"): raise ValueError( "y_true must be a classification target. " "Binary reliability is expected." ) y_bin = (y_true == positive_label).astype(int) prob_list: list[np.ndarray] = [] for arr in y_preds: arr = np.asarray(arr) prob_list.append(_to_prob_vector(arr, class_index)) if sample_weight is None: y_bin, *prob_list = drop_nan_in(y_bin, *prob_list, error="raise") w = np.ones_like(y_bin, dtype=float) else: w = np.asarray(sample_weight, dtype=float) y_bin, *prob_list, w = drop_nan_in( y_bin, *prob_list, w, error="raise" ) clip_lo, clip_hi = clip_probs clipped_flag = False new_probs = [] for p in prob_list: p0 = p.copy() p1 = _prep_probs(p0, clip_lo, clip_hi, normalize_probs) if not np.allclose(p0, p1): clipped_flag = True new_probs.append(p1) prob_list = new_probs if clipped_flag: warnings.warn( ( "Some predicted probabilities were normalized/clipped " f"to [{clip_lo}, {clip_hi}]." ), UserWarning, stacklevel=2, ) edges, centers = _build_bins( prob_list, n_bins, strategy, clip_lo, clip_hi ) z = _z_from_conf(conf_level) # -------------- colors & layout -------------- # colors = _colors(cmap, color_palette, len(prob_list)) if ax is not None and figsize is not None: warnings.warn( "`figsize` ignored because `ax` was provided.", stacklevel=2 ) fig, ax, axb = _setup_axes_for_reliability( ax=ax, counts_panel=counts_panel, figsize=figsize ) # if counts_panel == "bottom": # fig = plt.figure(figsize=figsize) # gs = fig.add_gridspec(2, 1, height_ratios=(3.0, 1.0), hspace=0.12) # ax = fig.add_subplot(gs[0, 0]) # axb = fig.add_subplot(gs[1, 0], sharex=ax) # else: # fig, ax = plt.subplots(figsize=figsize) # axb = None # -------------- compute & plot -------------- # per_model: dict[str, pd.DataFrame] = {} for i, (name, p, col) in enumerate(zip(names, prob_list, colors)): stats = _bin_stats(p, y_bin, w, edges, error_bars, z) ece = float(np.nansum(stats["ece"])) br = _brier(p, y_bin, w) df = pd.DataFrame( { "bin_left": edges[:-1], "bin_right": edges[1:], "bin_center": centers, "n": stats["n"], "w_sum": stats["wsum"], "p_mean": stats["pmean"], "y_rate": stats["yrate"], "y_low": stats["ylo"], "y_high": stats["yhi"], "ece_contrib": stats["ece"], } ) per_model[name] = df valid = df["w_sum"].to_numpy() > 0 x = df.loc[valid, "p_mean"].to_numpy() y = df.loc[valid, "y_rate"].to_numpy() ylo = df.loc[valid, "y_low"].to_numpy() yhi = df.loc[valid, "y_high"].to_numpy() if error_bars.lower() != "none": yerr = np.vstack([y - ylo, yhi - y]) ax.errorbar( x, y, yerr=yerr, fmt="none", ecolor=col, elinewidth=1.0, capsize=2, alpha=alpha * 0.85, ) ax.scatter(x, y, c=[col], s=s, marker=marker, alpha=alpha) if connect and len(x) > 1: ax.plot(x, y, color=col, linewidth=linewidth, alpha=alpha) label = name pieces = [] if show_ece: pieces.append(f"ECE={ece:.3f}") if show_brier: pieces.append(f"Brier={br:.3f}") if pieces: label = f"{label} ({', '.join(pieces)})" ax.plot( [], [], color=col, marker=marker, linestyle="-" if connect else "None", linewidth=linewidth, label=label, ) if axb is not None: bw = edges[1:] - edges[:-1] slot = bw * 0.8 / max(1, len(prob_list)) left = edges[:-1] + i * slot vals = df["w_sum"].to_numpy() if counts_norm == "fraction": denom = vals.sum() if vals.sum() > 0 else 1.0 vals = vals / denom axb.bar( left, vals, width=slot, align="edge", color=col, alpha=counts_alpha, label=name, ) for lab in ax.get_xticklabels(): lab.set_visible(False) # -------------- format axes -------------- # ax.set_xlim(*xlim) ax.set_ylim(*ylim) if xlabel: ax.set_xlabel(xlabel) if ylabel: ax.set_ylabel(ylabel) if title: ax.set_title(title) if show_diagonal: diag_kw = { "color": "gray", "linestyle": "--", "linewidth": 1.2, "alpha": 0.9, } if diagonal_kwargs: _assert_all_types( diagonal_kwargs, dict, objname="'diagonal_kwargs'" ) diag_kw.update(diagonal_kwargs) ax.plot((0.0, 1.0), (0.0, 1.0), **diag_kw) set_axis_grid(ax, show_grid=show_grid, grid_props=grid_props) if legend: ax.legend(loc=legend_loc) if axb is not None: axb.set_xlim(*xlim) axb.axhline(0, color="gray", lw=0.8) axb.set_xlabel(xlabel or "Predicted probability") axb.set_ylabel("Frac." if counts_norm == "fraction" else "Count") set_axis_grid(axb, show_grid=True, grid_props={"alpha": 0.25}) handles, labels = axb.get_legend_handles_labels() if handles and labels: axb.legend(loc="upper right", fontsize=8) plt.tight_layout() if savefig: try: fig.savefig(savefig, bbox_inches="tight", dpi=300) print(f"Plot saved to {savefig}") except Exception as e: print(f"Error saving plot to {savefig}: {e}") else: plt.show() if return_data: return ax, per_model return ax
# ------------------ helpers ------------------ # def _z_from_conf(cf: float) -> float: table = { 0.80: 1.2815515655, 0.90: 1.6448536269, 0.95: 1.9599639845, 0.975: 2.241402728, 0.99: 2.5758293035, } return table.get(round(cf, 3), 1.9599639845) def _to_prob_vector(arr: np.ndarray, ci: int | None) -> np.ndarray: if arr.ndim == 1: return arr.astype(float, copy=False) if arr.ndim == 2: idx = arr.shape[1] - 1 if ci is None else ci if idx < 0 or idx >= arr.shape[1]: raise ValueError( "class_index out of bounds for 2D predictions: " f"{idx} not in [0, {arr.shape[1] - 1}]" ) return arr[:, idx].astype(float, copy=False) raise ValueError( "Predictions must be 1D probabilities or " "(n_samples, n_classes) arrays." ) def _prep_probs( p: np.ndarray, clip_lo: float, clip_hi: float, do_norm: bool ) -> np.ndarray: p = np.asarray(p, dtype=float) if do_norm: pmin, pmax = np.nanmin(p), np.nanmax(p) if (pmin < -1e-9) or (pmax > 1.0 + 1e-9): rng = pmax - pmin if rng > 1e-12: p = (p - pmin) / rng p = np.clip(p, clip_lo, clip_hi) return p def _build_bins( probs_list: list[np.ndarray], nb: int, strat: str, low: float, high: float ) -> tuple[np.ndarray, np.ndarray]: if strat == "uniform": edges = np.linspace(low, high, nb + 1) else: allp = np.concatenate(probs_list) q = np.linspace(0.0, 1.0, nb + 1) edges = np.quantile(allp, q) edges = np.unique(edges) if len(edges) - 1 < nb: warnings.warn( ( "Not enough unique quantile edges; " "falling back to uniform bins." ), UserWarning, stacklevel=2, ) edges = np.linspace(low, high, nb + 1) centers = 0.5 * (edges[:-1] + edges[1:]) return edges, centers def _bin_stats( p: np.ndarray, y: np.ndarray, w: np.ndarray, edges: np.ndarray, ebars: str, zval: float, ) -> dict[str, np.ndarray]: nb = len(edges) - 1 eps = 1e-12 idx = np.digitize(p, edges, right=False) - 1 idx[idx < 0] = 0 idx[idx >= nb] = nb - 1 n = np.zeros(nb, dtype=float) wsum = np.zeros(nb, dtype=float) pmean = np.zeros(nb, dtype=float) yr = np.zeros(nb, dtype=float) for b in range(nb): m = idx == b if not np.any(m): continue ww = w[m] pp = p[m] yy = y[m] wsum[b] = ww.sum() n[b] = float(m.sum()) denom = max(wsum[b], eps) pmean[b] = float(np.dot(ww, pp) / denom) yr[b] = float(np.dot(ww, yy) / denom) if ebars.lower() == "none": ylo = np.full_like(yr, np.nan) yhi = np.full_like(yr, np.nan) elif ebars.lower() == "normal": neff = np.maximum(wsum, eps) se = np.sqrt(np.clip(yr * (1.0 - yr) / neff, 0.0, 1.0)) ylo = np.clip(yr - zval * se, 0.0, 1.0) yhi = np.clip(yr + zval * se, 0.0, 1.0) else: neff = np.maximum(wsum, eps) ylo = np.empty_like(yr) yhi = np.empty_like(yr) for i in range(nb): ph = yr[i] n_ = neff[i] if n_ <= eps: ylo[i] = np.nan yhi[i] = np.nan continue denom = 1.0 + (zval**2) / n_ center = (ph + (zval**2) / (2.0 * n_)) / denom rad = ( zval * np.sqrt((ph * (1.0 - ph) + (zval**2) / (4.0 * n_)) / n_) ) / denom ylo[i] = np.clip(center - rad, 0.0, 1.0) yhi[i] = np.clip(center + rad, 0.0, 1.0) totw = max(w.sum(), eps) wbin = wsum / totw ece_contrib = wbin * np.abs(yr - pmean) return { "n": n, "wsum": wsum, "pmean": pmean, "yrate": yr, "ylo": ylo, "yhi": yhi, "wbin": wbin, "ece": ece_contrib, } def _brier(p: np.ndarray, y: np.ndarray, w: np.ndarray) -> float: return float(np.average((p - y) ** 2, weights=w)) def _colors(cmap_name: str, palette: list[Any] | None, k: int) -> list[Any]: if palette is not None: return [palette[i % len(palette)] for i in range(k)] try: cmo = get_cmap(cmap_name, default="tab10", failsafe="discrete") except ValueError: warnings.warn( f"Invalid cmap '{cmap_name}'. Using 'tab10' instead.", UserWarning, stacklevel=2, ) cmo = get_cmap("tab10", default="tab10", failsafe="discrete") if hasattr(cmo, "colors") and len(cmo.colors) >= k: return list(cmo.colors[:k]) if k == 1: return [cmo(0.5)] return [cmo(i / (k - 1)) for i in range(k)] plot_reliability_diagram.__doc__ = r""" Plot a reliability diagram (calibration plot) for one or more classification models. This compares **predicted probabilities** to **observed frequencies** across bins of predicted probability. Perfect calibration lies on the diagonal :math:`y=x`. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth labels. For binary calibration, values are compared to ``positive_label`` after validation and flattening. *y_preds : array-like(s) One or more model predictions. Each item may be: - 1D array of positive-class probabilities in ``[0, 1]``. - 2D array of shape ``(n_samples, n_classes)``; use ``class_index`` to select a column. If omitted, the last column is used. names : list of str, optional Labels for each model curve. If fewer names are provided than models, placeholders like ``'Model_1'`` are appended. sample_weight : array-like of shape (n_samples,), optional Per-sample weights used for observed frequencies, ECE, and Brier score. If ``None``, equal weights are used. n_bins : int, default=10 Number of probability bins. strategy : {'uniform', 'quantile'}, default='uniform' Binning strategy. - ``'uniform'``: equally spaced edges in ``[0, 1]``. - ``'quantile'``: edges are empirical quantiles of the pooled predictions. If edges are not unique, the method falls back to uniform binning with a warning. positive_label : int or float or str, default=1 Label in ``y_true`` treated as the positive class when constructing the binary target. class_index : int, optional Column index to pick from 2D probability arrays. If omitted, the last column is used. clip_probs : tuple of (float, float), default=(0.0, 1.0) Inclusive clipping range applied to predictions. A warning is issued if clipping occurs. normalize_probs : bool, default=True If ``True``, attempts to linearly rescale predictions into ``[0, 1]`` when minor out-of-range values are detected, then applies clipping. error_bars : {'wilson', 'normal', 'none'}, default='wilson' Per-bin uncertainty for observed frequencies. - ``'wilson'``: Wilson interval using ``conf_level``. - ``'normal'``: normal approximation. - ``'none'``: no error bars. conf_level : float, default=0.95 Confidence level used for error bars when applicable. show_diagonal : bool, default=True Draw the reference diagonal :math:`y=x`. diagonal_kwargs : dict, optional Matplotlib keyword arguments for the diagonal reference line (e.g., ``linestyle``, ``color``). show_ece : bool, default=True Compute Expected Calibration Error (ECE) and append a summary to each model label. show_brier : bool, default=True Compute (weighted) Brier score and append a summary to each model label. counts_panel : {'none', 'bottom'}, default='bottom' If not ``'none'``, draw a compact histogram below the main panel that shows per-bin totals for each model. counts_norm : {'fraction', 'count'}, default='fraction' Normalization for the counts panel. ``'fraction'`` divides by the total weight; ``'count'`` shows raw weighted sums. counts_alpha : float, default=0.35 Alpha for bars in the counts panel. figsize : tuple of (float, float), default=(9, 7) Figure size for the layout. When ``counts_panel='bottom'``, a two-row gridspec is used. title : str, optional Title for the plot. If ``None``, no title is set. xlabel : str, optional Label for the x-axis. Defaults to ``'Predicted probability'``. ylabel : str, optional Label for the y-axis. Defaults to ``'Observed frequency'``. cmap : str, default='tab10' Matplotlib colormap name used to generate model colors. color_palette : list, optional Explicit list of colors. When provided, colors are cycled from this list instead of the colormap. marker : str, default='o' Marker used for the bin points. s : int, default=40 Marker size for the bin points. linewidth : float, default=2.0 Line width used when connecting bin points. alpha : float, default=0.9 Alpha for points and lines in the main panel. connect : bool, default=True Connect bin points with a line for each model. legend : bool, default=True Display a legend. Summary metrics (ECE, Brier) are shown next to model names when enabled. legend_loc : str, default='best' Legend location passed to Matplotlib. show_grid : bool, default=True Toggle gridlines via the package helper ``set_axis_grid``. grid_props : dict, optional Keyword arguments passed to ``set_axis_grid`` for grid customization (e.g., ``linestyle``, ``alpha``). xlim : tuple of (float, float), default=(0.0, 1.0) X-axis limits. ylim : tuple of (float, float), default=(0.0, 1.0) Y-axis limits. savefig : str, optional If provided, save the figure to this path; otherwise the plot is shown interactively. return_data : bool, default=False If ``True``, return ``(ax, data_dict)`` where values are per-model ``pandas.DataFrame`` objects with per-bin stats: ``['bin_left', 'bin_right', 'bin_center', 'n', 'w_sum', 'p_mean', 'y_rate', 'y_low', 'y_high', 'ece_contrib']``. Otherwise, return only the Matplotlib axes. Returns ------- ax : matplotlib.axes.Axes Axes of the main calibration plot. When ``counts_panel='bottom'``, the second axes (counts panel) is not returned. Notes ----- Calibration compares *confidence* to *accuracy* within bins. For bin :math:`b`, let :math:`\hat{p}_i` be predictions and :math:`y_i\in\{0,1\}` be binary targets with weights :math:`w_i\ge 0`. Define the weighted bin mean probability and accuracy as .. math:: \bar{p}_b \;=\; \frac{\sum_{i\in b} w_i \hat{p}_i} {\sum_{i\in b} w_i}, \qquad \bar{y}_b \;=\; \frac{\sum_{i\in b} w_i y_i} {\sum_{i\in b} w_i}. The Expected Calibration Error (ECE) is .. math:: \mathrm{ECE} \;=\; \sum_b \left( \frac{\sum_{i\in b} w_i}{\sum_i w_i} \right) \left| \bar{y}_b - \bar{p}_b \right|. The (weighted) Brier score is .. math:: \mathrm{Brier} \;=\; \frac{\sum_i w_i \left(\hat{p}_i - y_i\right)^2} {\sum_i w_i}. Wilson confidence intervals for :math:`\bar{y}_b` use :math:`z = \Phi^{-1}\!\left(\tfrac{1+\alpha}{2}\right)` and effective count :math:`n_b=\sum_{i\in b} w_i`: .. math:: \mathrm{center} \;=\; \frac{\bar{y}_b + \frac{z^2}{2 n_b}} {1 + \frac{z^2}{n_b}}, \qquad \mathrm{radius} \;=\; \frac{z}{1 + \frac{z^2}{n_b}} \sqrt{\frac{\bar{y}_b(1-\bar{y}_b)}{n_b} + \frac{z^2}{4 n_b^2}}. The interval is :math:`[\mathrm{center}-\mathrm{radius}, \mathrm{center}+\mathrm{radius}]`, clipped to ``[0, 1]``. The normal interval replaces the term with the usual standard error :math:`\sqrt{\bar{y}_b(1-\bar{y}_b)/n_b}`. When ``strategy='quantile'``, bin edges are the empirical quantiles of the pooled predictions. If many identical values exist, edges can collapse; in that case, the function falls back to uniform edges with a warning. Examples -------- Binary example with quantile bins and Wilson intervals. >>> import numpy as np >>> from kdiagram.plot.comparison import \ ... plot_reliability_diagram >>> rng = np.random.default_rng(0) >>> y = (rng.random(1000) < 0.4).astype(int) >>> p1 = 0.4 * np.ones_like(y) + 0.15 * rng.random(len(y)) >>> p2 = 0.4 * np.ones_like(y) + 0.05 * rng.random(len(y)) >>> ax = plot_reliability_diagram( ... y, p1, p2, ... names=['Wide', 'Tight'], ... n_bins=12, ... strategy='quantile', ... error_bars='wilson', ... counts_panel='bottom', ... show_ece=True, ... show_brier=True, ... title=('Reliability Diagram ' ... '(Quantile bins + Wilson CIs)'), ... ) """
[docs] @check_non_emptiness(params=["y_true", "y_preds"]) def plot_polar_reliability( y_true: np.ndarray, *y_preds: np.ndarray, names: list[str] | None = None, n_bins: int = 10, strategy: str = "uniform", title: str = "Polar Reliability Diagram", figsize: tuple[float, float] = (8.0, 8.0), cmap: str = "coolwarm", acov: Acov = "half_circle", show_grid: bool = True, grid_props: dict[str, Any] | None = None, show_cbar: bool = True, mask_radius: bool = False, savefig: str | None = None, dpi: int = 300, ax: Axes | None = None, ) -> Axes: if not y_preds: raise ValueError("At least one prediction array must be provided.") if not names: names = [f"Model {i + 1}" for i in range(len(y_preds))] y_true = np.asarray(y_true) prob_list = [_to_prob_vector(p, ci=None) for p in y_preds] weights = np.ones_like(y_true, dtype=float) edges, _ = _build_bins(prob_list, n_bins, strategy, 0.0, 1.0) # consistent palette colors = _colors(cmap, palette=None, k=len(y_preds)) # axes + angular span in radians fig, ax, span = setup_polar_axes( ax, acov=acov, figsize=figsize, ) # perfect calibration spiral over [0, span] perfect_theta = np.linspace(0.0, float(span), 100) perfect_radius = np.linspace(0.0, 1.0, 100) ax.plot( perfect_theta, perfect_radius, color="black", linestyle="--", lw=1.5, label="Perfect Calibration", ) line_collection_for_cbar = None # model spirals with diagnostic coloring for i, (name, p) in enumerate(zip(names, prob_list)): stats = _bin_stats(p, y_true, weights, edges, ebars="none", zval=0) df = pd.DataFrame( { "p_mean": stats["pmean"], "y_rate": stats["yrate"], } ).dropna() model_theta = df["p_mean"].to_numpy() * float(span) model_radius = df["y_rate"].to_numpy() # deviation from perfect calibration calibration_error = model_radius - df["p_mean"].to_numpy() # build colored segments pts = np.array([model_theta, model_radius]).T pts = pts.reshape(-1, 1, 2) segs = np.concatenate([pts[:-1], pts[1:]], axis=1) norm = Normalize(vmin=-0.5, vmax=0.5) lc = LineCollection( segs, cmap=get_cmap(cmap), norm=norm, ) lc.set_array(calibration_error[:-1]) lc.set_linewidth(3.0) line = ax.add_collection(lc) if i == 0: line_collection_for_cbar = line # legend handle ax.plot( [], [], color=get_cmap(cmap)(0.5), lw=3.0, label=name, ) # light fill between model and perfect spiral interp = np.interp( perfect_theta, model_theta, model_radius, left=0.0, right=1.0, ) ax.fill_between( perfect_theta, interp, perfect_radius, color=colors[i], alpha=0.15, ) # formatting ax.set_title(title, fontsize=16, y=1.1) ax.set_ylim(0.0, 1.05) # ticks: show predicted prob in [0,1] along angle xt = np.linspace(0.0, float(span), 6) xl = [f"{v:.1f}" for v in np.linspace(0.0, 1.0, 6)] ax.set_xticks(xt) ax.set_xticklabels(xl) ax.set_xlabel("Predicted Probability", labelpad=15) ax.set_ylabel("Observed Frequency", labelpad=25) ax.legend(loc="upper right", bbox_to_anchor=(1.3, 1.15)) set_axis_grid(ax, show_grid=show_grid, grid_props=grid_props) # colorbar (horizontal) if show_cbar and line_collection_for_cbar is not None: cbar = fig.colorbar( line_collection_for_cbar, ax=ax, orientation="horizontal", shrink=0.75, pad=0.08, ) cbar.set_label( "Calibration Error (Observed - Predicted)", fontsize=10, ) if mask_radius: ax.set_yticklabels([]) fig.tight_layout() if savefig: fig.savefig(savefig, dpi=dpi, bbox_inches="tight") plt.close(fig) else: plt.show() return ax
plot_polar_reliability.__doc__ = r""" Plot a Polar Reliability Diagram (Calibration Spiral). This function provides a novel visualization of model calibration by mapping the traditional reliability diagram onto a polar coordinate system :footcite:p:`kouadiob2025`. It compares **predicted probabilities** (mapped to the angle) to **observed frequencies** (mapped to the radius). Perfect calibration is represented by a perfect Archimedean spiral. The plot uses a diverging colormap to diagnostically color the model's spiral, immediately revealing regions of over- or under-confidence. Parameters ---------- y_true : np.ndarray 1D array of true binary labels (0 or 1). *y_preds : np.ndarray One or more 1D arrays of predicted probabilities for each model. names : list of str, optional Display names for each of the models. If not provided, generic names like ``'Model 1'`` will be generated. n_bins : int, default=10 Number of bins to group predicted probabilities into for analysis. strategy : {'uniform', 'quantile'}, default='uniform' The strategy for creating bins: - ``'uniform'``: Bins are of equal width across the [0, 1] range. - ``'quantile'``: Bins are created based on the quantiles of the predicted probabilities, ensuring each bin has a similar number of samples. title : str, default="Polar Reliability Diagram" The title for the plot. figsize : tuple of (float, float), default=(8, 8) The figure size in inches. cmap : str, default='coolwarm' A diverging colormap used to color the model's spiral. The center of the colormap represents perfect calibration, with one color for over-confidence and another for under-confidence. acov : {'default', 'half_circle', 'quarter_circle', 'eighth_circle'}, default='half_circle' Angular coverage of the polar sector. - ``'default'`` : full circle, :math:`2\pi` (360°) - ``'half_circle'`` : :math:`\pi` (180°) - ``'quarter_circle'`` : :math:`\pi/2` (90°) - ``'eighth_circle'`` : :math:`\pi/4` (45°) show_cbar : bool, default=True If ``True``, display a color bar that explains the diagnostic coloring of the calibration error. show_grid : bool, default=True Toggle the visibility of the polar grid lines. grid_props : dict, optional Custom keyword arguments passed to the grid for styling (e.g., ``linestyle``, ``alpha``). mask_radius : bool, default=False If ``True``, hide the radial tick labels. savefig : str, optional The file path to save the plot. If ``None``, the plot is displayed interactively. dpi : int, default=300 The resolution (dots per inch) for the saved figure. Returns ------- ax : matplotlib.axes.Axes The Matplotlib Axes object containing the polar reliability plot. Notes ----- This plot is a polar adaptation of the standard reliability diagram, a key tool in forecast verification :footcite:p:`Jolliffe2012`. 1. **Binning**: Predicted probabilities :math:`p_i` are first partitioned into :math:`K` bins. For each bin :math:`k`, the mean predicted probability (:math:`\bar{p}_k`) and the mean observed frequency (:math:`\bar{y}_k`) are calculated. 2. **Polar Mapping**: These values are then mapped to polar coordinates: .. math:: \theta_k &= \bar{p}_k \cdot \frac{\pi}{2} \\ r_k &= \bar{y}_k The plot is constrained to a 90-degree quadrant where the angle :math:`\theta` represents the predicted probability from 0 to 1, and the radius :math:`r` represents the observed frequency from 0 to 1. 3. **Perfect Calibration**: A perfectly calibrated model, where :math:`\bar{p}_k = \bar{y}_k` for all bins, will form a perfect Archimedean spiral defined by :math:`r = \frac{2\theta}{\pi}`. This is drawn as a dashed black reference line. 4. **Diagnostic Coloring**: The calibration error for each bin is calculated as :math:`e_k = \bar{y}_k - \bar{p}_k`. The line segments of the model's spiral are colored based on this error: - :math:`e_k < 0`: The model is **over-confident** (observed frequency is lower than predicted probability). - :math:`e_k > 0`: The model is **under-confident** (observed frequency is higher than predicted probability). Examples -------- >>> import numpy as np >>> from kdiagram.plot.comparison import plot_polar_reliability >>> >>> # Generate synthetic data for two models >>> np.random.seed(0) >>> n_samples = 2000 >>> y_true = (np.random.rand(n_samples) < 0.4).astype(int) >>> # A well-calibrated model >>> calibrated_preds = np.clip(0.4 + np.random.normal(0, 0.15, n_samples), 0, 1) >>> # An over-confident model >>> overconfident_preds = np.clip(0.4 + np.random.normal(0, 0.3, n_samples), 0, 1) >>> >>> # Generate the plot >>> ax = plot_polar_reliability( ... y_true, ... calibrated_preds, ... overconfident_preds, ... names=["Well-Calibrated", "Over-Confident"], ... n_bins=15, ... cmap='coolwarm' ... ) References ---------- .. footbibliography:: """
[docs] @validate_params( { "train_times": ["array-like", None], "metrics": [str, "array-like", callable, None], "scale": [ StrOptions( { "norm", "min-max", "std", "standard", } ), None, ], "lower_bound": [Real], } ) def plot_model_comparison( y_true, *y_preds, train_times: float | list[float] | None = None, metrics: str | Callable | list[str | Callable] | None = None, names: list[str] | None = None, title: str | None = None, figsize: tuple[float, float] | None = None, colors: list[Any] | None = None, alpha: float = 0.7, legend: bool = True, show_grid: bool = True, grid_props: dict | None = None, scale: str | None = "norm", lower_bound: float = 0.0, savefig: str | None = None, loc: str = "upper right", verbose: int = 0, acov: Acov = "default", ax: Axes | None = None, ): # --- input clean/validate try: y_true, *y_preds = drop_nan_in(y_true, *y_preds, error="raise") tmp = [] for pred in y_preds: pred_ok = validate_yy( y_true, pred, expected_type=None, flatten=True, )[1] tmp.append(pred_ok) y_preds = tmp except Exception as e: raise TypeError(f"Input validation failed: {e}") from e n_models = len(y_preds) if n_models == 0: warnings.warn( "No prediction arrays (*y_preds) provided.", stacklevel=2, ) return None if acov != "default": warnings.warn( "Non-default 'acov' for radar comparison. " "Nice plot prefers full 360°; proceeding as " "requested.", UserWarning, stacklevel=2, ) # --- names if names is None: names = [f"Model_{i + 1}" for i in range(n_models)] else: names = columns_manager(names, empty_as_none=False) if len(names) < n_models: names += [f"Model_{i + 1}" for i in range(len(names), n_models)] elif len(names) > n_models: warnings.warn( f"Received {len(names)} names for {n_models} " "models. Extra names ignored.", UserWarning, stacklevel=2, ) names = names[:n_models] # --- metrics defaulting if metrics is None: ttype = type_of_target(y_true) if ttype in ["continuous", "continuous-multioutput"]: metrics = ["r2", "mae", "mape", "rmse"] else: metrics = ["accuracy", "precision", "recall", "f1"] if verbose >= 1: print(f"[INFO] Auto metrics for '{ttype}': {metrics}") metrics = is_iterable( metrics, exclude_string=True, transform=True, ) metric_funcs = [] metric_names = [] error_metrics = [] for m in metrics: try: if isinstance(m, str): f = get_scorer(m) metric_funcs.append(f) metric_names.append(m) if m in ["mae", "mape", "rmse", "mse"]: error_metrics.append(m) elif callable(m): metric_funcs.append(m) mname = getattr(m, "__name__", "metric") metric_names.append(mname) else: warnings.warn( f"Ignoring invalid metric type: {type(m)}", stacklevel=2, ) except Exception as e: warnings.warn( f"Could not retrieve scorer for metric '{m}': {e}", stacklevel=2, ) if not metric_funcs: raise ValueError("No valid metrics found or specified.") # --- optional train time axis tvals = None if train_times is not None: if isinstance(train_times, (int, float, np.number)): tvals = np.array([float(train_times)] * n_models) else: tvals = np.asarray(train_times, dtype=float) if tvals.ndim != 1 or len(tvals) != n_models: raise ValueError( f"train_times must be a single float or a list/array " f"of length n_models ({n_models}). " f"Got shape {tvals.shape}." ) metric_names.append("Train Time (s)") # Add a placeholder for calculation loop, will substitute later metric_funcs.append("train_time_placeholder") # --- compute results [n_models, n_metrics] results = np.zeros((n_models, len(metric_names)), dtype=float) for i, y_pred in enumerate(y_preds): for j, mfunc in enumerate(metric_funcs): if mfunc == "train_time_placeholder": results[i, j] = tvals[i] elif mfunc is not None: try: results[i, j] = mfunc(y_true, y_pred) except Exception as e: warnings.warn( f"Could not compute metric " f"'{metric_names[j]}' for model " f"'{names[i]}': {e}. Setting to NaN.", stacklevel=2, ) results[i, j] = np.nan else: results[i, j] = np.nan # --- scale results R = results.copy() if np.isnan(R).any(): warnings.warn( "NaN values found in metric results. Scaling might " "be affected or rows/cols dropped depending on method.", stacklevel=2, ) # Note: Some metrics are better when *lower* (MAE, RMSE, MAPE, train_time). # For visualization where larger radius is better, we might invert these # before scaling, or adjust the interpretation. Let's scale first. if scale in ["norm", "min-max"]: if verbose >= 1: print("[INFO] Scaling metrics using Min-Max.") mn = np.nanmin(R, axis=0) mx = np.nanmax(R, axis=0) rg = mx - mn rg[rg < 1e-9] = 1.0 R = (R - mn) / rg # Now, for error metrics, higher value (closer to 1) is WORSE. # Invert them so higher value (closer to 1) is BETTER. for j, name in enumerate(metric_names): if name in error_metrics or name == "Train Time (s)": R[:, j] = 1.0 - R[:, j] # Scaled results are now in [0, 1], higher is better. elif scale in ["std", "standard"]: if verbose >= 1: print("[INFO] Standard scaling.") mu = np.nanmean(R, axis=0) sd = np.nanstd(R, axis=0) sd[sd < 1e-9] = 1.0 # Avoid division by zero R = (R - mu) / sd for j, name in enumerate(metric_names): if name in error_metrics or name == "Train Time (s)": R[:, j] = -R[:, j] # Now higher value means better performance (higher score or lower error) # but range is not [0, 1]. We need to handle lower_bound. # Replace any potential NaNs resulting from scaling (e.g., if all NaNs) R = np.nan_to_num(R, nan=lower_bound) # --- figure/axes with acov span fig, ax, span = setup_polar_axes( ax, acov=acov, figsize=figsize or (8.0, 8.0), ) # metric angles inside requested span m = len(metric_names) angles = np.linspace(0.0, float(span), m, endpoint=False) angles_closed = list(angles) + [angles[0]] # --- colors if colors is None: try: cmap_obj = get_cmap("tab10", default="tab10", failsafe="discrete") plot_colors = [cmap_obj(i % 10) for i in range(n_models)] except Exception: cmap_obj = get_cmap("viridis") plot_colors = [cmap_obj(i / n_models) for i in range(n_models)] else: plot_colors = colors # --- draw polygons for i in range(n_models): vals = np.concatenate((R[i], [R[i, 0]])) ax.plot( angles_closed, vals, label=names[i], color=plot_colors[i % len(plot_colors)], # Cycle colors linewidth=1.5, alpha=alpha, ) ax.fill( angles_closed, vals, color=plot_colors[i % len(plot_colors)], alpha=0.10, ) # --- ticks/labels ax.set_xticks(angles) ax.set_xticklabels(metric_names) if scale in ["norm", "min-max"]: ax.set_ylim(bottom=lower_bound, top=1.05) ax.set_yticks(np.linspace(lower_bound, 1.0, 5)) else: ax.set_ylim(bottom=lower_bound) ax.tick_params(axis="y", labelsize=8) ax.tick_params(axis="x", pad=10) set_axis_grid(ax, show_grid=show_grid, grid_props=grid_props) if legend: ax.legend(loc=loc, bbox_to_anchor=(1.25, 1.05)) ax.set_title( title or "Model Performance Comparison", y=1.15, fontsize=14, ) fig.tight_layout(pad=2.0) if savefig: try: fig.savefig(savefig, bbox_inches="tight", dpi=300) print(f"Plot saved to {savefig}") except Exception as e: print(f"Error saving plot to {savefig}: {e}") else: try: plt.show() except Exception as e: warnings.warn( f"Could not display plot ({e}). Use 'savefig'.", UserWarning, stacklevel=2, ) return ax
plot_model_comparison.__doc__ = r""" Plot multi-metric model performance comparison on a radar chart. Generates a radar chart (spider chart) visualizing multiple performance metrics for one or more models simultaneously. Each axis corresponds to a metric (e.g., R2, MAE, accuracy, precision), and each polygon represents a model, allowing for a holistic comparison of their strengths and weaknesses across different evaluation criteria [1]_. This function is highly valuable for model selection, providing a compact overview that goes beyond single-score comparisons. Use it when you need to balance trade-offs between various metrics (like accuracy vs. training time) or understand how different models perform relative to each other across a spectrum of relevant performance indicators. Internally relies on helpers to handle potential NaN values and determine data types [2]_. Parameters ---------- y_true : array-like of shape (n_samples,) The ground truth (correct) target values. *y_preds : array-like of shape (n_samples,) Variable number of prediction arrays, one for each model to be compared. Each array must have the same length as `y_true`. train_times : float or list of float, optional Training time in seconds for each model corresponding to `*y_preds`. If provided: - A single float assumes the same time for all models. - A list must match the number of models. It will be added as an additional axis/metric on the chart. Default is ``None``. metrics : str, callable, list of these, optional The performance metrics to calculate and plot. Default is ``None``, which triggers automatic metric selection based on the target type inferred from `y_true`: - **Regression:** Defaults to ``["r2", "mae", "mape", "rmse"]``. - **Classification:** Defaults to ``["accuracy", "precision", "recall"]``. Can be provided as: - A list of strings: Names of metrics known by scikit-learn or gofast's `get_scorer` (e.g., ``['r2', 'rmse']``). - A list of callables: Functions with the signature `metric(y_true, y_pred)`. - A mix of strings and callables. names : list of str, optional Names for each model corresponding to `*y_preds`. Used for the legend. If ``None`` or too short, defaults like "Model_1", "Model_2" are generated. Default is ``None``. title : str, optional Title displayed above the radar chart. If ``None``, a generic title may be used internally or omitted. Default is ``None``. figsize : tuple of (float, float), optional Figure size ``(width, height)`` in inches. If ``None``, uses Matplotlib's default (often similar to ``(8, 8)`` for this type of plot). colors : list of str or None, optional List of Matplotlib color specifications for each model's polygon. If ``None``, colors are automatically assigned from the default palette ('tab10'). If provided, the list length should ideally match `n_models`. alpha : float, optional Transparency level (between 0 and 1) for the plotted lines and filled areas. Default is ``0.7``. (Note: Fill alpha is often hardcoded lower, e.g., 0.1, in implementation). legend : bool, optional If ``True``, display a legend mapping colors/lines to model names. Default is ``True``. show_grid : bool, optional If ``True``, display the radial grid lines on the chart. Default is ``True``. scale : {'norm', 'min-max', 'std', 'standard'}, optional Method for scaling metric values before plotting. Scaling is applied independently to each metric (axis) across models. Default is ``'norm'``. - ``'norm'`` or ``'min-max'``: Min-max scaling. Transforms values to the range [0, 1] using :math:`(X - min) / (max - min)`. Useful for comparing relative performance when metrics have different scales. - ``'std'`` or ``'standard'``: Standard scaling (Z-score). Transforms values to have zero mean and unit variance using :math:`(X - mean) / std`. Preserves relative spacing better than min-max but results can be negative. - ``None``: Plot raw metric values without scaling. Use only if metrics naturally share a comparable, non-negative range. lower_bound : float, optional Sets the minimum value for the radial axis (innermost circle). Useful when using standard scaling ('std') which can produce negative values, or to adjust the plot's center. Default is ``0``. savefig : str, optional If provided, the file path (e.g., 'radar_comparison.svg') where the figure will be saved. If ``None``, the plot is displayed interactively. Default is ``None``. loc : str, optional Location argument passed to `matplotlib.pyplot.legend()` to position the legend (e.g., 'upper right', 'lower left', 'center right'). Default is ``'upper right'``. verbose : int, optional Controls the verbosity level. ``0`` is silent. Higher values may print debugging information during metric calculation or scaling. Default is ``0``. acov : {'default', 'half_circle', 'quarter_circle', 'eighth_circle'}, default='default' Angular coverage of the polar sector. - ``'default'`` : full circle, :math:`2\pi` (360°) - ``'half_circle'`` : :math:`\pi` (180°) - ``'quarter_circle'`` : :math:`\pi/2` (90°) - ``'eighth_circle'`` : :math:`\pi/4` (45°) Returns ------- ax : matplotlib.axes.Axes The Matplotlib Axes object containing the radar chart. Allows for further customization after the function call. Raises ------ ValueError If lengths of `y_preds`, `names` (if provided), and `train_times` (if provided) do not match. If an invalid string is provided for `scale`. If a metric string name is not recognized by the internal scorer. TypeError If `y_true` or `y_preds` contain non-numeric data. See Also -------- kdiagram.utils.metric_utils.get_scorer : Function likely used internally to fetch metric callables (verify path). sklearn.metrics : Scikit-learn metrics module. matplotlib.pyplot.polar : Function for creating polar plots. Notes ----- This function provides a multi-dimensional view of model performance. **Metric Calculation:** For each model :math:`k` with predictions :math:`\hat{y}_k` and each metric :math:`m` (from the `metrics` list), the score :math:`S_{m,k}` is calculated: .. math:: S_{m,k} = \text{Metric}_m(y_{true}, \hat{y}_k) If `train_times` are provided, they are treated as an additional metric axis. **Scaling:** If `scale` is specified, scaling is applied column-wise (per metric) across all models before plotting: - Min-Max ('norm'): .. math:: S'_{m,k} = \frac{S_{m,k} - \min_j(S_{m,j})}{\max_j(S_{m,j}) - \min_j(S_{m,j})} - Standard ('std'): .. math:: S'_{m,k} = \frac{S_{m,k} - \text{mean}_j(S_{m,j})}{\text{std}_j(S_{m,j})} **Plotting:** The (scaled) scores :math:`S'_{m,k}` for each model :math:`k` determine the radial distance along the axis corresponding to metric :math:`m`. Points are connected to form a polygon for each model. References ---------- .. [1] Wikipedia contributors. (2024). Radar chart. In Wikipedia, The Free Encyclopedia. Retrieved April 14, 2025, from https://en.wikipedia.org/wiki/Radar_chart *(General reference for radar charts)* .. [2] Kenny-Denecke, J. F., Hernandez-Amaro, A., Martin-Gorriz, M. L., & Castejon-Limos, P. (2024). Lead-Time Prediction in Wind Tower Manufacturing: A Machine Learning-Based Approach. *Mathematics*, 12(15), 2347. https://doi.org/10.3390/math12152347 *(Example application using radar charts for ML comparison)* Examples -------- >>> from kdiagram.plot.comparison import plot_model_comparison >>> import numpy as np >>> >>> # Example 1: Regression task >>> y_true_reg = np.array([3, -0.5, 2, 7, 5]) >>> y_pred_r1 = np.array([2.5, 0.0, 2.1, 7.8, 5.2]) >>> y_pred_r2 = np.array([3.2, 0.2, 1.8, 6.5, 4.8]) >>> times = [0.1, 0.5] # Training times in seconds >>> names = ['ModelLin', 'ModelTree'] >>> ax1 = plot_model_comparison(y_true_reg, y_pred_r1, y_pred_r2, ... train_times=times, names=names, ... metrics=['r2', 'mae', 'rmse'], # Specify metrics ... title="Regression Model Comparison", ... scale='norm') # Normalize for comparison >>> >>> # Example 2: Classification task (requires appropriate y_true/y_pred) >>> y_true_clf = np.array([0, 1, 0, 1, 1, 0]) >>> y_pred_c1 = np.array([0, 1, 0, 1, 0, 0]) # Model 1 preds >>> y_pred_c2 = np.array([0, 1, 1, 1, 1, 0]) # Model 2 preds >>> ax2 = plot_model_comparison(y_true_clf, y_pred_c1, y_pred_c2, ... names=["LogReg", "SVM"], ... # Uses default classification metrics ... title="Classification Model Comparison", ... scale='norm') """
[docs] @check_non_emptiness @isdf def plot_horizon_metrics( df: pd.DataFrame, qlow_cols: list[str], qup_cols: list[str], *, q50_cols: list[str] | None = None, xtick_labels: list[str] | None = None, normalize_radius: bool = False, show_value_labels: bool = True, cbar_label: str | None = None, r_label: str | None = None, cmap: str = "coolwarm", acov: Acov = "default", title: str | None = None, figsize: tuple[float, float] = (8.0, 8.0), alpha: float = 0.85, show_grid: bool = True, grid_props: dict | None = None, mask_angle: bool = False, savefig: str | None = None, dpi: int = 300, cbar: bool = True, ax: Axes | None = None, ): # --- validate lengths if len(qlow_cols) != len(qup_cols): raise ValueError( "Mismatch in length between `qlow_cols` " f"({len(qlow_cols)}) and `qup_cols` ({len(qup_cols)})." ) if q50_cols and len(qlow_cols) != len(q50_cols): raise ValueError( "Mismatch in length: `q50_cols` must match other " "quantile column lists." ) # --- data qlow_data = df[qlow_cols].to_numpy() qup_data = df[qup_cols].to_numpy() widths = qup_data - qlow_data radial_vals = np.mean(widths, axis=1) if q50_cols: color_vals = np.mean(df[q50_cols].to_numpy(), axis=1) else: color_vals = radial_vals if normalize_radius: rmin, rmax = radial_vals.min(), radial_vals.max() if (rmax - rmin) > 1e-9: radial_vals = (radial_vals - rmin) / (rmax - rmin) # --- axes via utility (sets offset/dir/thetamax) fig, ax, span = setup_polar_axes( ax, acov=acov, figsize=figsize, ) # --- bars n = len(df) theta = np.linspace(0.0, float(span), n, endpoint=False) norm = Normalize( vmin=float(np.min(color_vals)), vmax=float(np.max(color_vals)), ) cmap_obj = get_cmap(cmap, default="coolwarm") colors = cmap_obj(norm(color_vals)) bar_width = (float(span) / max(1, n)) * 0.9 ax.bar( theta, radial_vals, width=bar_width, color=colors, edgecolor="k", alpha=alpha, linewidth=0.5, ) # --- annotations if show_value_labels: rpad = 0.03 * float(np.max(radial_vals)) if n else 0.0 for ang, rad in zip(theta, radial_vals): ax.text( float(ang), float(rad) + rpad, f"{rad:.2f}", ha="center", va="bottom", fontsize=8, ) if xtick_labels: ax.set_xticks(theta) ax.set_xticklabels(xtick_labels) elif mask_angle: ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_title(title or "Polar Bar Comparison", fontsize=14) if r_label: ax.set_ylabel(r_label, fontsize=12, labelpad=20) set_axis_grid(ax, show_grid, grid_props=grid_props) if cbar: sm = cm.ScalarMappable(cmap=cmap_obj, norm=norm) sm.set_array([]) # mpl<3.8 compat cax = fig.colorbar(sm, ax=ax, pad=0.1, shrink=0.7) cax.set_label(cbar_label or "Color Metric", fontsize=10) # --- output fig.tight_layout() if savefig: fig.savefig(savefig, dpi=dpi, bbox_inches="tight") plt.close(fig) else: plt.show() return ax
plot_horizon_metrics.__doc__ = r""" Plot a polar bar chart comparing metrics across different horizons. This function visualizes a primary metric (typically **mean interval width**) as the height of bars arranged in a circle. Each bar represents a distinct category or forecast horizon. A secondary metric (typically the **mean Q50 value**) can be encoded as the color of the bars, providing a multi-faceted comparison. Parameters ---------- df : pd.DataFrame Input DataFrame where each **row** represents a distinct horizon or category to be compared. qlow_cols : list of str List of column names containing lower quantile samples (e.g., Q10) for each horizon. qup_cols : list of str List of column names containing upper quantile samples (e.g., Q90). Must have the same length as ``qlow_cols``. q50_cols : list of str, optional List of column names for the median quantile (Q50). If provided, the mean of these values determines the bar color. If ``None``, bar color is determined by the bar height (the mean interval width). xtick_labels : list of str, optional Custom labels for each bar on the angular axis. The length must match the number of rows in ``df``. If ``None``, no angular labels are shown. normalize_radius : bool, default=False If ``True``, the radial values (bar heights) are min-max scaled to the range ``[0, 1]``. show_value_labels : bool, default=True If ``True``, display the numeric value of the radial metric on top of each bar. cbar_label : str, optional Custom label for the color bar. If ``None``, a default label is generated. r_label : str, optional Custom label for the radial axis. cmap : str, default='coolwarm' Matplotlib colormap name for coloring the bars. acov : {'default', 'half_circle', 'quarter_circle', \ 'eighth_circle'}, default='default' Specifies the angular coverage of the plot: ``'default'`` (360°), ``'half_circle'`` (180°), etc. title : str, optional Title for the plot. If ``None``, a default title is used. figsize : tuple of (float, float), default=(8, 8) Figure size in inches. alpha : float, default=0.85 Transparency level for the bars. show_grid : bool, default=True Toggle gridlines via the package helper ``set_axis_grid``. grid_props : dict, optional Keyword arguments passed to ``set_axis_grid`` for grid customization. mask_angle : bool, default=False If ``True`` and ``xtick_labels`` is not provided, this will hide any default angular tick labels. savefig : str, optional If provided, save the figure to this path; otherwise the plot is shown interactively. dpi : int, default=300 Resolution for the saved figure. cbar : bool, default=True If ``True``, display a color bar. Returns ------- ax : matplotlib.axes.Axes The Matplotlib Axes object containing the polar bar plot. Notes ----- The plot summarizes metrics for :math:`N` horizons (rows) using data from :math:`M` samples (columns). Let :math:`\mathbf{L}`, :math:`\mathbf{U}`, and :math:`\mathbf{Q50}` be data matrices of shape :math:`(N, M)` extracted from the corresponding columns. 1. **Interval Width Calculation**: For each horizon :math:`j` and sample :math:`i`, the interval width is: .. math:: W_{j,i} = U_{j,i} - L_{j,i} 2. **Radial Value (Bar Height)**: The radial value :math:`r_j` for horizon :math:`j` is the mean interval width across all :math:`M` samples. .. math:: r_j = \frac{1}{M} \sum_{i=0}^{M-1} W_{j,i} 3. **Color Value**: The color value :math:`c_j` for horizon :math:`j` is determined by the mean of the ``q50_cols`` values. .. math:: c_j = \frac{1}{M} \sum_{i=0}^{M-1} Q50_{j,i} If ``q50_cols`` is not provided, the color defaults to the radial value, :math:`c_j = r_j`. 4. **Angular Position**: Horizons are spaced evenly around the circle. For horizon :math:`j`, the angle is: .. math:: \theta_j = \frac{j}{N} \times S where :math:`S` is the angular span from ``acov``. The plot starts at the top (12 o'clock) and proceeds clockwise. Examples -------- >>> import numpy as np >>> import pandas as pd >>> from kdiagram.plot import plot_horizon_metrics >>> >>> # Create synthetic data for 6 horizons with 2 samples each >>> horizons = ["H+1", "H+2", "H+3", "H+4", "H+5", "H+6"] >>> df = pd.DataFrame({ ... 'q10_s1': [1, 2, 3, 4, 5, 6], ... 'q10_s2': [1.2, 2.3, 3.4, 4.5, 5.6, 6.7], ... 'q90_s1': [3, 4, 5.5, 7, 8, 9.5], ... 'q90_s2': [3.1, 4.2, 5.7, 7.3, 8.4, 9.9], ... 'q50_s1': [2, 3, 4.2, 5.7, 6.5, 8.2], ... 'q50_s2': [2.1, 3.2, 4.4, 5.9, 6.9, 8.8], ... }) >>> >>> q10_cols = ['q10_s1', 'q10_s2'] >>> q90_cols = ['q90_s1', 'q90_s2'] >>> q50_cols = ['q50_s1', 'q50_s2'] >>> >>> ax = plot_horizon_metrics( ... df=df, ... qlow_cols=q10_cols, ... qup_cols=q90_cols, ... q50_cols=q50_cols, ... title="Mean Interval Width Across Horizons", ... xtick_labels=horizons, ... show_value_labels=True, ... r_label="Mean Interval Width (Q90-Q10)", ... cbar_label="Mean Q50 Value", ... acov="default" ... ) """