# License: Apache 2.0
# Author: LKouadio <etanoyau@gmail.com>
from __future__ import annotations
import warnings
from typing import Any, Literal
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.axes import Axes
from matplotlib.colors import Normalize
from scipy.stats import gaussian_kde, skew
from ..api.typing import Acov
from ..compat.matplotlib import get_cmap, get_colors
from ..compat.numpy import trapz
from ..core._io_utils import _get_valid_kwargs
from ..decorators import check_non_emptiness, isdf
from ..utils.fs import savefig as safe_savefig
from ..utils.plot import (
map_theta_to_span,
set_axis_grid,
setup_polar_axes,
)
from ..utils.validator import exist_features
__all__ = ["plot_error_ellipses", "plot_error_bands", "plot_error_violins"]
def _get_mode(mode):
"""Find mode. Fallback to optimized if mode is "cbueth",
This will be deprecated in future version.
"""
mode = str(mode).lower()
if mode in {"optimized", "cbueth"}:
return "cbueth"
return mode
def _plot_error_violins(
df: pd.DataFrame,
*error_cols: str,
names: list[str] | None = None,
title: str | None = None,
figsize: tuple[float, float] = (9.0, 9.0),
cmap: str = "viridis",
colors: list[str] = None,
show_grid: bool = True,
grid_props: dict[str, Any] | None = None,
savefig: str | None = None,
dpi: int = 300,
acov: Acov = "default",
ax: Axes | None = None,
**violin_kws,
):
# --- validate
if not error_cols:
raise ValueError("At least one error column must be provided.")
exist_features(df, features=list(error_cols))
if names and len(names) != len(error_cols):
warnings.warn(
"Names length does not match error columns. Using defaults.",
UserWarning,
stacklevel=2,
)
names = None
if not names:
names = [f"Model {i + 1}" for i in range(len(error_cols))]
# --- gather all errors & common grid
arrays = [df[c].dropna().to_numpy() for c in error_cols]
all_err = np.concatenate(arrays) if arrays else np.array([0.0])
r_min, r_max = float(np.min(all_err)), float(np.max(all_err))
grid = np.linspace(r_min, r_max, 200)
# KDE per series, normalized
violin_data: list[np.ndarray | None] = []
for arr in arrays:
if arr.size < 2:
violin_data.append(None)
continue
kde = gaussian_kde(arr)
dens = kde(grid)
m = float(np.max(dens)) if dens.size else 1.0
dens = dens / (m if m > 0 else 1.0)
violin_data.append(dens)
# --- axes via utility (handles offset/dir/thetamax)
fig, ax, span = setup_polar_axes(
ax,
acov=acov,
figsize=figsize,
)
# angular slots across requested span
k = len(error_cols)
angles = np.linspace(0.0, float(span), k, endpoint=False)
# colors
colors = get_colors(
k,
colors=colors,
cmap=cmap,
default="tab10",
failsafe="discrete",
)
# width of each violin in radians, scaled by span
width = (float(span) / max(1, k)) * 0.8
# draw violins (two-lobed polygon around the angle line)
violin_kws = _get_valid_kwargs(ax.fill, violin_kws, error="ignore")
for i, (ang, dens) in enumerate(zip(angles, violin_data)):
if dens is None:
continue
# symmetric angular deviation around `ang`
x = np.concatenate(
[-dens * (width / 2.0), np.flip(dens * (width / 2.0))]
)
y = np.concatenate([grid, np.flip(grid)])
theta = x + float(ang)
r = y
ax.fill(
theta,
r,
color=colors[i],
label=names[i],
alpha=violin_kws.pop("alpha", 0.6),
**violin_kws,
)
# zero-error reference (thin circle at r=0 over current span)
ax.plot(
np.linspace(0.0, float(span), 100),
np.zeros(100),
color="black",
linestyle="--",
lw=1.2,
label="Zero Error",
)
# labels / grid / title
ax.set_title(
title or "Comparison of Error Distributions",
fontsize=14,
)
ax.set_yticklabels([])
ax.set_xticks(angles)
ax.set_xticklabels(names)
ax.legend(loc="upper right", bbox_to_anchor=(1.3, 1.1))
set_axis_grid(ax, show_grid=show_grid, grid_props=grid_props)
# saving
final = safe_savefig(
savefig,
fig,
dpi=dpi,
bbox_inches="tight",
)
if final is not None:
plt.close(fig)
else:
fig.tight_layout()
plt.show()
return ax
[docs]
@check_non_emptiness
@isdf
def plot_error_violins(
df: pd.DataFrame,
*error_cols: str,
names: list[str] | None = None,
title: str | None = None,
figsize: tuple[float, float] = (9.0, 9.0),
cmap: str = "viridis",
colors: list[str] = None,
show_grid: bool = True,
grid_props: dict[str, Any] | None = None,
savefig: str | None = None,
dpi: int = 300,
acov: Acov = "default",
ax: Axes | None = None,
mode: Literal["basic", "cbueth", "optimized"] = "optimized",
bw_method: float | str | None = None,
overlay: bool = False,
overlay_angle: float | None = None,
show_stats: bool = False,
**violin_kws,
):
mode = str(mode).lower()
mode = _get_mode(mode)
if mode == "basic":
return _plot_error_violins(
df,
*error_cols,
names=names,
title=title,
figsize=figsize,
cmap=cmap,
show_grid=show_grid,
grid_props=grid_props,
savefig=savefig,
dpi=dpi,
acov=acov,
ax=ax,
**violin_kws,
)
# --- validate
if not error_cols:
raise ValueError("At least one error column must be provided.")
exist_features(df, features=list(error_cols))
if names and len(names) != len(error_cols):
warnings.warn(
"Names length does not match error columns. Using defaults.",
UserWarning,
stacklevel=2,
)
names = None
if not names:
names = [f"Model {i + 1}" for i in range(len(error_cols))]
# --- gather arrays
arrays = [df[c].dropna().to_numpy() for c in error_cols]
if not arrays:
warnings.warn("No data after NaN removal.", stacklevel=2)
return None
# Build a nonnegative radial grid based on absolute error magnitude
all_abs = np.abs(np.concatenate(arrays))
r_max = float(np.max(all_abs)) if all_abs.size else 1.0
r_grid = np.linspace(0.0, r_max, 200)
# KDE for positive/negative sides (mapped to |error|); normalize to [0, 1]
violins = []
for arr in arrays:
pos = arr[arr >= 0.0]
neg = -arr[arr < 0.0] # reflect
dens_pos = None
dens_neg = None
if pos.size > 1:
dp = gaussian_kde(pos, bw_method=bw_method)(r_grid)
dp = dp / (np.max(dp) if np.max(dp) > 0 else 1.0)
dens_pos = dp
if neg.size > 1:
dn = gaussian_kde(neg, bw_method=bw_method)(r_grid)
dn = dn / (np.max(dn) if np.max(dn) > 0 else 1.0)
dens_neg = dn
violins.append((dens_pos, dens_neg))
fig, ax, span = setup_polar_axes(
ax,
acov=acov,
figsize=figsize,
)
k = len(error_cols)
# Auto-overlay: with 1–2 models, overlay improves visual comparison
# (you can change default to "auto" in the signature later if you like).
if overlay is None or (
isinstance(overlay, str) and overlay.lower() == "auto"
):
overlay = k <= 2
# Headroom so outside text has space beyond the largest radius
headroom = 1.26 if (not overlay and k <= 2) else 1.20
ax.set_ylim(0, r_max * headroom)
if overlay:
base = np.pi / 2 if overlay_angle is None else float(overlay_angle)
angles = np.full(k, base, dtype=float)
width = float(span) * 0.32 # tighter for overlays
else:
angles = np.linspace(0.0, float(span), k, endpoint=False)
width = (float(span) / max(1, k)) * 0.78 # slightly slimmer
# colors
colors = get_colors(
k,
colors=colors,
cmap=cmap,
default="tab10",
failsafe="discrete",
)
# ---- draw two-lobe polygon per model ----
alpha_default = float(violin_kws.pop("alpha", 0.45))
edgecolor = violin_kws.pop("edgecolor", "black")
lw = float(violin_kws.pop("linewidth", 0.8))
violin_kws = _get_valid_kwargs(ax.fill, violin_kws, error="ignore")
# order small → large so big shapes don’t hide small ones
areas = []
for dpos, dneg in violins:
a = (trapz(dpos, x=r_grid) if dpos is not None else 0.0) + (
trapz(dneg, x=r_grid) if dneg is not None else 0.0
)
areas.append(float(a))
order = np.argsort(areas)
for rank, i in enumerate(order):
(dpos, dneg), ang, col = violins[i], float(angles[i]), colors[i]
# fallback to zeros array if one side is missing
z = np.zeros_like(r_grid)
dpos = dpos if dpos is not None else z
dneg = dneg if dneg is not None else z
# left lobe = negatives, right lobe = positives
left = ang - (0.5 * width) * dneg
right = ang + (0.5 * width) * dpos
theta = np.concatenate([left, right[::-1]])
r = np.concatenate([r_grid, r_grid[::-1]])
# legend label (stats only in legend)
med = float(np.median(arrays[i]))
skv = float(skew(arrays[i], bias=False))
label_i = (
names[i]
if not show_stats
else f"{names[i]} (med={med:.2f}; skew={skv:.2f})"
)
ax.fill(
theta,
r,
color=col,
label=label_i,
alpha=alpha_default,
edgecolor=edgecolor,
linewidth=lw,
zorder=2 + rank,
**violin_kws,
)
# ----- zero-error reference for |error| radial scale -----
# r=0 collapses to the center; draw a tiny marker instead.
zero_sz = max(12, 10000.0 / max(1.0, ax.figure.bbox.width)) # scale a bit
ax.scatter(
[0.0],
[0.0],
s=zero_sz,
facecolors="none",
edgecolors="black",
linewidths=1.0,
marker="o",
label="Zero Error (center)",
)
# ----- labels / grid / title -----
ax.set_title(title or "Comparison of Error Distributions", fontsize=14)
ax.set_yticklabels([])
# ----- labels / grid / title -----
ax.set_title(title or "Comparison of Error Distributions", fontsize=14)
ax.set_yticklabels([])
if overlay:
ax.set_xticks([]) # legend only; no outside labels in overlay mode
else:
ax.set_xticks([])
r_out = r_max * (headroom + 0.04) # just beyond the rim
for name, ang in zip(names, angles):
_label_outside(
ax, ang, r_out, name
) # clip_on=False inside helper
# single, clean legend (stats here, never on the plot)
handles, labels = ax.get_legend_handles_labels()
ax.legend(
handles,
labels,
loc="upper right",
bbox_to_anchor=(1.33, 1.06),
frameon=False,
)
set_axis_grid(ax, show_grid=show_grid, grid_props=grid_props)
# saving
final = safe_savefig(
savefig,
fig,
dpi=dpi,
bbox_inches="tight",
)
if final is not None:
plt.close(fig)
else:
fig.tight_layout()
plt.show()
return ax
def _label_outside(ax, angle, r_out, text, *, fontsize=11):
rot = np.degrees(angle) - 90
# keep upright text
if -90 <= rot <= 90:
ha = "left"
else:
rot += 180
ha = "right"
ax.text(
angle,
r_out,
text,
rotation=rot,
rotation_mode="anchor",
ha=ha,
va="center",
clip_on=False,
fontsize=fontsize,
)
plot_error_violins.__doc__ = r"""
Plot polar violin plots to compare multiple error distributions.
This function creates a polar plot where each angular sector
contains a violin plot representing the error distribution of a
different model or dataset. It is a powerful tool for visually
comparing bias, variance, and the overall shape of error
distributions [1]_.
Parameters
----------
df : pd.DataFrame
The input DataFrame containing the error data.
*error_cols : str
One or more column names from ``df``, each containing the error
values (e.g., ``actual - predicted``) for a model to be plotted.
names : list of str, optional
Display names for each of the models corresponding to
``error_cols``. If not provided, generic names like
``'Model 1'`` will be generated. The list length must match
the number of error columns.
title : str, optional
The title for the plot. If ``None``, a default is generated.
figsize : tuple of (float, float), default=(9, 9)
Figure size in inches.
cmap : str, default='viridis'
Matplotlib colormap used to assign a unique color to each
violin plot.
colors : list of str, optional
An explicit list of colors to use for the violins. If
provided, this overrides ``cmap``. The list will cycle if
it is shorter than the number of error columns.
acov : {'default', 'half_circle', 'quarter_circle', 'eighth_circle'},
default='default'
Angular coverage (span) of the plot:
- ``'default'``: :math:`2\pi` (full circle)
- ``'half_circle'``: :math:`\pi`
- ``'quarter_circle'``: :math:`\tfrac{\pi}{2}`
- ``'eighth_circle'``: :math:`\tfrac{\pi}{4}`
mode : {'optimized', 'basic'}, default='cbueth'
The plotting mode to use.
- ``'optimized'`` or ``'cbueth'`` (Default) A mode inspired by reviewer feedback
(see Notes). It maps error magnitude to the radius, splits
violins into positive/negative lobes, and uses a central
dot for the zero reference. This mode is optimized for
detecting bias and skew.
- ``'basic'``: The original implementation where the radial
axis directly represents the error value (positive and
negative), and violins are centered on their assigned angle.
The zero reference is a dashed circle.
bw_method : float, str, or None, default=None
The method used to calculate the estimator bandwidth for the
KDE. This is passed directly to ``scipy.stats.gaussian_kde``.
If ``None``, it uses the default "scott".
overlay : bool or 'auto', default='auto'
*Applies to 'cbueth' mode only.*
If ``True``, all violins are overlaid on a single shared spoke
for direct comparison (best for k=1 or k=2).
If ``False``, each violin gets its own spoke.
If ``'auto'`` (default), overlay is enabled if k <= 2 and
disabled otherwise.
overlay_angle : float, optional
*Applies to 'cbueth' mode when ``overlay=True``.*
The angle (in radians) for the shared spoke. If ``None``,
defaults to :math:`\pi/2` (vertical North).
show_stats : bool, default=False
*Applies to 'cbueth' mode only.*
If ``True``, appends the median and skew of each distribution
to its legend entry (e.g., "Model A (med=0.45; skew=-0.12)").
show_grid : bool, default=True
Toggle gridlines via the package helper ``set_axis_grid``.
grid_props : dict, optional
Keyword arguments passed to ``set_axis_grid`` for grid
customization.
savefig : str, optional
If provided, save the figure to this path; otherwise the
plot is shown interactively.
dpi : int, default=300
Resolution for the saved figure.
**violin_kws : dict, optional
Additional keyword arguments passed to the ``ax.fill`` call
for each violin (e.g., ``alpha``, ``edgecolor``).
Returns
-------
ax : matplotlib.axes.Axes or None
The Matplotlib Axes object containing the plot, or ``None``
if the plot could not be generated.
Notes
-----
The plot visualizes and compares several one-dimensional error
distributions. It adapts the standard violin plot [1]_ to a polar
coordinate system for multi-model comparison.
1. **Kernel Density Estimation (KDE)**: For each model's error
data :math:`\mathbf{x} = \{x_1, x_2, ..., x_n\}`, the
probability density function (PDF), :math:`\hat{f}_h(x)`, is
estimated using a Gaussian kernel. This creates a smooth curve
representing the distribution's shape.
.. math::
\hat{f}_h(x) = \frac{1}{nh} \sum_{i=1}^{n} K\left(\frac{x - x_i}{h}\right)
where :math:`K` is the Gaussian kernel and :math:`h` is the
bandwidth, a smoothing parameter.
2. **Violin Construction**: The violin shape is created by plotting
the density curve :math:`\hat{f}_h(x)` symmetrically around a
central axis. The width of the violin at any given error value
:math:`x` is proportional to its estimated density.
3. **Polar Arrangement**: Each model's violin is assigned a unique
angular sector on the polar plot. The radial axis represents
the error value, with a reference circle at :math:`r=0`
indicating a perfect forecast. The violin is drawn radially
within its assigned sector.
**The 'cbueth' Mode (Default)**
The default ``mode='cbueth'`` was developed in response to
insightful feedback during the JOSS paper review process. A
reviewer noted that the original "basic" mode could sometimes
make skewed distributions difficult to interpret, especially when
comparing only two models.
> *"...wouldn't it be easier to just plot them on top of each other
> with transparency? ... Maybe this is just a problem when just
> comparing two and not more models; with three it is already
> prettier."*
To honor this contribution, the new mode was named after the
reviewer's GitHub handle. It addresses this feedback with key
design changes:
1. **Radial Axis:** The radius maps to **absolute error**
:math:`|E|`, so all data starts from the center. The zero-error
reference is a single point at the origin.
2. **Two-Lobe Design:** Each violin is split into two lobes around
its central spoke:
* **Right Lobe:** Distribution of positive errors (:math:`E > 0`).
* **Left Lobe:** Distribution of negative errors (:math:`E < 0`).
3. **Interpretation:** This design makes key metrics instantly visible:
* **Bias:** An imbalance in the *size* of the two lobes
(e.g., a larger right lobe means a positive bias).
* **Skew:** Asymmetry *within* a single lobe.
* **Variance:** The overall radial extent of the lobes.
4. **Auto-Overlay:** As suggested, when only two models are
plotted (``overlay='auto'``), they are drawn on top of
each other with transparency for a direct, "face-off"
style comparison. For 3+ models, they are given separate spokes.
Examples
--------
>>> import numpy as np
>>> import pandas as pd
>>> from kdiagram.plot.errors import plot_polar_error_violins
>>>
>>> # Simulate errors from three different models
>>> np.random.seed(0)
>>> n_points = 1000
>>> df_errors = pd.DataFrame({
... 'Model A (Good)': np.random.normal(
... loc=0.5, scale=1.5, size=n_points),
... 'Model B (Biased)': np.random.normal(
... loc=-4.0, scale=1.5, size=n_points),
... 'Model C (Inconsistent)': np.random.normal(
... loc=0, scale=4.0, size=n_points),
... })
>>>
>>> # Generate the polar violin plot
>>> ax = plot_polar_error_violins(
... df_errors,
... 'Model A (Good)',
... 'Model B (Biased)',
... 'Model C (Inconsistent)',
... title='Comparison of Model Error Distributions',
... cmap='plasma',
... alpha=0.7
... )
References
----------
.. [1] Hintze, J. L., & Nelson, R. D. (1998). Violin Plots: A Box
Plot-Density Trace Synergism. The American Statistician, 52(2),
181-184.
"""
[docs]
@check_non_emptiness
@isdf
def plot_error_bands(
df: pd.DataFrame,
error_col: str,
theta_col: str,
*,
theta_period: float | None = None,
theta_bins: int = 24,
n_std: float = 1.0,
title: str | None = None,
figsize: tuple[float, float] = (8.0, 8.0),
cmap: str = "viridis",
show_grid: bool = True,
grid_props: dict[str, Any] | None = None,
mask_angle: bool = False,
savefig: str | None = None,
dpi: int = 300,
acov: Acov = "default",
ax: Axes | None = None,
**fill_kws,
):
# --- validate
exist_features(df, features=[error_col, theta_col])
data = df[[error_col, theta_col]].dropna()
if data.empty:
warnings.warn(
"DataFrame is empty after dropping NaNs in required columns.",
UserWarning,
stacklevel=2,
)
return None
# --- axes via utility
fig, ax, span = setup_polar_axes(
ax,
acov=acov,
figsize=figsize,
)
# map theta data into [0, span]
t_raw = data[theta_col].to_numpy()
if theta_period is not None:
data["theta_map"] = map_theta_to_span(
t_raw,
span=span,
theta_period=float(theta_period),
)
else:
tmin = float(np.min(t_raw))
tmax = float(np.max(t_raw))
if tmax > tmin + 1e-12:
data["theta_map"] = map_theta_to_span(
t_raw,
span=span,
data_min=tmin,
data_max=tmax,
)
else:
data["theta_map"] = 0.0
# --- binning over span
theta_edges = np.linspace(0.0, float(span), theta_bins + 1)
theta_labels = (theta_edges[:-1] + theta_edges[1:]) / 2.0
data["theta_bin"] = pd.cut(
data["theta_map"],
bins=theta_edges,
labels=theta_labels,
include_lowest=True,
)
# stats per bin (mean ± n_std*std)
stats = (
data.groupby("theta_bin", observed=False)[error_col]
.agg(["mean", "std"])
.reset_index()
)
stats["std"] = stats["std"].fillna(0.0)
# --- draw mean + band
ang = stats["theta_bin"].astype(float).to_numpy()
mu = stats["mean"].to_numpy()
sd = stats["std"].to_numpy()
ax.plot(
ang,
mu,
color="black",
lw=2.0,
label="Mean Error",
)
ax.fill_between(
ang,
mu - float(n_std) * sd,
mu + float(n_std) * sd,
alpha=fill_kws.pop("alpha", 0.3),
label=f"{n_std} Std. Dev. Band",
**fill_kws,
)
# zero-error reference as a thin circle over the current span
ax.plot(
np.linspace(0.0, float(span), 180),
np.zeros(180),
color="red",
linestyle="--",
lw=1.2,
label="Zero Error",
)
# titles / labels / grid
ax.set_title(
title or f"Error Distribution vs. {theta_col}",
fontsize=14,
)
ax.set_ylabel(f"Forecast Error ({error_col})")
if mask_angle:
ax.set_xticklabels([])
ax.legend(loc="upper right", bbox_to_anchor=(1.3, 1.1))
set_axis_grid(ax, show_grid=show_grid, grid_props=grid_props)
fig.tight_layout()
if savefig:
fig.savefig(savefig, dpi=dpi, bbox_inches="tight")
plt.close(fig)
else:
plt.show()
return ax
plot_error_bands.__doc__ = r"""
Plot polar error bands to visualize systemic vs random error.
This function aggregates forecast errors across bins of a cyclical
or ordered feature (like month or hour) and plots the mean error
and its standard deviation. It is a powerful diagnostic tool for
identifying systemic biases and variations in model performance.
Parameters
----------
df : pd.DataFrame
The input DataFrame containing the error and feature data.
error_col : str
Name of the column containing the forecast error values,
typically calculated as ``actual - predicted``.
theta_col : str
Name of the column representing the feature to bin against,
which will be mapped to the angular axis.
theta_period : float, optional
The period of the cyclical data in ``theta_col``. For example,
if ``theta_col`` is the month of the year, the period is 12.
This ensures the data wraps around the circle correctly.
theta_bins : int, default=24
The number of angular bins to group the data into for
calculating statistics.
n_std : float, default=1.0
The number of standard deviations to display in the shaded
error band around the mean error line.
title : str, optional
The title for the plot. If ``None``, a default is generated.
figsize : tuple of (float, float), default=(8, 8)
Figure size in inches.
cmap : str, default='viridis'
*Note: This parameter is currently not used in this function
as colors are fixed for clarity (black, red, and a fill color).*
show_grid : bool, default=True
Toggle gridlines via the package helper ``set_axis_grid``.
grid_props : dict, optional
Keyword arguments passed to ``set_axis_grid`` for grid
customization.
mask_angle : bool, default=False
If ``True``, hide the angular tick labels.
savefig : str, optional
If provided, save the figure to this path; otherwise the
plot is shown interactively.
dpi : int, default=300
Resolution for the saved figure.
**fill_kws : dict, optional
Additional keyword arguments passed to the ``ax.fill_between``
call for the shaded error band (e.g., ``color``, ``alpha``).
Returns
-------
ax : matplotlib.axes.Axes or None
The Matplotlib Axes object containing the plot, or ``None``
if the plot could not be generated.
Notes
-----
The plot visualizes the first two moments (mean and standard
deviation) of the error distribution conditioned on the angular
variable :math:`\theta`.
1. **Binning**: The data is first partitioned into :math:`K` bins
based on the values in ``theta_col``. Let :math:`B_k` be the set
of indices of data points belonging to the :math:`k`-th bin.
2. **Mean Error Calculation**: For each bin :math:`B_k`, the mean
error :math:`\mu_{e,k}` is calculated. This value is plotted as a
point on the central black line.
.. math::
\mu_{e,k} = \frac{1}{|B_k|} \sum_{i \in B_k} e_i
where :math:`e_i` is the error for data point :math:`i`. A
consistent deviation of this line from the zero-error circle
indicates a **systemic bias**.
3. **Error Variance Calculation**: For each bin, the standard
deviation of the error, :math:`\sigma_{e,k}`, is also calculated.
.. math::
\sigma_{e,k} = \sqrt{\frac{1}{|B_k|-1}\\
\sum_{i \in B_k} (e_i - \mu_{e,k})^2}
4. **Band Construction**: A shaded band is drawn between the lower
and upper bounds, defined by the mean plus or minus a multiple
of the standard deviation.
.. math::
\text{Upper Bound}_k &= \mu_{e,k} + n_{std} \cdot \sigma_{e,k} \\
\text{Lower Bound}_k &= \mu_{e,k} - n_{std} \cdot \sigma_{e,k}
The width of this band indicates the **random error** or
inconsistency of the model within that bin.
Examples
--------
>>> import numpy as np
>>> import pandas as pd
>>> from kdiagram.plot.errors import plot_error_bands
>>>
>>> # Simulate a model with seasonal error patterns
>>> np.random.seed(42)
>>> n_points = 2000
>>> day_of_year = np.arange(n_points) % 365
>>> month = (day_of_year // 30) + 1
>>>
>>> # Create a bias (positive error) in summer and more noise in winter
>>> seasonal_bias = np.sin((day_of_year - 90) * np.pi / 180) * 5
>>> seasonal_noise = 2 + 2 * np.cos(day_of_year * np.pi / 180)**2
>>> errors = seasonal_bias + np.random.normal(0, seasonal_noise, n_points)
>>>
>>> df_seasonal = pd.DataFrame({'month': month, 'forecast_error': errors})
>>>
>>> # Generate the plot
>>> ax = plot_error_bands(
... df=df_seasonal,
... error_col='forecast_error',
... theta_col='month',
... theta_period=12,
... theta_bins=12,
... n_std=1.5,
... title='Seasonal Forecast Error Analysis',
... color='#2980B9',
... alpha=0.3
... )
"""
[docs]
@check_non_emptiness
@isdf
def plot_error_ellipses(
df: pd.DataFrame,
r_col: str,
theta_col: str,
r_std_col: str,
theta_std_col: str,
*,
color_col: str | None = None,
n_std: float = 2.0,
title: str | None = None,
figsize: tuple[float, float] = (8.0, 8.0),
cmap: str = "viridis",
mask_angle: bool = False,
mask_radius: bool = False,
show_grid: bool = True,
grid_props: dict[str, Any] | None = None,
savefig: str | None = None,
dpi: int = 300,
acov: Acov = "default",
ax: Axes | None = None,
**ellipse_kws,
):
required = [r_col, theta_col, r_std_col, theta_std_col]
if color_col:
required.append(color_col)
exist_features(df, features=required)
data = df[required].dropna()
if data.empty:
warnings.warn(
"DataFrame is empty after dropping NaNs in required "
"columns. Cannot plot.",
UserWarning,
stacklevel=2,
)
return None
# Color metric (defaults to radial uncertainty)
if color_col:
color_vals = data[color_col].to_numpy()
cbar_label = color_col
else:
color_vals = data[r_std_col].to_numpy()
cbar_label = f"Uncertainty ({r_std_col})"
# Normalize colors
vmin = float(np.min(color_vals))
vmax = float(np.max(color_vals))
if np.isclose(vmin, vmax):
vmax = vmin + 1e-9
norm = Normalize(vmin=vmin, vmax=vmax)
cmap_obj = get_cmap(cmap, default="viridis")
colors = cmap_obj(norm(color_vals))
# Axes & angular coverage (sets offset/dir/thetamax)
fig, ax, span = setup_polar_axes(
ax,
acov=acov,
figsize=figsize,
)
# Map theta (degrees) into [0, span] radians
# Use a 360° period to preserve circularity
theta_deg = data[theta_col].to_numpy(dtype=float)
theta_map = map_theta_to_span(
theta_deg,
span=span,
theta_period=360.0,
)
# Scale angular std to the chosen span (assumed radians input)
# If your theta_std is in degrees, convert before scaling.
theta_std_raw = data[theta_std_col].to_numpy(dtype=float)
angle_scale = float(span) / (2.0 * np.pi)
theta_std_eff = theta_std_raw * angle_scale
# Draw each ellipse as a filled path in polar coordinates
r_mean = data[r_col].to_numpy(dtype=float)
r_std = data[r_std_col].to_numpy(dtype=float)
for i in range(len(data)):
th_path, r_path = _get_ellipse_path(
r_mean=r_mean[i],
theta_mean=theta_map[i], # already in radians
r_std=r_std[i],
theta_std=theta_std_eff[i], # scaled for acov
n_std=n_std,
)
ax.fill(
th_path,
r_path,
color=colors[i],
**ellipse_kws,
)
# Colorbar
sm = plt.cm.ScalarMappable(norm=norm, cmap=cmap_obj)
sm.set_array([])
cbar = fig.colorbar(sm, ax=ax, pad=0.1, shrink=0.75)
cbar.set_label(cbar_label, fontsize=10)
# Formatting
ax.set_title(
title or f"Error Ellipses ({n_std:.1f} std. dev.)",
fontsize=14,
)
set_axis_grid(ax, show_grid=show_grid, grid_props=grid_props)
if mask_angle:
ax.set_xticklabels([])
if mask_radius:
ax.set_yticklabels([])
fig.tight_layout()
if savefig:
fig.savefig(savefig, dpi=dpi, bbox_inches="tight")
plt.close(fig)
else:
plt.show()
return ax
def _get_ellipse_path(
r_mean: float,
theta_mean: float,
r_std: float,
theta_std: float,
n_std: float = 2.0,
):
"""
Helper to calculate the path of an ellipse in polar coordinates.
The ellipse is defined in a local Cartesian frame and then
transformed.
"""
# width along radial, height along tangential direction
width = n_std * r_std
height = n_std * (r_mean * np.sin(theta_std))
# ellipse center in Cartesian
x_c = r_mean * np.cos(theta_mean)
y_c = r_mean * np.sin(theta_mean)
# parametric ellipse
t = np.linspace(0.0, 2.0 * np.pi, 100)
x_loc = (width / 2.0) * np.cos(t)
y_loc = (height / 2.0) * np.sin(t)
# rotate ellipse to align with radial direction
R = np.array(
[
[np.cos(theta_mean), -np.sin(theta_mean)],
[np.sin(theta_mean), np.cos(theta_mean)],
]
)
x_rot, y_rot = np.dot(R, [x_loc, y_loc])
x_fin = x_rot + x_c
y_fin = y_rot + y_c
# back to polar
r_path = np.sqrt(x_fin**2 + y_fin**2)
theta_path = np.arctan2(y_fin, x_fin)
return theta_path, r_path
plot_error_ellipses.__doc__ = r"""
Plot polar error ellipses to visualize two-dimensional uncertainty.
This function draws ellipses on a polar plot to represent the
uncertainty of data points where both the radial and angular
components have associated errors (standard deviations).
Parameters
----------
df : pd.DataFrame
Input DataFrame containing the data for the plot.
r_col : str
Name of the column for the mean radial position (e.g., distance).
theta_col : str
Name of the column for the mean angular position. **Must be in
degrees.**
r_std_col : str
Name of the column for the standard deviation of the radial
position.
theta_std_col : str
Name of the column for the standard deviation of the angular
position. **Must be in degrees.**
color_col : str, optional
Name of a column to use for coloring the ellipses. If ``None``,
ellipses are colored by their radial uncertainty (``r_std_col``).
n_std : float, default=2.0
The number of standard deviations to use for the ellipse size.
For example, ``n_std=2.0`` corresponds to approximately a 95%
confidence region for a normal distribution.
title : str, optional
The title for the plot. If ``None``, a default is generated.
figsize : tuple of (float, float), default=(8, 8)
Figure size in inches.
cmap : str, default='viridis'
Matplotlib colormap for coloring the ellipses.
show_grid : bool, default=True
Toggle gridlines via the package helper ``set_axis_grid``.
grid_props : dict, optional
Keyword arguments passed to ``set_axis_grid`` for grid
customization.
mask_angle : bool, default=False
If ``True``, hide the angular tick labels (degrees).
mask_radius : bool, default=False
If ``True``, hide the radial tick labels.
savefig : str, optional
If provided, save the figure to this path; otherwise the
plot is shown interactively.
dpi : int, default=300
Resolution for the saved figure.
**ellipse_kws : dict, optional
Additional keyword arguments passed to the ``ax.fill`` call
for each ellipse (e.g., ``alpha``, ``edgecolor``).
Returns
-------
ax : matplotlib.axes.Axes or None
The Matplotlib Axes object containing the plot, or ``None``
if the plot could not be generated.
Notes
-----
The visualization for each data point :math:`i` is constructed
from its mean radial position :math:`\mu_{r,i}`, mean angular
position :math:`\mu_{\theta,i}`, and their respective standard
deviations :math:`\sigma_{r,i}` and :math:`\sigma_{\theta,i}`.
1. **Ellipse Dimensions**: The ellipse is first defined in a local
Cartesian coordinate system at the origin. Its half-width (along
the radial direction) and half-height (along the tangential
direction) are determined by the standard deviations:
.. math::
\text{width} &= n_{std} \cdot \sigma_{r,i} \\
\text{height} &= n_{std} \cdot (\mu_{r,i} \cdot \sin(\sigma_{\theta,i}))
Note that the tangential height depends on the radial distance
:math:`\mu_{r,i}`.
2. **Transformation**: This local ellipse is then transformed to the
correct position on the polar plot. This involves two steps:
a. **Rotation**: The ellipse is rotated by the mean angle
:math:`\mu_{\theta,i}` to align its primary axis with the
radial direction from the origin.
b. **Translation**: The rotated ellipse is translated to the
mean position, which in Cartesian coordinates is
:math:`(x_c, y_c) = (\mu_{r,i} \cos(\mu_{\theta,i}), \mu_{r,i} \sin(\mu_{\theta,i}))`.
3. **Plotting**: The final transformed ellipse is drawn as a filled
path on the polar axes.
Examples
--------
>>> import numpy as np
>>> import pandas as pd
>>> from kdiagram.plot.errors import plot_polar_error_ellipses
>>>
>>> # Simulate tracking data for 15 objects
>>> np.random.seed(1)
>>> n_points = 15
>>> df_tracking = pd.DataFrame({
... 'angle_deg': np.linspace(0, 360, n_points, endpoint=False),
... 'distance_km': np.random.uniform(20, 80, n_points),
... 'distance_std': np.random.uniform(2, 7, n_points),
... 'angle_std_deg': np.random.uniform(3, 10, n_points),
... 'object_priority': np.random.randint(1, 5, n_points)
... })
>>>
>>> # Generate the plot
>>> ax = plot_polar_error_ellipses(
... df=df_tracking,
... r_col='distance_km',
... theta_col='angle_deg',
... r_std_col='distance_std',
... theta_std_col='angle_std_deg',
... color_col='object_priority',
... n_std=1.5,
... title='1.5-Sigma Positional Uncertainty',
... cmap='cividis',
... alpha=0.7,
... edgecolor='black',
... linewidth=0.5
... )
"""