Source code for gammapy.visualization.violin

# Licensed under a 3-clause BSD style license - see LICENSE.rst
import matplotlib.pyplot as plt
import numpy as np
import astropy.units as u
from scipy.stats import gaussian_kde, norm
from astropy.visualization import quantity_support
from gammapy.maps.axes import UNIT_STRING_FORMAT



[docs]
def plot_samples_violin_vs_energy(
    energy_edges,
    samples_per_band,
    weights_per_band=None,
    energy_power=0,
    bw_method="scott",
    grid_size=200,
    ax=None,
    errorbar_kwargs=None,
    errorbar_ul_kwargs=None,
    violin_kwargs=None,
    violin_clip=None,
    y_label="dN/dE",
):
    """Plot sample violin distributions per energy bin on log–log axes.

    Unlike standard error bars which only shows summary statistics,
    a violin plot shows the full probability density for each energy bin.

    This function draws one violin per energy interval, using weighted
    kernel-density estimation (KDE) in log-space. The median and
    1σ-equivalent quantiles are overlaid using Gammapy-style flux-point
    error bars. Users can optionally apply an ``E_center**p`` scaling to the
    flux values and clip violin tails to a specified containment interval.
    Samples of other energy-dependent parameters can also be plotted,
    in that case the ``y_label`` should be changed accordingly.

    Parameters
    ----------
    energy_edges : `~numpy.ndarray` or `~astropy.units.Quantity`
        Monotonically increasing energy bin edges of length ``nbins + 1``.
        Must be positive and finite for log scaling.
    samples_per_band : list of `~astropy.units.Quantity`
        List of sample arrays, one per energy bin. Each array contains
        draws from the flux posterior (or other distribution) in that bin.
    weights_per_band : list of `~numpy.ndarray`, optional
        Per-sample weights for each bin, same length as ``samples_per_band``.
        If omitted, uniform weights are assumed.
    energy_power : float, optional
        Power of energy to multiply y-axis with. Default is 0.
    bw_method : str or float, optional
        Bandwidth selection passed to `~scipy.stats.gaussian_kde`.
        Default is "scott".
    grid_size : int, optional
        Number of evaluation points in log-flux space for the KDE grid.
        Default is 200.
    ax : `~matplotlib.axes.Axes`, optional
        Matplotlib axes to draw the violins and error bars. Default is None.
    errorbar_kwargs : dict, optional
        Keyword arguments forwarded to `~matplotlib.pyplot.errorbar` for the quantile bars.
    errorbar_ul_kwargs : dict, optional
        Keyword arguments forwarded to `~matplotlib.pyplot.errorbar` for the upper limit bars.
    violin_kwargs : dict, optional
        Keyword arguments forwarded to `~matplotlib.axes.Axes.fill` for violin plot.
    violin_clip : (float, float), optional
        Lower and upper containment fractions (in [0, 1]) used to clip
        the violin tails in log-space. If omitted, defaults to
        ``(norm.cdf(-4), norm.cdf(4))``.
    y_label : str, optional
        Base label for the y-axis, before unit and scaling prefixes.
        Default is "dN/dE".

    Returns
    -------
    ax : `~matplotlib.axes.Axes`
        Matplotlib axes.

    Notes
    -----
    - KDE is performed on the transformed variable ``log10(E^p * F)``,
      where ``p = energy_power``. The violin polygon is constructed by
      mirroring the normalized log-density around the bin center in
      log-energy.
    - Violin clipping is applied only to the KDE grid, not to the
      quantile-based error bars.
    - Negative flux samples are replaced by a small positive surrogate
      (10% of the smallest positive sample) to allow log-space evaluation.
    - The plotted error bars correspond to the 16%, 50%, and 84%
      weighted percentiles of the *unclipped* samples, unless the median
      is non-positive, in which case an upper limit marker is drawn
      at the 98% percentile. For a gaussian distribution these values corresponds to
      1σ errors and 2σ upper limit plotted by default by the `~gammapy.estimators.FluxPoints.plot` method.
    """
    with quantity_support():
        if ax is None:
            ax = plt.gca()

        ax.set_xscale("log")
        ax.set_yscale("log")

        # Energy edges
        energy_edges = u.Quantity(energy_edges)
        unit_x = f"{energy_edges.unit}"
        edges = np.asarray(energy_edges.to_value())
        _validate_energy_edges(edges)
        nbins = len(edges) - 1

        # Units
        unit_y_power = f"{samples_per_band[0].unit * energy_edges.unit**energy_power}"
        if ax.yaxis.units is None:
            unit_y_final = unit_y_power
        else:
            unit_y_final = ax.yaxis.units

        if ax.xaxis.units is None:
            unit_x_final = unit_x
        else:
            unit_x_final = ax.xaxis.units

        unit_y = f"{samples_per_band[0].unit}"

        samples_per_band, weights_per_band = _validate_inputs(
            samples_per_band, weights_per_band, nbins
        )

        # clip defaults
        if violin_clip is None:
            violin_clip = (norm.cdf(-4), norm.cdf(4))
        lc, uc = violin_clip
        if not (0 <= lc < uc <= 1):
            raise ValueError("violin_clip must satisfy 0 <= low < high <= 1.")

        # Errorbar styles
        errorbar_kwargs = errorbar_kwargs or {}
        errorbar_kwargs_defaults = dict(
            marker="o",
            ms=4.5,
            mec="black",
            mfc="white",
            color="black",
            capsize=2.5,
            elinewidth=1.2,
            lw=1.2,
        )
        for key in errorbar_kwargs_defaults.keys():
            errorbar_kwargs.setdefault(key, errorbar_kwargs_defaults[key])

        errorbar_ul_kwargs = errorbar_ul_kwargs or {}
        errorbar_ul_kwargs_defaults = dict(
            marker="v",
            ms=7,
            mec="black",
            mfc="white",
            color="black",
            capsize=2.5,
            elinewidth=1.2,
            lw=1.2,
        )
        for key in errorbar_ul_kwargs_defaults.keys():
            errorbar_ul_kwargs.setdefault(key, errorbar_ul_kwargs_defaults[key])

        if violin_kwargs is None:
            violin_kwargs = dict()
        violin_kwargs_defaults = dict(
            alpha=0.45,
            color="C0",
            edgecolor="black",
            lw=0.8,
        )
        for key in violin_kwargs_defaults.keys():
            violin_kwargs.setdefault(key, violin_kwargs_defaults[key])

        quantiles = [100 * norm.cdf(-1), 50, 100 * norm.cdf(1), 100 * norm.cdf(2)]

        # Precompute bin geometry
        emins = edges[:-1]
        emaxs = edges[1:]
        xlog_min = np.log10(emins)
        xlog_max = np.log10(emaxs)

        centers = 0.5 * (xlog_min + xlog_max)
        halfwidths = 0.5 * (xlog_max - xlog_min) * 0.99

        for samples, weights, xlog_c, hwlog, emin, emax in zip(
            samples_per_band, weights_per_band, centers, halfwidths, emins, emaxs
        ):
            samples = samples.to_value(unit_y)
            s, smin_pos = _sanitize_samples(samples)
            w = _sanitize_weights(weights, len(s))
            if w is None or s.size == 0:
                continue

            # transform
            ylog, scale = _compute_log_transformed(s, xlog_c, energy_power)

            # KDE
            grid_full, dens_full = _kde_evaluate(ylog, w, grid_size, bw_method)

            # clipping interval
            y_high = (
                np.percentile(s, uc * 100, weights=w, method="inverted_cdf") * scale
            )
            y_low = np.percentile(s, lc * 100, weights=w, method="inverted_cdf") * scale
            y_low = max(smin_pos * scale, y_low)

            if y_high > y_low > 0:
                ymin = max(grid_full.min(), np.log10(y_low))
                ymax = min(grid_full.max(), np.log10(y_high))
                mask = (grid_full >= ymin) & (grid_full <= ymax)
                ygrid_log, dens = grid_full[mask], dens_full[mask]
            else:
                ygrid_log, dens = grid_full, dens_full

            # violin polygon
            _draw_violin(
                ax, xlog_c, hwlog, ygrid_log, dens, violin_kwargs, unit_y_power, unit_x
            )

            # quantile bars
            _draw_errorbar(
                ax,
                xlog_c,
                emin,
                emax,
                samples,
                weights,
                quantiles,
                scale,
                errorbar_kwargs,
                errorbar_ul_kwargs,
                unit_y_power,
                unit_x,
            )

        # Labels
        ax.set_xlabel(f"Energy [{u.Unit(unit_x_final).to_string(UNIT_STRING_FORMAT)}]")
        unit_str = rf"[{u.Unit(unit_y_final).to_string(UNIT_STRING_FORMAT)}]"

        if energy_power:
            p = f"{energy_power:g}"
            ax.set_ylabel(rf"$E^{p}\,\times$ {y_label} {unit_str}")
        else:
            ax.set_ylabel(f"{y_label} {unit_str}")

        return ax



def _validate_inputs(samples_per_band, weights_per_band, nbins):
    """Validate that samples and weights."""
    if len(samples_per_band) != nbins:
        raise ValueError("samples_per_band must match number of bins.")

    if weights_per_band is None:
        weights_per_band = [None] * nbins
    elif len(weights_per_band) != nbins:
        raise ValueError("weights_per_band must match number of bins.")
    return samples_per_band, weights_per_band


def _validate_energy_edges(edges):
    """Validate that energy_edges is 1D, positive, finite, and strictly increasing."""
    if edges.ndim != 1 or edges.size < 2:
        raise ValueError("energy_edges must be 1D and contain at least two values.")
    if (
        not np.all(np.isfinite(edges))
        or np.any(edges <= 0)
        or not np.all(np.diff(edges) > 0)
    ):
        raise ValueError("energy_edges must be strictly increasing, finite, and > 0.")


def _sanitize_samples(samples):
    """Return finite samples with negative values adjusted for log-scale use."""
    s = samples.copy()
    mask = np.isfinite(s)
    s = s[mask]

    s_pos = s[s > 0]
    if np.any(s < 0):
        if s_pos.size > 0:
            s[s < 0] = 0.1 * s_pos.min()

    return s, s_pos.min()


def _sanitize_weights(weights, size):
    """Return normalised finite non-negative weights or None if empty."""
    if weights is None:
        w = np.ones(size, float)
    else:
        w = np.asarray(weights.copy(), float)
        w = w[np.isfinite(w) & (w >= 0)]

    total = w.sum()
    return w / total if total > 0 else None


def _compute_log_transformed(s, xlog_center, energy_power):
    """Transform samples to log10(E^p × F) and return log-values and linear scale."""
    if energy_power:
        shift = energy_power * xlog_center
        scale = (10**xlog_center) ** energy_power
    else:
        shift = 0.0
        scale = 1.0

    return np.log10(s) + shift, scale


def _kde_evaluate(ylog, w, grid_size, bw_method):
    """Evaluate weighted KDE on a fixed log-grid and normalise density."""
    grid = np.linspace(ylog.min(), ylog.max(), int(grid_size))
    kde = gaussian_kde(ylog, weights=w, bw_method=bw_method)
    dens = kde(grid)
    peak = dens.max()
    return grid, dens / peak if peak > 0 else dens


def _draw_violin(
    ax, x_center, hwlog, ygrid_log, dens, violin_kwargs, unit_y_power, unit_x
):
    """Draw a symmetric violin polygon in log-space."""
    unit_y = u.Unit(unit_y_power)
    unit_x = u.Unit(unit_x)
    xlog_left = x_center - hwlog * dens
    xlog_right = x_center + hwlog * dens

    x_left = 10**xlog_left
    x_right = 10**xlog_right
    y_lin = 10**ygrid_log

    x_poly = np.concatenate([x_left, x_right[::-1]])
    y_poly = np.concatenate([y_lin, y_lin[::-1]])

    return ax.fill(x_poly * unit_x, y_poly * unit_y, zorder=2, **violin_kwargs)


def _draw_errorbar(
    ax,
    xlog_c,
    emin,
    emax,
    s,
    w,
    quantiles,
    scale,
    err_kw,
    err_ul_kw,
    unit_y_power,
    unit_x,
):
    """Draw Gammapy-style flux-point error bars or upper limits."""
    unit_y = u.Unit(unit_y_power)
    unit_x = u.Unit(unit_x)

    y_qs = [
        np.percentile(s, q, weights=w, method="inverted_cdf") * scale for q in quantiles
    ]

    y_lo, y_med, y_hi, y_ul = y_qs[0], y_qs[1], y_qs[2], y_qs[3]
    if y_med > 0:
        yerr = np.array([[max(0.0, y_med - y_lo)], [max(0.0, y_hi - y_med)]]) * unit_y
        kwargs = err_kw
    else:
        yerr = None
        y_med = y_ul
        kwargs = err_ul_kw
    x_c = 10 ** (xlog_c)
    xerr = np.array([[x_c - emin], [emax - x_c]])
    return ax.errorbar(
        x_c * unit_x,
        y_med * unit_y,
        xerr=xerr * unit_x,
        yerr=yerr,
        **(kwargs or {}),
        zorder=5,
    )