Source code for gammapy.stats.poisson

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""Poisson significance computations for these two cases.

* known background level ``mu_bkg``
* background estimated from ``n_off`
"""
import numpy as np
import scipy.optimize
import scipy.special
import scipy.stats
from .normal import significance_to_probability_normal

__all__ = [
    "background",
    "background_error",
    "excess",
    "excess_error",
    "significance",
    "significance_on_off",
    "excess_matching_significance",
    "excess_matching_significance_on_off",
    "excess_ul_helene",
]

__doctest_skip__ = ["*"]


[docs]def background(n_off, alpha):
    r"""Estimate background in the on-region from an off-region observation.

    .. math::
        \mu_{background} = \alpha \times n_{off}

    Parameters
    ----------
    n_off : array_like
        Observed number of counts in the off region
    alpha : array_like
        On / off region exposure ratio for background events

    Returns
    -------
    background : `numpy.ndarray`
        Background estimate for the on region

    Examples
    --------
    >>> background(n_off=4, alpha=0.1)
    0.4
    >>> background(n_off=9, alpha=0.2)
    1.8
    """
    n_off = np.asanyarray(n_off, dtype=np.float64)
    alpha = np.asanyarray(alpha, dtype=np.float64)

    return alpha * n_off


[docs]def background_error(n_off, alpha):
    r"""Estimate standard error on background in the on region from an off-region observation.

    .. math::
          \Delta\mu_{bkg} = \alpha \times \sqrt{n_{off}}

    Parameters
    ----------
    n_off : array_like
        Observed number of counts in the off region
    alpha : array_like
        On / off region exposure ratio for background events

    Returns
    -------
    background : `numpy.ndarray`
        Background estimate for the on region

    Examples
    --------
    >>> background_error(n_off=4, alpha=0.1)
    0.2
    >>> background_error(n_off=9, alpha=0.2)
    0.6
    """
    n_off = np.asanyarray(n_off, dtype=np.float64)
    alpha = np.asanyarray(alpha, dtype=np.float64)

    return alpha * np.sqrt(n_off)


[docs]def excess(n_on, n_off, alpha):
    r"""Estimate excess in the on region for an on-off observation.

    .. math::
          \mu_{excess} = n_{on} - \alpha \times n_{off}

    Parameters
    ----------
    n_on : array_like
        Observed number of counts in the on region
    n_off : array_like
        Observed number of counts in the off region
    alpha : array_like
        On / off region exposure ratio for background events

    Returns
    -------
    excess : `numpy.ndarray`
        Excess estimate for the on region

    Examples
    --------
    >>> excess(n_on=10, n_off=20, alpha=0.1)
    8.0
    >>> excess(n_on=4, n_off=9, alpha=0.5)
    -0.5
    """
    n_on = np.asanyarray(n_on, dtype=np.float64)
    n_off = np.asanyarray(n_off, dtype=np.float64)
    alpha = np.asanyarray(alpha, dtype=np.float64)

    return n_on - alpha * n_off


[docs]def excess_error(n_on, n_off, alpha):
    r"""Estimate error on excess for an on-off measurement.

    .. math::
        \Delta\mu_{excess} = \sqrt{n_{on} + \alpha ^ 2 \times n_{off}}

    TODO: Implement better error and limit estimates (Li & Ma, Rolke)!

    Parameters
    ----------
    n_on : array_like
        Observed number of counts in the on region
    n_off : array_like
        Observed number of counts in the off region
    alpha : array_like
        On / off region exposure ratio for background events

    Returns
    -------
    excess_error : `numpy.ndarray`
        Excess error estimate

    Examples
    --------
    >>> excess_error(n_on=10, n_off=20, alpha=0.1)
    3.1937438845342623...
    >>> excess_error(n_on=4, n_off=9, alpha=0.5)
    2.5
    """
    n_on = np.asanyarray(n_on, dtype=np.float64)
    n_off = np.asanyarray(n_off, dtype=np.float64)
    alpha = np.asanyarray(alpha, dtype=np.float64)

    variance = n_on + (alpha ** 2) * n_off
    return np.sqrt(variance)


[docs]def significance(n_on, mu_bkg, method="lima", n_on_min=1):
    r"""Compute significance for an observed number of counts and known background.

    The default ``method="lima"`` gives the significance estimate corresponding
    to equation (17) from the Li & Ma paper [1]_ in the limiting of known background
    :math:`\mu_{bkg} = \alpha \times n_{off}` with :math:`\alpha \to 0`.

    It is given by the following formula:

    .. math::
        S_{lima} = \left[
          2 n_{on} \log \left( \frac{n_{on}}{\mu_{bkg}} \right) - n_{on} + \mu_{bkg}
        \right] ^ {1/2}

    For ``method="simple"``, the significance estimate is given by:

    .. math::
        S_{simple} = (n_{on} - \mu_{bkg}) / \sqrt{\mu_{bkg}}


    Parameters
    ----------
    n_on : array_like
        Observed number of counts
    mu_bkg : array_like
        Known background level
    method : {"lima", "simple"}
        Method for significance estimation
    n_on_min : float
        Minimum ``n_on`` (return ``NaN`` for smaller values)

    Returns
    -------
    significance : `~numpy.ndarray`
        Significance estimate

    References
    ----------
    .. [1] Li and Ma, "Analysis methods for results in gamma-ray astronomy",
       `Link <https://ui.adsabs.harvard.edu/abs/1983ApJ...272..317L>`_

    See Also
    --------
    excess, significance_on_off

    Examples
    --------
    >>> significance(n_on=10, mu_bkg=2, method='lima')
    4.0235256
    >>> significance(n_on=10, mu_bkg=2, method='simple')
    5.65685425
    """
    n_on = np.asanyarray(n_on, dtype=np.float64)
    mu_bkg = np.asanyarray(mu_bkg, dtype=np.float64)

    if method == "simple":
        func = _significance_simple
    elif method == "lima":
        func = _significance_lima
    elif method == "direct":
        func = _significance_direct
    else:
        raise ValueError(f"Invalid method: {method}")

    # For low `n_on` values, don't try to compute a significance and return `NaN`.
    n_on = np.atleast_1d(n_on)
    mu_bkg = np.atleast_1d(mu_bkg)
    mask = n_on >= n_on_min
    s = np.ones_like(n_on) * np.nan
    s[mask] = func(n_on[mask], mu_bkg[mask])

    return s


def _significance_simple(n_on, mu_bkg):
    # TODO: check this formula against ???
    # Is this the limit from the on/off case for alpha -> 0?
    excess = n_on - mu_bkg
    bkg_err = np.sqrt(mu_bkg)
    return excess / bkg_err


def _significance_lima(n_on, mu_bkg):
    sign = np.sign(n_on - mu_bkg)
    val = np.sqrt(2) * np.sqrt(n_on * np.log(n_on / mu_bkg) - n_on + mu_bkg)
    return sign * val


[docs]def significance_on_off(n_on, n_off, alpha, method="lima"):
    r"""Compute significance of an on-off observation.

    The default ``method="lima"`` gives the significance estimate corresponding
    to equation (17) from the Li & Ma paper [1]_.

    For ``method="simple"``, the significance estimate is given by Equation (5)
    from the Li & Ma paper [1]_. It is somewhat biased, but has the advantage
    of being analytically invertible, which is useful for sensitivity computations.

    .. math::
        S_{simple} = \mu_{excess} / \Delta\mu_{excess}

        \mu_{excess} = n_{on} - \alpha n_{off}

        \Delta\mu_{excess} = \sqrt{n_{on} + \alpha ^ 2 n_{off}}

    Parameters
    ----------
    n_on : array_like
        Observed number of counts in the on region
    n_off : array_like
        Observed number of counts in the off region
    alpha : array_like
        On / off region exposure ratio for background events
    method : {"lima", "simple"}
        Method for significance estimation

    Returns
    -------
    significance : `~numpy.ndarray`
        Significance estimate

    References
    ----------
    .. [1] Li and Ma, "Analysis methods for results in gamma-ray astronomy",
       `Link <https://ui.adsabs.harvard.edu/abs/1983ApJ...272..317L>`_

    See Also
    --------
    significance, excess_matching_significance_on_off

    Examples
    --------
    >>> significance_on_off(n_on=10, n_off=20, alpha=0.1, method='lima')
    3.6850322319420274
    >>> significance_on_off(n_on=10, n_off=20, alpha=0.1, method='simple')
    2.5048971643405982
    """
    n_on = np.asanyarray(n_on, dtype=np.float64)
    n_off = np.asanyarray(n_off, dtype=np.float64)
    alpha = np.asanyarray(alpha, dtype=np.float64)

    with np.errstate(invalid="ignore", divide="ignore"):
        if method == "simple":
            return _significance_simple_on_off(n_on, n_off, alpha)
        elif method == "lima":
            return _significance_lima_on_off(n_on, n_off, alpha)
        elif method == "direct":
            return _significance_direct_on_off(n_on, n_off, alpha)
        else:
            raise ValueError(f"Invalid method: {method}")


def _significance_simple_on_off(n_on, n_off, alpha):
    """Compute significance with the simple Equation (5) from Li & Ma."""
    excess_ = excess(n_on, n_off, alpha)
    excess_error_ = excess_error(n_on, n_off, alpha)

    return excess_ / excess_error_


def _significance_lima_on_off(n_on, n_off, alpha):
    r"""Compute significance with the Li & Ma formula (17)."""
    sign = np.sign(excess(n_on, n_off, alpha))

    tt = (alpha + 1) / (n_on + n_off)
    ll = n_on * np.log(n_on * tt / alpha)
    mm = n_off * np.log(n_off * tt)
    val = np.sqrt(np.abs(2 * (ll + mm)))

    return sign * val


def _significance_direct(n_on, mu_bkg):
    """Compute significance directly via Poisson probability.

    Reference: TODO (is this ever used?)
    """
    # Compute tail probability to see n_on or more counts
    # Note that we're using ``k = n_on - 1`` to get the probability
    # for n_on included or more, because `poisson.sf(k)` returns the
    # probability for more than k, with k excluded
    # For `n_on = 0` this returns `
    probability = scipy.stats.poisson.sf(n_on - 1, mu_bkg)

    # Convert probability to a significance
    return scipy.stats.norm.isf(probability)


def _significance_direct_on_off(n_on, n_off, alpha):
    """Compute significance directly via Poisson probability.

    Reference: https://ui.adsabs.harvard.edu/abs/1993NIMPA.328..570A

    You can use this method for small n_on < 10.
    In this case the Li & Ma formula isn't correct any more.
    """
    f = np.math.factorial

    # Not vectorised, only works for scalar, integer inputs
    n_on = int(n_on)
    n_off = int(n_off)

    # Compute tail probability to see n_on or more counts
    probability = 1
    for n in range(0, n_on):
        term_1 = alpha ** n / (1 + alpha) ** (n_off + n + 1)
        term_2 = f(n_off + n) / (f(n) * f(n_off))
        probability -= term_1 * term_2

    # Convert probability to a significance
    return scipy.stats.norm.isf(probability)


[docs]def excess_ul_helene(excess, excess_error, significance):
    """Compute excess upper limit using the Helene method.

    Reference: https://ui.adsabs.harvard.edu/abs/1984NIMPA.228..120H

    Parameters
    ----------
    excess : float
        Signal excess
    excess_error : float
        Gaussian excess error
        For on / off measurement, use this function to compute it:
        `~gammapy.stats.excess_error`.
    significance : float
        Confidence level significance for the excess upper limit.

    Returns
    -------
    excess_ul : float
        Upper limit for the excess
    """
    conf_level1 = significance_to_probability_normal(significance)

    if excess_error <= 0:
        raise ValueError(f"Non-positive excess_error: {excess_error}")

    if excess >= 0.0:
        zeta = excess / excess_error
        value = zeta / np.sqrt(2.0)
        integral = (1.0 + scipy.special.erf(value)) / 2.0
        integral2 = 1.0 - conf_level1 * integral
        value_old = value
        value_new = value_old + 0.01
        if integral > integral2:
            value_new = 0.0
        integral = (1.0 + scipy.special.erf(value_new)) / 2.0
    else:
        zeta = -excess / excess_error
        value = zeta / np.sqrt(2.0)
        integral = 1 - (1.0 + scipy.special.erf(value)) / 2.0
        integral2 = 1.0 - conf_level1 * integral
        value_old = value
        value_new = value_old + 0.01
        integral = (1.0 + scipy.special.erf(value_new)) / 2.0

    # The 1st Loop is for Speed & 2nd For Precision
    while integral < integral2:
        value_old = value_new
        value_new = value_new + 0.01
        integral = (1.0 + scipy.special.erf(value_new)) / 2.0
    value_new = value_old + 0.0000001
    integral = (1.0 + scipy.special.erf(value_new)) / 2.0

    while integral < integral2:
        value_new = value_new + 0.0000001
        integral = (1.0 + scipy.special.erf(value_new)) / 2.0
    value_new = value_new * np.sqrt(2.0)

    if excess >= 0.0:
        conf_limit = (value_new + zeta) * excess_error
    else:
        conf_limit = (value_new - zeta) * excess_error

    return conf_limit


[docs]def excess_matching_significance(mu_bkg, significance, method="lima"):
    r"""Compute excess matching a given significance.

    This function is the inverse of `significance`.

    Parameters
    ----------
    mu_bkg : array_like
        Known background level
    significance : array_like
        Significance
    method : {'lima', 'simple'}
        Select method

    Returns
    -------
    excess : `numpy.ndarray`
        Excess

    See Also
    --------
    significance, excess_matching_significance_on_off

    Examples
    --------
    >>> excess_matching_significance(mu_bkg=0.2, significance=5, method='lima')
    TODO
    >>> excess_matching_significance(mu_bkg=0.2, significance=5, method='simple')
    TODO
    """
    mu_bkg = np.asanyarray(mu_bkg, dtype=np.float64)
    significance = np.asanyarray(significance, dtype=np.float64)

    if method == "simple":
        return _excess_matching_significance_simple(mu_bkg, significance)
    elif method == "lima":
        return _excess_matching_significance_lima(mu_bkg, significance)
    else:
        raise ValueError(f"Invalid method: {method}")


[docs]def excess_matching_significance_on_off(n_off, alpha, significance, method="lima"):
    r"""Compute sensitivity of an on-off observation.

    This function is the inverse of `significance_on_off`.

    Parameters
    ----------
    n_off : array_like
        Observed number of counts in the off region
    alpha : array_like
        On / off region exposure ratio for background events
    significance : array_like
        Desired significance level
    method : {'lima', 'simple'}
        Which method?

    Returns
    -------
    excess : `numpy.ndarray`
        Excess

    See Also
    --------
    significance_on_off, excess_matching_significance

    Examples
    --------
    >>> excess_matching_significance_on_off(n_off=20,alpha=0.1,significance=5,method='lima')
    12.038
    >>> excess_matching_significance_on_off(n_off=20,alpha=0.1,significance=5,method='simple')
    27.034
    >>> excess_matching_significance_on_off(n_off=20,alpha=0.1,significance=0,method='lima')
    2.307301461e-09
    >>> excess_matching_significance_on_off(n_off=20,alpha=0.1,significance=0,method='simple')
    0.0
    >>> excess_matching_significance_on_off(n_off=20,alpha=0.1,significance=-10,method='lima')
    nan
    >>> excess_matching_significance_on_off(n_off=20,alpha=0.1,significance=-10,method='simple')
    nan
    """
    n_off = np.asanyarray(n_off, dtype=np.float64)
    alpha = np.asanyarray(alpha, dtype=np.float64)
    significance = np.asanyarray(significance, dtype=np.float64)

    if method == "simple":
        return _excess_matching_significance_on_off_simple(n_off, alpha, significance)
    elif method == "lima":
        return _excess_matching_significance_on_off_lima(n_off, alpha, significance)
    else:
        raise ValueError(f"Invalid method: {method}")


def _excess_matching_significance_simple(mu_bkg, significance):
    return significance * np.sqrt(mu_bkg)


def _excess_matching_significance_on_off_simple(n_off, alpha, significance):
    # TODO: can these equations be simplified?
    significance2 = significance ** 2
    determinant = significance2 + 4 * n_off * alpha * (1 + alpha)
    temp = significance2 + 2 * n_off * alpha
    n_on = 0.5 * (temp + significance * np.sqrt(np.abs(determinant)))
    return n_on - background(n_off, alpha)


# This is mostly a copy & paste from _excess_matching_significance_on_off_lima
# TODO: simplify this, or avoid code duplication?
# Looking at the formula for significance_lima_on_off, I don't think
# it can be analytically inverted because the n_on appears inside and outside the log
# So probably root finding is still needed here.
def _excess_matching_significance_lima(mu_bkg, significance):
    # Significance not well-defined for n_on < 0
    # Return Nan if given significance can't be reached
    s0 = _significance_lima(n_on=1e-5, mu_bkg=mu_bkg)
    if s0 >= significance:
        return np.nan

    def target_significance(n_on):
        if n_on >= 0:
            return _significance_lima(n_on, mu_bkg) - significance
        else:
            # This high value is to tell the optimizer to stay n_on >= 0
            return 1e10

    excess_guess = _excess_matching_significance_simple(mu_bkg, significance)
    n_on_guess = excess_guess + mu_bkg

    # solver options to control robustness / accuracy / speed
    opts = dict(factor=0.1)
    n_on = scipy.optimize.fsolve(target_significance, n_on_guess, **opts)
    return n_on - mu_bkg


def _excess_matching_significance_on_off_lima(n_off, alpha, significance):
    # Significance not well-defined for n_on < 0
    # Return Nan if given significance can't be reached
    s0 = _significance_lima_on_off(n_on=1e-5, n_off=n_off, alpha=alpha)
    if s0 >= significance:
        return np.nan

    def target_significance(n_on):
        if n_on >= 0:
            return _significance_lima_on_off(n_on, n_off, alpha) - significance
        else:
            # This high value is to tell the optimizer to stay n_on >= 0
            return 1e10

    excess_guess = _excess_matching_significance_on_off_simple(
        n_off, alpha, significance
    )
    n_on_guess = excess_guess + background(n_off, alpha)

    # solver options to control robustness / accuracy / speed
    opts = dict(factor=0.1)
    n_on = scipy.optimize.fsolve(target_significance, n_on_guess, **opts)
    return n_on - background(n_off, alpha)


_excess_matching_significance_lima = np.vectorize(_excess_matching_significance_lima)
_excess_matching_significance_on_off_lima = np.vectorize(
    _excess_matching_significance_on_off_lima
)