Source code for gammapy.stats.poisson

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""Poisson significance computations for these two cases.

* known background level ``mu_bkg``
* background estimated from ``n_off`
"""
import numpy as np
import scipy.optimize
import scipy.special
import scipy.stats
from .normal import significance_to_probability_normal

__all__ = [
    "background",
    "background_error",
    "excess",
    "excess_error",
    "significance",
    "significance_on_off",
    "excess_matching_significance",
    "excess_matching_significance_on_off",
    "excess_ul_helene",
]

__doctest_skip__ = ["*"]


[docs]def background(n_off, alpha): r"""Estimate background in the on-region from an off-region observation. .. math:: \mu_{background} = \alpha \times n_{off} Parameters ---------- n_off : array_like Observed number of counts in the off region alpha : array_like On / off region exposure ratio for background events Returns ------- background : `numpy.ndarray` Background estimate for the on region Examples -------- >>> background(n_off=4, alpha=0.1) 0.4 >>> background(n_off=9, alpha=0.2) 1.8 """ n_off = np.asanyarray(n_off, dtype=np.float64) alpha = np.asanyarray(alpha, dtype=np.float64) return alpha * n_off
[docs]def background_error(n_off, alpha): r"""Estimate standard error on background in the on region from an off-region observation. .. math:: \Delta\mu_{bkg} = \alpha \times \sqrt{n_{off}} Parameters ---------- n_off : array_like Observed number of counts in the off region alpha : array_like On / off region exposure ratio for background events Returns ------- background : `numpy.ndarray` Background estimate for the on region Examples -------- >>> background_error(n_off=4, alpha=0.1) 0.2 >>> background_error(n_off=9, alpha=0.2) 0.6 """ n_off = np.asanyarray(n_off, dtype=np.float64) alpha = np.asanyarray(alpha, dtype=np.float64) return alpha * np.sqrt(n_off)
[docs]def excess(n_on, n_off, alpha): r"""Estimate excess in the on region for an on-off observation. .. math:: \mu_{excess} = n_{on} - \alpha \times n_{off} Parameters ---------- n_on : array_like Observed number of counts in the on region n_off : array_like Observed number of counts in the off region alpha : array_like On / off region exposure ratio for background events Returns ------- excess : `numpy.ndarray` Excess estimate for the on region Examples -------- >>> excess(n_on=10, n_off=20, alpha=0.1) 8.0 >>> excess(n_on=4, n_off=9, alpha=0.5) -0.5 """ n_on = np.asanyarray(n_on, dtype=np.float64) n_off = np.asanyarray(n_off, dtype=np.float64) alpha = np.asanyarray(alpha, dtype=np.float64) return n_on - alpha * n_off
[docs]def excess_error(n_on, n_off, alpha): r"""Estimate error on excess for an on-off measurement. .. math:: \Delta\mu_{excess} = \sqrt{n_{on} + \alpha ^ 2 \times n_{off}} TODO: Implement better error and limit estimates (Li & Ma, Rolke)! Parameters ---------- n_on : array_like Observed number of counts in the on region n_off : array_like Observed number of counts in the off region alpha : array_like On / off region exposure ratio for background events Returns ------- excess_error : `numpy.ndarray` Excess error estimate Examples -------- >>> excess_error(n_on=10, n_off=20, alpha=0.1) 3.1937438845342623... >>> excess_error(n_on=4, n_off=9, alpha=0.5) 2.5 """ n_on = np.asanyarray(n_on, dtype=np.float64) n_off = np.asanyarray(n_off, dtype=np.float64) alpha = np.asanyarray(alpha, dtype=np.float64) variance = n_on + (alpha ** 2) * n_off return np.sqrt(variance)
[docs]def significance(n_on, mu_bkg, method="lima", n_on_min=1): r"""Compute significance for an observed number of counts and known background. The default ``method="lima"`` gives the significance estimate corresponding to equation (17) from the Li & Ma paper [1]_ in the limiting of known background :math:`\mu_{bkg} = \alpha \times n_{off}` with :math:`\alpha \to 0`. It is given by the following formula: .. math:: S_{lima} = \left[ 2 n_{on} \log \left( \frac{n_{on}}{\mu_{bkg}} \right) - n_{on} + \mu_{bkg} \right] ^ {1/2} For ``method="simple"``, the significance estimate is given by: .. math:: S_{simple} = (n_{on} - \mu_{bkg}) / \sqrt{\mu_{bkg}} Parameters ---------- n_on : array_like Observed number of counts mu_bkg : array_like Known background level method : {"lima", "simple"} Method for significance estimation n_on_min : float Minimum ``n_on`` (return ``NaN`` for smaller values) Returns ------- significance : `~numpy.ndarray` Significance estimate References ---------- .. [1] Li and Ma, "Analysis methods for results in gamma-ray astronomy", `Link <https://ui.adsabs.harvard.edu/abs/1983ApJ...272..317L>`_ See Also -------- excess, significance_on_off Examples -------- >>> significance(n_on=10, mu_bkg=2, method='lima') 4.0235256 >>> significance(n_on=10, mu_bkg=2, method='simple') 5.65685425 """ n_on = np.asanyarray(n_on, dtype=np.float64) mu_bkg = np.asanyarray(mu_bkg, dtype=np.float64) if method == "simple": func = _significance_simple elif method == "lima": func = _significance_lima elif method == "direct": func = _significance_direct else: raise ValueError(f"Invalid method: {method}") # For low `n_on` values, don't try to compute a significance and return `NaN`. n_on = np.atleast_1d(n_on) mu_bkg = np.atleast_1d(mu_bkg) mask = n_on >= n_on_min s = np.ones_like(n_on) * np.nan s[mask] = func(n_on[mask], mu_bkg[mask]) return s
def _significance_simple(n_on, mu_bkg): # TODO: check this formula against ??? # Is this the limit from the on/off case for alpha -> 0? excess = n_on - mu_bkg bkg_err = np.sqrt(mu_bkg) return excess / bkg_err def _significance_lima(n_on, mu_bkg): sign = np.sign(n_on - mu_bkg) val = np.sqrt(2) * np.sqrt(n_on * np.log(n_on / mu_bkg) - n_on + mu_bkg) return sign * val
[docs]def significance_on_off(n_on, n_off, alpha, method="lima"): r"""Compute significance of an on-off observation. The default ``method="lima"`` gives the significance estimate corresponding to equation (17) from the Li & Ma paper [1]_. For ``method="simple"``, the significance estimate is given by Equation (5) from the Li & Ma paper [1]_. It is somewhat biased, but has the advantage of being analytically invertible, which is useful for sensitivity computations. .. math:: S_{simple} = \mu_{excess} / \Delta\mu_{excess} \mu_{excess} = n_{on} - \alpha n_{off} \Delta\mu_{excess} = \sqrt{n_{on} + \alpha ^ 2 n_{off}} Parameters ---------- n_on : array_like Observed number of counts in the on region n_off : array_like Observed number of counts in the off region alpha : array_like On / off region exposure ratio for background events method : {"lima", "simple"} Method for significance estimation Returns ------- significance : `~numpy.ndarray` Significance estimate References ---------- .. [1] Li and Ma, "Analysis methods for results in gamma-ray astronomy", `Link <https://ui.adsabs.harvard.edu/abs/1983ApJ...272..317L>`_ See Also -------- significance, excess_matching_significance_on_off Examples -------- >>> significance_on_off(n_on=10, n_off=20, alpha=0.1, method='lima') 3.6850322319420274 >>> significance_on_off(n_on=10, n_off=20, alpha=0.1, method='simple') 2.5048971643405982 """ n_on = np.asanyarray(n_on, dtype=np.float64) n_off = np.asanyarray(n_off, dtype=np.float64) alpha = np.asanyarray(alpha, dtype=np.float64) with np.errstate(invalid="ignore", divide="ignore"): if method == "simple": return _significance_simple_on_off(n_on, n_off, alpha) elif method == "lima": return _significance_lima_on_off(n_on, n_off, alpha) elif method == "direct": return _significance_direct_on_off(n_on, n_off, alpha) else: raise ValueError(f"Invalid method: {method}")
def _significance_simple_on_off(n_on, n_off, alpha): """Compute significance with the simple Equation (5) from Li & Ma.""" excess_ = excess(n_on, n_off, alpha) excess_error_ = excess_error(n_on, n_off, alpha) return excess_ / excess_error_ def _significance_lima_on_off(n_on, n_off, alpha): r"""Compute significance with the Li & Ma formula (17).""" sign = np.sign(excess(n_on, n_off, alpha)) tt = (alpha + 1) / (n_on + n_off) ll = n_on * np.log(n_on * tt / alpha) mm = n_off * np.log(n_off * tt) val = np.sqrt(np.abs(2 * (ll + mm))) return sign * val def _significance_direct(n_on, mu_bkg): """Compute significance directly via Poisson probability. Reference: TODO (is this ever used?) """ # Compute tail probability to see n_on or more counts # Note that we're using ``k = n_on - 1`` to get the probability # for n_on included or more, because `poisson.sf(k)` returns the # probability for more than k, with k excluded # For `n_on = 0` this returns ` probability = scipy.stats.poisson.sf(n_on - 1, mu_bkg) # Convert probability to a significance return scipy.stats.norm.isf(probability) def _significance_direct_on_off(n_on, n_off, alpha): """Compute significance directly via Poisson probability. Reference: https://ui.adsabs.harvard.edu/abs/1993NIMPA.328..570A You can use this method for small n_on < 10. In this case the Li & Ma formula isn't correct any more. """ f = np.math.factorial # Not vectorised, only works for scalar, integer inputs n_on = int(n_on) n_off = int(n_off) # Compute tail probability to see n_on or more counts probability = 1 for n in range(0, n_on): term_1 = alpha ** n / (1 + alpha) ** (n_off + n + 1) term_2 = f(n_off + n) / (f(n) * f(n_off)) probability -= term_1 * term_2 # Convert probability to a significance return scipy.stats.norm.isf(probability)
[docs]def excess_ul_helene(excess, excess_error, significance): """Compute excess upper limit using the Helene method. Reference: https://ui.adsabs.harvard.edu/abs/1984NIMPA.228..120H Parameters ---------- excess : float Signal excess excess_error : float Gaussian excess error For on / off measurement, use this function to compute it: `~gammapy.stats.excess_error`. significance : float Confidence level significance for the excess upper limit. Returns ------- excess_ul : float Upper limit for the excess """ conf_level1 = significance_to_probability_normal(significance) if excess_error <= 0: raise ValueError(f"Non-positive excess_error: {excess_error}") if excess >= 0.0: zeta = excess / excess_error value = zeta / np.sqrt(2.0) integral = (1.0 + scipy.special.erf(value)) / 2.0 integral2 = 1.0 - conf_level1 * integral value_old = value value_new = value_old + 0.01 if integral > integral2: value_new = 0.0 integral = (1.0 + scipy.special.erf(value_new)) / 2.0 else: zeta = -excess / excess_error value = zeta / np.sqrt(2.0) integral = 1 - (1.0 + scipy.special.erf(value)) / 2.0 integral2 = 1.0 - conf_level1 * integral value_old = value value_new = value_old + 0.01 integral = (1.0 + scipy.special.erf(value_new)) / 2.0 # The 1st Loop is for Speed & 2nd For Precision while integral < integral2: value_old = value_new value_new = value_new + 0.01 integral = (1.0 + scipy.special.erf(value_new)) / 2.0 value_new = value_old + 0.0000001 integral = (1.0 + scipy.special.erf(value_new)) / 2.0 while integral < integral2: value_new = value_new + 0.0000001 integral = (1.0 + scipy.special.erf(value_new)) / 2.0 value_new = value_new * np.sqrt(2.0) if excess >= 0.0: conf_limit = (value_new + zeta) * excess_error else: conf_limit = (value_new - zeta) * excess_error return conf_limit
[docs]def excess_matching_significance(mu_bkg, significance, method="lima"): r"""Compute excess matching a given significance. This function is the inverse of `significance`. Parameters ---------- mu_bkg : array_like Known background level significance : array_like Significance method : {'lima', 'simple'} Select method Returns ------- excess : `numpy.ndarray` Excess See Also -------- significance, excess_matching_significance_on_off Examples -------- >>> excess_matching_significance(mu_bkg=0.2, significance=5, method='lima') TODO >>> excess_matching_significance(mu_bkg=0.2, significance=5, method='simple') TODO """ mu_bkg = np.asanyarray(mu_bkg, dtype=np.float64) significance = np.asanyarray(significance, dtype=np.float64) if method == "simple": return _excess_matching_significance_simple(mu_bkg, significance) elif method == "lima": return _excess_matching_significance_lima(mu_bkg, significance) else: raise ValueError(f"Invalid method: {method}")
[docs]def excess_matching_significance_on_off(n_off, alpha, significance, method="lima"): r"""Compute sensitivity of an on-off observation. This function is the inverse of `significance_on_off`. Parameters ---------- n_off : array_like Observed number of counts in the off region alpha : array_like On / off region exposure ratio for background events significance : array_like Desired significance level method : {'lima', 'simple'} Which method? Returns ------- excess : `numpy.ndarray` Excess See Also -------- significance_on_off, excess_matching_significance Examples -------- >>> excess_matching_significance_on_off(n_off=20,alpha=0.1,significance=5,method='lima') 12.038 >>> excess_matching_significance_on_off(n_off=20,alpha=0.1,significance=5,method='simple') 27.034 >>> excess_matching_significance_on_off(n_off=20,alpha=0.1,significance=0,method='lima') 2.307301461e-09 >>> excess_matching_significance_on_off(n_off=20,alpha=0.1,significance=0,method='simple') 0.0 >>> excess_matching_significance_on_off(n_off=20,alpha=0.1,significance=-10,method='lima') nan >>> excess_matching_significance_on_off(n_off=20,alpha=0.1,significance=-10,method='simple') nan """ n_off = np.asanyarray(n_off, dtype=np.float64) alpha = np.asanyarray(alpha, dtype=np.float64) significance = np.asanyarray(significance, dtype=np.float64) if method == "simple": return _excess_matching_significance_on_off_simple(n_off, alpha, significance) elif method == "lima": return _excess_matching_significance_on_off_lima(n_off, alpha, significance) else: raise ValueError(f"Invalid method: {method}")
def _excess_matching_significance_simple(mu_bkg, significance): return significance * np.sqrt(mu_bkg) def _excess_matching_significance_on_off_simple(n_off, alpha, significance): # TODO: can these equations be simplified? significance2 = significance ** 2 determinant = significance2 + 4 * n_off * alpha * (1 + alpha) temp = significance2 + 2 * n_off * alpha n_on = 0.5 * (temp + significance * np.sqrt(np.abs(determinant))) return n_on - background(n_off, alpha) # This is mostly a copy & paste from _excess_matching_significance_on_off_lima # TODO: simplify this, or avoid code duplication? # Looking at the formula for significance_lima_on_off, I don't think # it can be analytically inverted because the n_on appears inside and outside the log # So probably root finding is still needed here. def _excess_matching_significance_lima(mu_bkg, significance): # Significance not well-defined for n_on < 0 # Return Nan if given significance can't be reached s0 = _significance_lima(n_on=1e-5, mu_bkg=mu_bkg) if s0 >= significance: return np.nan def target_significance(n_on): if n_on >= 0: return _significance_lima(n_on, mu_bkg) - significance else: # This high value is to tell the optimizer to stay n_on >= 0 return 1e10 excess_guess = _excess_matching_significance_simple(mu_bkg, significance) n_on_guess = excess_guess + mu_bkg # solver options to control robustness / accuracy / speed opts = dict(factor=0.1) n_on = scipy.optimize.fsolve(target_significance, n_on_guess, **opts) return n_on - mu_bkg def _excess_matching_significance_on_off_lima(n_off, alpha, significance): # Significance not well-defined for n_on < 0 # Return Nan if given significance can't be reached s0 = _significance_lima_on_off(n_on=1e-5, n_off=n_off, alpha=alpha) if s0 >= significance: return np.nan def target_significance(n_on): if n_on >= 0: return _significance_lima_on_off(n_on, n_off, alpha) - significance else: # This high value is to tell the optimizer to stay n_on >= 0 return 1e10 excess_guess = _excess_matching_significance_on_off_simple( n_off, alpha, significance ) n_on_guess = excess_guess + background(n_off, alpha) # solver options to control robustness / accuracy / speed opts = dict(factor=0.1) n_on = scipy.optimize.fsolve(target_significance, n_on_guess, **opts) return n_on - background(n_off, alpha) _excess_matching_significance_lima = np.vectorize(_excess_matching_significance_lima) _excess_matching_significance_on_off_lima = np.vectorize( _excess_matching_significance_on_off_lima )