# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""Conversion functions for test statistic <-> significance <-> probability.
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
# TODO: make all the other methods private?
# need to transfer the info from their docstrings to `convert_likelihood` first!
# TODO: check with MC study if there's a factor 2 error in the p-values
# because half of the TS values are exactly zero when fitting e.g. source extension.
# Do we need to introduce a bool "one_sided" or "hard_limit"?
__all__ = ['convert_likelihood',
'significance_to_probability_normal',
'probability_to_significance_normal',
'probability_to_significance_normal_limit',
'significance_to_probability_normal_limit',
]
[docs]def convert_likelihood(to, probability=None, significance=None,
ts=None, chi2=None, df=None):
"""Convert between various equivalent likelihood measures.
TODO: don't use ``chi2`` with this function at the moment ...
I forgot that one also needs the number of data points to
compute ``ts``:
http://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test#Calculating_the_test-statistic
Probably it's best to split this out into a separate function
or just document how users should compute ``ts`` before calling this
function if they have ``chi2``.
This function uses the ``sf`` and ``isf`` methods of the
`~scipy.stats.norm` and `~scipy.stats.chi2` distributions
to convert between various equivalent ways to quote a likelihood.
- ``sf`` means "survival function", which is the "tail probability"
of the distribution and is defined as ``1 - cdf``, where ``cdf``
is the "cumulative distribution function".
- ``isf`` is the inverse survival function.
The relation between the quantities can be summarised as:
- significance <-- normal distribution ---> probability
- probability <--- chi2 distribution with df ---> ts
- ts = chi2 / df
So supporting both ``ts`` and ``chi2`` in this function is redundant,
it's kept as a convenience for users that have a ``ts`` value from
a Poisson likelihood fit and users that have a ``chi2`` value from
a chi-square fit.
Parameters
----------
to : {'probability', 'ts', 'significance', 'chi2'}
Which quantity you want to compute.
probability, significance, ts, chi2 : array_like
Input quantity value ... mutually exclusive, pass exactly one!
df : array_like
Difference in number of degrees of freedom between
the alternative and the null hypothesis model.
Returns
-------
value : `numpy.ndarray`
Output value as requested by the input ``to`` parameter.
Notes
-----
**TS computation**
Under certain assumptions Wilk's theorem say that the likelihood ratio
``TS = 2 (L_alt - L_null)`` has a chi-square distribution with ``ndf``
degrees of freedom in the null hypothesis case, where
``L_alt`` and ``L_null`` are the log-likelihoods in the null and alternative
hypothesis and ``ndf`` is the difference in the number of freedom in those models.
Note that the `~gammapy.stats.cash` statistic already contains the factor 2,
i.e. you should compute ``TS`` as ``TS = cash_alt - cash_null``.
- http://en.wikipedia.org/wiki/Chi-squared_distribution
- http://docs.scipy.org/doc/scipy-dev/reference/generated/scipy.stats.chi2.html
- http://en.wikipedia.org/wiki/Likelihood-ratio_test#Wilks.27s_theorem
- http://adsabs.harvard.edu/abs/1979ApJ...228..939C
- http://adsabs.harvard.edu/abs/2009A%26A...495..989S
**Physical limits**
``probability`` is the one-sided `p-value`, e.g. `significance=3`
corresponds to `probability=0.00135`.
TODO: check if this gives correct coverage for cases with hard physical limits,
e.g. when fitting TS of extended sources vs. point source and in half of the
cases ``TS=0`` ... I suspect coverage might not be OK and we need to add an
option to this function to handle those cases!
Examples
--------
Here's some examples how to compute the ``probability`` or ``significance``
for a given observed ``ts`` or ``chi2``:
>>> from gammapy.stats import convert_likelihood
>>> convert_likelihood(to='probability', ts=10, df=2)
0.0067379469990854679
>>> convert_likelihood(to='significance', chi2=19, df=7)
2.4004554920435521
Here's how to do the reverse, compute the ``ts`` or ``chi2`` that would
result in a given ``probability`` or ``significance``.
>>> convert_likelihood(to='ts', probability=0.01, df=1)
6.6348966010212171
>>> convert_likelihood(to='chi2', significance=3, df=10)
28.78498865156606
"""
from scipy.stats import norm as norm_distribution
from scipy.stats import chi2 as chi2_distribution
# ---> Check inputs are OK!
# ---> This is a function that will be used interactively by end-users,
# ---> so we want good error messages if they use it correctly.
# Check that the output `to` parameter is valid
valid_quantities = ['probability', 'ts', 'significance', 'chi2']
if to not in valid_quantities:
msg = 'Invalid parameter `to`: {}\n'.format(to)
msg += 'Valid options are: {}'.format(valid_quantities)
raise ValueError(msg)
# Check that the input is valid
_locals = locals().copy()
input_values = [_ for _ in valid_quantities
if _locals[_] is not None]
if len(input_values) != 1:
msg = 'You have to pass exactly one of the valid input quantities: '
msg += ', '.join(valid_quantities)
msg += '\nYou passed: '
if len(input_values) == 0:
msg += 'none'
else:
msg += ', '.join(input_values)
raise ValueError(msg)
input_type = input_values[0]
input_value = locals()[input_type]
# Check that `df` is given if it's required for the computation
if any(_ in ['ts', 'chi2'] for _ in [input_type, to]) and df is None:
msg = 'You have to specify the number of degrees of freedom '
msg += 'via the `df` parameter.'
raise ValueError(msg)
# ---> Compute the requested quantity
# ---> By now we know the inputs are OK.
# Compute equivalent `ts` for `chi2` ... after this
# the code will only handle the `ts` input case,
# i.e. conversions: significance <-> probability <-> ts
if chi2 is not None:
ts = chi2 / df
# A note that might help you understand the nested if-else-statement:
# The quantities `probability`, `significance`, `ts` and `chi2`
# form a graph with `probability` at the center.
# There might be functions directly relating the other quantities
# in general or in certain limits, but the computation here
# always proceeds via `probability` as a one- or two-step process.
if to == 'significance':
if ts is not None:
probability = chi2_distribution.sf(ts, df)
return norm_distribution.isf(probability)
elif to == 'probability':
if significance is not None:
return norm_distribution.sf(significance)
else:
return chi2_distribution.sf(ts, df)
elif to == 'ts':
# Compute a probability if needed
if significance is not None:
probability = norm_distribution.sf(significance)
return chi2_distribution.isf(probability, df)
elif to == 'chi2':
if ts is not None:
return df * ts
# Compute a probability if needed
if significance is not None:
probability = norm_distribution.sf(significance)
return chi2_distribution.isf(probability, df)
[docs]def significance_to_probability_normal(significance):
"""Convert significance to one-sided tail probability.
Parameters
----------
significance : array_like
Significance
Returns
-------
probability : ndarray
One-sided tail probability
See Also
--------
probability_to_significance_normal,
significance_to_probability_normal_limit
Examples
--------
>>> significance_to_probability_normal(0)
0.5
>>> significance_to_probability_normal(1)
0.15865525393145707
>>> significance_to_probability_normal(3)
0.0013498980316300933
>>> significance_to_probability_normal(5)
2.8665157187919328e-07
>>> significance_to_probability_normal(10)
7.6198530241604696e-24
"""
from scipy.stats import norm
return norm.sf(significance)
[docs]def probability_to_significance_normal(probability):
"""Convert one-sided tail probability to significance.
Parameters
----------
probability : array_like
One-sided tail probability
Returns
-------
significance : ndarray
Significance
See Also
--------
significance_to_probability_normal,
probability_to_significance_normal_limit
Examples
--------
>>> probability_to_significance_normal(1e-10)
6.3613409024040557
"""
from scipy.stats import norm
return norm.isf(probability)
def _p_to_s_direct(probability, one_sided=True):
"""Direct implementation of p_to_s for checking.
Reference: RooStats User Guide Equations (6,7).
"""
from scipy.special import erfinv
probability = 1 - probability # We want p to be the tail probability
temp = np.where(one_sided, 2 * probability - 1, probability)
return np.sqrt(2) * erfinv(temp)
def _s_to_p_direct(significance, one_sided=True):
"""Direct implementation of s_to_p for checking.
Note: _p_to_s_direct was solved for p.
"""
from scipy.special import erf
temp = erf(significance / np.sqrt(2))
probability = np.where(one_sided, (temp + 1) / 2., temp)
return 1 - probability # We want p to be the tail probability
[docs]def probability_to_significance_normal_limit(probability):
"""Convert tail probability to significance
in the limit of small p and large s.
Reference: Equation (4) of
http://adsabs.harvard.edu/abs/2007physics...2156C
They say it is better than 1% for s > 1.6.
Asymptotically: s ~ sqrt(-log(p))
"""
u = -2 * np.log(probability * np.sqrt(2 * np.pi))
return np.sqrt(u - np.log(u))
[docs]def significance_to_probability_normal_limit(significance, guess=1e-100):
"""Convert significance to tail probability
in the limit of small p and large s.
See p_to_s_limit docstring
Note: s^2 = u - log(u) can't be solved analytically.
"""
from scipy.optimize import fsolve
def f(probability):
if probability > 0:
return probability_to_significance_normal_limit(probability) - significance
else:
return 1e100
return fsolve(f, guess)