Source code for gammapy.time.period

# Licensed under a 3-clause BSD style license - see LICENSE.rst
from collections import OrderedDict
import numpy as np
from astropy.stats import LombScargle

__all__ = [
    'lomb_scargle',
]


[docs]def lomb_scargle(time, flux, flux_err, dt, max_period=None, criteria='all', n_bootstraps=100):
    """
    Compute period and its false alarm probability of a light curve using Lomb-Scargle PSD.

    To compute the Lomb-Scargle power spectral density, `astropy.stats.LombScargle` is called.
    For eyesight inspection, the spectral window function is also returned
    to evaluate the impact of sampling on the periodogram.
    The criteria for the false alarm probability are both parametric and non-parametric.

    For an introduction to the Lomb-Scargle periodogram, see Lomb (1976) and Scargle (1982).
    For an introduction to the false alarm probability of thr Lomb-Scargle periodogram, see the astropy docs.

    The function returns a results dictionary with the following content:

    - ``pgrid`` (`~numpy.ndarray`) -- Period grid in units of ``t``
    - ``psd`` (`~numpy.ndarray`) -- PSD of Lomb-Scargle at frequencies of ``fgrid``
    - ``period`` (`float`) -- Location of the highest periodogram peak
    - ``fap`` (`float`) or (`~numpy.ndarray`) -- False alarm probability of ``period``
      under the null hypothesis of only-noise data for the specified criteria.
      If criteria is not defined, the false alarm probability of all criteria is returned.
    - ``swf`` (`~numpy.ndarray`) -- Spectral window function

    Parameters
    ----------
    time : `~numpy.ndarray`
        Time array of the light curve
    flux : `~numpy.ndarray`
        Flux array of the light curve
    flux_err : `~numpy.ndarray`
        Flux error array of the light curve
    dt : float
        Desired resolution of the periodogram and the window function
    max_period : float
        Maximum period to analyse
    criteria : list of str
        Select which significance methods you'd like to run (by default all are running)
        Available: `{'pre', 'cvm', 'nll', 'boot'}`

        - ``pre`` for pre-defined beta distribution (see Schwarzenberg-Czerny (1998))
        - ``cvm`` for Cramer-von-Mises distance minimisation (see Thieler et at. (2016))
        - ``nll`` for negative logarithmic likelihood minimisation
        - ``boot`` for bootstrap-resampling (see Sueveges (2012)
           and ``astroML.time_series.lomb_scargle_bootstrap``.
    n_bootstraps : int
        Number of bootstraps resampling

    Returns
    -------
    results : `~collections.OrderedDict`
        Results dictionary (see description above).

    References
    ----------
    .. [1] Lomb (1976), "Least-squares frequency analysis of unequally spaced data",
       `Link <https://link.springer.com/article/10.1007%2FBF00648343?LI=true>`__
    .. [2] Scargle (1982), "Studies in astronomical time series analysis. II -
       Statistical aspects of spectral analysis of unevenly spaced data",
       `Link <http://articles.adsabs.harvard.edu/full/1982ApJ...263..835S>`__
    .. [3] Schwarzenberg-Czerny (1998), "The distribution of empirical periodograms: Lomb-Scargle and PDM spectra",
       `Link <https://academic.oup.com/mnras/article/301/3/831/1038387/The-distribution-of-empirical-periodograms-Lomb>`__
    .. [4] Thieler et at. (2016), "RobPer: An R Package to Calculate Periodograms for Light Curves Based on Robust Regression",
       `Link <https://www.jstatsoft.org/article/view/v069i09>`__
    .. [5] Sueveges (2012), "False Alarm Probability based on bootstrap and extreme-value methods for periodogram peaks",
       `Link <http://ada7.cosmostat.org/ADA7_proceeding_MSuveges2.pdf>`__
    .. [6] Astropy docs, Lomb-Scargle Periodograms,
       `Link <http://docs.astropy.org/en/latest/stats/lombscargle.html>`_
    """
    # set up lomb-scargle-algorithm
    freq, periods = _freq_grid(time, dt, max_period)
    psd_data = LombScargle(time, flux, flux_err).power(freq)

    # find period with highest periodogram peak
    psd_best_period = np.max(psd_data)
    best_period = periods[np.argmax(psd_data)]

    # define significance for best period
    if criteria == 'all':
        criteria = ['pre', 'cvm', 'nll', 'boot']

    fap = OrderedDict()

    if 'pre' in criteria:
        fap['pre'] = _fap_pre(time, freq, psd_best_period)
    if 'cvm' in criteria:
        fap['cvm'] = _fap_cvm(freq, psd_data, psd_best_period)
    if 'nll' in criteria:
        fap['nll'] = _fap_nll(time, freq, psd_data, psd_best_period)
    if 'boot' in criteria:
        fap['boot'] = _fap_boot(time, flux, flux_err, freq, psd_best_period, n_bootstraps)

    # spectral window function
    time_win, window = _window_function(time, dt)
    psd_win = LombScargle(time_win, window, 1).power(freq)

    return OrderedDict([
        ('pgrid', periods),
        ('psd', psd_data),
        ('period', best_period),
        ('fap', fap),
        ('swf', psd_win),
    ])


def _window_function(time, dt):
    """
    Generates window function with desired resolution dt
    """
    time_win = np.rint(time / dt) * dt
    t_max = np.max(time_win)
    t_min = np.min(time_win)
    window_grid = np.arange(t_min, t_max + dt, dt)
    window_grid = np.rint(window_grid / dt) * dt  # round again since np.arange is not robust
    window = np.zeros(len(window_grid))
    window[np.searchsorted(window_grid, time_win, side='right') - 1] = 1

    return window_grid, window


def _freq_grid(time, dt, max_period):
    """
    Generates the frequency grid for the periodogram
    """
    if max_period is None:
        max_period = np.rint((np.max(time) - np.min(time)) / dt) * dt
    else:
        max_period = np.rint(max_period / dt) * dt

    min_period = dt
    periods = np.arange(min_period, max_period + dt, dt)
    grid = 1. / periods

    return grid, periods


def _cvm(param, data):
    """
    Cramer-von-Mises distance for beta distribution
    """
    from scipy.stats import beta
    a, b = param
    ordered_data = np.sort(data, axis=None)
    sumbeta = 0
    for n in range(len(data)):
        cdf = beta.cdf(ordered_data[n], a, b)
        sumbeta += (cdf - (n - 0.5) / len(data)) ** 2.0

    cvm_dist = (1. / len(data)) * sumbeta + 1. / (12 * (len(data) ** 2.))
    mask = np.isfinite(cvm_dist)
    cvm = cvm_dist[mask]

    return cvm


def _nll(param, data):
    """
    Negative log likelihood function for beta distribution
    """
    from scipy.stats import beta
    a, b = param
    pdf = beta.pdf(data, a, b)
    lg = np.log(pdf)
    mask = np.isfinite(lg)
    nll = -lg[mask].sum()

    return nll


def _bootstrap(time, flux, flux_error, freq, n_bootstraps):
    """
    Returns value of maximum periodogram peak for every bootstrap resampling
    """
    rand = np.random.RandomState(42)
    max_periods = np.empty(n_bootstraps)

    for idx_run in range(n_bootstraps):
        ind = rand.randint(0, len(flux), len(flux))
        psd_boot = LombScargle(time, flux[ind], flux_error[ind]).power(freq)
        max_periods[idx_run] = np.max(psd_boot)

    return max_periods


def _fap_pre(time, freq, psd_best_period):
    """
    Computes false alarm probability for the pre-defined beta distribution
    """
    from scipy.stats import beta
    a = (3 - 1) / 2
    b = (len(time) - 3) / 2
    fap = 1 - beta.cdf(psd_best_period, a, b) ** len(freq)

    return fap


def _fap_cvm(freq, psd, psd_best_period):
    """
    Computes false alarm probability for the cvm-distance-minimised beta distribution
    """
    from scipy import optimize
    from scipy.stats import beta
    clip = 0.00001

    temp = np.mean(psd) * (-np.mean(psd) + np.mean(psd) ** 2 + np.var(psd))
    theta_1 = - temp / np.var(psd)
    if theta_1 < 0:
        theta_1 = clip

    theta_2 = (theta_1 - theta_1 * np.mean(psd)) / np.mean(psd)
    if theta_2 < 0:
        theta_2 = clip

    cvm_minimize = optimize.minimize(
        _cvm,
        [theta_1, theta_2],
        args=(psd,),
        bounds=((clip, None), (clip, None)),
    )
    fap = 1 - beta.cdf(psd_best_period, cvm_minimize.x[0], cvm_minimize.x[1]) ** len(freq)

    return fap


def _fap_nll(time, freq, psd, psd_best_period):
    """
    Computes false alarm probability for the negative logarithmic likelihood-minimised beta distribution
    """
    from scipy import optimize
    from scipy.stats import beta
    a = (3 - 1) / 2
    b = (len(time) - 3) / 2
    clip = 0.00001
    nll_minimize = optimize.minimize(
        _nll,
        [a, b],
        args=(psd,),
        bounds=((clip, None), (clip, None)),
    )
    fap = 1 - beta.cdf(psd_best_period, nll_minimize.x[0], nll_minimize.x[1]) ** len(freq)

    return fap


def _fap_boot(time, flux, flux_error, freq, psd_best_period, n_bootstraps):
    """
    Computes significance for the bootstrap-resampling
    """
    from scipy import stats
    max_periods = _bootstrap(time, flux, flux_error, freq, n_bootstraps)
    fap = 1 - (stats.percentileofscore(max_periods, psd_best_period) / 100)

    return fap
Navigation

Source code for gammapy.time.period