Source code for gammapy.time.period

# Licensed under a 3-clause BSD style license - see LICENSE.rst
from collections import OrderedDict
import numpy as np
from astropy.stats import LombScargle

__all__ = [
    'lomb_scargle',
]


[docs]def lomb_scargle(time, flux, flux_err, dt, max_period=None, criteria='all', n_bootstraps=100): """ Compute period and its false alarm probability of a light curve using Lomb-Scargle PSD. To compute the Lomb-Scargle power spectral density, `astropy.stats.LombScargle` is called. For eyesight inspection, the spectral window function is also returned to evaluate the impact of sampling on the periodogram. The criteria for the false alarm probability are both parametric and non-parametric. For an introduction to the Lomb-Scargle periodogram, see Lomb (1976) and Scargle (1982). For an introduction to the false alarm probability of thr Lomb-Scargle periodogram, see the astropy docs. The function returns a results dictionary with the following content: - ``pgrid`` (`~numpy.ndarray`) -- Period grid in units of ``t`` - ``psd`` (`~numpy.ndarray`) -- PSD of Lomb-Scargle at frequencies of ``fgrid`` - ``period`` (`float`) -- Location of the highest periodogram peak - ``fap`` (`float`) or (`~numpy.ndarray`) -- False alarm probability of ``period`` under the null hypothesis of only-noise data for the specified criteria. If criteria is not defined, the false alarm probability of all criteria is returned. - ``swf`` (`~numpy.ndarray`) -- Spectral window function Parameters ---------- time : `~numpy.ndarray` Time array of the light curve flux : `~numpy.ndarray` Flux array of the light curve flux_err : `~numpy.ndarray` Flux error array of the light curve dt : float Desired resolution of the periodogram and the window function max_period : float Maximum period to analyse criteria : list of str Select which significance methods you'd like to run (by default all are running) Available: `{'pre', 'cvm', 'nll', 'boot'}` - ``pre`` for pre-defined beta distribution (see Schwarzenberg-Czerny (1998)) - ``cvm`` for Cramer-von-Mises distance minimisation (see Thieler et at. (2016)) - ``nll`` for negative logarithmic likelihood minimisation - ``boot`` for bootstrap-resampling (see Sueveges (2012) and ``astroML.time_series.lomb_scargle_bootstrap``. n_bootstraps : int Number of bootstraps resampling Returns ------- results : `~collections.OrderedDict` Results dictionary (see description above). References ---------- .. [1] Lomb (1976), "Least-squares frequency analysis of unequally spaced data", `Link <https://link.springer.com/article/10.1007%2FBF00648343?LI=true>`__ .. [2] Scargle (1982), "Studies in astronomical time series analysis. II - Statistical aspects of spectral analysis of unevenly spaced data", `Link <http://articles.adsabs.harvard.edu/full/1982ApJ...263..835S>`__ .. [3] Schwarzenberg-Czerny (1998), "The distribution of empirical periodograms: Lomb-Scargle and PDM spectra", `Link <https://academic.oup.com/mnras/article/301/3/831/1038387/The-distribution-of-empirical-periodograms-Lomb>`__ .. [4] Thieler et at. (2016), "RobPer: An R Package to Calculate Periodograms for Light Curves Based on Robust Regression", `Link <https://www.jstatsoft.org/article/view/v069i09>`__ .. [5] Sueveges (2012), "False Alarm Probability based on bootstrap and extreme-value methods for periodogram peaks", `Link <http://ada7.cosmostat.org/ADA7_proceeding_MSuveges2.pdf>`__ .. [6] Astropy docs, Lomb-Scargle Periodograms, `Link <http://docs.astropy.org/en/latest/stats/lombscargle.html>`_ """ # set up lomb-scargle-algorithm freq, periods = _freq_grid(time, dt, max_period) psd_data = LombScargle(time, flux, flux_err).power(freq) # find period with highest periodogram peak psd_best_period = np.max(psd_data) best_period = periods[np.argmax(psd_data)] # define significance for best period if criteria == 'all': criteria = ['pre', 'cvm', 'nll', 'boot'] fap = OrderedDict() if 'pre' in criteria: fap['pre'] = _fap_pre(time, freq, psd_best_period) if 'cvm' in criteria: fap['cvm'] = _fap_cvm(freq, psd_data, psd_best_period) if 'nll' in criteria: fap['nll'] = _fap_nll(time, freq, psd_data, psd_best_period) if 'boot' in criteria: fap['boot'] = _fap_boot(time, flux, flux_err, freq, psd_best_period, n_bootstraps) # spectral window function time_win, window = _window_function(time, dt) psd_win = LombScargle(time_win, window, 1).power(freq) return OrderedDict([ ('pgrid', periods), ('psd', psd_data), ('period', best_period), ('fap', fap), ('swf', psd_win), ])
def _window_function(time, dt): """ Generates window function with desired resolution dt """ time_win = np.rint(time / dt) * dt t_max = np.max(time_win) t_min = np.min(time_win) window_grid = np.arange(t_min, t_max + dt, dt) window_grid = np.rint(window_grid / dt) * dt # round again since np.arange is not robust window = np.zeros(len(window_grid)) window[np.searchsorted(window_grid, time_win, side='right') - 1] = 1 return window_grid, window def _freq_grid(time, dt, max_period): """ Generates the frequency grid for the periodogram """ if max_period is None: max_period = np.rint((np.max(time) - np.min(time)) / dt) * dt else: max_period = np.rint(max_period / dt) * dt min_period = dt periods = np.arange(min_period, max_period + dt, dt) grid = 1. / periods return grid, periods def _cvm(param, data): """ Cramer-von-Mises distance for beta distribution """ from scipy.stats import beta a, b = param ordered_data = np.sort(data, axis=None) sumbeta = 0 for n in range(len(data)): cdf = beta.cdf(ordered_data[n], a, b) sumbeta += (cdf - (n - 0.5) / len(data)) ** 2.0 cvm_dist = (1. / len(data)) * sumbeta + 1. / (12 * (len(data) ** 2.)) mask = np.isfinite(cvm_dist) cvm = cvm_dist[mask] return cvm def _nll(param, data): """ Negative log likelihood function for beta distribution """ from scipy.stats import beta a, b = param pdf = beta.pdf(data, a, b) lg = np.log(pdf) mask = np.isfinite(lg) nll = -lg[mask].sum() return nll def _bootstrap(time, flux, flux_error, freq, n_bootstraps): """ Returns value of maximum periodogram peak for every bootstrap resampling """ rand = np.random.RandomState(42) max_periods = np.empty(n_bootstraps) for idx_run in range(n_bootstraps): ind = rand.randint(0, len(flux), len(flux)) psd_boot = LombScargle(time, flux[ind], flux_error[ind]).power(freq) max_periods[idx_run] = np.max(psd_boot) return max_periods def _fap_pre(time, freq, psd_best_period): """ Computes false alarm probability for the pre-defined beta distribution """ from scipy.stats import beta a = (3 - 1) / 2 b = (len(time) - 3) / 2 fap = 1 - beta.cdf(psd_best_period, a, b) ** len(freq) return fap def _fap_cvm(freq, psd, psd_best_period): """ Computes false alarm probability for the cvm-distance-minimised beta distribution """ from scipy import optimize from scipy.stats import beta clip = 0.00001 temp = np.mean(psd) * (-np.mean(psd) + np.mean(psd) ** 2 + np.var(psd)) theta_1 = - temp / np.var(psd) if theta_1 < 0: theta_1 = clip theta_2 = (theta_1 - theta_1 * np.mean(psd)) / np.mean(psd) if theta_2 < 0: theta_2 = clip cvm_minimize = optimize.minimize( _cvm, [theta_1, theta_2], args=(psd,), bounds=((clip, None), (clip, None)), ) fap = 1 - beta.cdf(psd_best_period, cvm_minimize.x[0], cvm_minimize.x[1]) ** len(freq) return fap def _fap_nll(time, freq, psd, psd_best_period): """ Computes false alarm probability for the negative logarithmic likelihood-minimised beta distribution """ from scipy import optimize from scipy.stats import beta a = (3 - 1) / 2 b = (len(time) - 3) / 2 clip = 0.00001 nll_minimize = optimize.minimize( _nll, [a, b], args=(psd,), bounds=((clip, None), (clip, None)), ) fap = 1 - beta.cdf(psd_best_period, nll_minimize.x[0], nll_minimize.x[1]) ** len(freq) return fap def _fap_boot(time, flux, flux_error, freq, psd_best_period, n_bootstraps): """ Computes significance for the bootstrap-resampling """ from scipy import stats max_periods = _bootstrap(time, flux, flux_error, freq, n_bootstraps) fap = 1 - (stats.percentileofscore(max_periods, psd_best_period) / 100) return fap