Source code for astrobase.periodbase.falsealarm

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# falsealarm.py - Waqas Bhatti (wbhatti@astro.princeton.edu) - Apr 2019
# License: MIT - see the LICENSE file for the full text.

'''This contains functions useful for false-alarm probability calculation.

- :py:func:`.bootstrap_falsealarmprob`: calculates the false alarm probability
  for a period using bootstrap resampling.

'''

#############
## LOGGING ##
#############

import logging
from astrobase import log_sub, log_fmt, log_date_fmt

DEBUG = False
if DEBUG:
    level = logging.DEBUG
else:
    level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
    level=level,
    style=log_sub,
    format=log_fmt,
    datefmt=log_date_fmt,
)

LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception


#############
## IMPORTS ##
#############

import numpy as np
import numpy.random as npr
npr.seed(0xdecaff)

from ..lcmath import sigclip_magseries


#############################################################
## FUNCTIONS FOR TESTING SIGNIFICANCE OF PERIODOGRAM PEAKS ##
#############################################################

[docs]def bootstrap_falsealarmprob(lspinfo, times, mags, errs, nbootstrap=250, magsarefluxes=False, sigclip=10.0, npeaks=None): '''Calculates the false alarm probabilities of periodogram peaks using bootstrap resampling of the magnitude time series. The false alarm probability here is defined as:: (1.0 + sum(trialbestpeaks[i] > peak[j]))/(ntrialbestpeaks + 1) for each best periodogram peak j. The index i is for each bootstrap trial. This effectively gives us a significance for the peak. Smaller FAP means a better chance that the peak is real. The basic idea is to get the number of trial best peaks that are larger than the current best peak and divide this by the total number of trials. The distribution of these trial best peaks is obtained after scrambling the mag values and rerunning the specified periodogram method for a bunch of trials. `lspinfo` is the output dict from a periodbase periodogram function and MUST contain a 'method' key that corresponds to one of the keys in the LSPMETHODS dict above. This will let this function know which periodogram function to run to generate the bootstrap samples. The lspinfo SHOULD also have a 'kwargs' key that corresponds to the input keyword arguments for the periodogram function as it was run originally, to keep everything the same during the bootstrap runs. If this is missing, default values will be used. FIXME: this may not be strictly correct; must look more into bootstrap significance testing. Also look into if we're doing resampling correctly for time series because the samples are not iid. Look into moving block bootstrap. Parameters ---------- lspinfo : dict A dict of period-finder results from one of the period-finders in periodbase, or your own functions, provided it's of the form and contains at least the keys listed below:: {'periods': np.array of all periods searched by the period-finder, 'lspvals': np.array of periodogram power value for each period, 'bestperiod': a float value that is the period with the highest peak in the periodogram, i.e. the most-likely actual period, 'method': a three-letter code naming the period-finder used; must be one of the keys in the `astrobase.periodbase.METHODLABELS` dict, 'nbestperiods': a list of the periods corresponding to periodogram peaks (`nbestlspvals` below) to annotate on the periodogram plot so they can be called out visually, 'nbestlspvals': a list of the power values associated with periodogram peaks to annotate on the periodogram plot so they can be called out visually; should be the same length as `nbestperiods` above, 'kwargs': dict of kwargs passed to your own period-finder function} If you provide your own function's period-finder results, you should add a corresponding key for it to the LSPMETHODS dict above so the bootstrap function can use it correctly. Your period-finder function should take `times`, `mags`, errs and any extra parameters as kwargs and return a dict of the form described above. A small worked example:: from your_module import your_periodfinder_func from astrobase import periodbase periodbase.LSPMETHODS['your-finder'] = your_periodfinder_func # run a period-finder session your_pfresults = your_periodfinder_func(times, mags, errs, **extra_kwargs) # run bootstrap to find FAP falsealarm_info = periodbase.bootstrap_falsealarmprob( your_pfresults, times, mags, errs, nbootstrap=250, magsarefluxes=False, ) times,mags,errs : np.arrays The magnitude/flux time-series to process along with their associated measurement errors. nbootstrap : int The total number of bootstrap trials to run. This is set to 250 by default, but should probably be around 1000 for realistic results. magsarefluxes : bool If True, indicates the input time-series is fluxes and not mags. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. npeaks : int or None The number of peaks from the list of 'nbestlspvals' in the period-finder result dict to run the bootstrap for. If None, all of the peaks in this list will have their FAP calculated. Returns ------- dict Returns a dict of the form:: {'peaks':allpeaks, 'periods':allperiods, 'probabilities':allfaps, 'alltrialbestpeaks':alltrialbestpeaks} ''' from astrobase.periodbase import LSPMETHODS # figure out how many periods to work on if (npeaks and (0 < npeaks < len(lspinfo['nbestperiods']))): nperiods = npeaks else: LOGWARNING('npeaks not specified or invalid, ' 'getting FAP for all %s periodogram peaks' % len(lspinfo['nbestperiods'])) nperiods = len(lspinfo['nbestperiods']) nbestperiods = lspinfo['nbestperiods'][:nperiods] nbestpeaks = lspinfo['nbestlspvals'][:nperiods] # get rid of nans first and sigclip stimes, smags, serrs = sigclip_magseries(times, mags, errs, magsarefluxes=magsarefluxes, sigclip=sigclip) allpeaks = [] allperiods = [] allfaps = [] alltrialbestpeaks = [] # make sure there are enough points to calculate a spectrum if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9: for ind, period, peak in zip(range(len(nbestperiods)), nbestperiods, nbestpeaks): LOGINFO('peak %s: running %s trials...' % (ind+1, nbootstrap)) trialbestpeaks = [] for _trial in range(nbootstrap): # get a scrambled index tindex = npr.randint(0, high=mags.size, size=mags.size) # get the kwargs dict out of the lspinfo if 'kwargs' in lspinfo: kwargs = lspinfo['kwargs'] # update the kwargs with some local stuff kwargs.update({'magsarefluxes':magsarefluxes, 'sigclip':sigclip, 'verbose':False}) else: kwargs = {'magsarefluxes':magsarefluxes, 'sigclip':sigclip, 'verbose':False} # run the periodogram with scrambled mags and errs # and the appropriate keyword arguments lspres = LSPMETHODS[lspinfo['method']]( times, mags[tindex], errs[tindex], **kwargs ) trialbestpeaks.append(lspres['bestlspval']) trialbestpeaks = np.array(trialbestpeaks) alltrialbestpeaks.append(trialbestpeaks) # calculate the FAP for a trial peak j = FAP[j] = # (1.0 + sum(trialbestpeaks[i] > peak[j]))/(ntrialbestpeaks + 1) if lspinfo['method'] != 'pdm': falsealarmprob = ( (1.0 + trialbestpeaks[trialbestpeaks > peak].size) / (trialbestpeaks.size + 1.0) ) # for PDM, we're looking for a peak smaller than the best peak # because values closer to 0.0 are more significant else: falsealarmprob = ( (1.0 + trialbestpeaks[trialbestpeaks < peak].size) / (trialbestpeaks.size + 1.0) ) LOGINFO('FAP for peak %s, period: %.6f = %.3g' % (ind+1, period, falsealarmprob)) allpeaks.append(peak) allperiods.append(period) allfaps.append(falsealarmprob) return {'peaks':allpeaks, 'periods':allperiods, 'probabilities':allfaps, 'alltrialbestpeaks':alltrialbestpeaks} else: LOGERROR('not enough mag series points to calculate periodogram') return None