Source code for astrobase.periodbase.macf

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# macf.py - Waqas Bhatti (wbhatti@astro.princeton.edu) - Oct 2017

'''
This contains the ACF period-finding algorithm from McQuillan+ 2013a and
McQuillan+ 2014.

'''

#############
## LOGGING ##
#############

import logging
from astrobase import log_sub, log_fmt, log_date_fmt

DEBUG = False
if DEBUG:
    level = logging.DEBUG
else:
    level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
    level=level,
    style=log_sub,
    format=log_fmt,
    datefmt=log_date_fmt,
)

LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception


#############
## IMPORTS ##
#############

from numpy import (
    nan as npnan, arange as nparange, array as nparray,
    isfinite as npisfinite, sqrt as npsqrt, concatenate as npconcatenate,
    zeros as npzeros, int64 as npint64, all as npall, polyfit as nppolyfit
)

from scipy.signal import argrelmax, argrelmin, savgol_filter
from astropy.convolution import convolve, Gaussian1DKernel


###################
## LOCAL IMPORTS ##
###################

from ..varbase.autocorr import autocorr_magseries


######################
## HELPER FUNCTIONS ##
######################


def _smooth_acf(acf, windowfwhm=7, windowsize=21):
    '''This returns a smoothed version of the ACF.

    Convolves the ACF with a Gaussian of given `windowsize` and `windowfwhm`.

    Parameters
    ----------

    acf : np.array
        The auto-correlation function array to smooth.

    windowfwhm : int
        The smoothing window Gaussian kernel's FWHM .

    windowsize : int
        The number of input points to apply the smoothing over.

    Returns
    -------

    np.array
        Smoothed version of the input ACF array.

    '''

    convkernel = Gaussian1DKernel(windowfwhm, x_size=windowsize)
    smoothed = convolve(acf, convkernel, boundary='extend')

    return smoothed


def _smooth_acf_savgol(acf, windowsize=21, polyorder=2):
    '''
    This returns a smoothed version of the ACF.

    This version uses the Savitsky-Golay smoothing filter.

    Parameters
    ----------

    acf : np.array
        The auto-correlation function array to smooth.

    windowsize : int
        The number of input points to apply the smoothing over.

    polyorder : int
        The order of the polynomial to use in the Savitsky-Golay filter.

    Returns
    -------

    np.array
        Smoothed version of the input ACF array.

    '''

    smoothed = savgol_filter(acf, windowsize, polyorder)

    return smoothed


def _get_acf_peakheights(lags, acf, npeaks=20, searchinterval=1):
    '''This calculates the relative peak heights for first npeaks in ACF.

    Usually, the first peak or the second peak (if its peak height > first peak)
    corresponds to the correct lag. When we know the correct lag, the period is
    then::

        bestperiod = time[lags == bestlag] - time[0]

    Parameters
    ----------

    lags : np.array
        An array of lags that the ACF is calculated at.

    acf : np.array
        The array containing the ACF values.

    npeaks : int
        THe maximum number of peaks to consider when finding peak heights.

    searchinterval : int
        From `scipy.signal.argrelmax`: "How many points on each side to use for
        the comparison to consider comparator(n, n+x) to be True." This
        effectively sets how many points on each of the current peak will be
        used to check if the current peak is the local maximum.

    Returns
    -------

    dict
        This returns a dict of the following form::

            {'maxinds':the indices of the lag array where maxes are,
             'maxacfs':the ACF values at each max,
             'maxlags':the lag values at each max,
             'mininds':the indices of the lag array where mins are,
             'minacfs':the ACF values at each min,
             'minlags':the lag values at each min,
             'relpeakheights':the relative peak heights of each rel. ACF peak,
             'relpeaklags':the lags at each rel. ACF peak found,
             'peakindices':the indices of arrays where each rel. ACF peak is,
             'bestlag':the lag value with the largest rel. ACF peak height,
             'bestpeakheight':the largest rel. ACF peak height,
             'bestpeakindex':the largest rel. ACF peak's number in all peaks}

    '''

    maxinds = argrelmax(acf, order=searchinterval)[0]
    maxacfs = acf[maxinds]
    maxlags = lags[maxinds]
    mininds = argrelmin(acf, order=searchinterval)[0]
    minacfs = acf[mininds]
    minlags = lags[mininds]

    relpeakheights = npzeros(npeaks)
    relpeaklags = npzeros(npeaks,dtype=npint64)
    peakindices = npzeros(npeaks,dtype=npint64)

    for peakind, mxi in enumerate(maxinds[:npeaks]):

        # check if there are no mins to the left
        # throw away this peak because it's probably spurious
        # (FIXME: is this OK?)
        if npall(mxi < mininds):
            continue

        leftminind = mininds[mininds < mxi][-1]  # the last index to the left
        rightminind = mininds[mininds > mxi][0]  # the first index to the right
        relpeakheights[peakind] = (
            acf[mxi] - (acf[leftminind] + acf[rightminind])/2.0
        )
        relpeaklags[peakind] = lags[mxi]
        peakindices[peakind] = peakind

    # figure out the bestperiod if possible
    if relpeakheights[0] > relpeakheights[1]:
        bestlag = relpeaklags[0]
        bestpeakheight = relpeakheights[0]
        bestpeakindex = peakindices[0]
    else:
        bestlag = relpeaklags[1]
        bestpeakheight = relpeakheights[1]
        bestpeakindex = peakindices[1]

    return {'maxinds':maxinds,
            'maxacfs':maxacfs,
            'maxlags':maxlags,
            'mininds':mininds,
            'minacfs':minacfs,
            'minlags':minlags,
            'relpeakheights':relpeakheights,
            'relpeaklags':relpeaklags,
            'peakindices':peakindices,
            'bestlag':bestlag,
            'bestpeakheight':bestpeakheight,
            'bestpeakindex':bestpeakindex}


[docs]def plot_acf_results(acfp, outfile, maxlags=5000, yrange=(-0.4,0.4)): ''' This plots the unsmoothed/smoothed ACF vs lag. Parameters ---------- acfp : dict This is the dict returned from `macf_period_find` below. outfile : str The output file the plot will be written to. maxlags: int The maximum number of lags to include in the plot. yrange : sequence of two floats The y-range of the ACF vs. lag plot to use. ''' import matplotlib.pyplot as plt lags = acfp['acfresults']['lags'][:maxlags] smoothedacf = acfp['acf'][:maxlags] unsmoothedacf = acfp['acfresults']['acf'][:maxlags] acfparams = acfp['kwargs']['smoothfunckwargs'].copy() acfparams.update({'peakinterval': int(acfp['kwargs']['smoothacf']/2.0)}) # plot the ACFs fig, ax1 = plt.subplots() # this is lags vs acf ax1.plot(lags, unsmoothedacf, label='unsmoothed ACF',color='#1f77b4') ax1.plot(lags, smoothedacf, label='smoothed ACF', color='#ff7f0e') ax1.set_xlim((0,maxlags)) ax1.set_xlabel('lags') # overplot the identified peaks acfmaxinds = acfp['acfpeaks']['maxinds'] for i, maxind in enumerate(acfmaxinds): if i == 0: ax1.axvline(maxind, linewidth=2.0, color='red', ymin=0.2, ymax=0.3, label='identified ACF peaks') else: ax1.axvline(maxind, linewidth=2.0, color='red', ymin=0.2, ymax=0.3) plt.ylabel('ACF') plt.ylim(yrange) ax1.legend() plt.title('%s' % repr(acfparams)) plt.tight_layout() plt.savefig(outfile) plt.close('all') return outfile
############################ ## PERIOD FINDER FUNCTION ## ############################
[docs]def macf_period_find( times, mags, errs, fillgaps=0.0, filterwindow=11, forcetimebin=None, maxlags=None, maxacfpeaks=10, smoothacf=21, # set for Kepler-type LCs, see details below smoothfunc=_smooth_acf_savgol, smoothfunckwargs=None, magsarefluxes=False, sigclip=3.0, verbose=True, periodepsilon=0.1, # doesn't do anything, for consistent external API nworkers=None, # doesn't do anything, for consistent external API startp=None, # doesn't do anything, for consistent external API endp=None, # doesn't do anything, for consistent external API autofreq=None, # doesn't do anything, for consistent external API stepsize=None, # doesn't do anything, for consistent external API ): '''This finds periods using the McQuillan+ (2013a, 2014) ACF method. The kwargs from `periodepsilon` to `stepsize` don't do anything but are used to present a consistent API for all periodbase period-finders to an outside driver (e.g. the one in the checkplotserver). Parameters ---------- times,mags,errs : np.array The input magnitude/flux time-series to run the period-finding for. fillgaps : 'noiselevel' or float This sets what to use to fill in gaps in the time series. If this is 'noiselevel', will smooth the light curve using a point window size of `filterwindow` (this should be an odd integer), subtract the smoothed LC from the actual LC and estimate the RMS. This RMS will be used to fill in the gaps. Other useful values here are 0.0, and npnan. filterwindow : int The light curve's smoothing filter window size to use if `fillgaps='noiselevel`'. forcetimebin : None or float This is used to force a particular cadence in the light curve other than the automatically determined cadence. This effectively rebins the light curve to this cadence. This should be in the same time units as `times`. maxlags : None or int This is the maximum number of lags to calculate. If None, will calculate all lags. maxacfpeaks : int This is the maximum number of ACF peaks to use when finding the highest peak and obtaining a fit period. smoothacf : int This is the number of points to use as the window size when smoothing the ACF with the `smoothfunc`. This should be an odd integer value. If this is None, will not smooth the ACF, but this will probably lead to finding spurious peaks in a generally noisy ACF. For Kepler, a value between 21 and 51 seems to work fine. For ground based data, much larger values may be necessary: between 1001 and 2001 seem to work best for the HAT surveys. This is dependent on cadence, RMS of the light curve, the periods of the objects you're looking for, and finally, any correlated noise in the light curve. Make a plot of the smoothed/unsmoothed ACF vs. lag using the result dict of this function and the `plot_acf_results` function above to see the identified ACF peaks and what kind of smoothing might be needed. The value of `smoothacf` will also be used to figure out the interval to use when searching for local peaks in the ACF: this interval is 1/2 of the `smoothacf` value. smoothfunc : Python function This is the function that will be used to smooth the ACF. This should take at least one kwarg: 'windowsize'. Other kwargs can be passed in using a dict provided in `smoothfunckwargs`. By default, this uses a Savitsky-Golay filter, a Gaussian filter is also provided but not used. Another good option would be an actual low-pass filter (generated using scipy.signal?) to remove all high frequency noise from the ACF. smoothfunckwargs : dict or None The dict of optional kwargs to pass in to the `smoothfunc`. magsarefluxes : bool If your input measurements in `mags` are actually fluxes instead of mags, set this is True. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. verbose : bool If True, will indicate progress and report errors. Returns ------- dict Returns a dict with results. dict['bestperiod'] is the estimated best period and dict['fitperiodrms'] is its estimated error. Other interesting things in the output include: - dict['acfresults']: all results from calculating the ACF. in particular, the unsmoothed ACF might be of interest: dict['acfresults']['acf'] and dict['acfresults']['lags']. - dict['lags'] and dict['acf'] contain the ACF after smoothing was applied. - dict['periods'] and dict['lspvals'] can be used to construct a pseudo-periodogram. - dict['naivebestperiod'] is obtained by multiplying the lag at the highest ACF peak with the cadence. This is usually close to the fit period (dict['fitbestperiod']), which is calculated by doing a fit to the lags vs. peak index relation as in McQuillan+ 2014. ''' # get the ACF acfres = autocorr_magseries( times, mags, errs, maxlags=maxlags, fillgaps=fillgaps, forcetimebin=forcetimebin, sigclip=sigclip, magsarefluxes=magsarefluxes, filterwindow=filterwindow, verbose=verbose ) xlags = acfres['lags'] # smooth the ACF if requested if smoothacf and isinstance(smoothacf, int) and smoothacf > 0: if smoothfunckwargs is None: sfkwargs = {'windowsize':smoothacf} else: sfkwargs = smoothfunckwargs.copy() sfkwargs.update({'windowsize':smoothacf}) xacf = smoothfunc(acfres['acf'], **sfkwargs) else: xacf = acfres['acf'] # get the relative peak heights and fit best lag peakres = _get_acf_peakheights(xlags, xacf, npeaks=maxacfpeaks, searchinterval=int(smoothacf/2)) # this is the best period's best ACF peak height bestlspval = peakres['bestpeakheight'] try: # get the fit best lag from a linear fit to the peak index vs time(peak # lag) function as in McQillian+ (2014) fity = npconcatenate(( [0.0, peakres['bestlag']], peakres['relpeaklags'][peakres['relpeaklags'] > peakres['bestlag']] )) fity = fity*acfres['cadence'] fitx = nparange(fity.size) fitcoeffs, fitcovar = nppolyfit(fitx, fity, 1, cov=True) # fit best period is the gradient of fit fitbestperiod = fitcoeffs[0] bestperiodrms = npsqrt(fitcovar[0,0]) # from the covariance matrix except Exception: LOGWARNING('linear fit to time at each peak lag ' 'value vs. peak number failed, ' 'naively calculated ACF period may not be accurate') fitcoeffs = nparray([npnan, npnan]) fitcovar = nparray([[npnan, npnan], [npnan, npnan]]) fitbestperiod = npnan bestperiodrms = npnan raise # calculate the naive best period using delta_tau = lag * cadence naivebestperiod = peakres['bestlag']*acfres['cadence'] if fitbestperiod < naivebestperiod: LOGWARNING('fit bestperiod = %.5f may be an alias, ' 'naively calculated bestperiod is = %.5f' % (fitbestperiod, naivebestperiod)) if npisfinite(fitbestperiod): bestperiod = fitbestperiod else: bestperiod = naivebestperiod return {'bestperiod':bestperiod, 'bestlspval':bestlspval, 'nbestpeaks':maxacfpeaks, # for compliance with the common pfmethod API 'nbestperiods':npconcatenate([ [fitbestperiod], peakres['relpeaklags'][1:maxacfpeaks]*acfres['cadence'] ]), 'nbestlspvals':peakres['maxacfs'][:maxacfpeaks], 'lspvals':xacf, 'periods':xlags*acfres['cadence'], 'acf':xacf, 'lags':xlags, 'method':'acf', 'naivebestperiod':naivebestperiod, 'fitbestperiod':fitbestperiod, 'fitperiodrms':bestperiodrms, 'periodfitcoeffs':fitcoeffs, 'periodfitcovar':fitcovar, 'kwargs':{'maxlags':maxlags, 'maxacfpeaks':maxacfpeaks, 'fillgaps':fillgaps, 'filterwindow':filterwindow, 'smoothacf':smoothacf, 'smoothfunckwargs':sfkwargs, 'magsarefluxes':magsarefluxes, 'sigclip':sigclip}, 'acfresults':acfres, 'acfpeaks':peakres}