Source code for astrobase.varbase.signals

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# signals.py - Waqas Bhatti (wbhatti@astro.princeton.edu) - Jan 2017

'''
Contains functions to deal with masking and removing periodic signals in light
curves.

'''

#############
## LOGGING ##
#############

import logging
from astrobase import log_sub, log_fmt, log_date_fmt

DEBUG = False
if DEBUG:
    level = logging.DEBUG
else:
    level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
    level=level,
    style=log_sub,
    format=log_fmt,
    datefmt=log_date_fmt,
)

LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception


#############
## IMPORTS ##
#############

import os.path
import os
from io import BytesIO as Strio


import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

###################
## LOCAL IMPORTS ##
###################

from ..periodbase.zgls import pgen_lsp
from ..lcfit.sinusoidal import _fourier_func, fourier_fit_magseries
from ..lcmath import sigclip_magseries, phase_magseries


################################################
## REMOVING SIGNALS FROM MAGNITUDE TIMESERIES ##
################################################

[docs]def prewhiten_magseries(times, mags, errs,
                        whitenperiod,
                        whitenparams,
                        sigclip=3.0,
                        magsarefluxes=False,
                        plotfit=None,
                        plotfitphasedlconly=True,
                        rescaletomedian=True):
    '''Removes a periodic sinusoidal signal generated using whitenparams from
    the input magnitude time series.

    Parameters
    ----------

    times,mags,errs : np.array
        The input mag/flux time-series to prewhiten.

    whitenperiod : float
        The period of the sinusoidal signal to remove.

    whitenparams : list of floats
        This contains the Fourier amplitude and phase coefficients of the
        sinusoidal signal to remove::

            [ampl_1, ampl_2, ampl_3, ..., ampl_X,
             pha_1, pha_2, pha_3, ..., pha_X]

        where `X` is the Fourier order. These are usually the output of a
        previous Fourier fit to the light curve (from
        :py:func:`astrobase.lcfit.sinusoidal.fourier_fit_magseries` for
        example).

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    magsarefluxes : bool
        If True, will treat the input values of `mags` as fluxes for purposes of
        plotting the fit and sig-clipping.

    plotfit : str or False
        If this is a string, this function will make a plot showing the effect
        of the pre-whitening on the mag/flux time-series and write the plot to
        the path specified here.

    plotfitphasedlconly : bool
        If True, will plot only the phased LC for showing the effect of
        pre-whitening, and skip plotting the unphased LC.

    rescaletomedian : bool
        If this is True, then we add back the constant median term of the
        magnitudes to the final pre-whitened mag series.

    Returns
    -------

    dict
        Returns a dict of the form::

            {'wtimes':times array after pre-whitening,
             'wphase':phase array after pre-whitening,
             'wmags':mags array after pre-whitening,
             'werrs':errs array after pre-whitening,
             'whitenparams':the input pre-whitening params used,
             'whitenperiod':the input pre-whitening period used,
             'fitplotfile':the output plot file if plotfit was set}

    '''

    stimes, smags, serrs = sigclip_magseries(times, mags, errs,
                                             sigclip=sigclip,
                                             magsarefluxes=magsarefluxes)

    median_mag = np.median(smags)

    # phase the mag series using the given period and epoch = min(stimes)
    mintime = np.min(stimes)

    # calculate the unsorted phase, then sort it
    iphase = (
        (stimes - mintime)/whitenperiod -
        np.floor((stimes - mintime)/whitenperiod)
    )
    phasesortind = np.argsort(iphase)

    # these are the final quantities to use for the Fourier fits
    phase = iphase[phasesortind]
    pmags = smags[phasesortind]
    perrs = serrs[phasesortind]

    # get the times sorted in phase order (useful to get the fit mag minimum
    # with respect to phase -- the light curve minimum)
    ptimes = stimes[phasesortind]

    # now subtract the harmonic series from the phased LC
    # these are still in phase order
    wmags = pmags - _fourier_func(whitenparams, phase, pmags)

    # resort everything by time order
    wtimeorder = np.argsort(ptimes)
    wtimes = ptimes[wtimeorder]
    wphase = phase[wtimeorder]
    wmags = wmags[wtimeorder]
    werrs = perrs[wtimeorder]

    if rescaletomedian:
        wmags = wmags + median_mag

    # prepare the returndict
    returndict = {'wtimes':wtimes,  # these are in the new time order
                  'wphase':wphase,
                  'wmags':wmags,
                  'werrs':werrs,
                  'whitenparams':whitenparams,
                  'whitenperiod':whitenperiod}

    # make the fit plot if required
    if plotfit and (isinstance(plotfit, str) or isinstance(plotfit, Strio)):

        if plotfitphasedlconly:
            plt.figure(figsize=(10,4.8))
        else:
            plt.figure(figsize=(16,9.6))

        if plotfitphasedlconly:

            # phased series before whitening
            plt.subplot(121)
            plt.plot(phase,pmags,
                     marker='.',
                     color='k',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC before pre-whitening')

            # phased series after whitening
            plt.subplot(122)
            plt.plot(wphase,wmags,
                     marker='.',
                     color='g',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC after pre-whitening')

        else:

            # time series before whitening
            plt.subplot(221)
            plt.plot(stimes,smags,
                     marker='.',
                     color='k',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('JD')
            plt.title('LC before pre-whitening')

            # time series after whitening
            plt.subplot(222)
            plt.plot(wtimes,wmags,
                     marker='.',
                     color='g',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('JD')
            plt.title('LC after pre-whitening with period: %.6f' % whitenperiod)

            # phased series before whitening
            plt.subplot(223)
            plt.plot(phase,pmags,
                     marker='.',
                     color='k',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC before pre-whitening')

            # phased series after whitening
            plt.subplot(224)
            plt.plot(wphase,wmags,
                     marker='.',
                     color='g',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC after pre-whitening')

        plt.tight_layout()
        plt.savefig(plotfit, format='png', pad_inches=0.0)
        plt.close()

        if isinstance(plotfit, str) or isinstance(plotfit, Strio):
            returndict['fitplotfile'] = plotfit

    return returndict


[docs]def gls_prewhiten(times, mags, errs,
                  fourierorder=3,  # 3rd order series to start with
                  initfparams=None,
                  startp_gls=None,
                  endp_gls=None,
                  stepsize=1.0e-4,
                  autofreq=True,
                  sigclip=30.0,
                  magsarefluxes=False,
                  nbestpeaks=5,
                  nworkers=4,
                  plotfits=None):
    '''Iterative pre-whitening of a magnitude series using the L-S periodogram.

    This finds the best period, fits a fourier series with the best period, then
    whitens the time series with the best period, and repeats until `nbestpeaks`
    are done.

    Parameters
    ----------

    times,mags,errs : np.array
        The input mag/flux time-series to iteratively pre-whiten.

    fourierorder : int
        The Fourier order of the sinusoidal signal to fit to the time-series and
        iteratively remove.

    initfparams : list or None
        If this is provided, should be a list of Fourier amplitudes and phases
        in the following format::

            [ampl_1, ampl_2, ampl_3, ..., ampl_X,
             pha_1, pha_2, pha_3, ..., pha_X]

        where `X` is the Fourier order. These are usually the output of a
        previous Fourier fit to the light curve (from
        :py:func:`astrobase.lcfit.sinusoidal.fourier_fit_magseries` for
        example). You MUST provide ONE of `fourierorder` and `initfparams`, but
        not both. If both are provided or both are None, a sinusoidal signal of
        Fourier order 3 will be used by default.

    startp_gls, endp_gls : float or None
        If these are provided, will serve as input to the Generalized
        Lomb-Scargle function that will attempt to find the best `nbestpeaks`
        periods in the time-series. These set the minimum and maximum period to
        search for in the time-series.

    stepsize : float
        The step-size in frequency to use when constructing a frequency grid for
        the period search.

    autofreq : bool
        If this is True, the value of `stepsize` will be ignored and the
        :py:func:`astrobase.periodbase.get_frequency_grid` function will be used
        to generate a frequency grid based on `startp`, and `endp`. If these are
        None as well, `startp` will be set to 0.1 and `endp` will be set to
        `times.max() - times.min()`.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    magsarefluxes : bool
        If the input measurement values in `mags` and `errs` are in fluxes, set
        this to True.

    nbestpeaks : int
        The number of 'best' peaks to return from the periodogram results,
        starting from the global maximum of the periodogram peak values.

    nworkers : int
        The number of parallel workers to use when calculating the periodogram.

    plotfits : None or str
        If this is a str, should indicate the file to which a plot of the
        successive iterations of pre-whitening will be written to. This will
        contain a row of plots indicating the before/after states of the light
        curves for each round of pre-whitening.

    Returns
    -------

    (bestperiods, plotfile) : tuple
        This returns a list of the best periods (with the "highest" peak in the
        periodogram) after each round of pre-whitening is done. If plotfit is a
        str, will also return the path to the generated plot file.

    '''

    stimes, smags, serrs = sigclip_magseries(times, mags, errs,
                                             sigclip=sigclip,
                                             magsarefluxes=magsarefluxes)

    # now start the cycle by doing an GLS on the initial timeseries
    gls = pgen_lsp(stimes, smags, serrs,
                   magsarefluxes=magsarefluxes,
                   startp=startp_gls,
                   endp=endp_gls,
                   autofreq=autofreq,
                   sigclip=sigclip,
                   stepsize=stepsize,
                   nworkers=nworkers)

    LOGINFO('round %s: period = %.6f' % (0, gls['bestperiod']))

    if plotfits and isinstance(plotfits, str):

        plt.figure(figsize=(20,6*nbestpeaks))

        nplots = nbestpeaks + 1

        # periodogram
        plt.subplot(nplots,3,1)
        plt.plot(gls['periods'],gls['lspvals'])
        plt.xlabel('period [days]')
        plt.ylabel('GLS power')
        plt.xscale('log')
        plt.title('round 0, best period = %.6f' % gls['bestperiod'])

        # unphased LC
        plt.subplot(nplots,3,2)
        plt.plot(stimes, smags,
                 linestyle='none', marker='o',ms=1.0,rasterized=True)
        if not magsarefluxes:
            plt.gca().invert_yaxis()
            plt.ylabel('magnitude')
        else:
            plt.ylabel('flux')
        plt.xlabel('JD')
        plt.title('unphased LC before whitening')

        # phased LC
        plt.subplot(nplots,3,3)
        phased = phase_magseries(stimes, smags,
                                 gls['bestperiod'], stimes.min())

        plt.plot(phased['phase'], phased['mags'],
                 linestyle='none', marker='o',ms=1.0,rasterized=True)
        if not magsarefluxes:
            plt.ylabel('magnitude')
            plt.gca().invert_yaxis()
        else:
            plt.ylabel('flux')
        plt.xlabel('phase')
        plt.title('phased LC before whitening: P = %.6f' % gls['bestperiod'])

    # set up the initial times, mags, errs, period
    wtimes, wmags, werrs = stimes, smags, serrs
    wperiod = gls['bestperiod']

    # start the best periods list
    bestperiods = []

    # now go through the rest of the cycles
    for fitind in range(nbestpeaks):

        wfseries = fourier_fit_magseries(wtimes, wmags, werrs, wperiod,
                                         fourierorder=fourierorder,
                                         fourierparams=initfparams,
                                         magsarefluxes=magsarefluxes,
                                         sigclip=sigclip)

        wffitparams = wfseries['fitinfo']['finalparams']

        wseries = prewhiten_magseries(wtimes, wmags, werrs,
                                      wperiod,
                                      wffitparams,
                                      magsarefluxes=magsarefluxes,
                                      sigclip=sigclip)

        LOGINFO('round %s: period = %.6f' % (fitind+1, wperiod))
        bestperiods.append(wperiod)

        # update the mag series with whitened version
        wtimes, wmags, werrs = (
            wseries['wtimes'], wseries['wmags'], wseries['werrs']
        )

        # redo the periodogram
        wgls = pgen_lsp(wtimes, wmags, werrs,
                        magsarefluxes=magsarefluxes,
                        startp=startp_gls,
                        endp=endp_gls,
                        autofreq=autofreq,
                        sigclip=sigclip,
                        stepsize=stepsize,
                        nworkers=nworkers)
        wperiod = wgls['bestperiod']
        bestperiods.append(wperiod)

        # make plots if requested
        if plotfits and isinstance(plotfits, str):

            # periodogram
            plt.subplot(nplots,3,4+fitind*3)
            plt.plot(wgls['periods'],wgls['lspvals'])
            plt.xlabel('period [days]')
            plt.ylabel('LSP power')
            plt.xscale('log')
            plt.title('round %s, best period = %.6f' % (fitind+1,
                                                        wgls['bestperiod']))

            # unphased LC
            plt.subplot(nplots,3,5+fitind*3)
            plt.plot(wtimes, wmags,
                     linestyle='none', marker='o',ms=1.0,rasterized=True)
            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('flux')
            plt.xlabel('JD')
            plt.title('unphased LC after whitening')

            # phased LC
            plt.subplot(nplots,3,6+fitind*3)
            wphased = phase_magseries(wtimes, wmags,
                                      wperiod, stimes.min())

            plt.plot(wphased['phase'], wphased['mags'],
                     linestyle='none', marker='o',ms=1.0,rasterized=True)
            if not magsarefluxes:
                plt.ylabel('magnitude')
                plt.gca().invert_yaxis()
            else:
                plt.ylabel('flux')
            plt.xlabel('phase')
            plt.title('phased LC after whitening: P = %.6f' % wperiod)

    # in the end, write out the plot
    if plotfits and isinstance(plotfits, str):

        plt.subplots_adjust(hspace=0.2,wspace=0.4)
        plt.savefig(plotfits, bbox_inches='tight')
        plt.close('all')
        return bestperiods, os.path.abspath(plotfits)

    else:

        return bestperiods


[docs]def mask_signal(times, mags, errs,
                signalperiod,
                signalepoch,
                magsarefluxes=False,
                maskphases=(0,0,0.5,1.0),
                maskphaselength=0.1,
                plotfit=None,
                plotfitphasedlconly=True,
                sigclip=30.0):
    '''This removes repeating signals in the magnitude time series.

    Useful for masking planetary transit signals in light curves to search for
    other variability.

    A small worked example of using this and `prewhiten_magseries` above:

    https://github.com/waqasbhatti/astrobase/issues/77#issuecomment-463803558

    Parameters
    ----------

    times,mags,errs : np.array
        The input mag/flux time-series to run the masking on.

    signalperiod : float
        The period of the signal to mask.

    signalepoch : float
        The epoch of the signal to mask.

    magsarefluxes : bool
        Set to True if `mags` is actually an array of fluxes.

    maskphases : sequence of floats
        This defines which phase values will be masked. For each item in this
        sequence, this function will mask a length of phase given by
        `maskphaselength` centered on each `maskphases` value, and remove all LC
        points in these regions from the light curve.

    maskphaselength : float
        The length in phase to mask for each phase value provided in
        `maskphases`.

    plotfit : str or None
        If provided as a str, indicates the output plot file.

    plotfitphasedlconly : bool
        If True, will only plot the effect of masking the signal as requested on
        the phased LC. If False, will also plot the unphased LC.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    '''

    stimes, smags, serrs = sigclip_magseries(times, mags, errs,
                                             sigclip=sigclip,
                                             magsarefluxes=magsarefluxes)

    # now phase the light curve using the period and epoch provided
    phases = (
        (stimes - signalepoch)/signalperiod -
        np.floor((stimes - signalepoch)/signalperiod)
    )

    # mask the requested phases using the mask length (in phase units)
    # this gets all the masks into one array
    masks = np.array([(np.abs(phases - x) > maskphaselength)
                      for x in maskphases])
    # this flattens the masks to a single array for all combinations
    masks = np.all(masks,axis=0)

    # apply the mask to the times, mags, and errs
    mphases = phases[masks]
    mtimes = stimes[masks]
    mmags = smags[masks]
    merrs = serrs[masks]

    returndict = {'mphases':mphases,
                  'mtimes':mtimes,
                  'mmags':mmags,
                  'merrs':merrs}

    # make the fit plot if required
    if plotfit and isinstance(plotfit, str) or isinstance(plotfit, Strio):

        if plotfitphasedlconly:
            plt.figure(figsize=(10,4.8))
        else:
            plt.figure(figsize=(16,9.6))

        if plotfitphasedlconly:

            # phased series before whitening
            plt.subplot(121)
            plt.plot(phases,smags,
                     marker='.',
                     color='k',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC before signal masking')

            # phased series after whitening
            plt.subplot(122)
            plt.plot(mphases,mmags,
                     marker='.',
                     color='g',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC after signal masking')

        else:

            # time series before whitening
            plt.subplot(221)
            plt.plot(stimes,smags,
                     marker='.',
                     color='k',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('JD')
            plt.title('LC before signal masking')

            # time series after whitening
            plt.subplot(222)
            plt.plot(mtimes,mmags,
                     marker='.',
                     color='g',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('JD')
            plt.title('LC after signal masking')

            # phased series before whitening
            plt.subplot(223)
            plt.plot(phases,smags,
                     marker='.',
                     color='k',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC before signal masking')

            # phased series after whitening
            plt.subplot(224)
            plt.plot(mphases,mmags,
                     marker='.',
                     color='g',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC after signal masking')

        plt.tight_layout()
        plt.savefig(plotfit, format='png', pad_inches=0.0)
        plt.close()

        if isinstance(plotfit, str) or isinstance(plotfit, Strio):
            returndict['fitplotfile'] = plotfit

    return returndict