Source code for astrobase.varclass.varfeatures

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# features.py - Waqas Bhatti (wbhatti@astro.princeton.edu) - Jan 2017
# License: MIT. See the LICENSE file for more details.

'''
Calculates light curve features for variability classification.

'''

#############
## LOGGING ##
#############

import logging
from astrobase import log_sub, log_fmt, log_date_fmt

DEBUG = False
if DEBUG:
    level = logging.DEBUG
else:
    level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
    level=level,
    style=log_sub,
    format=log_fmt,
    datefmt=log_date_fmt,
)

LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception


#############
## IMPORTS ##
#############

from numpy import nan as npnan, sum as npsum, abs as npabs, \
    roll as nproll, isfinite as npisfinite, std as npstd, \
    sign as npsign, sqrt as npsqrt, mean as npmean, median as npmedian, \
    array as nparray, percentile as nppercentile, \
    polyfit as nppolyfit, var as npvar, max as npmax, min as npmin, \
    log10 as nplog10, arange as nparange, pi as MPI, floor as npfloor, \
    argsort as npargsort, cos as npcos, sin as npsin, tan as nptan, \
    where as npwhere, linspace as nplinspace, \
    zeros_like as npzeros_like, full_like as npfull_like, all as npall, \
    correlate as npcorrelate, nonzero as npnonzero, diff as npdiff, exp as npexp

from scipy.stats import skew as spskew, kurtosis as spkurtosis
from scipy.signal import savgol_filter


###################
## LOCAL IMPORTS ##
###################

from ..lcmath import sigclip_magseries, time_bin_magseries_with_errs


##########################################
## BASE VARIABILITY FEATURE COMPUTATION ##
##########################################

[docs]def stetson_jindex(ftimes, fmags, ferrs, weightbytimediff=False): '''This calculates the Stetson index for the magseries, based on consecutive pairs of observations. Based on Nicole Loncke's work for her Planets and Life certificate at Princeton in 2014. Parameters ---------- ftimes,fmags,ferrs : np.array The input mag/flux time-series with all non-finite elements removed. weightbytimediff : bool If this is True, the Stetson index for any pair of mags will be reweighted by the difference in times between them using the scheme in Fruth+ 2012 and Zhange+ 2003 (as seen in Sokolovsky+ 2017):: w_i = exp(- (t_i+1 - t_i)/ delta_t ) Returns ------- float The calculated Stetson J variability index. ''' ndet = len(fmags) if ndet > 9: # get the median and ndet medmag = npmedian(fmags) # get the stetson index elements delta_prefactor = (ndet/(ndet - 1)) sigma_i = delta_prefactor*(fmags - medmag)/ferrs # Nicole's clever trick to advance indices by 1 and do x_i*x_(i+1) sigma_j = nproll(sigma_i,1) if weightbytimediff: difft = npdiff(ftimes) deltat = npmedian(difft) weights_i = npexp(- difft/deltat ) products = (weights_i*sigma_i[1:]*sigma_j[1:]) else: # ignore first elem since it's actually x_0*x_n products = (sigma_i*sigma_j)[1:] stetsonj = ( npsum(npsign(products) * npsqrt(npabs(products))) ) / ndet return stetsonj else: LOGERROR('not enough detections in this magseries ' 'to calculate stetson J index') return npnan
[docs]def stetson_kindex(fmags, ferrs): '''This calculates the Stetson K index (a robust measure of the kurtosis). Parameters ---------- fmags,ferrs : np.array The input mag/flux time-series to process. Must have no non-finite elems. Returns ------- float The Stetson K variability index. ''' # use a fill in value for the errors if they're none if ferrs is None: ferrs = npfull_like(fmags, 0.005) ndet = len(fmags) if ndet > 9: # get the median and ndet medmag = npmedian(fmags) # get the stetson index elements delta_prefactor = (ndet/(ndet - 1)) sigma_i = delta_prefactor*(fmags - medmag)/ferrs stetsonk = ( npsum(npabs(sigma_i))/(npsqrt(npsum(sigma_i*sigma_i))) * (ndet**(-0.5)) ) return stetsonk else: LOGERROR('not enough detections in this magseries ' 'to calculate stetson K index') return npnan
[docs]def lightcurve_moments(ftimes, fmags, ferrs): '''This calculates the weighted mean, stdev, median, MAD, percentiles, skew, kurtosis, fraction of LC beyond 1-stdev, and IQR. Parameters ---------- ftimes,fmags,ferrs : np.array The input mag/flux time-series with all non-finite elements removed. Returns ------- dict A dict with all of the light curve moments calculated. ''' ndet = len(fmags) if ndet > 9: # now calculate the various things we need series_median = npmedian(fmags) series_wmean = ( npsum(fmags*(1.0/(ferrs*ferrs)))/npsum(1.0/(ferrs*ferrs)) ) series_mad = npmedian(npabs(fmags - series_median)) series_stdev = 1.483*series_mad series_skew = spskew(fmags) series_kurtosis = spkurtosis(fmags) # get the beyond1std fraction series_above1std = len(fmags[fmags > (series_median + series_stdev)]) series_below1std = len(fmags[fmags < (series_median - series_stdev)]) # this is the fraction beyond 1 stdev series_beyond1std = (series_above1std + series_below1std)/float(ndet) # get the magnitude percentiles series_mag_percentiles = nppercentile( fmags, [5.0,10,17.5,25,32.5,40,60,67.5,75,82.5,90,95] ) return { 'median':series_median, 'wmean':series_wmean, 'mad':series_mad, 'stdev':series_stdev, 'skew':series_skew, 'kurtosis':series_kurtosis, 'beyond1std':series_beyond1std, 'mag_percentiles':series_mag_percentiles, 'mag_iqr': series_mag_percentiles[8] - series_mag_percentiles[3], } else: LOGERROR('not enough detections in this magseries ' 'to calculate light curve moments') return None
[docs]def lightcurve_flux_measures(ftimes, fmags, ferrs, magsarefluxes=False): '''This calculates percentiles and percentile ratios of the flux. Parameters ---------- ftimes,fmags,ferrs : np.array The input mag/flux time-series with all non-finite elements removed. magsarefluxes : bool If the `fmags` array actually contains fluxes, will not convert `mags` to fluxes before calculating the percentiles. Returns ------- dict A dict with all of the light curve flux percentiles and percentile ratios calculated. ''' ndet = len(fmags) if ndet > 9: # get the fluxes if magsarefluxes: series_fluxes = fmags else: series_fluxes = 10.0**(-0.4*fmags) series_flux_median = npmedian(series_fluxes) # get the percent_amplitude for the fluxes series_flux_percent_amplitude = ( npmax(npabs(series_fluxes))/series_flux_median ) # get the flux percentiles series_flux_percentiles = nppercentile( series_fluxes, [5.0,10,17.5,25,32.5,40,60,67.5,75,82.5,90,95] ) series_frat_595 = ( series_flux_percentiles[-1] - series_flux_percentiles[0] ) series_frat_1090 = ( series_flux_percentiles[-2] - series_flux_percentiles[1] ) series_frat_175825 = ( series_flux_percentiles[-3] - series_flux_percentiles[2] ) series_frat_2575 = ( series_flux_percentiles[-4] - series_flux_percentiles[3] ) series_frat_325675 = ( series_flux_percentiles[-5] - series_flux_percentiles[4] ) series_frat_4060 = ( series_flux_percentiles[-6] - series_flux_percentiles[5] ) # calculate the flux percentile ratios series_flux_percentile_ratio_mid20 = series_frat_4060/series_frat_595 series_flux_percentile_ratio_mid35 = series_frat_325675/series_frat_595 series_flux_percentile_ratio_mid50 = series_frat_2575/series_frat_595 series_flux_percentile_ratio_mid65 = series_frat_175825/series_frat_595 series_flux_percentile_ratio_mid80 = series_frat_1090/series_frat_595 # calculate the ratio of F595/median flux series_percent_difference_flux_percentile = ( series_frat_595/series_flux_median ) series_percentile_magdiff = -2.5*nplog10( series_percent_difference_flux_percentile ) return { 'flux_median':series_flux_median, 'flux_percent_amplitude':series_flux_percent_amplitude, 'flux_percentiles':series_flux_percentiles, 'flux_percentile_ratio_mid20':series_flux_percentile_ratio_mid20, 'flux_percentile_ratio_mid35':series_flux_percentile_ratio_mid35, 'flux_percentile_ratio_mid50':series_flux_percentile_ratio_mid50, 'flux_percentile_ratio_mid65':series_flux_percentile_ratio_mid65, 'flux_percentile_ratio_mid80':series_flux_percentile_ratio_mid80, 'percent_difference_flux_percentile':series_percentile_magdiff, } else: LOGERROR('not enough detections in this magseries ' 'to calculate flux measures') return None
[docs]def lightcurve_ptp_measures(ftimes, fmags, ferrs): '''This calculates various point-to-point measures (`eta` in Kim+ 2014). Parameters ---------- ftimes,fmags,ferrs : np.array The input mag/flux time-series with all non-finite elements removed. Returns ------- dict A dict with values of the point-to-point measures, including the `eta` variability index (often used as its inverse `inveta` to have the same sense as increasing variability index -> more likely a variable star). ''' ndet = len(fmags) if ndet > 9: timediffs = npdiff(ftimes) # get rid of stuff with time diff = 0.0 nzind = npnonzero(timediffs) ftimes, fmags, ferrs = ftimes[nzind], fmags[nzind], ferrs[nzind] # recalculate ndet and diffs ndet = ftimes.size timediffs = npdiff(ftimes) # calculate the point to point measures p2p_abs_magdiffs = npabs(npdiff(fmags)) p2p_squared_magdiffs = npdiff(fmags)*npdiff(fmags) robstd = npmedian(npabs(fmags - npmedian(fmags)))*1.483 robvar = robstd*robstd # these are eta from the Kim+ 2014 paper - ratio of point-to-point # difference to the variance of the entire series # this is the robust version eta_robust = npmedian(p2p_abs_magdiffs)/robvar eta_robust = eta_robust/(ndet - 1.0) # this is the usual version eta_normal = npsum(p2p_squared_magdiffs)/npvar(fmags) eta_normal = eta_normal/(ndet - 1.0) timeweights = 1.0/(timediffs*timediffs) # this is eta_e modified for uneven sampling from the Kim+ 2014 paper eta_uneven_normal = ( (npsum(timeweights*p2p_squared_magdiffs) / (npvar(fmags) * npsum(timeweights)) ) * npmean(timeweights) * (ftimes.max() - ftimes.min())*(ftimes.max() - ftimes.min()) ) # this is robust eta_e modified for uneven sampling from the Kim+ 2014 # paper eta_uneven_robust = ( (npsum(timeweights*p2p_abs_magdiffs) / (robvar * npsum(timeweights)) ) * npmedian(timeweights) * (ftimes[-1] - ftimes[0])*(ftimes[-1] - ftimes[0]) ) return { 'eta_normal':eta_normal, 'eta_robust':eta_robust, 'eta_uneven_normal':eta_uneven_normal, 'eta_uneven_robust':eta_uneven_robust } else: return None
[docs]def nonperiodic_lightcurve_features(times, mags, errs, magsarefluxes=False): '''This calculates the following nonperiodic features of the light curve, listed in Richards, et al. 2011): - amplitude - beyond1std - flux_percentile_ratio_mid20 - flux_percentile_ratio_mid35 - flux_percentile_ratio_mid50 - flux_percentile_ratio_mid65 - flux_percentile_ratio_mid80 - linear_trend - max_slope - median_absolute_deviation - median_buffer_range_percentage - pair_slope_trend - percent_amplitude - percent_difference_flux_percentile - skew - stdev - timelength - mintime - maxtime Parameters ---------- times,mags,errs : np.array The input mag/flux time-series to process. magsarefluxes : bool If True, will treat values in `mags` as fluxes instead of magnitudes. Returns ------- dict A dict containing all of the features listed above. ''' # remove nans first finiteind = npisfinite(times) & npisfinite(mags) & npisfinite(errs) ftimes, fmags, ferrs = times[finiteind], mags[finiteind], errs[finiteind] # remove zero errors nzind = npnonzero(ferrs) ftimes, fmags, ferrs = ftimes[nzind], fmags[nzind], ferrs[nzind] ndet = len(fmags) if ndet > 9: # calculate the moments moments = lightcurve_moments(ftimes, fmags, ferrs) # calculate the flux measures fluxmeasures = lightcurve_flux_measures(ftimes, fmags, ferrs, magsarefluxes=magsarefluxes) # calculate the point-to-point measures ptpmeasures = lightcurve_ptp_measures(ftimes, fmags, ferrs) # get the length in time mintime, maxtime = npmin(ftimes), npmax(ftimes) timelength = maxtime - mintime # get the amplitude series_amplitude = 0.5*(npmax(fmags) - npmin(fmags)) # calculate the linear fit to the entire mag series fitcoeffs = nppolyfit(ftimes, fmags, 1, w=1.0/(ferrs*ferrs)) series_linear_slope = fitcoeffs[1] # roll fmags by 1 rolled_fmags = nproll(fmags,1) # calculate the magnitude ratio (from the WISE paper) series_magratio = ( (npmax(fmags) - moments['median']) / (npmax(fmags) - npmin(fmags) ) ) # this is the dictionary returned containing all the measures measures = { 'ndet':fmags.size, 'mintime':mintime, 'maxtime':maxtime, 'timelength':timelength, 'amplitude':series_amplitude, 'ndetobslength_ratio':ndet/timelength, 'linear_fit_slope':series_linear_slope, 'magnitude_ratio':series_magratio, } if moments: measures.update(moments) if ptpmeasures: measures.update(ptpmeasures) if fluxmeasures: measures.update(fluxmeasures) return measures else: LOGERROR('not enough detections in this magseries ' 'to calculate non-periodic features') return None
############################################################### ## KEPLER COMBINED DIFFERENTIAL PHOTOMETRIC PRECISION (CDPP) ## ###############################################################
[docs]def gilliland_cdpp(times, mags, errs, windowlength=97, polyorder=2, binsize=23400, # in seconds: 6.5 hours for classic CDPP sigclip=5.0, magsarefluxes=False, **kwargs): '''This calculates the CDPP of a timeseries using the method in the paper: Gilliland, R. L., Chaplin, W. J., Dunham, E. W., et al. 2011, ApJS, 197, 6 (http://adsabs.harvard.edu/abs/2011ApJS..197....6G) The steps are: - pass the time-series through a Savitsky-Golay filter. - we use `scipy.signal.savgol_filter`, `**kwargs` are passed to this. - also see: http://scipy.github.io/old-wiki/pages/Cookbook/SavitzkyGolay. - the `windowlength` is the number of LC points to use (Kepler uses 2 days = (1440 minutes/day / 30 minutes/LC point) x 2 days = 96 -> 97 LC points). - the `polyorder` is a quadratic by default. - subtract the smoothed time-series from the actual light curve. - sigma clip the remaining LC. - get the binned mag series by averaging over 6.5 hour bins, only retaining bins with at least 7 points. - the standard deviation of the binned averages is the CDPP. - multiply this by 1.168 to correct for over-subtraction of white-noise. Parameters ---------- times,mags,errs : np.array The input mag/flux time-series to calculate CDPP for. windowlength : int The smoothing window size to use. polyorder : int The polynomial order to use in the Savitsky-Golay smoothing. binsize : int The bin size to use for binning the light curve. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. magsarefluxes : bool If True, indicates the input time-series is fluxes and not mags. kwargs : additional kwargs These are passed directly to `scipy.signal.savgol_filter`. Returns ------- float The calculated CDPP value. ''' # if no errs are given, assume 0.1% errors if errs is None: errs = 0.001*mags # get rid of nans first find = npisfinite(times) & npisfinite(mags) & npisfinite(errs) ftimes = times[find] fmags = mags[find] ferrs = errs[find] if ftimes.size < (3*windowlength): LOGERROR('not enough LC points to calculate CDPP') return npnan # now get the smoothed mag series using the filter # kwargs are provided to the savgol_filter function smoothed = savgol_filter(fmags, windowlength, polyorder, **kwargs) subtracted = fmags - smoothed # sigclip the subtracted light curve stimes, smags, serrs = sigclip_magseries(ftimes, subtracted, ferrs, magsarefluxes=magsarefluxes) # bin over 6.5 hour bins and throw away all bins with less than 7 elements binned = time_bin_magseries_with_errs(stimes, smags, serrs, binsize=binsize, minbinelems=7) bmags = binned['binnedmags'] # stdev of bin mags x 1.168 -> CDPP cdpp = npstd(bmags) * 1.168 return cdpp
##################### ## ROLLUP FUNCTION ## #####################
[docs]def all_nonperiodic_features(times, mags, errs, magsarefluxes=False, stetson_weightbytimediff=True): '''This rolls up the feature functions above and returns a single dict. NOTE: this doesn't calculate the CDPP to save time since binning and smoothing takes a while for dense light curves. Parameters ---------- times,mags,errs : np.array The input mag/flux time-series to calculate CDPP for. magsarefluxes : bool If True, indicates `mags` is actually an array of flux values. stetson_weightbytimediff : bool If this is True, the Stetson index for any pair of mags will be reweighted by the difference in times between them using the scheme in Fruth+ 2012 and Zhange+ 2003 (as seen in Sokolovsky+ 2017):: w_i = exp(- (t_i+1 - t_i)/ delta_t ) Returns ------- dict Returns a dict with all of the variability features. ''' # remove nans first finiteind = npisfinite(times) & npisfinite(mags) & npisfinite(errs) ftimes, fmags, ferrs = times[finiteind], mags[finiteind], errs[finiteind] # remove zero errors nzind = npnonzero(ferrs) ftimes, fmags, ferrs = ftimes[nzind], fmags[nzind], ferrs[nzind] xfeatures = nonperiodic_lightcurve_features(times, mags, errs, magsarefluxes=magsarefluxes) stetj = stetson_jindex(ftimes, fmags, ferrs, weightbytimediff=stetson_weightbytimediff) stetk = stetson_kindex(fmags, ferrs) xfeatures.update({'stetsonj':stetj, 'stetsonk':stetk}) return xfeatures