Source code for astrobase.lcmath

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# lcmath.py - Waqas Bhatti (wbhatti@astro.princeton.edu) - Feb 2015

'''
Contains various useful tools for calculating various things related to
lightcurves (like phasing, sigma-clipping, finding and filling gaps, etc.)

'''

#############
## LOGGING ##
#############

import logging
from astrobase import log_sub, log_fmt, log_date_fmt

DEBUG = False
if DEBUG:
    level = logging.DEBUG
else:
    level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
    level=level,
    style=log_sub,
    format=log_fmt,
    datefmt=log_date_fmt,
)

LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception


#############
## IMPORTS ##
#############

import numpy as np
from numpy import (
    isfinite as npisfinite, median as npmedian, mean as npmean,
    abs as npabs, std as npstddev
)

from scipy.spatial import cKDTree
from scipy.signal import savgol_filter
import scipy.stats


############################
## NORMALIZING MAG SERIES ##
############################

[docs]def find_lc_timegroups(lctimes, mingap=4.0): '''Finds gaps in the provided time-series and indexes them into groups. This finds the gaps in the provided `lctimes` array, so we can figure out which times are for consecutive observations and which represent gaps between seasons or observing eras. Parameters ---------- lctimes : array-like This contains the times to analyze for gaps; assumed to be some form of Julian date. mingap : float This defines how much the difference between consecutive measurements is allowed to be to consider them as parts of different timegroups. By default it is set to 4.0 days. Returns ------- tuple A tuple of the form: `(ngroups, [slice(start_ind_1, end_ind_1), ...])` is returned. This contains the number of groups as the first element, and a list of Python `slice` objects for each time-group found. These can be used directly to index into the array of times to quickly get measurements associated with each group. ''' lc_time_diffs = np.diff(lctimes) group_start_indices = np.where(lc_time_diffs > mingap)[0] if len(group_start_indices) > 0: group_indices = [] for i, gindex in enumerate(group_start_indices): if i == 0: group_indices.append(slice(0,gindex+1)) else: group_indices.append(slice(group_start_indices[i-1]+1,gindex+1)) # at the end, add the slice for the last group to the end of the times # array group_indices.append(slice(group_start_indices[-1]+1,len(lctimes))) # if there's no large gap in the LC, then there's only one group to worry # about else: group_indices = [slice(0,len(lctimes))] return len(group_indices), group_indices
[docs]def normalize_magseries(times, mags, mingap=4.0, normto='globalmedian', magsarefluxes=False, debugmode=False): '''This normalizes the magnitude time-series to a specified value. This is used to normalize time series measurements that may have large time gaps and vertical offsets in mag/flux measurement between these 'timegroups', either due to instrument changes or different filters. NOTE: this works in-place! The mags array will be replaced with normalized mags when this function finishes. Parameters ---------- times,mags : array-like The times (assumed to be some form of JD) and mags (or flux) measurements to be normalized. mingap : float This defines how much the difference between consecutive measurements is allowed to be to consider them as parts of different timegroups. By default it is set to 4.0 days. normto : {'globalmedian', 'zero'} or a float Specifies the normalization type:: 'globalmedian' -> norms each mag to the global median of the LC column 'zero' -> norms each mag to zero a float -> norms each mag to this specified float value. magsarefluxes : bool Indicates if the input `mags` array is actually an array of flux measurements instead of magnitude measurements. If this is set to True, then: - if `normto` is 'zero', then the median flux is divided from each observation's flux value to yield normalized fluxes with 1.0 as the global median. - if `normto` is 'globalmedian', then the global median flux value across the entire time series is multiplied with each measurement. - if `norm` is set to a `float`, then this number is multiplied with the flux value for each measurement. debugmode : bool If this is True, will print out verbose info on each timegroup found. Returns ------- times,normalized_mags : np.arrays Normalized magnitude values after normalization. If normalization fails for some reason, `times` and `normalized_mags` will both be None. ''' ngroups, timegroups = find_lc_timegroups(times, mingap=mingap) # find all the non-nan indices finite_ind = np.isfinite(mags) if any(finite_ind): # find the global median global_mag_median = np.median(mags[finite_ind]) # go through the groups and normalize them to the median for # each group for tgind, tg in enumerate(timegroups): finite_ind = np.isfinite(mags[tg]) # find this timegroup's median mag and normalize the mags in # it to this median group_median = np.median((mags[tg])[finite_ind]) if magsarefluxes: mags[tg] = mags[tg]/group_median else: mags[tg] = mags[tg] - group_median if debugmode: LOGDEBUG('group %s: elems %s, ' 'finite elems %s, median mag %s' % (tgind, len(mags[tg]), len(finite_ind), group_median)) # now that everything is normalized to 0.0, add the global median # offset back to all the mags and write the result back to the dict if isinstance(normto, str) and normto == 'globalmedian': if magsarefluxes: mags = mags * global_mag_median else: mags = mags + global_mag_median # if the normto is a float, add everything to that float and return elif isinstance(normto, float): if magsarefluxes: mags = mags * normto else: mags = mags + normto # anything else just returns the normalized mags as usual return times, mags else: LOGERROR('measurements are all nan!') return None, None
#################### ## SIGMA-CLIPPING ## ####################
[docs]def sigclip_magseries(times, mags, errs, sigclip=None, iterative=False, niterations=None, meanormedian='median', magsarefluxes=False): '''Sigma-clips a magnitude or flux time-series. Selects the finite times, magnitudes (or fluxes), and errors from the passed values, and apply symmetric or asymmetric sigma clipping to them. Parameters ---------- times,mags,errs : np.array The magnitude or flux time-series arrays to sigma-clip. This doesn't assume all values are finite or if they're positive/negative. All of these arrays will have their non-finite elements removed, and then will be sigma-clipped based on the arguments to this function. `errs` is optional. Set it to None if you don't have values for these. A 'faked' `errs` array will be generated if necessary, which can be ignored in the output as well. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. iterative : bool If this is set to True, will perform iterative sigma-clipping. If `niterations` is not set and this is True, sigma-clipping is iterated until no more points are removed. niterations : int The maximum number of iterations to perform for sigma-clipping. If None, the `iterative` arg takes precedence, and `iterative=True` will sigma-clip until no more points are removed. If `niterations` is not None and `iterative` is False, `niterations` takes precedence and iteration will occur for the specified number of iterations. meanormedian : {'mean', 'median'} Use 'mean' for sigma-clipping based on the mean value, or 'median' for sigma-clipping based on the median value. Default is 'median'. magsareflux : bool True if your "mags" are in fact fluxes, i.e. if "fainter" corresponds to `mags` getting smaller. Returns ------- (stimes, smags, serrs) : tuple The sigma-clipped and nan-stripped time-series. ''' returnerrs = True # fake the errors if they don't exist # this is inconsequential to sigma-clipping # we don't return these dummy values if the input errs are None if errs is None: # assume 0.1% errors if not given # this should work for mags and fluxes errs = 0.001*mags returnerrs = False # filter the input times, mags, errs; do sigclipping and normalization find = npisfinite(times) & npisfinite(mags) & npisfinite(errs) ftimes, fmags, ferrs = times[find], mags[find], errs[find] # get the center value and stdev if meanormedian == 'median': # stddev = 1.483 x MAD center_mag = npmedian(fmags) stddev_mag = (npmedian(npabs(fmags - center_mag))) * 1.483 elif meanormedian == 'mean': center_mag = npmean(fmags) stddev_mag = npstddev(fmags) else: LOGWARNING("unrecognized meanormedian value given to " "sigclip_magseries: %s, defaulting to 'median'" % meanormedian) meanormedian = 'median' center_mag = npmedian(fmags) stddev_mag = (npmedian(npabs(fmags - center_mag))) * 1.483 # sigclip next for a single sigclip value if sigclip and isinstance(sigclip, (float, int)): if not iterative and niterations is None: sigind = (npabs(fmags - center_mag)) < (sigclip * stddev_mag) stimes = ftimes[sigind] smags = fmags[sigind] serrs = ferrs[sigind] else: # # iterative version adapted from scipy.stats.sigmaclip # # First, if niterations is not set, iterate until covergence if niterations is None: delta = 1 this_times = ftimes this_mags = fmags this_errs = ferrs while delta: if meanormedian == 'mean': this_center = npmean(this_mags) this_stdev = npstddev(this_mags) elif meanormedian == 'median': this_center = npmedian(this_mags) this_stdev = ( npmedian(npabs(this_mags - this_center)) ) * 1.483 this_size = this_mags.size # apply the sigclip tsi = ( (npabs(this_mags - this_center)) < (sigclip * this_stdev) ) # update the arrays this_times = this_times[tsi] this_mags = this_mags[tsi] this_errs = this_errs[tsi] # update delta and go to the top of the loop delta = this_size - this_mags.size else: # If iterating only a certain number of times this_times = ftimes this_mags = fmags this_errs = ferrs iter_num = 0 delta = 1 while iter_num < niterations and delta: if meanormedian == 'mean': this_center = npmean(this_mags) this_stdev = npstddev(this_mags) elif meanormedian == 'median': this_center = npmedian(this_mags) this_stdev = (npmedian(npabs(this_mags - this_center))) * 1.483 this_size = this_mags.size # apply the sigclip tsi = ( (npabs(this_mags - this_center)) < (sigclip * this_stdev) ) # update the arrays this_times = this_times[tsi] this_mags = this_mags[tsi] this_errs = this_errs[tsi] # update the number of iterations and delta and # go to the top of the loop delta = this_size - this_mags.size iter_num += 1 # final sigclipped versions stimes, smags, serrs = this_times, this_mags, this_errs # this handles sigclipping for asymmetric +ve and -ve clip values elif sigclip and isinstance(sigclip, (list,tuple)) and len(sigclip) == 2: # sigclip is passed as [dimmingclip, brighteningclip] dimmingclip = sigclip[0] brighteningclip = sigclip[1] if not iterative and niterations is None: if magsarefluxes: nottoodimind = ( (fmags - center_mag) > (-dimmingclip*stddev_mag) ) nottoobrightind = ( (fmags - center_mag) < (brighteningclip*stddev_mag) ) else: nottoodimind = ( (fmags - center_mag) < (dimmingclip*stddev_mag) ) nottoobrightind = ( (fmags - center_mag) > (-brighteningclip*stddev_mag) ) sigind = nottoodimind & nottoobrightind stimes = ftimes[sigind] smags = fmags[sigind] serrs = ferrs[sigind] else: # # iterative version adapted from scipy.stats.sigmaclip # if niterations is None: delta = 1 this_times = ftimes this_mags = fmags this_errs = ferrs while delta: if meanormedian == 'mean': this_center = npmean(this_mags) this_stdev = npstddev(this_mags) elif meanormedian == 'median': this_center = npmedian(this_mags) this_stdev = (npmedian(npabs(this_mags - this_center))) * 1.483 this_size = this_mags.size if magsarefluxes: nottoodimind = ( (this_mags - this_center) > (-dimmingclip*this_stdev) ) nottoobrightind = ( (this_mags - this_center) < (brighteningclip*this_stdev) ) else: nottoodimind = ( (this_mags - this_center) < (dimmingclip*this_stdev) ) nottoobrightind = ( (this_mags - this_center) > (-brighteningclip*this_stdev) ) # apply the sigclip tsi = nottoodimind & nottoobrightind # update the arrays this_times = this_times[tsi] this_mags = this_mags[tsi] this_errs = this_errs[tsi] # update delta and go to top of the loop delta = this_size - this_mags.size else: # If iterating only a certain number of times this_times = ftimes this_mags = fmags this_errs = ferrs iter_num = 0 delta = 1 while iter_num < niterations and delta: if meanormedian == 'mean': this_center = npmean(this_mags) this_stdev = npstddev(this_mags) elif meanormedian == 'median': this_center = npmedian(this_mags) this_stdev = (npmedian(npabs(this_mags - this_center))) * 1.483 this_size = this_mags.size if magsarefluxes: nottoodimind = ( (this_mags - this_center) > (-dimmingclip*this_stdev) ) nottoobrightind = ( (this_mags - this_center) < (brighteningclip*this_stdev) ) else: nottoodimind = ( (this_mags - this_center) < (dimmingclip*this_stdev) ) nottoobrightind = ( (this_mags - this_center) > (-brighteningclip*this_stdev) ) # apply the sigclip tsi = nottoodimind & nottoobrightind # update the arrays this_times = this_times[tsi] this_mags = this_mags[tsi] this_errs = this_errs[tsi] # update the number of iterations and delta # and go to top of the loop delta = this_size - this_mags.size iter_num += 1 # final sigclipped versions stimes, smags, serrs = this_times, this_mags, this_errs else: stimes = ftimes smags = fmags serrs = ferrs if returnerrs: return stimes, smags, serrs else: return stimes, smags, None
[docs]def sigclip_magseries_with_extparams(times, mags, errs, extparams, sigclip=None, iterative=False, magsarefluxes=False): '''Sigma-clips a magnitude or flux time-series and associated measurement arrays. Selects the finite times, magnitudes (or fluxes), and errors from the passed values, and apply symmetric or asymmetric sigma clipping to them. Uses the same array indices as these values to filter out the values of all arrays in the `extparams` list. This can be useful for simultaneously sigma-clipping a magnitude/flux time-series along with their associated values of external parameters, such as telescope hour angle, zenith distance, temperature, moon phase, etc. Parameters ---------- times,mags,errs : np.array The magnitude or flux time-series arrays to sigma-clip. This doesn't assume all values are finite or if they're positive/negative. All of these arrays will have their non-finite elements removed, and then will be sigma-clipped based on the arguments to this function. `errs` is optional. Set it to None if you don't have values for these. A 'faked' `errs` array will be generated if necessary, which can be ignored in the output as well. extparams : list of np.array This is a list of all external parameter arrays to simultaneously filter along with the magnitude/flux time-series. All of these arrays should have the same length as the `times`, `mags`, and `errs` arrays. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. iterative : bool If this is set to True, will perform iterative sigma-clipping. If `niterations` is not set and this is True, sigma-clipping is iterated until no more points are removed. magsareflux : bool True if your "mags" are in fact fluxes, i.e. if "fainter" corresponds to `mags` getting smaller. Returns ------- (stimes, smags, serrs) : tuple The sigma-clipped and nan-stripped time-series in `stimes`, `smags`, `serrs` and the associated values of the `extparams` in `sextparams`. ''' returnerrs = True # fake the errors if they don't exist # this is inconsequential to sigma-clipping # we don't return these dummy values if the input errs are None if errs is None: # assume 0.1% errors if not given # this should work for mags and fluxes errs = 0.001*mags returnerrs = False # filter the input times, mags, errs; do sigclipping and normalization find = npisfinite(times) & npisfinite(mags) & npisfinite(errs) ftimes, fmags, ferrs = times[find], mags[find], errs[find] # apply the same indices to the external parameters for epi, eparr in enumerate(extparams): extparams[epi] = eparr[find] # get the median and stdev = 1.483 x MAD median_mag = npmedian(fmags) stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483 # sigclip next for a single sigclip value if sigclip and isinstance(sigclip, (float, int)): if not iterative: sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag) stimes = ftimes[sigind] smags = fmags[sigind] serrs = ferrs[sigind] # apply the same indices to the external parameters for epi, eparr in enumerate(extparams): extparams[epi] = eparr[sigind] else: # # iterative version adapted from scipy.stats.sigmaclip # delta = 1 this_times = ftimes this_mags = fmags this_errs = ferrs while delta: this_median = npmedian(this_mags) this_stdev = (npmedian(npabs(this_mags - this_median))) * 1.483 this_size = this_mags.size # apply the sigclip tsi = (npabs(this_mags - this_median)) < (sigclip * this_stdev) # update the arrays this_times = this_times[tsi] this_mags = this_mags[tsi] this_errs = this_errs[tsi] # apply the same indices to the external parameters for epi, eparr in enumerate(extparams): extparams[epi] = eparr[tsi] # update delta and go to the top of the loop delta = this_size - this_mags.size # final sigclipped versions stimes, smags, serrs = this_times, this_mags, this_errs # this handles sigclipping for asymmetric +ve and -ve clip values elif sigclip and isinstance(sigclip, (list, tuple)) and len(sigclip) == 2: # sigclip is passed as [dimmingclip, brighteningclip] dimmingclip = sigclip[0] brighteningclip = sigclip[1] if not iterative: if magsarefluxes: nottoodimind = ( (fmags - median_mag) > (-dimmingclip*stddev_mag) ) nottoobrightind = ( (fmags - median_mag) < (brighteningclip*stddev_mag) ) else: nottoodimind = ( (fmags - median_mag) < (dimmingclip*stddev_mag) ) nottoobrightind = ( (fmags - median_mag) > (-brighteningclip*stddev_mag) ) sigind = nottoodimind & nottoobrightind stimes = ftimes[sigind] smags = fmags[sigind] serrs = ferrs[sigind] # apply the same indices to the external parameters for epi, eparr in enumerate(extparams): extparams[epi] = eparr[sigind] else: # # iterative version adapted from scipy.stats.sigmaclip # delta = 1 this_times = ftimes this_mags = fmags this_errs = ferrs while delta: this_median = npmedian(this_mags) this_stdev = (npmedian(npabs(this_mags - this_median))) * 1.483 this_size = this_mags.size if magsarefluxes: nottoodimind = ( (this_mags - this_median) > (-dimmingclip*this_stdev) ) nottoobrightind = ( (this_mags - this_median) < (brighteningclip*this_stdev) ) else: nottoodimind = ( (this_mags - this_median) < (dimmingclip*this_stdev) ) nottoobrightind = ( (this_mags - this_median) > (-brighteningclip*this_stdev) ) # apply the sigclip tsi = nottoodimind & nottoobrightind # update the arrays this_times = this_times[tsi] this_mags = this_mags[tsi] this_errs = this_errs[tsi] # apply the same indices to the external parameters for epi, eparr in enumerate(extparams): extparams[epi] = eparr[tsi] # update delta and go to top of the loop delta = this_size - this_mags.size # final sigclipped versions stimes, smags, serrs = this_times, this_mags, this_errs else: stimes = ftimes smags = fmags serrs = ferrs if returnerrs: return stimes, smags, serrs, extparams else: return stimes, smags, None, extparams
################# ## PHASING LCS ## #################
[docs]def phase_magseries(times, mags, period, epoch, wrap=True, sort=True): '''Phases a magnitude/flux time-series using a given period and epoch. The equation used is:: phase = (times - epoch)/period - floor((times - epoch)/period) This phases the given magnitude timeseries using the given period and epoch. If wrap is True, wraps the result around 0.0 (and returns an array that has twice the number of the original elements). If sort is True, returns the magnitude timeseries in phase sorted order. Parameters ---------- times,mags : np.array The magnitude/flux time-series values to phase using the provided `period` and `epoch`. Non-fiinite values will be removed. period : float The period to use to phase the time-series. epoch : float The epoch to phase the time-series. This is usually the time-of-minimum or time-of-maximum of some periodic light curve phenomenon. Alternatively, one can use the minimum time value in `times`. wrap : bool If this is True, the returned phased time-series will be wrapped around phase 0.0, which is useful for plotting purposes. The arrays returned will have twice the number of input elements because of this wrapping. sort : bool If this is True, the returned phased time-series will be sorted in increasing phase order. Returns ------- dict A dict of the following form is returned:: {'phase': the phase values, 'mags': the mags/flux values at each phase, 'period': the input `period` used to phase the time-series, 'epoch': the input `epoch` used to phase the time-series} ''' # find all the finite values of the magnitudes and times finiteind = np.isfinite(mags) & np.isfinite(times) finite_times = times[finiteind] finite_mags = mags[finiteind] magseries_phase = ( (finite_times - epoch)/period - np.floor(((finite_times - epoch)/period)) ) outdict = {'phase':magseries_phase, 'mags':finite_mags, 'period':period, 'epoch':epoch} if sort: sortorder = np.argsort(outdict['phase']) outdict['phase'] = outdict['phase'][sortorder] outdict['mags'] = outdict['mags'][sortorder] if wrap: outdict['phase'] = np.concatenate((outdict['phase']-1.0, outdict['phase'])) outdict['mags'] = np.concatenate((outdict['mags'], outdict['mags'])) return outdict
[docs]def phase_magseries_with_errs(times, mags, errs, period, epoch, wrap=True, sort=True): '''Phases a magnitude/flux time-series using a given period and epoch. The equation used is:: phase = (times - epoch)/period - floor((times - epoch)/period) This phases the given magnitude timeseries using the given period and epoch. If wrap is True, wraps the result around 0.0 (and returns an array that has twice the number of the original elements). If sort is True, returns the magnitude timeseries in phase sorted order. Parameters ---------- times,mags,errs : np.array The magnitude/flux time-series values and associated measurement errors to phase using the provided `period` and `epoch`. Non-fiinite values will be removed. period : float The period to use to phase the time-series. epoch : float The epoch to phase the time-series. This is usually the time-of-minimum or time-of-maximum of some periodic light curve phenomenon. Alternatively, one can use the minimum time value in `times`. wrap : bool If this is True, the returned phased time-series will be wrapped around phase 0.0, which is useful for plotting purposes. The arrays returned will have twice the number of input elements because of this wrapping. sort : bool If this is True, the returned phased time-series will be sorted in increasing phase order. Returns ------- dict A dict of the following form is returned:: {'phase': the phase values, 'mags': the mags/flux values at each phase, 'errs': the err values at each phase, 'period': the input `period` used to phase the time-series, 'epoch': the input `epoch` used to phase the time-series} ''' # find all the finite values of the magnitudes and times finiteind = np.isfinite(mags) finite_times = times[finiteind] finite_mags = mags[finiteind] finite_errs = errs[finiteind] magseries_phase = ( (finite_times - epoch)/period - np.floor(((finite_times - epoch)/period)) ) outdict = {'phase':magseries_phase, 'mags':finite_mags, 'errs':finite_errs, 'period':period, 'epoch':epoch} if sort: sortorder = np.argsort(outdict['phase']) outdict['phase'] = outdict['phase'][sortorder] outdict['mags'] = outdict['mags'][sortorder] outdict['errs'] = outdict['errs'][sortorder] if wrap: outdict['phase'] = np.concatenate((outdict['phase']-1.0, outdict['phase'])) outdict['mags'] = np.concatenate((outdict['mags'], outdict['mags'])) outdict['errs'] = np.concatenate((outdict['errs'], outdict['errs'])) return outdict
################# ## BINNING LCs ## #################
[docs]def time_bin_magseries(times, mags, binsize=540.0, minbinelems=7): '''Bins the given mag/flux time-series in time using the bin size given. Parameters ---------- times,mags : np.array The magnitude/flux time-series to bin in time. Non-finite elements will be removed from these arrays. At least 10 elements in each array are required for this function to operate. binsize : float The bin size to use to group together measurements closer than this amount in time. This is in seconds. minbinelems : int The minimum number of elements required per bin to include it in the output. Returns ------- dict A dict of the following form is returned:: {'jdbin_indices': a list of the index arrays into the nan-filtered input arrays per each bin, 'jdbins': list of bin boundaries for each bin, 'nbins': the number of bins generated, 'binnedtimes': the time values associated with each time bin; this is the median of the times in each bin, 'binnedmags': the mag/flux values associated with each time bin; this is the median of the mags/fluxes in each bin} ''' # check if the input arrays are ok if not(times.shape and mags.shape and len(times) > 9 and len(mags) > 9): LOGERROR("input time/mag arrays don't have enough elements") return # find all the finite values of the magnitudes and times finiteind = np.isfinite(mags) & np.isfinite(times) finite_times = times[finiteind] finite_mags = mags[finiteind] # convert binsize in seconds to JD units binsizejd = binsize/(86400.0) nbins = int(np.ceil((np.nanmax(finite_times) - np.nanmin(finite_times))/binsizejd) + 1) minjd = np.nanmin(finite_times) jdbins = [(minjd + x*binsizejd) for x in range(nbins)] # make a KD-tree on the JDs so we can do fast distance calculations. we # need to add a bogus y coord to make this a problem that KD-trees can # solve. time_coords = np.array([[x,1.0] for x in finite_times]) jdtree = cKDTree(time_coords) binned_finite_timeseries_indices = [] collected_binned_mags = {} for jd in jdbins: # find all bin indices close to within binsizejd of this point # using the cKDTree query. we use the p-norm = 1 (I think this # means straight-up pairwise distance? FIXME: check this) bin_indices = jdtree.query_ball_point(np.array([jd,1.0]), binsizejd/2.0, p=1.0) # if the bin_indices have already been collected, then we're # done with this bin, move to the next one. if they haven't, # then this is the start of a new bin. if (bin_indices not in binned_finite_timeseries_indices and len(bin_indices) >= minbinelems): binned_finite_timeseries_indices.append(bin_indices) # convert to ndarrays binned_finite_timeseries_indices = [np.array(x) for x in binned_finite_timeseries_indices] collected_binned_mags['jdbins_indices'] = binned_finite_timeseries_indices collected_binned_mags['jdbins'] = jdbins collected_binned_mags['nbins'] = len(binned_finite_timeseries_indices) # collect the finite_times binned_jd = np.array([np.median(finite_times[x]) for x in binned_finite_timeseries_indices]) collected_binned_mags['binnedtimes'] = binned_jd collected_binned_mags['binsize'] = binsize # median bin the magnitudes according to the calculated indices collected_binned_mags['binnedmags'] = ( np.array([np.median(finite_mags[x]) for x in binned_finite_timeseries_indices]) ) return collected_binned_mags
[docs]def time_bin_magseries_with_errs(times, mags, errs, binsize=540.0, minbinelems=7): '''Bins the given mag/flux time-series in time using the bin size given. Parameters ---------- times,mags,errs : np.array The magnitude/flux time-series and associated measurement errors to bin in time. Non-finite elements will be removed from these arrays. At least 10 elements in each array are required for this function to operate. binsize : float The bin size to use to group together measurements closer than this amount in time. This is in seconds. minbinelems : int The minimum number of elements required per bin to include it in the output. Returns ------- dict A dict of the following form is returned:: {'jdbin_indices': a list of the index arrays into the nan-filtered input arrays per each bin, 'jdbins': list of bin boundaries for each bin, 'nbins': the number of bins generated, 'binnedtimes': the time values associated with each time bin; this is the median of the times in each bin, 'binnedmags': the mag/flux values associated with each time bin; this is the median of the mags/fluxes in each bin, 'binnederrs': the err values associated with each time bin; this is the median of the errs in each bin} ''' # check if the input arrays are ok if not(times.shape and mags.shape and errs.shape and len(times) > 9 and len(mags) > 9): LOGERROR("input time/mag/err arrays don't have enough elements") return # find all the finite values of the magnitudes and times finiteind = np.isfinite(mags) & np.isfinite(times) & np.isfinite(errs) finite_times = times[finiteind] finite_mags = mags[finiteind] finite_errs = errs[finiteind] # convert binsize in seconds to JD units binsizejd = binsize/(86400.0) nbins = int(np.ceil((np.nanmax(finite_times) - np.nanmin(finite_times))/binsizejd) + 1) minjd = np.nanmin(finite_times) jdbins = [(minjd + x*binsizejd) for x in range(nbins)] # make a KD-tree on the JDs so we can do fast distance calculations. we # need to add a bogus y coord to make this a problem that KD-trees can # solve. time_coords = np.array([[x,1.0] for x in finite_times]) jdtree = cKDTree(time_coords) binned_finite_timeseries_indices = [] collected_binned_mags = {} for jd in jdbins: # find all bin indices close to within binsize of this point using the # cKDTree query. we use the p-norm = 1 for pairwise Euclidean distance. bin_indices = jdtree.query_ball_point(np.array([jd,1.0]), binsizejd/2.0, p=1.0) # if the bin_indices have already been collected, then we're # done with this bin, move to the next one. if they haven't, # then this is the start of a new bin. if (bin_indices not in binned_finite_timeseries_indices and len(bin_indices) >= minbinelems): binned_finite_timeseries_indices.append(bin_indices) # convert to ndarrays binned_finite_timeseries_indices = [np.array(x) for x in binned_finite_timeseries_indices] collected_binned_mags['jdbins_indices'] = binned_finite_timeseries_indices collected_binned_mags['jdbins'] = np.array(jdbins) collected_binned_mags['nbins'] = len(binned_finite_timeseries_indices) # collect the finite_times binned_jd = np.array([np.median(finite_times[x]) for x in binned_finite_timeseries_indices]) collected_binned_mags['binnedtimes'] = binned_jd collected_binned_mags['binsize'] = binsize # median bin the magnitudes according to the calculated indices collected_binned_mags['binnedmags'] = ( np.array([np.median(finite_mags[x]) for x in binned_finite_timeseries_indices]) ) # FIXME: calculate the error in the median-binned magnitude correctly # for now, just take the median of the errors in this bin collected_binned_mags['binnederrs'] = ( np.array([np.median(finite_errs[x]) for x in binned_finite_timeseries_indices]) ) return collected_binned_mags
[docs]def phase_bin_magseries(phases, mags, binsize=0.005, minbinelems=7): '''Bins a phased magnitude/flux time-series using the bin size provided. Parameters ---------- phases,mags : np.array The phased magnitude/flux time-series to bin in phase. Non-finite elements will be removed from these arrays. At least 10 elements in each array are required for this function to operate. binsize : float The bin size to use to group together measurements closer than this amount in phase. This is in units of phase. minbinelems : int The minimum number of elements required per bin to include it in the output. Returns ------- dict A dict of the following form is returned:: {'phasebin_indices': a list of the index arrays into the nan-filtered input arrays per each bin, 'phasebins': list of bin boundaries for each bin, 'nbins': the number of bins generated, 'binnedphases': the phase values associated with each phase bin; this is the median of the phase value in each bin, 'binnedmags': the mag/flux values associated with each phase bin; this is the median of the mags/fluxes in each bin} ''' # check if the input arrays are ok if not(phases.shape and mags.shape and len(phases) > 10 and len(mags) > 10): LOGERROR("input time/mag arrays don't have enough elements") return # find all the finite values of the magnitudes and phases finiteind = np.isfinite(mags) & np.isfinite(phases) finite_phases = phases[finiteind] finite_mags = mags[finiteind] nbins = int(np.ceil((np.nanmax(finite_phases) - np.nanmin(finite_phases))/binsize) + 1) minphase = np.nanmin(finite_phases) phasebins = [(minphase + x*binsize) for x in range(nbins)] # make a KD-tree on the PHASEs so we can do fast distance calculations. we # need to add a bogus y coord to make this a problem that KD-trees can # solve. time_coords = np.array([[x,1.0] for x in finite_phases]) phasetree = cKDTree(time_coords) binned_finite_phaseseries_indices = [] collected_binned_mags = {} for phase in phasebins: # find all bin indices close to within binsize of this point using the # cKDTree query. we use the p-norm = 1 for pairwise Euclidean distance. bin_indices = phasetree.query_ball_point(np.array([phase,1.0]), binsize/2.0, p=1.0) # if the bin_indices have already been collected, then we're # done with this bin, move to the next one. if they haven't, # then this is the start of a new bin. if (bin_indices not in binned_finite_phaseseries_indices and len(bin_indices) >= minbinelems): binned_finite_phaseseries_indices.append(bin_indices) # convert to ndarrays binned_finite_phaseseries_indices = [np.array(x) for x in binned_finite_phaseseries_indices] collected_binned_mags['phasebins_indices'] = ( binned_finite_phaseseries_indices ) collected_binned_mags['phasebins'] = phasebins collected_binned_mags['nbins'] = len(binned_finite_phaseseries_indices) # collect the finite_phases binned_phase = np.array([np.median(finite_phases[x]) for x in binned_finite_phaseseries_indices]) collected_binned_mags['binnedphases'] = binned_phase collected_binned_mags['binsize'] = binsize # median bin the magnitudes according to the calculated indices collected_binned_mags['binnedmags'] = ( np.array([np.median(finite_mags[x]) for x in binned_finite_phaseseries_indices]) ) return collected_binned_mags
[docs]def phase_bin_magseries_with_errs(phases, mags, errs, binsize=0.005, minbinelems=7, weights=None): '''Bins a phased magnitude/flux time-series using the bin size provided. Parameters ---------- phases,mags,errs : np.array The phased magnitude/flux time-series and associated errs to bin in phase. Non-finite elements will be removed from these arrays. At least 10 elements in each array are required for this function to operate. binsize : float The bin size to use to group together measurements closer than this amount in phase. This is in units of phase. minbinelems : int The minimum number of elements required per bin to include it in the output. weights : np.array or None Optional weight vector to be applied during binning. If if is passed, `np.average` is used to bin, rather than `np.median`. A good choice would be to pass ``weights=1/errs**2``, to weight by the inverse variance. Returns ------- dict A dict of the following form is returned:: {'phasebin_indices': a list of the index arrays into the nan-filtered input arrays per each bin, 'phasebins': list of bin boundaries for each bin, 'nbins': the number of bins generated, 'binnedphases': the phase values associated with each phase bin; this is the median of the phase value in each bin, 'binnedmags': the mag/flux values associated with each phase bin; this is the median of the mags/fluxes in each bin, 'binnederrs': the err values associated with each phase bin; this is the median of the errs in each bin} ''' # check if the input arrays are ok if not(phases.shape and mags.shape and len(phases) > 10 and len(mags) > 10): LOGERROR("input time/mag arrays don't have enough elements") return # find all the finite values of the magnitudes and phases finiteind = np.isfinite(mags) & np.isfinite(phases) & np.isfinite(errs) finite_phases = phases[finiteind] finite_mags = mags[finiteind] finite_errs = errs[finiteind] if weights is not None and len(weights) > 10: finite_weights = weights[finiteind] else: finite_weights = None nbins = int(np.ceil((np.nanmax(finite_phases) - np.nanmin(finite_phases))/binsize) + 1) minphase = np.nanmin(finite_phases) phasebins = [(minphase + x*binsize) for x in range(nbins)] # make a KD-tree on the PHASEs so we can do fast distance calculations. we # need to add a bogus y coord to make this a problem that KD-trees can # solve. time_coords = np.array([[x,1.0] for x in finite_phases]) phasetree = cKDTree(time_coords) binned_finite_phaseseries_indices = [] collected_binned_mags = {} for phase in phasebins: # find all bin indices close to within binsize of this point using the # cKDTree query. we use the p-norm = 1 for pairwise Euclidean distance. bin_indices = phasetree.query_ball_point(np.array([phase,1.0]), binsize/2.0, p=1.0) # if the bin_indices have already been collected, then we're # done with this bin, move to the next one. if they haven't, # then this is the start of a new bin. if (bin_indices not in binned_finite_phaseseries_indices and len(bin_indices) >= minbinelems): binned_finite_phaseseries_indices.append(bin_indices) # convert to ndarrays binned_finite_phaseseries_indices = [np.array(x) for x in binned_finite_phaseseries_indices] collected_binned_mags['phasebins_indices'] = ( binned_finite_phaseseries_indices ) collected_binned_mags['phasebins'] = phasebins collected_binned_mags['nbins'] = len(binned_finite_phaseseries_indices) # collect the finite_phases binned_phase = np.array([np.median(finite_phases[x]) for x in binned_finite_phaseseries_indices]) collected_binned_mags['binnedphases'] = binned_phase collected_binned_mags['binsize'] = binsize # median bin the magnitudes according to the calculated indices if finite_weights is None: collected_binned_mags['binnedmags'] = ( np.array([np.median(finite_mags[x]) for x in binned_finite_phaseseries_indices]) ) else: collected_binned_mags['binnedmags'] = ( np.array([np.average(finite_mags[x], weights=finite_weights[x]) for x in binned_finite_phaseseries_indices]) ) collected_binned_mags['binnederrs'] = ( np.array([np.median(finite_errs[x]) for x in binned_finite_phaseseries_indices]) ) return collected_binned_mags
############################# ## FILLING TIMESERIES GAPS ## #############################
[docs]def fill_magseries_gaps(times, mags, errs, fillgaps=0.0, sigclip=3.0, magsarefluxes=False, filterwindow=11, forcetimebin=None, verbose=True): '''This fills in gaps in a light curve. This is mainly intended for use in ACF period-finding, but maybe useful otherwise (i.e. when we figure out ARMA stuff for LCs). The main steps here are: - normalize the light curve to zero - remove giant outliers - interpolate gaps in the light curve (since ACF requires evenly spaced sampling) From McQuillan+ 2013a (https://doi.org/10.1093/mnras/stt536): "The ACF calculation requires the light curves to be regularly sampled and normalized to zero. We divided the flux in each quarter by its median and subtracted unity. Gaps in the light curve longer than the Kepler long cadence were filled using linear interpolation with added white Gaussian noise. This noise level was estimated using the variance of the residuals following subtraction of a smoothed version of the flux. To smooth the flux, we applied an iterative non-linear filter which consists of a median filter followed by a boxcar filter, both with 11-point windows, with iterative 3σ clipping of outliers." Parameters ---------- times,mags,errs : np.array The magnitude/flux time-series and associated measurement errors to operate on. Non-finite elements will be removed from these arrays. At least 10 elements in each array are required for this function to operate. fillgaps : {'noiselevel', 'nan'} or float If `fillgap='noiselevel'`, fills the gaps with the noise level obtained via the procedure above. If `fillgaps='nan'`, fills the gaps with `np.nan`. Otherwise, if `fillgaps` is a float, will use that value to fill the gaps. The default is to fill the gaps with 0.0 (as in McQuillan+ 2014) to "...prevent them contributing to the ACF". sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. magsareflux : bool True if your "mags" are in fact fluxes, i.e. if "fainter" corresponds to `mags` getting smaller. filterwindow : int The number of time-series points to include in the Savitsky-Golay filter operation when smoothing the light curve. This should be an odd integer. forcetimebin : float or None If `forcetimebin` is a float, this value will be used to generate the interpolated time series, effectively binning the light curve to this cadence. If `forcetimebin` is None, the mode of the gaps (the forward difference between successive time values in `times`) in the provided light curve will be used as the effective cadence. NOTE: `forcetimebin` must be in the same units as `times`, e.g. if times are JD then `forcetimebin` must be in days as well verbose : bool If this is True, will indicate progress at various stages in the operation. Returns ------- dict A dict of the following form is returned:: {'itimes': the interpolated time values after gap-filling, 'imags': the interpolated mag/flux values after gap-filling, 'ierrs': the interpolated mag/flux values after gap-filling, 'cadence': the cadence of the output mag/flux time-series} ''' # remove nans finind = np.isfinite(times) & np.isfinite(mags) & np.isfinite(errs) ftimes, fmags, ferrs = times[finind], mags[finind], errs[finind] # remove zero errs nzind = np.nonzero(ferrs) ftimes, fmags, ferrs = ftimes[nzind], fmags[nzind], ferrs[nzind] # sigma-clip stimes, smags, serrs = sigclip_magseries(ftimes, fmags, ferrs, magsarefluxes=magsarefluxes, sigclip=sigclip) # normalize to zero if magsarefluxes: smags = smags / np.median(smags) - 1.0 else: smags = smags - np.median(smags) if isinstance(fillgaps, float): gapfiller = fillgaps elif isinstance(fillgaps, str) and fillgaps == 'noiselevel': # figure out the gaussian noise level by subtracting a Savitsky-Golay # filtered version of the light curve smoothed = smags - savgol_filter(smags, filterwindow, 2) noiselevel = 1.483 * np.median(np.abs(smoothed - np.median(smoothed))) gapfiller = noiselevel elif isinstance(fillgaps, str) and fillgaps == 'nan': gapfiller = np.nan # figure out the gap size and where to interpolate. we do this by figuring # out the most common gap (this should be the cadence). to do this, we need # to calculate the mode of the gap distribution. # get the gaps gaps = np.diff(stimes) # just use scipy.stats.mode instead of our hacked together nonsense earlier. gapmoderes = scipy.stats.mode(gaps) gapmode = gapmoderes[0].item() LOGINFO('auto-cadence for mag series: %.5f' % gapmode) # sort the gaps if forcetimebin: LOGWARNING('forcetimebin is set, forcing cadence to %.5f' % forcetimebin) gapmode = forcetimebin if gapmode == 0.0: LOGERROR('the smallest cadence of this light curve appears to be 0.0, ' 'the automatic cadence finder probably failed. ' 'try setting forcetimebin?') return None starttime, endtime = np.min(stimes), np.max(stimes) ntimes = int(np.ceil((endtime - starttime)/gapmode) + 1) if verbose: LOGINFO('generating new time series with %s measurements' % ntimes) # first, generate the full time series interpolated_times = np.linspace(starttime, endtime, ntimes) interpolated_mags = np.full_like(interpolated_times, gapfiller) interpolated_errs = np.full_like(interpolated_times, gapfiller) for ind, itime in enumerate(interpolated_times[:-1]): nextitime = itime + gapmode # find the mags between this and the next time bin itimeind = np.where((stimes > itime) & (stimes < nextitime)) # if there's more than one elem in this time bin, median them if itimeind[0].size > 1: interpolated_mags[ind] = np.median(smags[itimeind[0]]) interpolated_errs[ind] = np.median(serrs[itimeind[0]]) # otherwise, if there's only one elem in this time bin, take it elif itimeind[0].size == 1: interpolated_mags[ind] = smags[itimeind[0]] interpolated_errs[ind] = serrs[itimeind[0]] return {'itimes':interpolated_times, 'imags':interpolated_mags, 'ierrs':interpolated_errs, 'cadence':gapmode}