#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# nonphysical.py
# Waqas Bhatti and Luke Bouma - Feb 2017
# (wbhatti@astro.princeton.edu and luke@astro.princeton.edu)
'''Light curve fitting routines for 'non-physical' models:
- :py:func:`astrobase.lcfit.nonphysical.spline_fit_magseries`: fit a univariate
cubic spline to a magnitude/flux time series with a specified spline knot
fraction.
- :py:func:`astrobase.lcfit.nonphysical.savgol_fit_magseries`: apply a
Savitzky-Golay smoothing filter to a magnitude/flux time series, returning the
resulting smoothed function as a "fit".
- :py:func:`astrobase.lcfit.nonphysical.legendre_fit_magseries`: fit a Legendre
function of the specified order to the magnitude/flux time series.
'''
#############
## LOGGING ##
#############
import logging
from astrobase import log_sub, log_fmt, log_date_fmt
DEBUG = False
if DEBUG:
level = logging.DEBUG
else:
level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
level=level,
style=log_sub,
format=log_fmt,
datefmt=log_date_fmt,
)
LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception
#############
## IMPORTS ##
#############
from numpy import (
sum as npsum, array as nparray, max as npmax, min as npmin,
floor as npfloor, where as npwhere, linspace as nplinspace,
full_like as npfull_like, nonzero as npnonzero, diff as npdiff,
concatenate as npconcatenate
)
from scipy.interpolate import LSQUnivariateSpline
from scipy.signal import savgol_filter
from numpy.polynomial.legendre import Legendre
from ..lcmath import sigclip_magseries
from .utils import get_phased_quantities, make_fit_plot
#################################################################
## SPLINE FITTING TO PHASED AND UNPHASED MAGNITUDE TIME SERIES ##
#################################################################
[docs]def spline_fit_magseries(times, mags, errs, period,
knotfraction=0.01,
maxknots=30,
sigclip=30.0,
plotfit=False,
ignoreinitfail=False,
magsarefluxes=False,
verbose=True):
'''This fits a univariate cubic spline to the phased light curve.
This fit may be better than the Fourier fit for sharply variable objects,
like EBs, so can be used to distinguish them from other types of variables.
Parameters
----------
times,mags,errs : np.array
The input mag/flux time-series to fit a spline to.
period : float
The period to use for the spline fit.
knotfraction : float
The knot fraction is the number of internal knots to use for the
spline. A value of 0.01 (or 1%) of the total number of non-nan
observations appears to work quite well, without over-fitting. maxknots
controls the maximum number of knots that will be allowed.
maxknots : int
The maximum number of knots that will be used even if `knotfraction`
gives a value to use larger than `maxknots`. This helps dealing with
over-fitting to short time-scale variations.
sigclip : float or int or sequence of two floats/ints or None
If a single float or int, a symmetric sigma-clip will be performed using
the number provided as the sigma-multiplier to cut out from the input
time-series.
If a list of two ints/floats is provided, the function will perform an
'asymmetric' sigma-clip. The first element in this list is the sigma
value to use for fainter flux/mag values; the second element in this
list is the sigma value to use for brighter flux/mag values. For
example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
dimmings and greater than 3-sigma brightenings. Here the meaning of
"dimming" and "brightening" is set by *physics* (not the magnitude
system), which is why the `magsarefluxes` kwarg must be correctly set.
If `sigclip` is None, no sigma-clipping will be performed, and the
time-series (with non-finite elems removed) will be passed through to
the output.
magsarefluxes : bool
If True, will treat the input values of `mags` as fluxes for purposes of
plotting the fit and sig-clipping.
plotfit : str or False
If this is a string, this function will make a plot for the fit to the
mag/flux time-series and writes the plot to the path specified here.
ignoreinitfail : bool
If this is True, ignores the initial failure to find a set of optimized
Fourier parameters using the global optimization function and proceeds
to do a least-squares fit anyway.
verbose : bool
If True, will indicate progress and warn of any problems.
Returns
-------
dict
This function returns a dict containing the model fit parameters, the
minimized chi-sq value and the reduced chi-sq value. The form of this
dict is mostly standardized across all functions in this module::
{
'fittype':'spline',
'fitinfo':{
'nknots': the number of knots used for the fit
'fitmags': the model fit mags,
'fitepoch': the epoch of minimum light for the fit,
},
'fitchisq': the minimized value of the fit's chi-sq,
'fitredchisq':the reduced chi-sq value,
'fitplotfile': the output fit plot if fitplot is not None,
'magseries':{
'times':input times in phase order of the model,
'phase':the phases of the model mags,
'mags':input mags/fluxes in the phase order of the model,
'errs':errs in the phase order of the model,
'magsarefluxes':input value of magsarefluxes kwarg
}
}
'''
# this is required to fit the spline correctly
if errs is None:
errs = npfull_like(mags, 0.005)
# sigclip the magnitude time series
stimes, smags, serrs = sigclip_magseries(times, mags, errs,
sigclip=sigclip,
magsarefluxes=magsarefluxes)
# get rid of zero errs
nzind = npnonzero(serrs)
stimes, smags, serrs = stimes[nzind], smags[nzind], serrs[nzind]
# phase the mag series
phase, pmags, perrs, ptimes, mintime = (
get_phased_quantities(stimes, smags, serrs, period)
)
# now figure out the number of knots up to max knots (=100)
nobs = len(phase)
nknots = int(npfloor(knotfraction*nobs))
nknots = maxknots if nknots > maxknots else nknots
splineknots = nplinspace(phase[0] + 0.01,
phase[-1] - 0.01,
num=nknots)
# NOTE: newer scipy needs x to be strictly increasing. this means we should
# filter out anything that doesn't have np.diff(phase) > 0.0
# FIXME: this needs to be tested
phase_diffs_ind = npdiff(phase) > 0.0
incphase_ind = npconcatenate((nparray([True]), phase_diffs_ind))
phase, pmags, perrs = (phase[incphase_ind],
pmags[incphase_ind],
perrs[incphase_ind])
# generate and fit the spline
spl = LSQUnivariateSpline(phase, pmags, t=splineknots, w=1.0/perrs)
# calculate the spline fit to the actual phases, the chisq and red-chisq
fitmags = spl(phase)
fitchisq = npsum(
((fitmags - pmags)*(fitmags - pmags)) / (perrs*perrs)
)
fitredchisq = fitchisq/(len(pmags) - nknots - 1)
if verbose:
LOGINFO(
'spline fit done. nknots = %s, '
'chisq = %.5f, reduced chisq = %.5f' %
(nknots, fitchisq, fitredchisq)
)
# figure out the time of light curve minimum (i.e. the fit epoch)
# this is when the fit mag is maximum (i.e. the faintest)
# or if magsarefluxes = True, then this is when fit flux is minimum
if not magsarefluxes:
fitmagminind = npwhere(fitmags == npmax(fitmags))
else:
fitmagminind = npwhere(fitmags == npmin(fitmags))
if len(fitmagminind[0]) > 1:
fitmagminind = (fitmagminind[0][0],)
magseriesepoch = ptimes[fitmagminind]
# assemble the returndict
returndict = {
'fittype':'spline',
'fitinfo':{
'nknots':nknots,
'fitmags':fitmags,
'fitepoch':magseriesepoch
},
'fitchisq':fitchisq,
'fitredchisq':fitredchisq,
'fitplotfile':None,
'magseries':{
'times':ptimes,
'phase':phase,
'mags':pmags,
'errs':perrs,
'magsarefluxes':magsarefluxes
},
}
# make the fit plot if required
if plotfit and isinstance(plotfit, str):
make_fit_plot(phase, pmags, perrs, fitmags,
period, mintime, magseriesepoch,
plotfit,
magsarefluxes=magsarefluxes)
returndict['fitplotfile'] = plotfit
return returndict
#####################################################
## SAVITZKY-GOLAY FITTING TO MAGNITUDE TIME SERIES ##
#####################################################
[docs]def savgol_fit_magseries(times, mags, errs, period,
windowlength=None,
polydeg=2,
sigclip=30.0,
plotfit=False,
magsarefluxes=False,
verbose=True):
'''Fit a Savitzky-Golay filter to the magnitude/flux time series.
SG fits successive sub-sets (windows) of adjacent data points with a
low-order polynomial via least squares. At each point (magnitude), it
returns the value of the polynomial at that magnitude's time. This is made
significantly cheaper than *actually* performing least squares for each
window through linear algebra tricks that are possible when specifying the
window size and polynomial order beforehand. Numerical Recipes Ch 14.8
gives an overview, Eq. 14.8.6 is what Scipy has implemented.
The idea behind Savitzky-Golay is to preserve higher moments (>=2) of the
input data series than would be done by a simple moving window average.
Note that the filter assumes evenly spaced data, which magnitude time series
are not. By *pretending* the data points are evenly spaced, we introduce an
additional noise source in the function values. This is a relatively small
noise source provided that the changes in the magnitude values across the
full width of the N=windowlength point window is < sqrt(N/2) times the
measurement noise on a single point.
TODO:
- Find correct dof for reduced chi squared in savgol_fit_magseries
Parameters
----------
times,mags,errs : np.array
The input mag/flux time-series to fit the Savitsky-Golay model to.
period : float
The period to use for the model fit.
windowlength : None or int
The length of the filter window (the number of coefficients). Must be
either positive and odd, or None. (The window is the number of points to
the left, and to the right, of whatever point is having a polynomial fit
to it locally). Bigger windows at fixed polynomial order risk lowering
the amplitude of sharp features. If None, this routine (arbitrarily)
sets the `windowlength` for phased LCs to be either the number of finite
data points divided by 300, or polydeg+3, whichever is bigger.
polydeg : int
This is the order of the polynomial used to fit the samples. Must be
less than `windowlength`. "Higher-order filters do better at preserving
feature heights and widths, but do less smoothing on broader features."
(Numerical Recipes).
sigclip : float or int or sequence of two floats/ints or None
If a single float or int, a symmetric sigma-clip will be performed using
the number provided as the sigma-multiplier to cut out from the input
time-series.
If a list of two ints/floats is provided, the function will perform an
'asymmetric' sigma-clip. The first element in this list is the sigma
value to use for fainter flux/mag values; the second element in this
list is the sigma value to use for brighter flux/mag values. For
example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
dimmings and greater than 3-sigma brightenings. Here the meaning of
"dimming" and "brightening" is set by *physics* (not the magnitude
system), which is why the `magsarefluxes` kwarg must be correctly set.
If `sigclip` is None, no sigma-clipping will be performed, and the
time-series (with non-finite elems removed) will be passed through to
the output.
magsarefluxes : bool
If True, will treat the input values of `mags` as fluxes for purposes of
plotting the fit and sig-clipping.
plotfit : str or False
If this is a string, this function will make a plot for the fit to the
mag/flux time-series and writes the plot to the path specified here.
ignoreinitfail : bool
If this is True, ignores the initial failure to find a set of optimized
Fourier parameters using the global optimization function and proceeds
to do a least-squares fit anyway.
verbose : bool
If True, will indicate progress and warn of any problems.
Returns
-------
dict
This function returns a dict containing the model fit parameters, the
minimized chi-sq value and the reduced chi-sq value. The form of this
dict is mostly standardized across all functions in this module::
{
'fittype':'savgol',
'fitinfo':{
'windowlength': the window length used for the fit,
'polydeg':the polynomial degree used for the fit,
'fitmags': the model fit mags,
'fitepoch': the epoch of minimum light for the fit,
},
'fitchisq': the minimized value of the fit's chi-sq,
'fitredchisq':the reduced chi-sq value,
'fitplotfile': the output fit plot if fitplot is not None,
'magseries':{
'times':input times in phase order of the model,
'phase':the phases of the model mags,
'mags':input mags/fluxes in the phase order of the model,
'errs':errs in the phase order of the model,
'magsarefluxes':input value of magsarefluxes kwarg
}
}
'''
stimes, smags, serrs = sigclip_magseries(times, mags, errs,
sigclip=sigclip,
magsarefluxes=magsarefluxes)
# get rid of zero errs
nzind = npnonzero(serrs)
stimes, smags, serrs = stimes[nzind], smags[nzind], serrs[nzind]
phase, pmags, perrs, ptimes, mintime = (
get_phased_quantities(stimes, smags, serrs, period)
)
if not isinstance(windowlength, int):
windowlength = max(
polydeg + 3,
int(len(phase)/300)
)
if windowlength % 2 == 0:
windowlength += 1
if verbose:
LOGINFO('applying Savitzky-Golay filter with '
'window length %s and polynomial degree %s to '
'mag series with %s observations, '
'using period %.6f, folded at %.6f' % (windowlength,
polydeg,
len(pmags),
period,
mintime))
# generate the function values obtained by applying the SG filter. The
# "wrap" option is best for phase-folded LCs.
sgf = savgol_filter(pmags, windowlength, polydeg, mode='wrap')
# here the "fit" to the phases is the function produced by the
# Savitzky-Golay filter. then compute the chisq and red-chisq.
fitmags = sgf
fitchisq = npsum(
((fitmags - pmags)*(fitmags - pmags)) / (perrs*perrs)
)
# TODO: quantify dof for SG filter.
nparams = int(len(pmags)/windowlength) * polydeg
fitredchisq = fitchisq/(len(pmags) - nparams - 1)
fitredchisq = -99.
if verbose:
LOGINFO(
'SG filter applied. chisq = %.5f, reduced chisq = %.5f' %
(fitchisq, fitredchisq)
)
# figure out the time of light curve minimum (i.e. the fit epoch)
# this is when the fit mag is maximum (i.e. the faintest)
# or if magsarefluxes = True, then this is when fit flux is minimum
if not magsarefluxes:
fitmagminind = npwhere(fitmags == npmax(fitmags))
else:
fitmagminind = npwhere(fitmags == npmin(fitmags))
if len(fitmagminind[0]) > 1:
fitmagminind = (fitmagminind[0][0],)
magseriesepoch = ptimes[fitmagminind]
# assemble the returndict
returndict = {
'fittype':'savgol',
'fitinfo':{
'windowlength':windowlength,
'polydeg':polydeg,
'fitmags':fitmags,
'fitepoch':magseriesepoch
},
'fitchisq':fitchisq,
'fitredchisq':fitredchisq,
'fitplotfile':None,
'magseries':{
'times':ptimes,
'phase':phase,
'mags':pmags,
'errs':perrs,
'magsarefluxes':magsarefluxes
}
}
# make the fit plot if required
if plotfit and isinstance(plotfit, str):
make_fit_plot(phase, pmags, perrs, fitmags,
period, mintime, magseriesepoch,
plotfit,
magsarefluxes=magsarefluxes)
returndict['fitplotfile'] = plotfit
return returndict
##########################################################
## LEGENDRE-POLYNOMIAL FITTING TO MAGNITUDE TIME SERIES ##
##########################################################
[docs]def legendre_fit_magseries(times, mags, errs, period,
legendredeg=10,
sigclip=30.0,
plotfit=False,
magsarefluxes=False,
verbose=True):
'''Fit an arbitrary-order Legendre series, via least squares, to the
magnitude/flux time series.
This is a series of the form::
p(x) = c_0*L_0(x) + c_1*L_1(x) + c_2*L_2(x) + ... + c_n*L_n(x)
where L_i's are Legendre polynomials (also called "Legendre functions of the
first kind") and c_i's are the coefficients being fit.
This function is mainly just a wrapper to
`numpy.polynomial.legendre.Legendre.fit`.
Parameters
----------
times,mags,errs : np.array
The input mag/flux time-series to fit a Legendre series polynomial to.
period : float
The period to use for the Legendre fit.
legendredeg : int
This is `n` in the equation above, e.g. if you give `n=5`, you will
get 6 coefficients. This number should be much less than the number of
data points you are fitting.
sigclip : float or int or sequence of two floats/ints or None
If a single float or int, a symmetric sigma-clip will be performed using
the number provided as the sigma-multiplier to cut out from the input
time-series.
If a list of two ints/floats is provided, the function will perform an
'asymmetric' sigma-clip. The first element in this list is the sigma
value to use for fainter flux/mag values; the second element in this
list is the sigma value to use for brighter flux/mag values. For
example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
dimmings and greater than 3-sigma brightenings. Here the meaning of
"dimming" and "brightening" is set by *physics* (not the magnitude
system), which is why the `magsarefluxes` kwarg must be correctly set.
If `sigclip` is None, no sigma-clipping will be performed, and the
time-series (with non-finite elems removed) will be passed through to
the output.
magsarefluxes : bool
If True, will treat the input values of `mags` as fluxes for purposes of
plotting the fit and sig-clipping.
plotfit : str or False
If this is a string, this function will make a plot for the fit to the
mag/flux time-series and writes the plot to the path specified here.
ignoreinitfail : bool
If this is True, ignores the initial failure to find a set of optimized
Fourier parameters using the global optimization function and proceeds
to do a least-squares fit anyway.
verbose : bool
If True, will indicate progress and warn of any problems.
Returns
-------
dict
This function returns a dict containing the model fit parameters, the
minimized chi-sq value and the reduced chi-sq value. The form of this
dict is mostly standardized across all functions in this module::
{
'fittype':'legendre',
'fitinfo':{
'legendredeg': the Legendre polynomial degree used,
'fitmags': the model fit mags,
'fitepoch': the epoch of minimum light for the fit,
},
'fitchisq': the minimized value of the fit's chi-sq,
'fitredchisq':the reduced chi-sq value,
'fitplotfile': the output fit plot if fitplot is not None,
'magseries':{
'times':input times in phase order of the model,
'phase':the phases of the model mags,
'mags':input mags/fluxes in the phase order of the model,
'errs':errs in the phase order of the model,
'magsarefluxes':input value of magsarefluxes kwarg
}
}
'''
stimes, smags, serrs = sigclip_magseries(times, mags, errs,
sigclip=sigclip,
magsarefluxes=magsarefluxes)
# get rid of zero errs
nzind = npnonzero(serrs)
stimes, smags, serrs = stimes[nzind], smags[nzind], serrs[nzind]
phase, pmags, perrs, ptimes, mintime = (
get_phased_quantities(stimes, smags, serrs, period)
)
if verbose:
LOGINFO('fitting Legendre series with '
'maximum Legendre polynomial order %s to '
'mag series with %s observations, '
'using period %.6f, folded at %.6f' % (legendredeg,
len(pmags),
period,
mintime))
# Least squares fit of Legendre polynomial series to the data. The window
# and domain (see "Using the Convenience Classes" in the numpy
# documentation) are handled automatically, scaling the times to a minimal
# domain in [-1,1], in which Legendre polynomials are a complete basis.
p = Legendre.fit(phase, pmags, legendredeg)
coeffs = p.coef
fitmags = p(phase)
# Now compute the chisq and red-chisq.
fitchisq = npsum(
((fitmags - pmags)*(fitmags - pmags)) / (perrs*perrs)
)
nparams = legendredeg + 1
fitredchisq = fitchisq/(len(pmags) - nparams - 1)
if verbose:
LOGINFO(
'Legendre fit done. chisq = %.5f, reduced chisq = %.5f' %
(fitchisq, fitredchisq)
)
# figure out the time of light curve minimum (i.e. the fit epoch)
# this is when the fit mag is maximum (i.e. the faintest)
# or if magsarefluxes = True, then this is when fit flux is minimum
if not magsarefluxes:
fitmagminind = npwhere(fitmags == npmax(fitmags))
else:
fitmagminind = npwhere(fitmags == npmin(fitmags))
if len(fitmagminind[0]) > 1:
fitmagminind = (fitmagminind[0][0],)
magseriesepoch = ptimes[fitmagminind]
# assemble the returndict
returndict = {
'fittype':'legendre',
'fitinfo':{
'legendredeg':legendredeg,
'fitmags':fitmags,
'fitepoch':magseriesepoch,
'finalparams':coeffs,
},
'fitchisq':fitchisq,
'fitredchisq':fitredchisq,
'fitplotfile':None,
'magseries':{
'times':ptimes,
'phase':phase,
'mags':pmags,
'errs':perrs,
'magsarefluxes':magsarefluxes
}
}
# make the fit plot if required
if plotfit and isinstance(plotfit, str):
make_fit_plot(phase, pmags, perrs, fitmags,
period, mintime, magseriesepoch,
plotfit,
magsarefluxes=magsarefluxes)
returndict['fitplotfile'] = plotfit
return returndict