Source code for astrobase.varbase.trends

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# trends.py - Waqas Bhatti (wbhatti@astro.princeton.edu) - Feb 2018

'''
Contains light curve trend-removal tools, such as external parameter
decorrelation (EPD) and smoothing.

'''

#############
## LOGGING ##
#############

import logging
from astrobase import log_sub, log_fmt, log_date_fmt

DEBUG = False
if DEBUG:
    level = logging.DEBUG
else:
    level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
    level=level,
    style=log_sub,
    format=log_fmt,
    datefmt=log_date_fmt,
)

LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception


#############
## IMPORTS ##
#############

from functools import partial

import numpy as np
import numpy.random as npr
RANDSEED = 0xdecaff
npr.seed(RANDSEED)

from numpy import median as npmedian, abs as npabs, pi as pi_value
from numpy.linalg import lstsq

from scipy.optimize import leastsq, least_squares
from scipy.signal import medfilt, savgol_filter
from scipy.ndimage.filters import median_filter
from astropy.convolution import convolve, Gaussian1DKernel

# for random forest EPD
from sklearn.ensemble import RandomForestRegressor

from ..lcmath import sigclip_magseries_with_extparams


#########################
## SMOOTHING FUNCTIONS ##
#########################

[docs]def smooth_magseries_ndimage_medfilt(mags, windowsize): '''This smooths the magseries with a median filter that reflects the array at the boundary. See https://docs.scipy.org/doc/scipy/reference/tutorial/ndimage.html for details. Parameters ---------- mags : np.array The input mags/flux time-series to smooth. windowsize : int This is a odd integer containing the smoothing window size. Returns ------- np.array The smoothed mag/flux time-series array. ''' return median_filter(mags, size=windowsize, mode='reflect')
[docs]def smooth_magseries_signal_medfilt(mags, windowsize): '''This smooths the magseries with a simple median filter. This function pads with zeros near the boundary, see: https://stackoverflow.com/questions/24585706/scipy-medfilt-wrong-result Typically this is bad. Parameters ---------- mags : np.array The input mags/flux time-series to smooth. windowsize : int This is a odd integer containing the smoothing window size. Returns ------- np.array The smoothed mag/flux time-series array. ''' return medfilt(mags, windowsize)
[docs]def smooth_magseries_gaussfilt(mags, windowsize, windowfwhm=7): '''This smooths the magseries with a Gaussian kernel. Parameters ---------- mags : np.array The input mags/flux time-series to smooth. windowsize : int This is a odd integer containing the smoothing window size. windowfwhm : int This is an odd integer containing the FWHM of the applied Gaussian window function. Returns ------- np.array The smoothed mag/flux time-series array. ''' convkernel = Gaussian1DKernel(windowfwhm, x_size=windowsize) smoothed = convolve(mags, convkernel, boundary='extend') return smoothed
[docs]def smooth_magseries_savgol(mags, windowsize, polyorder=2): '''This smooths the magseries with a Savitsky-Golay filter. Parameters ---------- mags : np.array The input mags/flux time-series to smooth. windowsize : int This is a odd integer containing the smoothing window size. polyorder : int This is an integer containing the polynomial degree order to use when generating the Savitsky-Golay filter. Returns ------- np.array The smoothed mag/flux time-series array. ''' smoothed = savgol_filter(mags, windowsize, polyorder) return smoothed
######################################## ## OLD EPD FUNCTIONS (USED FOR HATPI) ## ######################################## def _old_epd_diffmags(coeff, fsv, fdv, fkv, xcc, ycc, bgv, bge, mag): ''' This calculates the difference in mags after EPD coefficients are calculated. final EPD mags = median(magseries) + epd_diffmags() ''' return -(coeff[0]*fsv**2. + coeff[1]*fsv + coeff[2]*fdv**2. + coeff[3]*fdv + coeff[4]*fkv**2. + coeff[5]*fkv + coeff[6] + coeff[7]*fsv*fdv + coeff[8]*fsv*fkv + coeff[9]*fdv*fkv + coeff[10]*np.sin(2*np.pi*xcc) + coeff[11]*np.cos(2*np.pi*xcc) + coeff[12]*np.sin(2*np.pi*ycc) + coeff[13]*np.cos(2*np.pi*ycc) + coeff[14]*np.sin(4*np.pi*xcc) + coeff[15]*np.cos(4*np.pi*xcc) + coeff[16]*np.sin(4*np.pi*ycc) + coeff[17]*np.cos(4*np.pi*ycc) + coeff[18]*bgv + coeff[19]*bge - mag) def _old_epd_magseries(times, mags, errs, fsv, fdv, fkv, xcc, ycc, bgv, bge, epdsmooth_windowsize=21, epdsmooth_sigclip=3.0, epdsmooth_func=smooth_magseries_signal_medfilt, epdsmooth_extraparams=None): ''' Detrends a magnitude series given in mag using accompanying values of S in fsv, D in fdv, K in fkv, x coords in xcc, y coords in ycc, background in bgv, and background error in bge. smooth is used to set a smoothing parameter for the fit function. Does EPD voodoo. ''' # find all the finite values of the magsnitude finiteind = np.isfinite(mags) # calculate median and stdev mags_median = np.median(mags[finiteind]) mags_stdev = np.nanstd(mags) # if we're supposed to sigma clip, do so if epdsmooth_sigclip: excludeind = abs(mags - mags_median) < epdsmooth_sigclip*mags_stdev finalind = finiteind & excludeind else: finalind = finiteind final_mags = mags[finalind] final_len = len(final_mags) # smooth the signal if isinstance(epdsmooth_extraparams, dict): smoothedmags = epdsmooth_func(final_mags, epdsmooth_windowsize, **epdsmooth_extraparams) else: smoothedmags = epdsmooth_func(final_mags, epdsmooth_windowsize) # make the linear equation matrix epdmatrix = np.c_[fsv[finalind]**2.0, fsv[finalind], fdv[finalind]**2.0, fdv[finalind], fkv[finalind]**2.0, fkv[finalind], np.ones(final_len), fsv[finalind]*fdv[finalind], fsv[finalind]*fkv[finalind], fdv[finalind]*fkv[finalind], np.sin(2*np.pi*xcc[finalind]), np.cos(2*np.pi*xcc[finalind]), np.sin(2*np.pi*ycc[finalind]), np.cos(2*np.pi*ycc[finalind]), np.sin(4*np.pi*xcc[finalind]), np.cos(4*np.pi*xcc[finalind]), np.sin(4*np.pi*ycc[finalind]), np.cos(4*np.pi*ycc[finalind]), bgv[finalind], bge[finalind]] # solve the matrix equation [epdmatrix] . [x] = [smoothedmags] # return the EPD differential magss if the solution succeeds try: coeffs, residuals, rank, singulars = lstsq(epdmatrix, smoothedmags, rcond=None) if DEBUG: print('coeffs = %s, residuals = %s' % (coeffs, residuals)) retdict = {'times':times, 'mags':(mags_median + _old_epd_diffmags(coeffs, fsv, fdv, fkv, xcc, ycc, bgv, bge, mags)), 'errs':errs, 'fitcoeffs':coeffs, 'residuals':residuals} return retdict # if the solution fails, return nothing except Exception: LOGEXCEPTION('EPD solution did not converge') retdict = {'times':times, 'mags':np.full_like(mags, np.nan), 'errs':errs, 'fitcoeffs':coeffs, 'residuals':residuals} return retdict ################################################### ## HAT-SPECIFIC EXTERNAL PARAMETER DECORRELATION ## ################################################### def _epd_function(coeffs, fsv, fdv, fkv, xcc, ycc, bgv, bge, iha, izd): ''' This is the EPD function to fit using a smoothed mag-series. ''' return (coeffs[0]*fsv*fsv + coeffs[1]*fsv + coeffs[2]*fdv*fdv + coeffs[3]*fdv + coeffs[4]*fkv*fkv + coeffs[5]*fkv + coeffs[6] + coeffs[7]*fsv*fdv + coeffs[8]*fsv*fkv + coeffs[9]*fdv*fkv + coeffs[10]*np.sin(2*pi_value*xcc) + coeffs[11]*np.cos(2*pi_value*xcc) + coeffs[12]*np.sin(2*pi_value*ycc) + coeffs[13]*np.cos(2*pi_value*ycc) + coeffs[14]*np.sin(4*pi_value*xcc) + coeffs[15]*np.cos(4*pi_value*xcc) + coeffs[16]*np.sin(4*pi_value*ycc) + coeffs[17]*np.cos(4*pi_value*ycc) + coeffs[18]*bgv + coeffs[19]*bge + coeffs[20]*iha + coeffs[21]*izd) def _epd_residual(coeffs, mags, fsv, fdv, fkv, xcc, ycc, bgv, bge, iha, izd): ''' This is the residual function to minimize using scipy.optimize.leastsq. ''' f = _epd_function(coeffs, fsv, fdv, fkv, xcc, ycc, bgv, bge, iha, izd) residual = mags - f return residual def _epd_residual2(coeffs, times, mags, errs, fsv, fdv, fkv, xcc, ycc, bgv, bge, iha, izd): '''This is the residual function to minimize using scipy.optimize.least_squares. This variant is for :py:func:`.epd_magseries_extparams`. ''' f = _epd_function(coeffs, fsv, fdv, fkv, xcc, ycc, bgv, bge, iha, izd) residual = mags - f return residual
[docs]def epd_magseries(times, mags, errs, fsv, fdv, fkv, xcc, ycc, bgv, bge, iha, izd, magsarefluxes=False, epdsmooth_sigclip=3.0, epdsmooth_windowsize=21, epdsmooth_func=smooth_magseries_savgol, epdsmooth_extraparams=None): '''Detrends a magnitude series using External Parameter Decorrelation. Requires a set of external parameters similar to those present in HAT light curves. At the moment, the HAT light-curve-specific external parameters are: - S: the 'fsv' column in light curves, - D: the 'fdv' column in light curves, - K: the 'fkv' column in light curves, - x coords: the 'xcc' column in light curves, - y coords: the 'ycc' column in light curves, - background value: the 'bgv' column in light curves, - background error: the 'bge' column in light curves, - hour angle: the 'iha' column in light curves, - zenith distance: the 'izd' column in light curves S, D, and K are defined as follows: - S -> measure of PSF sharpness (~1/sigma^2 sosmaller S = wider PSF) - D -> measure of PSF ellipticity in xy direction - K -> measure of PSF ellipticity in cross direction S, D, K are related to the PSF's variance and covariance, see eqn 30-33 in A. Pal's thesis: https://arxiv.org/abs/0906.3486 NOTE: The errs are completely ignored and returned unchanged (except for sigclip and finite filtering). Parameters ---------- times,mags,errs : np.array The input mag/flux time-series to detrend. fsv : np.array Array containing the external parameter `S` of the same length as times. fdv : np.array Array containing the external parameter `D` of the same length as times. fkv : np.array Array containing the external parameter `K` of the same length as times. xcc : np.array Array containing the external parameter `x-coords` of the same length as times. ycc : np.array Array containing the external parameter `y-coords` of the same length as times. bgv : np.array Array containing the external parameter `background value` of the same length as times. bge : np.array Array containing the external parameter `background error` of the same length as times. iha : np.array Array containing the external parameter `hour angle` of the same length as times. izd : np.array Array containing the external parameter `zenith distance` of the same length as times. magsarefluxes : bool Set this to True if `mags` actually contains fluxes. epdsmooth_sigclip : float or int or sequence of two floats/ints or None This specifies how to sigma-clip the input LC before fitting the EPD function to it. If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. epdsmooth_windowsize : int This is the number of LC points to smooth over to generate a smoothed light curve that will be used to fit the EPD function. epdsmooth_func : Python function This sets the smoothing filter function to use. A Savitsky-Golay filter is used to smooth the light curve by default. The functions that can be used with this kwarg are listed in `varbase.trends`. If you want to use your own function, it MUST have the following signature:: def smoothfunc(mags_array, window_size, **extraparams) and return a numpy array of the same size as `mags_array` with the smoothed time-series. Any extra params can be provided using the `extraparams` dict. epdsmooth_extraparams : dict This is a dict of any extra filter params to supply to the smoothing function. Returns ------- dict Returns a dict of the following form:: {'times':the input times after non-finite elems removed, 'mags':the EPD detrended mag values (the EPD mags), 'errs':the errs after non-finite elems removed, 'fitcoeffs':EPD fit coefficient values, 'fitinfo':the full tuple returned by scipy.leastsq, 'fitmags':the EPD fit function evaluated at times, 'mags_median': this is median of the EPD mags, 'mags_mad': this is the MAD of EPD mags} ''' finind = np.isfinite(times) & np.isfinite(mags) & np.isfinite(errs) ftimes, fmags, ferrs = times[::][finind], mags[::][finind], errs[::][finind] ffsv, ffdv, ffkv, fxcc, fycc, fbgv, fbge, fiha, fizd = ( fsv[::][finind], fdv[::][finind], fkv[::][finind], xcc[::][finind], ycc[::][finind], bgv[::][finind], bge[::][finind], iha[::][finind], izd[::][finind], ) stimes, smags, serrs, separams = sigclip_magseries_with_extparams( times, mags, errs, [fsv, fdv, fkv, xcc, ycc, bgv, bge, iha, izd], sigclip=epdsmooth_sigclip, magsarefluxes=magsarefluxes ) sfsv, sfdv, sfkv, sxcc, sycc, sbgv, sbge, siha, sizd = separams # smooth the signal if isinstance(epdsmooth_extraparams, dict): smoothedmags = epdsmooth_func(smags, epdsmooth_windowsize, **epdsmooth_extraparams) else: smoothedmags = epdsmooth_func(smags, epdsmooth_windowsize) # initial fit coeffs initcoeffs = np.zeros(22) # fit the smoothed mags and find the EPD function coefficients leastsqfit = leastsq(_epd_residual, initcoeffs, args=(smoothedmags, sfsv, sfdv, sfkv, sxcc, sycc, sbgv, sbge, siha, sizd), full_output=True) # if the fit succeeds, then get the EPD mags if leastsqfit[-1] in (1,2,3,4): fitcoeffs = leastsqfit[0] epdfit = _epd_function(fitcoeffs, ffsv, ffdv, ffkv, fxcc, fycc, fbgv, fbge, fiha, fizd) epdmags = npmedian(fmags) + fmags - epdfit retdict = {'times':ftimes, 'mags':epdmags, 'errs':ferrs, 'fitcoeffs':fitcoeffs, 'fitinfo':leastsqfit, 'fitmags':epdfit, 'mags_median':npmedian(epdmags), 'mags_mad':npmedian(npabs(epdmags - npmedian(epdmags)))} return retdict # if the solution fails, return nothing else: LOGERROR('EPD fit did not converge') return None
######################################## ## EPD WITH ARBITRARY EXTERNAL PARAMS ## ########################################
[docs]def epd_magseries_extparams( times, mags, errs, externalparam_arrs, initial_coeff_guess, magsarefluxes=False, epdsmooth_sigclip=3.0, epdsmooth_windowsize=21, epdsmooth_func=smooth_magseries_savgol, epdsmooth_extraparams=None, objective_func=_epd_residual2, objective_kwargs=None, optimizer_func=least_squares, optimizer_kwargs=None, ): '''This does EPD on a mag-series with arbitrary external parameters. Parameters ---------- times,mags,errs : np.array The input mag/flux time-series to run EPD on. externalparam_arrs : list of np.arrays This is a list of ndarrays of external parameters to decorrelate against. These should all be the same size as `times`, `mags`, `errs`. initial_coeff_guess : np.array An array of initial fit coefficients to pass into the objective function. epdsmooth_sigclip : float or int or sequence of two floats/ints or None This specifies how to sigma-clip the input LC before smoothing it and fitting the EPD function to it. The actual LC will not be sigma-clipped. If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. epdsmooth_windowsize : int This is the number of LC points to smooth over to generate a smoothed light curve that will be used to fit the EPD function. epdsmooth_func : Python function This sets the smoothing filter function to use. A Savitsky-Golay filter is used to smooth the light curve by default. The functions that can be used with this kwarg are listed in `varbase.trends`. If you want to use your own function, it MUST have the following signature:: def smoothfunc(mags_array, window_size, **extraparams) and return a numpy array of the same size as `mags_array` with the smoothed time-series. Any extra params can be provided using the `extraparams` dict. epdsmooth_extraparams : dict This is a dict of any extra filter params to supply to the smoothing function. objective_func : Python function The function that calculates residuals between the model and the smoothed mag-series. This must have the following signature:: def objective_func(fit_coeffs, times, mags, errs, *external_params, **objective_kwargs) where `times`, `mags`, `errs` are arrays of the sigma-clipped and smoothed time-series, `fit_coeffs` is an array of EPD fit coefficients, `external_params` is a tuple of the passed in external parameter arrays, and `objective_kwargs` is a dict of any optional kwargs to pass into the objective function. This should return the value of the residual based on evaluating the model function (and any weights based on errs or times). objective_kwargs : dict or None A dict of kwargs to pass into the `objective_func` function. optimizer_func : Python function The function that minimizes the residual between the model and the smoothed mag-series using the `objective_func`. This should have a signature similar to one of the optimizer functions in `scipy.optimize <https://docs.scipy.org/doc/scipy/reference/optimize.html>`_, i.e.:: def optimizer_func(objective_func, initial_coeffs, args=(), ...) and return a `scipy.optimize.OptimizeResult <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html>`_. We'll rely on the ``.success`` attribute to determine if the EPD fit was successful, and the ``.x`` attribute to get the values of the fit coefficients. optimizer_kwargs : dict or None A dict of kwargs to pass into the `optimizer_func` function. Returns ------- dict Returns a dict of the following form:: {'times':the input times after non-finite elems removed, 'mags':the EPD detrended mag values (the EPD mags), 'errs':the errs after non-finite elems removed, 'fitcoeffs':EPD fit coefficient values, 'fitinfo':the result returned by the optimizer function, 'mags_median': this is the median of the EPD mags, 'mags_mad': this is the MAD of EPD mags} ''' # get finite times, mags, errs finind = np.isfinite(times) & np.isfinite(mags) & np.isfinite(errs) ftimes, fmags, ferrs = times[::][finind], mags[::][finind], errs[::][finind] finalparam_arrs = [] for ep in externalparam_arrs: finalparam_arrs.append(ep[::][finind]) # sigclip the LC to pass into the smoothing for EPD fit stimes, smags, serrs, eparams = sigclip_magseries_with_extparams( times.copy(), mags.copy(), errs.copy(), [x.copy() for x in externalparam_arrs], sigclip=epdsmooth_sigclip, magsarefluxes=magsarefluxes ) # smooth the signal before fitting the function to it if isinstance(epdsmooth_extraparams, dict): smoothedmags = epdsmooth_func(smags, epdsmooth_windowsize, **epdsmooth_extraparams) else: smoothedmags = epdsmooth_func(smags, epdsmooth_windowsize) # the initial coeffs are passed in here initial_coeffs = initial_coeff_guess # reform the objective function with any optional kwargs if objective_kwargs is not None: obj_func = partial(objective_func, **objective_kwargs) else: obj_func = objective_func # run the optimizer function by passing in the objective function, the # coeffs, and the smoothed mags and external params as part of the `args` # tuple if not optimizer_kwargs: optimizer_kwargs = {} fit_info = optimizer_func( obj_func, initial_coeffs, args=(stimes, smoothedmags, serrs, *eparams), **optimizer_kwargs ) if fit_info.success: fit_coeffs = fit_info.x epd_mags = np.median(fmags) + obj_func(fit_coeffs, ftimes, fmags, ferrs, *finalparam_arrs) retdict = {'times':ftimes, 'mags':epd_mags, 'errs':ferrs, 'fitcoeffs':fit_coeffs, 'fitinfo':fit_info, 'mags_median':npmedian(epd_mags), 'mags_mad':npmedian(npabs(epd_mags - npmedian(epd_mags)))} return retdict # if the solution fails, return nothing else: LOGERROR('EPD fit did not converge') return None
####################### ## RANDOM FOREST EPD ## #######################
[docs]def rfepd_magseries(times, mags, errs, externalparam_arrs, magsarefluxes=False, epdsmooth=True, epdsmooth_sigclip=3.0, epdsmooth_windowsize=21, epdsmooth_func=smooth_magseries_savgol, epdsmooth_extraparams=None, rf_subsample=1.0, rf_ntrees=300, rf_extraparams={'criterion':'mse', 'oob_score':False, 'n_jobs':-1}): '''This uses a `RandomForestRegressor` to de-correlate the given magseries. Parameters ---------- times,mags,errs : np.array The input mag/flux time-series to run EPD on. externalparam_arrs : list of np.arrays This is a list of ndarrays of external parameters to decorrelate against. These should all be the same size as `times`, `mags`, `errs`. epdsmooth : bool If True, sets the training LC for the RandomForestRegress to be a smoothed version of the sigma-clipped light curve provided in `times`, `mags`, `errs`. epdsmooth_sigclip : float or int or sequence of two floats/ints or None This specifies how to sigma-clip the input LC before smoothing it and fitting the EPD function to it. The actual LC will not be sigma-clipped. If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. epdsmooth_windowsize : int This is the number of LC points to smooth over to generate a smoothed light curve that will be used to fit the EPD function. epdsmooth_func : Python function This sets the smoothing filter function to use. A Savitsky-Golay filter is used to smooth the light curve by default. The functions that can be used with this kwarg are listed in `varbase.trends`. If you want to use your own function, it MUST have the following signature:: def smoothfunc(mags_array, window_size, **extraparams) and return a numpy array of the same size as `mags_array` with the smoothed time-series. Any extra params can be provided using the `extraparams` dict. epdsmooth_extraparams : dict This is a dict of any extra filter params to supply to the smoothing function. rf_subsample : float Defines the fraction of the size of the `mags` array to use for training the random forest regressor. rf_ntrees : int This is the number of trees to use for the `RandomForestRegressor`. rf_extraprams : dict This is a dict of any extra kwargs to provide to the `RandomForestRegressor` instance used. Returns ------- dict Returns a dict with decorrelated mags and the usual info from the `RandomForestRegressor`: variable importances, etc. ''' # get finite times, mags, errs finind = np.isfinite(times) & np.isfinite(mags) & np.isfinite(errs) ftimes, fmags, ferrs = times[::][finind], mags[::][finind], errs[::][finind] finalparam_arrs = [] for ep in externalparam_arrs: finalparam_arrs.append(ep[::][finind]) stimes, smags, serrs, eparams = sigclip_magseries_with_extparams( times, mags, errs, externalparam_arrs, sigclip=epdsmooth_sigclip, magsarefluxes=magsarefluxes ) # smoothing is optional for RFR because we train on a fraction of the mag # series and so should not require a smoothed input to fit a function to if epdsmooth: # smooth the signal if isinstance(epdsmooth_extraparams, dict): smoothedmags = epdsmooth_func(smags, epdsmooth_windowsize, **epdsmooth_extraparams) else: smoothedmags = epdsmooth_func(smags, epdsmooth_windowsize) else: smoothedmags = smags # set up the regressor if isinstance(rf_extraparams, dict): RFR = RandomForestRegressor(n_estimators=rf_ntrees, **rf_extraparams) else: RFR = RandomForestRegressor(n_estimators=rf_ntrees) # collect the features features = np.column_stack(eparams) # fit, then generate the predicted values, then get corrected values # we fit on a randomly selected subsample of all the mags if rf_subsample < 1.0: featureindices = np.arange(smoothedmags.size) # these are sorted because time-order should be important training_indices = np.sort( npr.choice(featureindices, size=int(rf_subsample*smoothedmags.size), replace=False) ) else: training_indices = np.arange(smoothedmags.size) RFR.fit(features[training_indices,:], smoothedmags[training_indices]) # predict on the full feature set flux_corrections = RFR.predict(np.column_stack(finalparam_arrs)) corrected_fmags = npmedian(fmags) + fmags - flux_corrections retdict = {'times':ftimes, 'mags':corrected_fmags, 'errs':ferrs, 'feature_importances':RFR.feature_importances_, 'regressor':RFR, 'mags_median':npmedian(corrected_fmags), 'mags_mad':npmedian(npabs(corrected_fmags - npmedian(corrected_fmags)))} return retdict