#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# astrotess.py - Luke Bouma (luke@astro.princeton.edu) - 09/2018
'''
Contains various tools for analyzing TESS light curves.
'''
#############
## LOGGING ##
#############
import logging
from astrobase import log_sub, log_fmt, log_date_fmt
DEBUG = False
if DEBUG:
level = logging.DEBUG
else:
level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
level=level,
style=log_sub,
format=log_fmt,
datefmt=log_date_fmt,
)
LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception
##################
## MAIN IMPORTS ##
##################
import pickle
import os.path
import gzip
import sys
import glob
import numpy as np
from astropy.io import fits as pyfits
#######################################
## UTILITY FUNCTIONS FOR FLUXES/MAGS ##
#######################################
[docs]def normalized_flux_to_mag(lcdict,
columns=('sap.sap_flux',
'sap.sap_flux_err',
'sap.sap_bkg',
'sap.sap_bkg_err',
'pdc.pdcsap_flux',
'pdc.pdcsap_flux_err')):
'''This converts the normalized fluxes in the TESS lcdicts to TESS mags.
Uses the object's TESS mag stored in lcdict['objectinfo']['tessmag']::
mag - object_tess_mag = -2.5 log (flux/median_flux)
Parameters
----------
lcdict : lcdict
An `lcdict` produced by `read_tess_fitslc` or
`consolidate_tess_fitslc`. This must have normalized fluxes in its
measurement columns (use the `normalize` kwarg for these functions).
columns : sequence of str
The column keys of the normalized flux and background measurements in
the `lcdict` to operate on and convert to magnitudes in TESS band (T).
Returns
-------
lcdict
The returned `lcdict` will contain extra columns corresponding to
magnitudes for each input normalized flux/background column.
'''
tess_mag = lcdict['objectinfo']['tessmag']
for key in columns:
k1, k2 = key.split('.')
if 'err' not in k2:
lcdict[k1][k2.replace('flux','mag')] = (
tess_mag - 2.5*np.log10(lcdict[k1][k2])
)
else:
lcdict[k1][k2.replace('flux','mag')] = (
- 2.5*np.log10(1.0 - lcdict[k1][k2])
)
return lcdict
#########################################################
## LCDICT MAKING FUNCTIONS FOR TESS HLSP LC.FITS FILES ##
#########################################################
# these appear to be similar to Kepler LCs, so we'll copy over stuff from
# astrokep.py
# this is the list of keys to pull out of the top header of the FITS
LCTOPKEYS = [
'DATE-OBS',
'DATE-END',
'PROCVER',
'ORIGIN',
'DATA_REL',
'TIMVERSN',
'OBJECT',
'TICID',
'SECTOR',
'CAMERA',
'CCD',
'PXTABLE',
'RADESYS',
'RA_OBJ',
'DEC_OBJ',
'EQUINOX',
'PMRA',
'PMDEC',
'PMTOTAL',
'TESSMAG',
'TEFF',
'LOGG',
'MH',
'RADIUS',
'TICVER',
'CRMITEN',
'CRBLKSZ',
'CRSPOC',
]
# this is the list of keys to pull out of the light curve header
LCHEADERKEYS = [
'EXPOSURE',
'TIMEREF',
'TASSIGN',
'TIMESYS',
'BJDREFI',
'BJDREFF',
'TELAPSE',
'LIVETIME',
'INT_TIME',
'NUM_FRM',
'TIMEDEL',
'BACKAPP',
'DEADAPP',
'VIGNAPP',
'GAINA',
'GAINB',
'GAINC',
'GAIND',
'READNOIA',
'READNOIB',
'READNOIC',
'READNOID',
'CDPP0_5',
'CDPP1_0',
'CDPP2_0',
'PDCVAR',
'PDCMETHD',
'CROWDSAP',
'FLFRCSAP',
'NSPSDDET',
'NSPSDCOR'
]
# this is the list of keys to pull out of the light curve FITS table
LCDATAKEYS = [
'TIME',
'TIMECORR',
'CADENCENO',
'QUALITY',
'PSF_CENTR1','PSF_CENTR1_ERR','PSF_CENTR2','PSF_CENTR2_ERR',
'MOM_CENTR1','MOM_CENTR1_ERR','MOM_CENTR2','MOM_CENTR2_ERR',
'POS_CORR1','POS_CORR2'
]
# this is the list of columns to use for fluxes, backgrounds, errors
LCSAPKEYS = ['SAP_FLUX','SAP_FLUX_ERR','SAP_BKG','SAP_BKG_ERR']
LCPDCKEYS = ['PDCSAP_FLUX','PDCSAP_FLUX_ERR']
# this is the list of keys to pull out of the aperture part of the light curve
# we also pull out the whole pixel mask, which looks something like:
#
# array([[65, 69, 69, 69, 69, 69, 69, 69, 69, 65, 65],
# [69, 69, 69, 69, 65, 65, 65, 65, 69, 69, 65],
# [65, 65, 65, 65, 65, 65, 65, 65, 65, 69, 65],
# [65, 65, 65, 65, 75, 75, 65, 65, 65, 69, 65],
# [65, 65, 65, 75, 75, 75, 75, 65, 65, 65, 65],
# [65, 65, 65, 75, 75, 75, 75, 65, 65, 69, 65],
# [65, 65, 65, 65, 75, 75, 65, 65, 65, 69, 65],
# [65, 65, 65, 65, 65, 65, 65, 65, 65, 69, 65],
# [69, 69, 69, 65, 69, 65, 65, 65, 69, 69, 65],
# [65, 69, 69, 69, 69, 69, 65, 69, 69, 65, 65],
# [65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65]], dtype=int32)
#
# FIXME: figure out what these values mean (probably flux-collected = 75 /
# flux-available = 69 / flux-in-stamp = 65). we use CDELT1 and CDELT2 below to
# get the pixel scale in arcsec/px
LCAPERTUREKEYS = ['NPIXSAP','NPIXMISS',
'CDELT1','CDELT2']
[docs]def read_tess_fitslc(lcfits,
headerkeys=LCHEADERKEYS,
datakeys=LCDATAKEYS,
sapkeys=LCSAPKEYS,
pdckeys=LCPDCKEYS,
topkeys=LCTOPKEYS,
apkeys=LCAPERTUREKEYS,
normalize=False,
appendto=None,
filterqualityflags=False,
nanfilter=None,
timestoignore=None):
'''This extracts the light curve from a single TESS .lc.fits file.
This works on the light curves available at MAST.
TODO: look at:
https://archive.stsci.edu/missions/tess/doc/EXP-TESS-ARC-ICD-TM-0014.pdf
for details on the column descriptions and to fill in any other info we
need.
Parameters
----------
lcfits : str
The filename of a MAST Kepler/K2 light curve FITS file.
headerkeys : list
A list of FITS header keys that will be extracted from the FITS light
curve file. These describe the observations. The default value for this
is given in `LCHEADERKEYS` above.
datakeys : list
A list of FITS column names that correspond to the auxiliary
measurements in the light curve. The default is `LCDATAKEYS` above.
sapkeys : list
A list of FITS column names that correspond to the SAP flux
measurements in the light curve. The default is `LCSAPKEYS` above.
pdckeys : list
A list of FITS column names that correspond to the PDC flux
measurements in the light curve. The default is `LCPDCKEYS` above.
topkeys : list
A list of FITS header keys that describe the object in the light
curve. The default is `LCTOPKEYS` above.
apkeys : list
A list of FITS header keys that describe the flux measurement apertures
used by the TESS pipeline. The default is `LCAPERTUREKEYS` above.
normalize : bool
If True, then the light curve's SAP_FLUX and PDCSAP_FLUX measurements
will be normalized to 1.0 by dividing out the median flux for the
component light curve.
appendto : lcdict or None
If appendto is an `lcdict`, will append measurements of this `lcdict` to
that `lcdict`. This is used for consolidating light curves for the same
object across different files (sectors/cameras/CCDs?). The appending
does not care about the time order. To consolidate light curves in time
order, use `consolidate_tess_fitslc` below.
filterqualityflags : bool
If True, will remove any measurements that have non-zero quality flags
present. This usually indicates an issue with the instrument or
spacecraft.
nanfilter : {'sap','pdc','sap,pdc'} or None
Indicates the flux measurement type(s) to apply the filtering to.
timestoignore : list of tuples or None
This is of the form::
[(time1_start, time1_end), (time2_start, time2_end), ...]
and indicates the start and end times to mask out of the final
lcdict. Use this to remove anything that wasn't caught by the quality
flags.
Returns
-------
lcdict
Returns an `lcdict` (this is useable by most astrobase functions for LC
processing).
'''
# read the fits file
hdulist = pyfits.open(lcfits)
lchdr, lcdata = hdulist[1].header, hdulist[1].data
lctophdr, lcaperturehdr, lcaperturedata = (hdulist[0].header,
hdulist[2].header,
hdulist[2].data)
hdulist.close()
hdrinfo = {}
# now get the values we want from the header
for key in headerkeys:
if key in lchdr and lchdr[key] is not None:
hdrinfo[key.lower()] = lchdr[key]
else:
hdrinfo[key.lower()] = None
# get the number of detections
ndet = lchdr['NAXIS2']
# get the info from the topheader
for key in topkeys:
if key in lctophdr and lctophdr[key] is not None:
hdrinfo[key.lower()] = lctophdr[key]
else:
hdrinfo[key.lower()] = None
# get the info from the lcaperturehdr
for key in lcaperturehdr:
if key in lcaperturehdr and lcaperturehdr[key] is not None:
hdrinfo[key.lower()] = lcaperturehdr[key]
else:
hdrinfo[key.lower()] = None
# if we're appending to another lcdict
if appendto and isinstance(appendto, dict):
lcdict = appendto
# update lcinfo
lcdict['lcinfo']['timesys'].append(hdrinfo['timesys'])
lcdict['lcinfo']['bjdoffset'].append(
hdrinfo['bjdrefi'] + hdrinfo['bjdreff']
)
lcdict['lcinfo']['lcaperture'].append(lcaperturedata)
lcdict['lcinfo']['aperpix_used'].append(hdrinfo['npixsap'])
lcdict['lcinfo']['aperpix_unused'].append(hdrinfo['npixmiss'])
lcdict['lcinfo']['pixarcsec'].append(
(np.abs(hdrinfo['cdelt1']) +
np.abs(hdrinfo['cdelt2']))*3600.0/2.0
)
lcdict['lcinfo']['ndet'].append(ndet)
lcdict['lcinfo']['exptime'].append(hdrinfo['exposure'])
lcdict['lcinfo']['sector'].append(hdrinfo['sector'])
lcdict['lcinfo']['camera'].append(hdrinfo['camera'])
lcdict['lcinfo']['ccd'].append(hdrinfo['ccd'])
lcdict['lcinfo']['date_obs_start'].append(hdrinfo['date-obs'])
lcdict['lcinfo']['date_obs_end'].append(hdrinfo['date-end'])
lcdict['lcinfo']['pixel_table_id'].append(hdrinfo['pxtable'])
lcdict['lcinfo']['origin'].append(hdrinfo['origin'])
lcdict['lcinfo']['datarelease'].append(hdrinfo['data_rel'])
lcdict['lcinfo']['procversion'].append(hdrinfo['procver'])
lcdict['lcinfo']['tic_version'].append(hdrinfo['ticver'])
lcdict['lcinfo']['cr_mitigation'].append(hdrinfo['crmiten'])
lcdict['lcinfo']['cr_blocksize'].append(hdrinfo['crblksz'])
lcdict['lcinfo']['cr_spocclean'].append(hdrinfo['crspoc'])
# update the varinfo for this light curve
lcdict['varinfo']['cdpp0_5'].append(hdrinfo['cdpp0_5'])
lcdict['varinfo']['cdpp1_0'].append(hdrinfo['cdpp1_0'])
lcdict['varinfo']['cdpp2_0'].append(hdrinfo['cdpp2_0'])
lcdict['varinfo']['pdcvar'].append(hdrinfo['pdcvar'])
lcdict['varinfo']['pdcmethod'].append(hdrinfo['pdcmethd'])
lcdict['varinfo']['target_flux_total_flux_ratio_in_aper'].append(
hdrinfo['crowdsap']
)
lcdict['varinfo']['target_flux_fraction_in_aper'].append(
hdrinfo['flfrcsap']
)
# update the light curve columns now
for key in datakeys:
if key.lower() in lcdict:
lcdict[key.lower()] = (
np.concatenate((lcdict[key.lower()], lcdata[key]))
)
for key in sapkeys:
if key.lower() in lcdict['sap']:
sapflux_median = np.nanmedian(lcdata['SAP_FLUX'])
# normalize the current flux measurements if needed
if normalize and key == 'SAP_FLUX':
thislcdata = lcdata[key] / sapflux_median
elif normalize and key == 'SAP_FLUX_ERR':
thislcdata = lcdata[key] / sapflux_median
elif normalize and key == 'SAP_BKG':
thislcdata = lcdata[key] / sapflux_median
elif normalize and key == 'SAP_BKG_ERR':
thislcdata = lcdata[key] / sapflux_median
else:
thislcdata = lcdata[key]
lcdict['sap'][key.lower()] = (
np.concatenate((lcdict['sap'][key.lower()], thislcdata))
)
for key in pdckeys:
if key.lower() in lcdict['pdc']:
pdcsap_flux_median = np.nanmedian(lcdata['PDCSAP_FLUX'])
# normalize the current flux measurements if needed
if normalize and key == 'PDCSAP_FLUX':
thislcdata = lcdata[key] / pdcsap_flux_median
elif normalize and key == 'PDCSAP_FLUX_ERR':
thislcdata = lcdata[key] / pdcsap_flux_median
else:
thislcdata = lcdata[key]
lcdict['pdc'][key.lower()] = (
np.concatenate((lcdict['pdc'][key.lower()], thislcdata))
)
# append some of the light curve information into existing numpy arrays
# so we can sort on them later
lcdict['exptime'] = np.concatenate(
(lcdict['exptime'],
np.full_like(lcdata['TIME'],
hdrinfo['exposure'],
dtype=np.float64))
)
lcdict['sector'] = np.concatenate(
(lcdict['sector'],
np.full_like(lcdata['TIME'],
hdrinfo['sector'],
dtype=np.int64))
)
lcdict['camera'] = np.concatenate(
(lcdict['camera'],
np.full_like(lcdata['TIME'],
hdrinfo['camera'],
dtype=np.int64))
)
lcdict['ccd'] = np.concatenate(
(lcdict['ccd'],
np.full_like(lcdata['TIME'],
hdrinfo['ccd'],
dtype=np.int64))
)
lcdict['pixel_table_id'] = np.concatenate(
(lcdict['pixel_table_id'],
np.full_like(lcdata['TIME'],
hdrinfo['pxtable'],
dtype=np.int64))
)
lcdict['origin'] = np.concatenate(
(lcdict['origin'],
np.full_like(lcdata['TIME'],
hdrinfo['origin'],
dtype='U100'))
)
lcdict['date_obs_start'] = np.concatenate(
(lcdict['date_obs_start'],
np.full_like(lcdata['TIME'],
hdrinfo['date-obs'],
dtype='U100'))
)
lcdict['date_obs_end'] = np.concatenate(
(lcdict['date_obs_end'],
np.full_like(lcdata['TIME'],
hdrinfo['date-end'],
dtype='U100'))
)
lcdict['procversion'] = np.concatenate(
(lcdict['procversion'],
np.full_like(lcdata['TIME'],
hdrinfo['procver'],
dtype='U255'))
)
lcdict['datarelease'] = np.concatenate(
(lcdict['datarelease'],
np.full_like(lcdata['TIME'],
hdrinfo['data_rel'],
dtype=np.int64))
)
# otherwise, this is a new lcdict
else:
# form the lcdict
# the metadata is one-elem arrays because we might add on to them later
lcdict = {
'objectid':hdrinfo['object'],
'lcinfo':{
'timesys':[hdrinfo['timesys']],
'bjdoffset':[hdrinfo['bjdrefi'] + hdrinfo['bjdreff']],
'exptime':[hdrinfo['exposure']],
'lcaperture':[lcaperturedata],
'aperpix_used':[hdrinfo['npixsap']],
'aperpix_unused':[hdrinfo['npixmiss']],
'pixarcsec':[(np.abs(hdrinfo['cdelt1']) +
np.abs(hdrinfo['cdelt2']))*3600.0/2.0],
'ndet':[ndet],
'origin':[hdrinfo['origin']],
'procversion':[hdrinfo['procver']],
'datarelease':[hdrinfo['data_rel']],
'sector':[hdrinfo['sector']],
'camera':[hdrinfo['camera']],
'ccd':[hdrinfo['ccd']],
'pixel_table_id':[hdrinfo['pxtable']],
'date_obs_start':[hdrinfo['date-obs']],
'date_obs_end':[hdrinfo['date-end']],
'tic_version':[hdrinfo['ticver']],
'cr_mitigation':[hdrinfo['crmiten']],
'cr_blocksize':[hdrinfo['crblksz']],
'cr_spocclean':[hdrinfo['crspoc']],
},
'objectinfo':{
'objectid':hdrinfo['object'],
'ticid':hdrinfo['ticid'],
'tessmag':hdrinfo['tessmag'],
'ra':hdrinfo['ra_obj'],
'decl':hdrinfo['dec_obj'],
'pmra':hdrinfo['pmra'],
'pmdecl':hdrinfo['pmdec'],
'pmtotal':hdrinfo['pmtotal'],
'star_teff':hdrinfo['teff'],
'star_logg':hdrinfo['logg'],
'star_mh':hdrinfo['mh'],
'star_radius':hdrinfo['radius'],
'observatory':'TESS',
'telescope':'TESS photometer',
},
'varinfo':{
'cdpp0_5':[hdrinfo['cdpp0_5']],
'cdpp1_0':[hdrinfo['cdpp1_0']],
'cdpp2_0':[hdrinfo['cdpp2_0']],
'pdcvar':[hdrinfo['pdcvar']],
'pdcmethod':[hdrinfo['pdcmethd']],
'target_flux_total_flux_ratio_in_aper':[hdrinfo['crowdsap']],
'target_flux_fraction_in_aper':[hdrinfo['flfrcsap']],
},
'sap':{},
'pdc':{},
}
# get the LC columns
for key in datakeys:
lcdict[key.lower()] = lcdata[key]
for key in sapkeys:
lcdict['sap'][key.lower()] = lcdata[key]
for key in pdckeys:
lcdict['pdc'][key.lower()] = lcdata[key]
# turn some of the light curve information into numpy arrays so we can
# sort on them later
lcdict['exptime'] = np.full_like(lcdict['time'],
lcdict['lcinfo']['exptime'][0],
dtype=np.float64)
lcdict['sector'] = np.full_like(lcdict['time'],
lcdict['lcinfo']['sector'][0],
dtype=np.int64)
lcdict['camera'] = np.full_like(lcdict['time'],
lcdict['lcinfo']['camera'][0],
dtype=np.int64)
lcdict['ccd'] = np.full_like(lcdict['time'],
lcdict['lcinfo']['ccd'][0],
dtype=np.int64)
lcdict['pixel_table_id'] = np.full_like(
lcdict['time'],
lcdict['lcinfo']['pixel_table_id'][0],
dtype=np.int64,
)
lcdict['origin'] = np.full_like(
lcdict['time'],
lcdict['lcinfo']['origin'][0],
dtype='U100',
)
lcdict['date_obs_start'] = np.full_like(
lcdict['time'],
lcdict['lcinfo']['date_obs_start'][0],
dtype='U100',
)
lcdict['date_obs_end'] = np.full_like(
lcdict['time'],
lcdict['lcinfo']['date_obs_end'][0],
dtype='U100',
)
lcdict['procversion'] = np.full_like(
lcdict['time'],
lcdict['lcinfo']['procversion'][0],
dtype='U255',
)
lcdict['datarelease'] = np.full_like(
lcdict['time'],
lcdict['lcinfo']['datarelease'][0],
dtype=np.int64,
)
# normalize the SAP and PDCSAP fluxes, errs, and backgrounds if needed
if normalize:
sapflux_median = np.nanmedian(lcdict['sap']['sap_flux'])
pdcsap_flux_median = np.nanmedian(lcdict['pdc']['pdcsap_flux'])
lcdict['sap']['sap_flux'] = (
lcdict['sap']['sap_flux'] /
sapflux_median
)
lcdict['sap']['sap_flux_err'] = (
lcdict['sap']['sap_flux_err'] /
sapflux_median
)
lcdict['sap']['sap_bkg'] = (
lcdict['sap']['sap_bkg'] /
sapflux_median
)
lcdict['sap']['sap_bkg_err'] = (
lcdict['sap']['sap_bkg_err'] /
sapflux_median
)
lcdict['pdc']['pdcsap_flux'] = (
lcdict['pdc']['pdcsap_flux'] /
pdcsap_flux_median
)
lcdict['pdc']['pdcsap_flux_err'] = (
lcdict['pdc']['pdcsap_flux_err'] /
pdcsap_flux_median
)
## END OF LIGHT CURVE CONSTRUCTION ##
# update the lcdict columns with the actual columns
lcdict['columns'] = (
[x.lower() for x in datakeys] +
['sap.%s' % x.lower() for x in sapkeys] +
['pdc.%s' % x.lower() for x in pdckeys] +
['exptime','sector','camera','ccd', 'pixel_table_id',
'origin', 'date_obs_start', 'date_obs_end',
'procversion', 'datarelease']
)
# update the ndet key in the objectinfo with the sum of all observations
lcdict['objectinfo']['ndet'] = sum(lcdict['lcinfo']['ndet'])
# filter the LC dict if requested
if (filterqualityflags is not False or
nanfilter is not None or
timestoignore is not None):
lcdict = filter_tess_lcdict(lcdict,
filterqualityflags,
nanfilter=nanfilter,
timestoignore=timestoignore)
# return the lcdict at the end
return lcdict
[docs]def consolidate_tess_fitslc(lclist,
normalize=True,
filterqualityflags=False,
nanfilter=None,
timestoignore=None,
headerkeys=LCHEADERKEYS,
datakeys=LCDATAKEYS,
sapkeys=LCSAPKEYS,
pdckeys=LCPDCKEYS,
topkeys=LCTOPKEYS,
apkeys=LCAPERTUREKEYS):
'''This consolidates a list of LCs for a single TIC object.
NOTE: if light curve time arrays contain nans, these and their associated
measurements will be sorted to the end of the final combined arrays.
Parameters
----------
lclist : list of str, or str
`lclist` is either a list of actual light curve files or a string that
is valid for glob.glob to search for and generate a light curve list
based on the file glob. This is useful for consolidating LC FITS files
across different TESS sectors for a single TIC ID using a glob like
`*<TICID>*_lc.fits`.
normalize : bool
If True, then the light curve's SAP_FLUX and PDCSAP_FLUX measurements
will be normalized to 1.0 by dividing out the median flux for the
component light curve.
filterqualityflags : bool
If True, will remove any measurements that have non-zero quality flags
present. This usually indicates an issue with the instrument or
spacecraft.
nanfilter : {'sap','pdc','sap,pdc'} or None
Indicates the flux measurement type(s) to apply the filtering to.
timestoignore : list of tuples or None
This is of the form::
[(time1_start, time1_end), (time2_start, time2_end), ...]
and indicates the start and end times to mask out of the final
lcdict. Use this to remove anything that wasn't caught by the quality
flags.
headerkeys : list
A list of FITS header keys that will be extracted from the FITS light
curve file. These describe the observations. The default value for this
is given in `LCHEADERKEYS` above.
datakeys : list
A list of FITS column names that correspond to the auxiliary
measurements in the light curve. The default is `LCDATAKEYS` above.
sapkeys : list
A list of FITS column names that correspond to the SAP flux
measurements in the light curve. The default is `LCSAPKEYS` above.
pdckeys : list
A list of FITS column names that correspond to the PDC flux
measurements in the light curve. The default is `LCPDCKEYS` above.
topkeys : list
A list of FITS header keys that describe the object in the light
curve. The default is `LCTOPKEYS` above.
apkeys : list
A list of FITS header keys that describe the flux measurement apertures
used by the TESS pipeline. The default is `LCAPERTUREKEYS` above.
Returns
-------
lcdict
Returns an `lcdict` (this is useable by most astrobase functions for LC
processing).
'''
# if the lclist is a string, assume that we're passing in a fileglob
if isinstance(lclist, str):
matching = glob.glob(lclist,
recursive=True)
LOGINFO('found %s LCs: %r' % (len(matching), matching))
if len(matching) == 0:
LOGERROR('could not find any TESS LC files matching glob: %s' %
lclist)
return None
# if the lclist is an actual list of LCs, then use it directly
else:
matching = lclist
# get the first file
consolidated = read_tess_fitslc(matching[0],
normalize=normalize,
headerkeys=LCHEADERKEYS,
datakeys=LCDATAKEYS,
sapkeys=LCSAPKEYS,
pdckeys=LCPDCKEYS,
topkeys=LCTOPKEYS,
apkeys=LCAPERTUREKEYS)
# get the rest of the files
if len(matching) > 1:
for lcf in matching[1:]:
consolidated = read_tess_fitslc(lcf,
appendto=consolidated,
normalize=normalize,
headerkeys=LCHEADERKEYS,
datakeys=LCDATAKEYS,
sapkeys=LCSAPKEYS,
pdckeys=LCPDCKEYS,
topkeys=LCTOPKEYS,
apkeys=LCAPERTUREKEYS)
# get the sort indices. we use time for the columns and sectors for the
# bits in lcinfo and varinfo
LOGINFO('sorting by time...')
# NOTE: nans in time will be sorted to the end of the array
finiteind = np.isfinite(consolidated['time'])
if np.sum(finiteind) < consolidated['time'].size:
LOGWARNING('some time values are nan! '
'measurements at these times will be '
'sorted to the end of the column arrays.')
# get the time sort index
column_sort_ind = np.argsort(consolidated['time'])
# sort the columns by time
for col in consolidated['columns']:
if '.' in col:
key, subkey = col.split('.')
consolidated[key][subkey] = (
consolidated[key][subkey][column_sort_ind]
)
else:
consolidated[col] = consolidated[col][column_sort_ind]
info_sort_ind = np.argsort(consolidated['lcinfo']['sector'])
# sort the keys in lcinfo
for key in consolidated['lcinfo']:
consolidated['lcinfo'][key] = (
np.array(consolidated['lcinfo'][key])[info_sort_ind].tolist()
)
# sort the keys in varinfo
for key in consolidated['varinfo']:
consolidated['varinfo'][key] = (
np.array(consolidated['varinfo'][key])[info_sort_ind].tolist()
)
# filter the LC dict if requested
# we do this at the end
if (filterqualityflags is not False or
nanfilter is not None or
timestoignore is not None):
consolidated = filter_tess_lcdict(consolidated,
filterqualityflags,
nanfilter=nanfilter,
timestoignore=timestoignore)
return consolidated
##################
## INPUT/OUTPUT ##
##################
[docs]def tess_lcdict_to_pkl(lcdict,
outfile=None):
'''This writes the `lcdict` to a Python pickle.
Parameters
----------
lcdict : lcdict
This is the input `lcdict` to write to a pickle.
outfile : str or None
If this is None, the object's Kepler ID/EPIC ID will determined from the
`lcdict` and used to form the filename of the output pickle file. If
this is a `str`, the provided filename will be used.
Returns
-------
str
The absolute path to the written pickle file.
'''
if not outfile:
outfile = '%s-tesslc.pkl' % lcdict['objectid'].replace(' ','')
# we're using pickle.HIGHEST_PROTOCOL here, this will make Py3 pickles
# unreadable for Python 2.7
with open(outfile,'wb') as outfd:
pickle.dump(lcdict, outfd, protocol=pickle.HIGHEST_PROTOCOL)
return os.path.abspath(outfile)
[docs]def read_tess_pklc(picklefile):
'''This turns the pickled lightcurve file back into an `lcdict`.
Parameters
----------
picklefile : str
The path to a previously written Kepler LC picklefile generated by
`tess_lcdict_to_pkl` above.
Returns
-------
lcdict
Returns an `lcdict` (this is useable by most astrobase functions for LC
processing).
'''
if picklefile.endswith('.gz'):
infd = gzip.open(picklefile, 'rb')
else:
infd = open(picklefile, 'rb')
try:
with infd:
lcdict = pickle.load(infd)
except UnicodeDecodeError:
with open(picklefile,'rb') as infd:
lcdict = pickle.load(infd, encoding='latin1')
LOGWARNING('pickle %s was probably from Python 2 '
'and failed to load without using "latin1" encoding. '
'This is probably a numpy issue: '
'http://stackoverflow.com/q/11305790' % picklefile)
return lcdict
################################
## TESS LIGHTCURVE PROCESSING ##
################################
[docs]def filter_tess_lcdict(lcdict,
filterqualityflags=True,
nanfilter='sap,pdc,time',
timestoignore=None,
quiet=False):
'''This filters the provided TESS `lcdict`, removing nans and bad
observations.
By default, this function removes points in the TESS LC that have ANY
quality flags set.
Parameters
----------
lcdict : lcdict
An `lcdict` produced by `consolidate_tess_fitslc` or
`read_tess_fitslc`.
filterflags : bool
If True, will remove any measurements that have non-zero quality flags
present. This usually indicates an issue with the instrument or
spacecraft.
nanfilter : {'sap','pdc','sap,pdc'}
Indicates the flux measurement type(s) to apply the filtering to.
timestoignore : list of tuples or None
This is of the form::
[(time1_start, time1_end), (time2_start, time2_end), ...]
and indicates the start and end times to mask out of the final
lcdict. Use this to remove anything that wasn't caught by the quality
flags.
Returns
-------
lcdict
Returns an `lcdict` (this is useable by most astrobase functions for LC
processing). The `lcdict` is filtered IN PLACE!
'''
cols = lcdict['columns']
# filter all bad LC points as noted by quality flags
if filterqualityflags:
nbefore = lcdict['time'].size
filterind = lcdict['quality'] == 0
for col in cols:
if '.' in col:
key, subkey = col.split('.')
lcdict[key][subkey] = lcdict[key][subkey][filterind]
else:
lcdict[col] = lcdict[col][filterind]
nafter = lcdict['time'].size
if not quiet:
LOGINFO('applied quality flag filter, '
'ndet before = %s, ndet after = %s'
% (nbefore, nafter))
if nanfilter and nanfilter == 'sap,pdc,time':
notnanind = (
np.isfinite(lcdict['sap']['sap_flux']) &
np.isfinite(lcdict['sap']['sap_flux_err']) &
np.isfinite(lcdict['pdc']['pdcsap_flux']) &
np.isfinite(lcdict['pdc']['pdcsap_flux_err']) &
np.isfinite(lcdict['time'])
)
elif nanfilter and nanfilter == 'sap,time':
notnanind = (
np.isfinite(lcdict['sap']['sap_flux']) &
np.isfinite(lcdict['sap']['sap_flux_err']) &
np.isfinite(lcdict['time'])
)
elif nanfilter and nanfilter == 'pdc,time':
notnanind = (
np.isfinite(lcdict['pdc']['pdcsap_flux']) &
np.isfinite(lcdict['pdc']['pdcsap_flux_err']) &
np.isfinite(lcdict['time'])
)
elif nanfilter is None:
pass
else:
raise NotImplementedError
# remove nans from all columns
if nanfilter:
nbefore = lcdict['time'].size
for col in cols:
if '.' in col:
key, subkey = col.split('.')
lcdict[key][subkey] = lcdict[key][subkey][notnanind]
else:
lcdict[col] = lcdict[col][notnanind]
nafter = lcdict['time'].size
if not quiet:
LOGINFO('removed nans, ndet before = %s, ndet after = %s'
% (nbefore, nafter))
# exclude all times in timestoignore
if (timestoignore and
isinstance(timestoignore, list) and
len(timestoignore) > 0):
exclind = np.full_like(lcdict['time'],True).astype(bool)
nbefore = exclind.size
# get all the masks
for ignoretime in timestoignore:
time0, time1 = ignoretime[0], ignoretime[1]
thismask = ~((lcdict['time'] >= time0) & (lcdict['time'] <= time1))
exclind = exclind & thismask
# apply the masks
for col in cols:
if '.' in col:
key, subkey = col.split('.')
lcdict[key][subkey] = lcdict[key][subkey][exclind]
else:
lcdict[col] = lcdict[col][exclind]
nafter = lcdict['time'].size
if not quiet:
LOGINFO('removed timestoignore, ndet before = %s, ndet after = %s'
% (nbefore, nafter))
return lcdict