Source code for astrobase.astrotess

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# astrotess.py - Luke Bouma (luke@astro.princeton.edu) - 09/2018

'''
Contains various tools for analyzing TESS light curves.

'''

#############
## LOGGING ##
#############

import logging
from astrobase import log_sub, log_fmt, log_date_fmt

DEBUG = False
if DEBUG:
    level = logging.DEBUG
else:
    level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
    level=level,
    style=log_sub,
    format=log_fmt,
    datefmt=log_date_fmt,
)

LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception


##################
## MAIN IMPORTS ##
##################

import pickle
import os.path
import gzip
import sys
import glob

import numpy as np
from astropy.io import fits as pyfits


#######################################
## UTILITY FUNCTIONS FOR FLUXES/MAGS ##
#######################################

[docs]def normalized_flux_to_mag(lcdict,
                           columns=('sap.sap_flux',
                                    'sap.sap_flux_err',
                                    'sap.sap_bkg',
                                    'sap.sap_bkg_err',
                                    'pdc.pdcsap_flux',
                                    'pdc.pdcsap_flux_err')):
    '''This converts the normalized fluxes in the TESS lcdicts to TESS mags.

    Uses the object's TESS mag stored in lcdict['objectinfo']['tessmag']::

        mag - object_tess_mag = -2.5 log (flux/median_flux)

    Parameters
    ----------

    lcdict : lcdict
        An `lcdict` produced by `read_tess_fitslc` or
        `consolidate_tess_fitslc`. This must have normalized fluxes in its
        measurement columns (use the `normalize` kwarg for these functions).

    columns : sequence of str
        The column keys of the normalized flux and background measurements in
        the `lcdict` to operate on and convert to magnitudes in TESS band (T).

    Returns
    -------

    lcdict
        The returned `lcdict` will contain extra columns corresponding to
        magnitudes for each input normalized flux/background column.

    '''

    tess_mag = lcdict['objectinfo']['tessmag']

    for key in columns:

        k1, k2 = key.split('.')

        if 'err' not in k2:

            lcdict[k1][k2.replace('flux','mag')] = (
                tess_mag - 2.5*np.log10(lcdict[k1][k2])
            )

        else:

            lcdict[k1][k2.replace('flux','mag')] = (
                - 2.5*np.log10(1.0 - lcdict[k1][k2])
            )

    return lcdict


#########################################################
## LCDICT MAKING FUNCTIONS FOR TESS HLSP LC.FITS FILES ##
#########################################################

# these appear to be similar to Kepler LCs, so we'll copy over stuff from
# astrokep.py

# this is the list of keys to pull out of the top header of the FITS
LCTOPKEYS = [
    'DATE-OBS',
    'DATE-END',
    'PROCVER',
    'ORIGIN',
    'DATA_REL',
    'TIMVERSN',
    'OBJECT',
    'TICID',
    'SECTOR',
    'CAMERA',
    'CCD',
    'PXTABLE',
    'RADESYS',
    'RA_OBJ',
    'DEC_OBJ',
    'EQUINOX',
    'PMRA',
    'PMDEC',
    'PMTOTAL',
    'TESSMAG',
    'TEFF',
    'LOGG',
    'MH',
    'RADIUS',
    'TICVER',
    'CRMITEN',
    'CRBLKSZ',
    'CRSPOC',
]

# this is the list of keys to pull out of the light curve header
LCHEADERKEYS = [
    'EXPOSURE',
    'TIMEREF',
    'TASSIGN',
    'TIMESYS',
    'BJDREFI',
    'BJDREFF',
    'TELAPSE',
    'LIVETIME',
    'INT_TIME',
    'NUM_FRM',
    'TIMEDEL',
    'BACKAPP',
    'DEADAPP',
    'VIGNAPP',
    'GAINA',
    'GAINB',
    'GAINC',
    'GAIND',
    'READNOIA',
    'READNOIB',
    'READNOIC',
    'READNOID',
    'CDPP0_5',
    'CDPP1_0',
    'CDPP2_0',
    'PDCVAR',
    'PDCMETHD',
    'CROWDSAP',
    'FLFRCSAP',
    'NSPSDDET',
    'NSPSDCOR'
]

# this is the list of keys to pull out of the light curve FITS table
LCDATAKEYS = [
    'TIME',
    'TIMECORR',
    'CADENCENO',
    'QUALITY',
    'PSF_CENTR1','PSF_CENTR1_ERR','PSF_CENTR2','PSF_CENTR2_ERR',
    'MOM_CENTR1','MOM_CENTR1_ERR','MOM_CENTR2','MOM_CENTR2_ERR',
    'POS_CORR1','POS_CORR2'
]

# this is the list of columns to use for fluxes, backgrounds, errors
LCSAPKEYS = ['SAP_FLUX','SAP_FLUX_ERR','SAP_BKG','SAP_BKG_ERR']
LCPDCKEYS = ['PDCSAP_FLUX','PDCSAP_FLUX_ERR']

# this is the list of keys to pull out of the aperture part of the light curve
# we also pull out the whole pixel mask, which looks something like:
#
# array([[65, 69, 69, 69, 69, 69, 69, 69, 69, 65, 65],
#        [69, 69, 69, 69, 65, 65, 65, 65, 69, 69, 65],
#        [65, 65, 65, 65, 65, 65, 65, 65, 65, 69, 65],
#        [65, 65, 65, 65, 75, 75, 65, 65, 65, 69, 65],
#        [65, 65, 65, 75, 75, 75, 75, 65, 65, 65, 65],
#        [65, 65, 65, 75, 75, 75, 75, 65, 65, 69, 65],
#        [65, 65, 65, 65, 75, 75, 65, 65, 65, 69, 65],
#        [65, 65, 65, 65, 65, 65, 65, 65, 65, 69, 65],
#        [69, 69, 69, 65, 69, 65, 65, 65, 69, 69, 65],
#        [65, 69, 69, 69, 69, 69, 65, 69, 69, 65, 65],
#        [65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65]], dtype=int32)
#
# FIXME: figure out what these values mean (probably flux-collected = 75 /
# flux-available = 69 / flux-in-stamp = 65). we use CDELT1 and CDELT2 below to
# get the pixel scale in arcsec/px
LCAPERTUREKEYS = ['NPIXSAP','NPIXMISS',
                  'CDELT1','CDELT2']


[docs]def read_tess_fitslc(lcfits,
                     headerkeys=LCHEADERKEYS,
                     datakeys=LCDATAKEYS,
                     sapkeys=LCSAPKEYS,
                     pdckeys=LCPDCKEYS,
                     topkeys=LCTOPKEYS,
                     apkeys=LCAPERTUREKEYS,
                     normalize=False,
                     appendto=None,
                     filterqualityflags=False,
                     nanfilter=None,
                     timestoignore=None):
    '''This extracts the light curve from a single TESS .lc.fits file.

    This works on the light curves available at MAST.

    TODO: look at:

    https://archive.stsci.edu/missions/tess/doc/EXP-TESS-ARC-ICD-TM-0014.pdf

    for details on the column descriptions and to fill in any other info we
    need.

    Parameters
    ----------

    lcfits : str
        The filename of a MAST Kepler/K2 light curve FITS file.

    headerkeys : list
        A list of FITS header keys that will be extracted from the FITS light
        curve file. These describe the observations. The default value for this
        is given in `LCHEADERKEYS` above.

    datakeys : list
        A list of FITS column names that correspond to the auxiliary
        measurements in the light curve. The default is `LCDATAKEYS` above.

    sapkeys : list
        A list of FITS column names that correspond to the SAP flux
        measurements in the light curve. The default is `LCSAPKEYS` above.

    pdckeys : list
        A list of FITS column names that correspond to the PDC flux
        measurements in the light curve. The default is `LCPDCKEYS` above.

    topkeys : list
        A list of FITS header keys that describe the object in the light
        curve. The default is `LCTOPKEYS` above.

    apkeys : list
        A list of FITS header keys that describe the flux measurement apertures
        used by the TESS pipeline. The default is `LCAPERTUREKEYS` above.

    normalize : bool
        If True, then the light curve's SAP_FLUX and PDCSAP_FLUX measurements
        will be normalized to 1.0 by dividing out the median flux for the
        component light curve.

    appendto : lcdict or None
        If appendto is an `lcdict`, will append measurements of this `lcdict` to
        that `lcdict`. This is used for consolidating light curves for the same
        object across different files (sectors/cameras/CCDs?). The appending
        does not care about the time order. To consolidate light curves in time
        order, use `consolidate_tess_fitslc` below.

    filterqualityflags : bool
        If True, will remove any measurements that have non-zero quality flags
        present. This usually indicates an issue with the instrument or
        spacecraft.

    nanfilter : {'sap','pdc','sap,pdc'} or None
        Indicates the flux measurement type(s) to apply the filtering to.

    timestoignore : list of tuples or None
        This is of the form::

            [(time1_start, time1_end), (time2_start, time2_end), ...]

        and indicates the start and end times to mask out of the final
        lcdict. Use this to remove anything that wasn't caught by the quality
        flags.

    Returns
    -------

    lcdict
        Returns an `lcdict` (this is useable by most astrobase functions for LC
        processing).

    '''

    # read the fits file
    hdulist = pyfits.open(lcfits)
    lchdr, lcdata = hdulist[1].header, hdulist[1].data
    lctophdr, lcaperturehdr, lcaperturedata = (hdulist[0].header,
                                               hdulist[2].header,
                                               hdulist[2].data)
    hdulist.close()

    hdrinfo = {}

    # now get the values we want from the header
    for key in headerkeys:
        if key in lchdr and lchdr[key] is not None:
            hdrinfo[key.lower()] = lchdr[key]
        else:
            hdrinfo[key.lower()] = None

    # get the number of detections
    ndet = lchdr['NAXIS2']

    # get the info from the topheader
    for key in topkeys:
        if key in lctophdr and lctophdr[key] is not None:
            hdrinfo[key.lower()] = lctophdr[key]
        else:
            hdrinfo[key.lower()] = None

    # get the info from the lcaperturehdr
    for key in lcaperturehdr:
        if key in lcaperturehdr and lcaperturehdr[key] is not None:
            hdrinfo[key.lower()] = lcaperturehdr[key]
        else:
            hdrinfo[key.lower()] = None

    # if we're appending to another lcdict
    if appendto and isinstance(appendto, dict):

        lcdict = appendto

        # update lcinfo
        lcdict['lcinfo']['timesys'].append(hdrinfo['timesys'])
        lcdict['lcinfo']['bjdoffset'].append(
            hdrinfo['bjdrefi'] + hdrinfo['bjdreff']
        )
        lcdict['lcinfo']['lcaperture'].append(lcaperturedata)
        lcdict['lcinfo']['aperpix_used'].append(hdrinfo['npixsap'])
        lcdict['lcinfo']['aperpix_unused'].append(hdrinfo['npixmiss'])
        lcdict['lcinfo']['pixarcsec'].append(
            (np.abs(hdrinfo['cdelt1']) +
             np.abs(hdrinfo['cdelt2']))*3600.0/2.0
        )
        lcdict['lcinfo']['ndet'].append(ndet)
        lcdict['lcinfo']['exptime'].append(hdrinfo['exposure'])
        lcdict['lcinfo']['sector'].append(hdrinfo['sector'])
        lcdict['lcinfo']['camera'].append(hdrinfo['camera'])
        lcdict['lcinfo']['ccd'].append(hdrinfo['ccd'])

        lcdict['lcinfo']['date_obs_start'].append(hdrinfo['date-obs'])
        lcdict['lcinfo']['date_obs_end'].append(hdrinfo['date-end'])
        lcdict['lcinfo']['pixel_table_id'].append(hdrinfo['pxtable'])
        lcdict['lcinfo']['origin'].append(hdrinfo['origin'])
        lcdict['lcinfo']['datarelease'].append(hdrinfo['data_rel'])
        lcdict['lcinfo']['procversion'].append(hdrinfo['procver'])

        lcdict['lcinfo']['tic_version'].append(hdrinfo['ticver'])
        lcdict['lcinfo']['cr_mitigation'].append(hdrinfo['crmiten'])
        lcdict['lcinfo']['cr_blocksize'].append(hdrinfo['crblksz'])
        lcdict['lcinfo']['cr_spocclean'].append(hdrinfo['crspoc'])

        # update the varinfo for this light curve
        lcdict['varinfo']['cdpp0_5'].append(hdrinfo['cdpp0_5'])
        lcdict['varinfo']['cdpp1_0'].append(hdrinfo['cdpp1_0'])
        lcdict['varinfo']['cdpp2_0'].append(hdrinfo['cdpp2_0'])
        lcdict['varinfo']['pdcvar'].append(hdrinfo['pdcvar'])
        lcdict['varinfo']['pdcmethod'].append(hdrinfo['pdcmethd'])
        lcdict['varinfo']['target_flux_total_flux_ratio_in_aper'].append(
            hdrinfo['crowdsap']
        )
        lcdict['varinfo']['target_flux_fraction_in_aper'].append(
            hdrinfo['flfrcsap']
        )

        # update the light curve columns now
        for key in datakeys:

            if key.lower() in lcdict:
                lcdict[key.lower()] = (
                    np.concatenate((lcdict[key.lower()], lcdata[key]))
                )

        for key in sapkeys:

            if key.lower() in lcdict['sap']:

                sapflux_median = np.nanmedian(lcdata['SAP_FLUX'])

                # normalize the current flux measurements if needed
                if normalize and key == 'SAP_FLUX':
                    thislcdata = lcdata[key] / sapflux_median
                elif normalize and key == 'SAP_FLUX_ERR':
                    thislcdata = lcdata[key] / sapflux_median
                elif normalize and key == 'SAP_BKG':
                    thislcdata = lcdata[key] / sapflux_median
                elif normalize and key == 'SAP_BKG_ERR':
                    thislcdata = lcdata[key] / sapflux_median
                else:
                    thislcdata = lcdata[key]

                lcdict['sap'][key.lower()] = (
                    np.concatenate((lcdict['sap'][key.lower()], thislcdata))
                )

        for key in pdckeys:

            if key.lower() in lcdict['pdc']:

                pdcsap_flux_median = np.nanmedian(lcdata['PDCSAP_FLUX'])

                # normalize the current flux measurements if needed
                if normalize and key == 'PDCSAP_FLUX':
                    thislcdata = lcdata[key] / pdcsap_flux_median
                elif normalize and key == 'PDCSAP_FLUX_ERR':
                    thislcdata = lcdata[key] / pdcsap_flux_median
                else:
                    thislcdata = lcdata[key]

                lcdict['pdc'][key.lower()] = (
                    np.concatenate((lcdict['pdc'][key.lower()], thislcdata))
                )

        # append some of the light curve information into existing numpy arrays
        # so we can sort on them later
        lcdict['exptime'] = np.concatenate(
            (lcdict['exptime'],
             np.full_like(lcdata['TIME'],
                          hdrinfo['exposure'],
                          dtype=np.float64))
        )
        lcdict['sector'] = np.concatenate(
            (lcdict['sector'],
             np.full_like(lcdata['TIME'],
                          hdrinfo['sector'],
                          dtype=np.int64))
        )
        lcdict['camera'] = np.concatenate(
            (lcdict['camera'],
             np.full_like(lcdata['TIME'],
                          hdrinfo['camera'],
                          dtype=np.int64))
        )
        lcdict['ccd'] = np.concatenate(
            (lcdict['ccd'],
             np.full_like(lcdata['TIME'],
                          hdrinfo['ccd'],
                          dtype=np.int64))
        )

        lcdict['pixel_table_id'] = np.concatenate(
            (lcdict['pixel_table_id'],
             np.full_like(lcdata['TIME'],
                          hdrinfo['pxtable'],
                          dtype=np.int64))
        )
        lcdict['origin'] = np.concatenate(
            (lcdict['origin'],
             np.full_like(lcdata['TIME'],
                          hdrinfo['origin'],
                          dtype='U100'))
        )
        lcdict['date_obs_start'] = np.concatenate(
            (lcdict['date_obs_start'],
             np.full_like(lcdata['TIME'],
                          hdrinfo['date-obs'],
                          dtype='U100'))
        )
        lcdict['date_obs_end'] = np.concatenate(
            (lcdict['date_obs_end'],
             np.full_like(lcdata['TIME'],
                          hdrinfo['date-end'],
                          dtype='U100'))
        )
        lcdict['procversion'] = np.concatenate(
            (lcdict['procversion'],
             np.full_like(lcdata['TIME'],
                          hdrinfo['procver'],
                          dtype='U255'))
        )
        lcdict['datarelease'] = np.concatenate(
            (lcdict['datarelease'],
             np.full_like(lcdata['TIME'],
                          hdrinfo['data_rel'],
                          dtype=np.int64))
        )

    # otherwise, this is a new lcdict
    else:

        # form the lcdict
        # the metadata is one-elem arrays because we might add on to them later
        lcdict = {
            'objectid':hdrinfo['object'],
            'lcinfo':{
                'timesys':[hdrinfo['timesys']],
                'bjdoffset':[hdrinfo['bjdrefi'] + hdrinfo['bjdreff']],
                'exptime':[hdrinfo['exposure']],
                'lcaperture':[lcaperturedata],
                'aperpix_used':[hdrinfo['npixsap']],
                'aperpix_unused':[hdrinfo['npixmiss']],
                'pixarcsec':[(np.abs(hdrinfo['cdelt1']) +
                              np.abs(hdrinfo['cdelt2']))*3600.0/2.0],
                'ndet':[ndet],
                'origin':[hdrinfo['origin']],
                'procversion':[hdrinfo['procver']],
                'datarelease':[hdrinfo['data_rel']],
                'sector':[hdrinfo['sector']],
                'camera':[hdrinfo['camera']],
                'ccd':[hdrinfo['ccd']],
                'pixel_table_id':[hdrinfo['pxtable']],
                'date_obs_start':[hdrinfo['date-obs']],
                'date_obs_end':[hdrinfo['date-end']],
                'tic_version':[hdrinfo['ticver']],
                'cr_mitigation':[hdrinfo['crmiten']],
                'cr_blocksize':[hdrinfo['crblksz']],
                'cr_spocclean':[hdrinfo['crspoc']],
            },
            'objectinfo':{
                'objectid':hdrinfo['object'],
                'ticid':hdrinfo['ticid'],
                'tessmag':hdrinfo['tessmag'],
                'ra':hdrinfo['ra_obj'],
                'decl':hdrinfo['dec_obj'],
                'pmra':hdrinfo['pmra'],
                'pmdecl':hdrinfo['pmdec'],
                'pmtotal':hdrinfo['pmtotal'],
                'star_teff':hdrinfo['teff'],
                'star_logg':hdrinfo['logg'],
                'star_mh':hdrinfo['mh'],
                'star_radius':hdrinfo['radius'],
                'observatory':'TESS',
                'telescope':'TESS photometer',
            },
            'varinfo':{
                'cdpp0_5':[hdrinfo['cdpp0_5']],
                'cdpp1_0':[hdrinfo['cdpp1_0']],
                'cdpp2_0':[hdrinfo['cdpp2_0']],
                'pdcvar':[hdrinfo['pdcvar']],
                'pdcmethod':[hdrinfo['pdcmethd']],
                'target_flux_total_flux_ratio_in_aper':[hdrinfo['crowdsap']],
                'target_flux_fraction_in_aper':[hdrinfo['flfrcsap']],
            },
            'sap':{},
            'pdc':{},
        }

        # get the LC columns
        for key in datakeys:
            lcdict[key.lower()] = lcdata[key]
        for key in sapkeys:
            lcdict['sap'][key.lower()] = lcdata[key]
        for key in pdckeys:
            lcdict['pdc'][key.lower()] = lcdata[key]

        # turn some of the light curve information into numpy arrays so we can
        # sort on them later
        lcdict['exptime'] = np.full_like(lcdict['time'],
                                         lcdict['lcinfo']['exptime'][0],
                                         dtype=np.float64)
        lcdict['sector'] = np.full_like(lcdict['time'],
                                        lcdict['lcinfo']['sector'][0],
                                        dtype=np.int64)
        lcdict['camera'] = np.full_like(lcdict['time'],
                                        lcdict['lcinfo']['camera'][0],
                                        dtype=np.int64)
        lcdict['ccd'] = np.full_like(lcdict['time'],
                                     lcdict['lcinfo']['ccd'][0],
                                     dtype=np.int64)

        lcdict['pixel_table_id'] = np.full_like(
            lcdict['time'],
            lcdict['lcinfo']['pixel_table_id'][0],
            dtype=np.int64,
        )
        lcdict['origin'] = np.full_like(
            lcdict['time'],
            lcdict['lcinfo']['origin'][0],
            dtype='U100',
        )
        lcdict['date_obs_start'] = np.full_like(
            lcdict['time'],
            lcdict['lcinfo']['date_obs_start'][0],
            dtype='U100',
        )
        lcdict['date_obs_end'] = np.full_like(
            lcdict['time'],
            lcdict['lcinfo']['date_obs_end'][0],
            dtype='U100',
        )
        lcdict['procversion'] = np.full_like(
            lcdict['time'],
            lcdict['lcinfo']['procversion'][0],
            dtype='U255',
        )
        lcdict['datarelease'] = np.full_like(
            lcdict['time'],
            lcdict['lcinfo']['datarelease'][0],
            dtype=np.int64,
        )

        # normalize the SAP and PDCSAP fluxes, errs, and backgrounds if needed
        if normalize:

            sapflux_median = np.nanmedian(lcdict['sap']['sap_flux'])
            pdcsap_flux_median = np.nanmedian(lcdict['pdc']['pdcsap_flux'])

            lcdict['sap']['sap_flux'] = (
                lcdict['sap']['sap_flux'] /
                sapflux_median
            )
            lcdict['sap']['sap_flux_err'] = (
                lcdict['sap']['sap_flux_err'] /
                sapflux_median
            )

            lcdict['sap']['sap_bkg'] = (
                lcdict['sap']['sap_bkg'] /
                sapflux_median
            )
            lcdict['sap']['sap_bkg_err'] = (
                lcdict['sap']['sap_bkg_err'] /
                sapflux_median
            )

            lcdict['pdc']['pdcsap_flux'] = (
                lcdict['pdc']['pdcsap_flux'] /
                pdcsap_flux_median
            )
            lcdict['pdc']['pdcsap_flux_err'] = (
                lcdict['pdc']['pdcsap_flux_err'] /
                pdcsap_flux_median
            )

    ## END OF LIGHT CURVE CONSTRUCTION ##

    # update the lcdict columns with the actual columns
    lcdict['columns'] = (
        [x.lower() for x in datakeys] +
        ['sap.%s' % x.lower() for x in sapkeys] +
        ['pdc.%s' % x.lower() for x in pdckeys] +
        ['exptime','sector','camera','ccd', 'pixel_table_id',
         'origin', 'date_obs_start', 'date_obs_end',
         'procversion', 'datarelease']
    )

    # update the ndet key in the objectinfo with the sum of all observations
    lcdict['objectinfo']['ndet'] = sum(lcdict['lcinfo']['ndet'])

    # filter the LC dict if requested
    if (filterqualityflags is not False or
        nanfilter is not None or
        timestoignore is not None):
        lcdict = filter_tess_lcdict(lcdict,
                                    filterqualityflags,
                                    nanfilter=nanfilter,
                                    timestoignore=timestoignore)

    # return the lcdict at the end
    return lcdict


[docs]def consolidate_tess_fitslc(lclist,
                            normalize=True,
                            filterqualityflags=False,
                            nanfilter=None,
                            timestoignore=None,
                            headerkeys=LCHEADERKEYS,
                            datakeys=LCDATAKEYS,
                            sapkeys=LCSAPKEYS,
                            pdckeys=LCPDCKEYS,
                            topkeys=LCTOPKEYS,
                            apkeys=LCAPERTUREKEYS):
    '''This consolidates a list of LCs for a single TIC object.

    NOTE: if light curve time arrays contain nans, these and their associated
    measurements will be sorted to the end of the final combined arrays.

    Parameters
    ----------

    lclist : list of str, or str
        `lclist` is either a list of actual light curve files or a string that
        is valid for glob.glob to search for and generate a light curve list
        based on the file glob. This is useful for consolidating LC FITS files
        across different TESS sectors for a single TIC ID using a glob like
        `*<TICID>*_lc.fits`.

    normalize : bool
        If True, then the light curve's SAP_FLUX and PDCSAP_FLUX measurements
        will be normalized to 1.0 by dividing out the median flux for the
        component light curve.

    filterqualityflags : bool
        If True, will remove any measurements that have non-zero quality flags
        present. This usually indicates an issue with the instrument or
        spacecraft.

    nanfilter : {'sap','pdc','sap,pdc'} or None
        Indicates the flux measurement type(s) to apply the filtering to.

    timestoignore : list of tuples or None
        This is of the form::

            [(time1_start, time1_end), (time2_start, time2_end), ...]

        and indicates the start and end times to mask out of the final
        lcdict. Use this to remove anything that wasn't caught by the quality
        flags.

    headerkeys : list
        A list of FITS header keys that will be extracted from the FITS light
        curve file. These describe the observations. The default value for this
        is given in `LCHEADERKEYS` above.

    datakeys : list
        A list of FITS column names that correspond to the auxiliary
        measurements in the light curve. The default is `LCDATAKEYS` above.

    sapkeys : list
        A list of FITS column names that correspond to the SAP flux
        measurements in the light curve. The default is `LCSAPKEYS` above.

    pdckeys : list
        A list of FITS column names that correspond to the PDC flux
        measurements in the light curve. The default is `LCPDCKEYS` above.

    topkeys : list
        A list of FITS header keys that describe the object in the light
        curve. The default is `LCTOPKEYS` above.

    apkeys : list
        A list of FITS header keys that describe the flux measurement apertures
        used by the TESS pipeline. The default is `LCAPERTUREKEYS` above.

    Returns
    -------

    lcdict
        Returns an `lcdict` (this is useable by most astrobase functions for LC
        processing).

    '''

    # if the lclist is a string, assume that we're passing in a fileglob
    if isinstance(lclist, str):

        matching = glob.glob(lclist,
                             recursive=True)
        LOGINFO('found %s LCs: %r' % (len(matching), matching))

        if len(matching) == 0:
            LOGERROR('could not find any TESS LC files matching glob: %s' %
                     lclist)
            return None

    # if the lclist is an actual list of LCs, then use it directly
    else:

        matching = lclist

    # get the first file
    consolidated = read_tess_fitslc(matching[0],
                                    normalize=normalize,
                                    headerkeys=LCHEADERKEYS,
                                    datakeys=LCDATAKEYS,
                                    sapkeys=LCSAPKEYS,
                                    pdckeys=LCPDCKEYS,
                                    topkeys=LCTOPKEYS,
                                    apkeys=LCAPERTUREKEYS)

    # get the rest of the files
    if len(matching) > 1:

        for lcf in matching[1:]:

            consolidated = read_tess_fitslc(lcf,
                                            appendto=consolidated,
                                            normalize=normalize,
                                            headerkeys=LCHEADERKEYS,
                                            datakeys=LCDATAKEYS,
                                            sapkeys=LCSAPKEYS,
                                            pdckeys=LCPDCKEYS,
                                            topkeys=LCTOPKEYS,
                                            apkeys=LCAPERTUREKEYS)

    # get the sort indices. we use time for the columns and sectors for the
    # bits in lcinfo and varinfo
    LOGINFO('sorting by time...')

    # NOTE: nans in time will be sorted to the end of the array
    finiteind = np.isfinite(consolidated['time'])
    if np.sum(finiteind) < consolidated['time'].size:
        LOGWARNING('some time values are nan! '
                   'measurements at these times will be '
                   'sorted to the end of the column arrays.')

    # get the time sort index
    column_sort_ind = np.argsort(consolidated['time'])

    # sort the columns by time
    for col in consolidated['columns']:
        if '.' in col:
            key, subkey = col.split('.')
            consolidated[key][subkey] = (
                consolidated[key][subkey][column_sort_ind]
            )
        else:
            consolidated[col] = consolidated[col][column_sort_ind]

    info_sort_ind = np.argsort(consolidated['lcinfo']['sector'])

    # sort the keys in lcinfo
    for key in consolidated['lcinfo']:
        consolidated['lcinfo'][key] = (
            np.array(consolidated['lcinfo'][key])[info_sort_ind].tolist()
        )

    # sort the keys in varinfo
    for key in consolidated['varinfo']:
        consolidated['varinfo'][key] = (
            np.array(consolidated['varinfo'][key])[info_sort_ind].tolist()
        )

    # filter the LC dict if requested
    # we do this at the end
    if (filterqualityflags is not False or
        nanfilter is not None or
        timestoignore is not None):
        consolidated = filter_tess_lcdict(consolidated,
                                          filterqualityflags,
                                          nanfilter=nanfilter,
                                          timestoignore=timestoignore)

    return consolidated


##################
## INPUT/OUTPUT ##
##################

[docs]def tess_lcdict_to_pkl(lcdict,
                       outfile=None):
    '''This writes the `lcdict` to a Python pickle.

    Parameters
    ----------

    lcdict : lcdict
        This is the input `lcdict` to write to a pickle.

    outfile : str or None
        If this is None, the object's Kepler ID/EPIC ID will determined from the
        `lcdict` and used to form the filename of the output pickle file. If
        this is a `str`, the provided filename will be used.

    Returns
    -------

    str
        The absolute path to the written pickle file.

    '''

    if not outfile:
        outfile = '%s-tesslc.pkl' % lcdict['objectid'].replace(' ','')

    # we're using pickle.HIGHEST_PROTOCOL here, this will make Py3 pickles
    # unreadable for Python 2.7
    with open(outfile,'wb') as outfd:
        pickle.dump(lcdict, outfd, protocol=pickle.HIGHEST_PROTOCOL)

    return os.path.abspath(outfile)


[docs]def read_tess_pklc(picklefile):
    '''This turns the pickled lightcurve file back into an `lcdict`.

    Parameters
    ----------

    picklefile : str
        The path to a previously written Kepler LC picklefile generated by
        `tess_lcdict_to_pkl` above.

    Returns
    -------

    lcdict
        Returns an `lcdict` (this is useable by most astrobase functions for LC
        processing).

    '''

    if picklefile.endswith('.gz'):
        infd = gzip.open(picklefile, 'rb')
    else:
        infd = open(picklefile, 'rb')

    try:
        with infd:
            lcdict = pickle.load(infd)

    except UnicodeDecodeError:

        with open(picklefile,'rb') as infd:
            lcdict = pickle.load(infd, encoding='latin1')

        LOGWARNING('pickle %s was probably from Python 2 '
                   'and failed to load without using "latin1" encoding. '
                   'This is probably a numpy issue: '
                   'http://stackoverflow.com/q/11305790' % picklefile)

    return lcdict


################################
## TESS LIGHTCURVE PROCESSING ##
################################

[docs]def filter_tess_lcdict(lcdict,
                       filterqualityflags=True,
                       nanfilter='sap,pdc,time',
                       timestoignore=None,
                       quiet=False):
    '''This filters the provided TESS `lcdict`, removing nans and bad
    observations.

    By default, this function removes points in the TESS LC that have ANY
    quality flags set.

    Parameters
    ----------

    lcdict : lcdict
        An `lcdict` produced by `consolidate_tess_fitslc` or
        `read_tess_fitslc`.

    filterflags : bool
        If True, will remove any measurements that have non-zero quality flags
        present. This usually indicates an issue with the instrument or
        spacecraft.

    nanfilter : {'sap','pdc','sap,pdc'}
        Indicates the flux measurement type(s) to apply the filtering to.

    timestoignore : list of tuples or None
        This is of the form::

            [(time1_start, time1_end), (time2_start, time2_end), ...]

        and indicates the start and end times to mask out of the final
        lcdict. Use this to remove anything that wasn't caught by the quality
        flags.

    Returns
    -------

    lcdict
        Returns an `lcdict` (this is useable by most astrobase functions for LC
        processing). The `lcdict` is filtered IN PLACE!

    '''

    cols = lcdict['columns']

    # filter all bad LC points as noted by quality flags
    if filterqualityflags:

        nbefore = lcdict['time'].size
        filterind = lcdict['quality'] == 0

        for col in cols:
            if '.' in col:
                key, subkey = col.split('.')
                lcdict[key][subkey] = lcdict[key][subkey][filterind]
            else:
                lcdict[col] = lcdict[col][filterind]

        nafter = lcdict['time'].size
        if not quiet:
            LOGINFO('applied quality flag filter, '
                    'ndet before = %s, ndet after = %s'
                    % (nbefore, nafter))

    if nanfilter and nanfilter == 'sap,pdc,time':
        notnanind = (
            np.isfinite(lcdict['sap']['sap_flux']) &
            np.isfinite(lcdict['sap']['sap_flux_err']) &
            np.isfinite(lcdict['pdc']['pdcsap_flux']) &
            np.isfinite(lcdict['pdc']['pdcsap_flux_err']) &
            np.isfinite(lcdict['time'])
        )
    elif nanfilter and nanfilter == 'sap,time':
        notnanind = (
            np.isfinite(lcdict['sap']['sap_flux']) &
            np.isfinite(lcdict['sap']['sap_flux_err']) &
            np.isfinite(lcdict['time'])
        )
    elif nanfilter and nanfilter == 'pdc,time':
        notnanind = (
            np.isfinite(lcdict['pdc']['pdcsap_flux']) &
            np.isfinite(lcdict['pdc']['pdcsap_flux_err']) &
            np.isfinite(lcdict['time'])
        )
    elif nanfilter is None:
        pass
    else:
        raise NotImplementedError

    # remove nans from all columns
    if nanfilter:

        nbefore = lcdict['time'].size
        for col in cols:
            if '.' in col:
                key, subkey = col.split('.')
                lcdict[key][subkey] = lcdict[key][subkey][notnanind]
            else:
                lcdict[col] = lcdict[col][notnanind]

        nafter = lcdict['time'].size

        if not quiet:
            LOGINFO('removed nans, ndet before = %s, ndet after = %s'
                    % (nbefore, nafter))

    # exclude all times in timestoignore
    if (timestoignore and
        isinstance(timestoignore, list) and
        len(timestoignore) > 0):

        exclind = np.full_like(lcdict['time'],True).astype(bool)
        nbefore = exclind.size

        # get all the masks
        for ignoretime in timestoignore:
            time0, time1 = ignoretime[0], ignoretime[1]
            thismask = ~((lcdict['time'] >= time0) & (lcdict['time'] <= time1))
            exclind = exclind & thismask

        # apply the masks
        for col in cols:
            if '.' in col:
                key, subkey = col.split('.')
                lcdict[key][subkey] = lcdict[key][subkey][exclind]
            else:
                lcdict[col] = lcdict[col][exclind]

        nafter = lcdict['time'].size
        if not quiet:
            LOGINFO('removed timestoignore, ndet before = %s, ndet after = %s'
                    % (nbefore, nafter))

    return lcdict