#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# checkplot.py - Waqas Bhatti (wbhatti@astro.princeton.edu) - Jan 2017
# License: MIT.
'''
The `checkplot_pickle` function takes, for a single object, an arbitrary number
of results from independent period-finding functions (e.g. BLS, PDM, AoV, GLS,
etc.) in periodbase, and generates a pickle file that contains object and
variability information, finder chart, mag series plot, and for each
period-finding result: a periodogram and phased mag series plots for an
arbitrary number of 'best periods'.
Checkplot pickles are intended for use with an external checkplot viewer: the
Tornado webapp `astrobase.cpserver.checkplotserver.py`, but you can also use the
`checkplot.pkl_png.checkplot_pickle_to_png` function to render checkplot pickles
to PNGs that will look something like::
[ finder ] [ objectinfo ] [ variableinfo ] [ unphased LC ]
[ periodogram1 ] [ phased LC P1 ] [ phased LC P2 ] [ phased LC P3 ]
[ periodogram2 ] [ phased LC P1 ] [ phased LC P2 ] [ phased LC P3 ]
.
.
[ periodogramN ] [ phased LC P1 ] [ phased LC P2 ] [ phased LC P3 ]
for N independent period-finding methods producing:
- periodogram1,2,3...N: the periodograms from each method
- phased LC P1,P2,P3: the phased lightcurves using the best 3 peaks in each
periodogram
'''
#############
## LOGGING ##
#############
import logging
from astrobase import log_sub, log_fmt, log_date_fmt
DEBUG = False
if DEBUG:
level = logging.DEBUG
else:
level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
level=level,
style=log_sub,
format=log_fmt,
datefmt=log_date_fmt,
)
LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception
#############
## IMPORTS ##
#############
import os
import os.path
import gzip
import hashlib
import pickle
# we're going to plot using Agg only
import matplotlib
matplotlib.use('Agg')
# import this to check if stimes, smags, serrs are Column objects
from astropy.table import Column as AstColumn
###################
## LOCAL IMPORTS ##
###################
from ..lcmath import normalize_magseries, sigclip_magseries
from ..varclass.varfeatures import all_nonperiodic_features
#
# import the checkplot pickle helper functions
#
from .pkl_io import (
_read_checkplot_picklefile,
_write_checkplot_picklefile
)
from .pkl_utils import (
_pkl_finder_objectinfo,
_pkl_periodogram,
_pkl_magseries_plot,
_pkl_phased_magseries_plot
)
from .pkl_xmatch import xmatch_external_catalogs
#############################
## CHECKPLOT DICT FUNCTION ##
#############################
[docs]def checkplot_dict(
lspinfolist,
times,
mags,
errs,
fast_mode=False,
magsarefluxes=False,
nperiodstouse=3,
objectinfo=None,
deredden_object=True,
custom_bandpasses=None,
gaia_submit_timeout=10.0,
gaia_submit_tries=3,
gaia_max_timeout=180.0,
gaia_mirror=None,
complete_query_later=True,
varinfo=None,
getvarfeatures=True,
lclistpkl=None,
nbrradiusarcsec=60.0,
maxnumneighbors=5,
xmatchinfo=None,
xmatchradiusarcsec=3.0,
lcfitfunc=None,
lcfitparams=None,
externalplots=None,
findercmap='gray_r',
finderconvolve=None,
findercachedir='~/.astrobase/stamp-cache',
normto='globalmedian',
normmingap=4.0,
sigclip=4.0,
varepoch='min',
phasewrap=True,
phasesort=True,
phasebin=0.002,
minbinelems=7,
plotxlim=(-0.8,0.8),
xliminsetmode=False,
plotdpi=100,
bestperiodhighlight=None,
xgridlines=None,
mindet=99,
verbose=True
):
'''This writes a multiple lspinfo checkplot to a dict.
This function can take input from multiple lspinfo dicts (e.g. a list of
output dicts or gzipped pickles of dicts from independent runs of BLS, PDM,
AoV, or GLS period-finders in periodbase).
NOTE: if `lspinfolist` contains more than one lspinfo object with the same
lspmethod ('pdm','gls','sls','aov','bls'), the latest one in the list will
overwrite the earlier ones.
The output dict contains all the plots (magseries and phased magseries),
periodograms, object information, variability information, light curves, and
phased light curves. This can be written to:
- a pickle with `checkplot_pickle` below
- a PNG with `checkplot.pkl_png.checkplot_pickle_to_png`
Parameters
----------
lspinfolist : list of dicts
This is a list of dicts containing period-finder results ('lspinfo'
dicts). These can be from any of the period-finder methods in
astrobase.periodbase. To incorporate external period-finder results into
checkplots, these dicts must be of the form below, including at least
the keys indicated here::
{'periods': np.array of all periods searched by the period-finder,
'lspvals': np.array of periodogram power value for each period,
'bestperiod': a float value that is the period with the highest
peak in the periodogram, i.e. the most-likely actual
period,
'method': a three-letter code naming the period-finder used; must
be one of the keys in the
`astrobase.periodbase.METHODLABELS` dict,
'nbestperiods': a list of the periods corresponding to periodogram
peaks (`nbestlspvals` below) to annotate on the
periodogram plot so they can be called out
visually,
'nbestlspvals': a list of the power values associated with
periodogram peaks to annotate on the periodogram
plot so they can be called out visually; should be
the same length as `nbestperiods` above}
`nbestperiods` and `nbestlspvals` in each lspinfo dict must have at
least as many elements as the `nperiodstouse` kwarg to this function.
times,mags,errs : np.arrays
The magnitude/flux time-series to process for this checkplot along with
their associated measurement errors.
fast_mode : bool or float
This runs the external catalog operations in a "fast" mode, with short
timeouts and not trying to hit external catalogs that take a long time
to respond.
If this is set to True, the default settings for the external requests
will then become::
skyview_lookup = False
skyview_timeout = 45.0
skyview_retry_failed = False
dust_timeout = 10.0
gaia_submit_timeout = 7.0
gaia_max_timeout = 10.0
gaia_submit_tries = 2
complete_query_later = False
search_simbad = False
If this is a float, will run in "fast" mode with the provided timeout
value in seconds and the following settings::
skyview_lookup = True
skyview_timeout = fast_mode
skyview_retry_failed = False
dust_timeout = fast_mode
gaia_submit_timeout = 0.66*fast_mode
gaia_max_timeout = fast_mode
gaia_submit_tries = 2
complete_query_later = False
search_simbad = False
magsarefluxes : bool
If True, indicates the input time-series is fluxes and not mags so the
plot y-axis direction and range can be set appropriately.
nperiodstouse : int
This controls how many 'best' periods to make phased LC plots for. By
default, this is the 3 best. If this is set to None, all 'best' periods
present in each lspinfo dict's 'nbestperiods' key will be processed for
this checkplot.
objectinfo : dict or None
This is a dict containing information on the object whose light
curve is being processed. This function will then be able to
look up and download a finder chart for this object and write
that to the output checkplotdict. External services such as
GAIA, SIMBAD, TIC, etc. will also be used to look up this object
by its coordinates, and will add in information available from
those services.
This dict must be of the form and contain at least the keys described
below::
{'objectid': the name of the object,
'ra': the right ascension of the object in decimal degrees,
'decl': the declination of the object in decimal degrees,
'ndet': the number of observations of this object}
You can also provide magnitudes and proper motions of the object using
the following keys and the appropriate values in the `objectinfo`
dict. These will be used to calculate colors, total and reduced proper
motion, etc. and display these in the output checkplot PNG::
'pmra' -> the proper motion in mas/yr in right ascension,
'pmdecl' -> the proper motion in mas/yr in declination,
'umag' -> U mag -> colors: U-B, U-V, U-g
'bmag' -> B mag -> colors: U-B, B-V
'vmag' -> V mag -> colors: U-V, B-V, V-R, V-I, V-K
'rmag' -> R mag -> colors: V-R, R-I
'imag' -> I mag -> colors: g-I, V-I, R-I, B-I
'jmag' -> 2MASS J mag -> colors: J-H, J-K, g-J, i-J
'hmag' -> 2MASS H mag -> colors: J-H, H-K
'kmag' -> 2MASS Ks mag -> colors: g-Ks, H-Ks, J-Ks, V-Ks
'sdssu' -> SDSS u mag -> colors: u-g, u-V
'sdssg' -> SDSS g mag -> colors: g-r, g-i, g-K, u-g, U-g, g-J
'sdssr' -> SDSS r mag -> colors: r-i, g-r
'sdssi' -> SDSS i mag -> colors: r-i, i-z, g-i, i-J, i-W1
'sdssz' -> SDSS z mag -> colors: i-z, z-W2, g-z
'ujmag' -> UKIRT J mag -> colors: J-H, H-K, J-K, g-J, i-J
'uhmag' -> UKIRT H mag -> colors: J-H, H-K
'ukmag' -> UKIRT K mag -> colors: g-K, H-K, J-K, V-K
'irac1' -> Spitzer IRAC1 mag -> colors: i-I1, I1-I2
'irac2' -> Spitzer IRAC2 mag -> colors: I1-I2, I2-I3
'irac3' -> Spitzer IRAC3 mag -> colors: I2-I3
'irac4' -> Spitzer IRAC4 mag -> colors: I3-I4
'wise1' -> WISE W1 mag -> colors: i-W1, W1-W2
'wise2' -> WISE W2 mag -> colors: W1-W2, W2-W3
'wise3' -> WISE W3 mag -> colors: W2-W3
'wise4' -> WISE W4 mag -> colors: W3-W4
If you have magnitude measurements in other bands, use the
`custom_bandpasses` kwarg to pass these in.
If this is None, no object information will be incorporated into the
checkplot (kind of making it effectively useless for anything other than
glancing at the phased light curves at various 'best' periods from the
period-finder results).
deredden_object : bool
If this is True, will use the 2MASS DUST service to get extinction
coefficients in various bands, and then try to deredden the magnitudes
and colors of the object already present in the checkplot's objectinfo
dict.
custom_bandpasses : dict
This is a dict used to provide custom bandpass definitions for any
magnitude measurements in the objectinfo dict that are not automatically
recognized by :py:func:`astrobase.varclass.starfeatures.color_features`.
gaia_submit_timeout : float
Sets the timeout in seconds to use when submitting a request to look up
the object's information to the GAIA service. Note that if `fast_mode`
is set, this is ignored.
gaia_submit_tries : int
Sets the maximum number of times the GAIA services will be contacted to
obtain this object's information. If `fast_mode` is set, this is
ignored, and the services will be contacted only once (meaning that a
failure to respond will be silently ignored and no GAIA data will be
added to the checkplot's objectinfo dict).
gaia_max_timeout : float
Sets the timeout in seconds to use when waiting for the GAIA service to
respond to our request for the object's information. Note that if
`fast_mode` is set, this is ignored.
gaia_mirror : str or None
This sets the GAIA mirror to use. This is a key in the
`services.gaia.GAIA_URLS` dict which defines the URLs to hit for each
mirror.
complete_query_later : bool
If this is True, saves the state of GAIA queries that are not yet
complete when `gaia_max_timeout` is reached while waiting for the GAIA
service to respond to our request. A later call for GAIA info on the
same object will attempt to pick up the results from the existing query
if it's completed. If `fast_mode` is True, this is ignored.
varinfo : dict
If this is None, a blank dict of the form below will be added to the
checkplotdict::
{'objectisvar': None -> variability flag (None indicates unset),
'vartags': CSV str containing variability type tags from review,
'varisperiodic': None -> periodic variability flag (None -> unset),
'varperiod': the period associated with the periodic variability,
'varepoch': the epoch associated with the periodic variability}
If you provide a dict matching this format in this kwarg, this will be
passed unchanged to the output checkplotdict produced.
getvarfeatures : bool
If this is True, several light curve variability features for this
object will be calculated and added to the output checkpotdict as
checkplotdict['varinfo']['features']. This uses the function
`varclass.varfeatures.all_nonperiodic_features` so see its docstring for
the measures that are calculated (e.g. Stetson J indices, dispersion
measures, etc.)
lclistpkl : dict or str
If this is provided, must be a dict resulting from reading a catalog
produced by the `lcproc.catalogs.make_lclist` function or a str path
pointing to the pickle file produced by that function. This catalog is
used to find neighbors of the current object in the current light curve
collection. Looking at neighbors of the object within the radius
specified by `nbrradiusarcsec` is useful for light curves produced by
instruments that have a large pixel scale, so are susceptible to
blending of variability and potential confusion of neighbor variability
with that of the actual object being looked at. If this is None, no
neighbor lookups will be performed.
nbrradiusarcsec : flaot
The radius in arcseconds to use for a search conducted around the
coordinates of this object to look for any potential confusion and
blending of variability amplitude caused by their proximity.
maxnumneighbors : int
The maximum number of neighbors that will have their light curves and
magnitudes noted in this checkplot as potential blends with the target
object.
xmatchinfo : str or dict
This is either the xmatch dict produced by the function
`load_xmatch_external_catalogs` above, or the path to the xmatch info
pickle file produced by that function.
xmatchradiusarcsec : float
This is the cross-matching radius to use in arcseconds.
lcfitfunc : Python function or None
If provided, this should be a Python function that is used to fit a
model to the light curve. This fit is then overplotted for each phased
light curve in the checkplot. This function should have the following
signature:
`def lcfitfunc(times, mags, errs, period, **lcfitparams)`
where `lcfitparams` encapsulates all external parameters (i.e. number of
knots for a spline function, the degree of a Legendre polynomial fit,
etc., planet transit parameters) This function should return a Python
dict with the following structure (similar to the functions in
`astrobase.lcfit`) and at least the keys below::
{'fittype':<str: name of fit method>,
'fitchisq':<float: the chi-squared value of the fit>,
'fitredchisq':<float: the reduced chi-squared value of the fit>,
'fitinfo':{'fitmags':<ndarray: model mags/fluxes from fit func>},
'magseries':{'times':<ndarray: times where fitmags are evaluated>}}
Additional keys in the dict returned from this function can include
`fitdict['fitinfo']['finalparams']` for the final model fit parameters
(this will be used by the checkplotserver if present),
`fitdict['fitinfo']['fitepoch']` for the minimum light epoch returned by
the model fit, among others.
In any case, the output dict of `lcfitfunc` will be copied to the output
checkplotdict as::
checkplotdict[lspmethod][periodind]['lcfit'][<fittype>]
for each phased light curve.
lcfitparams : dict
A dict containing the LC fit parameters to use when calling the function
provided in `lcfitfunc`. This contains key-val pairs corresponding to
parameter names and their respective initial values to be used by the
fit function.
externalplots : list of tuples of str
If provided, this is a list of 4-element tuples containing:
1. path to PNG of periodogram from an external period-finding method
2. path to PNG of best period phased LC from the external period-finder
3. path to PNG of 2nd-best phased LC from the external period-finder
4. path to PNG of 3rd-best phased LC from the external period-finder
This can be used to incorporate external period-finding method results
into the output checkplot pickle or exported PNG to allow for comparison
with astrobase results.
Example of externalplots::
[('/path/to/external/bls-periodogram.png',
'/path/to/external/bls-phasedlc-plot-bestpeak.png',
'/path/to/external/bls-phasedlc-plot-peak2.png',
'/path/to/external/bls-phasedlc-plot-peak3.png'),
('/path/to/external/pdm-periodogram.png',
'/path/to/external/pdm-phasedlc-plot-bestpeak.png',
'/path/to/external/pdm-phasedlc-plot-peak2.png',
'/path/to/external/pdm-phasedlc-plot-peak3.png'),
...]
If `externalplots` is provided here, these paths will be stored in the
output checkplotdict. The `checkplot.pkl_png.checkplot_pickle_to_png`
function can then automatically retrieve these plot PNGs and put
them into the exported checkplot PNG.
findercmap : str or matplotlib.cm.ColorMap object
The Colormap object to use for the finder chart image.
finderconvolve : astropy.convolution.Kernel object or None
If not None, the Kernel object to use for convolving the finder image.
findercachedir : str
The path to the astrobase cache directory for finder chart downloads
from the NASA SkyView service.
normto : {'globalmedian', 'zero'} or a float
These are specified as below:
- 'globalmedian' -> norms each mag to the global median of the LC column
- 'zero' -> norms each mag to zero
- a float -> norms each mag to this specified float value.
normmingap : float
This defines how much the difference between consecutive measurements is
allowed to be to consider them as parts of different timegroups. By
default it is set to 4.0 days.
sigclip : float or int or sequence of two floats/ints or None
If a single float or int, a symmetric sigma-clip will be performed using
the number provided as the sigma-multiplier to cut out from the input
time-series.
If a list of two ints/floats is provided, the function will perform an
'asymmetric' sigma-clip. The first element in this list is the sigma
value to use for fainter flux/mag values; the second element in this
list is the sigma value to use for brighter flux/mag values. For
example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
dimmings and greater than 3-sigma brightenings. Here the meaning of
"dimming" and "brightening" is set by *physics* (not the magnitude
system), which is why the `magsarefluxes` kwarg must be correctly set.
If `sigclip` is None, no sigma-clipping will be performed, and the
time-series (with non-finite elems removed) will be passed through to
the output.
varepoch : 'min' or float or list of lists or None
The epoch to use for this phased light curve plot tile. If this is a
float, will use the provided value directly. If this is 'min', will
automatically figure out the time-of-minimum of the phased light
curve. If this is None, will use the mimimum value of `stimes` as the
epoch of the phased light curve plot. If this is a list of lists, will
use the provided value of `lspmethodind` to look up the current
period-finder method and the provided value of `periodind` to look up
the epoch associated with that method and the current period. This is
mostly only useful when `twolspmode` is True.
phasewrap : bool
If this is True, the phased time-series will be wrapped around
phase 0.0.
phasesort : bool
If True, will sort the phased light curve in order of increasing phase.
phasebin: float
The bin size to use to group together measurements closer than this
amount in phase. This is in units of phase. If this is a float, a
phase-binned version of the phased light curve will be overplotted on
top of the regular phased light curve.
minbinelems : int
The minimum number of elements required per phase bin to include it in
the phased LC plot.
plotxlim : sequence of two floats or None
The x-range (min, max) of the phased light curve plot. If None, will be
determined automatically.
xliminsetmode : bool
If this is True, the generated phased light curve plot will use the
values of `plotxlim` as the main plot x-axis limits (i.e. zoomed-in if
`plotxlim` is a range smaller than the full phase range), and will show
the full phased light curve plot as an smaller inset. Useful for
planetary transit light curves.
plotdpi : int
The resolution of the output plot PNGs in dots per inch.
bestperiodhighlight : str or None
If not None, this is a str with a matplotlib color specification to use
as the background color to highlight the phased light curve plot of the
'best' period and epoch combination. If None, no highlight will be
applied.
xgridlines : list of floats or None
If this is provided, must be a list of floats corresponding to the phase
values where to draw vertical dashed lines as a means of highlighting
these.
mindet : int
The minimum of observations the input object's mag/flux time-series must
have for this function to plot its light curve and phased light
curve. If the object has less than this number, no light curves will be
plotted, but the checkplotdict will still contain all of the other
information.
verbose : bool
If True, will indicate progress and warn about problems.
Returns
-------
dict
Returns a checkplotdict.
'''
# if an objectinfo dict is absent, we'll generate a fake objectid based on
# the second five time and mag array values. this should be OK to ID the
# object across repeated runs of this function with the same times, mags,
# errs, but should provide enough uniqueness otherwise (across different
# times/mags array inputs). this is all done so we can still save checkplots
# correctly to pickles after reviewing them using checkplotserver
try:
objuuid = hashlib.sha512(times[5:10].tobytes() +
mags[5:10].tobytes()).hexdigest()[:5]
except Exception:
if verbose:
LOGWARNING('times, mags, and errs may have too few items')
objuuid = hashlib.sha512(times.tobytes() +
mags.tobytes()).hexdigest()[:5]
if (objectinfo is None):
if verbose:
LOGWARNING('no objectinfo dict provided as kwarg, '
'adding a randomly generated objectid')
objectinfo = {'objectid':objuuid}
# special for HAT stuff, eventually we should add objectid to
# lcd['objectinfo'] there as well
elif (isinstance(objectinfo, dict) and 'hatid' in objectinfo):
objectinfo['objectid'] = objectinfo['hatid']
elif ((isinstance(objectinfo, dict) and 'objectid' not in objectinfo) or
(isinstance(objectinfo, dict) and 'objectid' in objectinfo and
(objectinfo['objectid'] is None or objectinfo['objectid'] == ''))):
if verbose:
LOGWARNING('adding a randomly generated objectid '
'since none was provided in objectinfo dict')
objectinfo['objectid'] = objuuid
# 0. get the objectinfo and finder chart and initialize the checkplotdict
checkplotdict = _pkl_finder_objectinfo(
objectinfo,
varinfo,
findercmap,
finderconvolve,
sigclip,
normto,
normmingap,
deredden_object=deredden_object,
custom_bandpasses=custom_bandpasses,
lclistpkl=lclistpkl,
nbrradiusarcsec=nbrradiusarcsec,
maxnumneighbors=maxnumneighbors,
plotdpi=plotdpi,
verbose=verbose,
findercachedir=findercachedir,
gaia_submit_timeout=gaia_submit_timeout,
gaia_submit_tries=gaia_submit_tries,
gaia_max_timeout=gaia_max_timeout,
gaia_mirror=gaia_mirror,
complete_query_later=complete_query_later,
fast_mode=fast_mode
)
# try again to get the right objectid
if (objectinfo and isinstance(objectinfo, dict) and
'objectid' in objectinfo and objectinfo['objectid']):
checkplotdict['objectid'] = objectinfo['objectid']
# filter the input times, mags, errs; do sigclipping and normalization
stimes, smags, serrs = sigclip_magseries(times,
mags,
errs,
magsarefluxes=magsarefluxes,
sigclip=sigclip)
# fail early if not enough light curve points
if ((stimes is None) or (smags is None) or (serrs is None) or
(stimes.size < 49) or (smags.size < 49) or (serrs.size < 49)):
LOGERROR("one or more of times, mags, errs appear to be None "
"after sig-clipping. are the measurements all nan? "
"can't make a checkplot for this objectid: %s" %
checkplotdict['objectid'])
checkplotdict['magseries'] = None
checkplotdict['status'] = 'failed: LC points appear to be all nan'
return checkplotdict
# this may fix some unpickling issues for astropy.table.Column objects
# we convert them back to ndarrays
if isinstance(stimes, AstColumn):
stimes = stimes.data
LOGWARNING('times is an astropy.table.Column object, '
'changing to numpy array because of '
'potential unpickling issues')
if isinstance(smags, AstColumn):
smags = smags.data
LOGWARNING('mags is an astropy.table.Column object, '
'changing to numpy array because of '
'potential unpickling issues')
if isinstance(serrs, AstColumn):
serrs = serrs.data
LOGWARNING('errs is an astropy.table.Column object, '
'changing to numpy array because of '
'potential unpickling issues')
# report on how sigclip went
if verbose:
LOGINFO('sigclip = %s: before = %s observations, '
'after = %s observations' %
(sigclip, len(times), len(stimes)))
# take care of the normalization
if normto is not False:
stimes, smags = normalize_magseries(stimes, smags,
normto=normto,
magsarefluxes=magsarefluxes,
mingap=normmingap)
# make sure we have some lightcurve points to plot after sigclip
if len(stimes) > mindet:
# 1. get the mag series plot using these filtered stimes, smags, serrs
magseriesdict = _pkl_magseries_plot(stimes, smags, serrs,
plotdpi=plotdpi,
magsarefluxes=magsarefluxes)
# update the checkplotdict
checkplotdict.update(magseriesdict)
# 2. for each lspinfo in lspinfolist, read it in (from pkl or pkl.gz
# if necessary), make the periodogram, make the phased mag series plots
# for each of the nbestperiods in each lspinfo dict
checkplot_pfmethods = []
for lspind, lspinfo in enumerate(lspinfolist):
# get the LSP from a pickle file transparently
if isinstance(lspinfo,str) and os.path.exists(lspinfo):
LOGINFO('loading LSP info from pickle %s' % lspinfo)
if '.gz' in lspinfo:
with gzip.open(lspinfo,'rb') as infd:
lspinfo = pickle.load(infd)
else:
with open(lspinfo,'rb') as infd:
lspinfo = pickle.load(infd)
# make the periodogram first
# we'll prepend the lspmethod index to allow for multiple same
# lspmethods
override_pfmethod = '%s-%s' % (lspind, lspinfo['method'])
periodogramdict = _pkl_periodogram(
lspinfo,
plotdpi=plotdpi,
override_pfmethod=override_pfmethod
)
# update the checkplotdict.
checkplotdict.update(periodogramdict)
# now, make the phased light curve plots for each of the
# nbestperiods from this periodogram
for nbpind, nbperiod in enumerate(
lspinfo['nbestperiods'][:nperiodstouse]
):
# if there's a function to use for fitting, do the fit
if lcfitfunc:
try:
if lcfitparams is None:
lcfitparams = {}
overplotfit = lcfitfunc(stimes,
smags,
serrs,
nbperiod,
**lcfitparams)
except Exception:
LOGEXCEPTION('the light curve fitting function '
'failed, not plotting a fit over the '
'phased light curve')
overplotfit = None
else:
overplotfit = None
# get the varepoch from a run of bls_snr if available. this
# allows us to use the correct transit center epochs if
# calculated using bls_snr and added back to the kbls function
# result dicts
if (lspinfo is not None and
'bls' in lspinfo['method'] and
'epochs' in lspinfo):
thisvarepoch = lspinfo['epochs'][nbpind]
if verbose:
LOGINFO(
'using pre-calculated transit-center epoch value: '
'%.6f from kbls.bls_snr() for period: %.5f'
% (thisvarepoch, nbperiod)
)
else:
thisvarepoch = varepoch
# this updates things as it runs
checkplotdict = _pkl_phased_magseries_plot(
checkplotdict,
lspinfo['method'],
nbpind,
stimes, smags, serrs,
nbperiod, thisvarepoch,
lspmethodind=lspind,
phasewrap=phasewrap,
phasesort=phasesort,
phasebin=phasebin,
minbinelems=minbinelems,
plotxlim=plotxlim,
overplotfit=overplotfit,
plotdpi=plotdpi,
bestperiodhighlight=bestperiodhighlight,
magsarefluxes=magsarefluxes,
xliminsetmode=xliminsetmode,
xgridlines=xgridlines,
verbose=verbose,
override_pfmethod=override_pfmethod,
)
# if there's an snr key for this lspmethod, add the info in it to
# the checkplotdict as well
if 'snr' in lspinfo:
if override_pfmethod in checkplotdict:
checkplotdict[override_pfmethod]['snr'] = (
lspinfo['snr']
)
if 'transitdepth' in lspinfo:
if override_pfmethod in checkplotdict:
checkplotdict[override_pfmethod]['transitdepth'] = (
lspinfo['transitdepth']
)
if 'transitduration' in lspinfo:
if override_pfmethod in checkplotdict:
checkplotdict[override_pfmethod]['transitduration'] = (
lspinfo['transitduration']
)
checkplot_pfmethods.append(override_pfmethod)
#
# end of processing each pfmethod
#
## update the checkplot dict with some other stuff that's needed by
## checkplotserver
# 3. add a comments key:val
checkplotdict['comments'] = None
# 4. calculate some variability features
if getvarfeatures is True:
checkplotdict['varinfo']['features'] = all_nonperiodic_features(
stimes,
smags,
serrs,
magsarefluxes=magsarefluxes,
)
# 5. add a signals key:val. this will be used by checkplotserver's
# pre-whitening and masking functions. these will write to
# checkplotdict['signals']['whiten'] and
# checkplotdict['signals']['mask'] respectively.
checkplotdict['signals'] = {}
# 6. add any externalplots if we have them
checkplotdict['externalplots'] = []
if (externalplots and
isinstance(externalplots, list) and
len(externalplots) > 0):
for externalrow in externalplots:
if all(os.path.exists(erowfile) for erowfile in externalrow):
if verbose:
LOGINFO('adding external plots: %s to checkplot dict' %
repr(externalrow))
checkplotdict['externalplots'].append(externalrow)
else:
LOGWARNING('could not add some external '
'plots in: %s to checkplot dict'
% repr(externalrow))
# 7. do any xmatches required
if xmatchinfo is not None:
checkplotdict = xmatch_external_catalogs(
checkplotdict,
xmatchinfo,
xmatchradiusarcsec=xmatchradiusarcsec
)
# the checkplotdict now contains everything we need
contents = sorted(checkplotdict.keys())
checkplotdict['status'] = 'ok: contents are %s' % contents
if verbose:
LOGINFO('checkplot dict complete for %s' %
checkplotdict['objectid'])
LOGINFO('checkplot dict contents: %s' % contents)
# 8. update the pfmethods key
checkplotdict['pfmethods'] = checkplot_pfmethods
# otherwise, we don't have enough LC points, return nothing
else:
LOGERROR('not enough light curve points for %s, have %s, need %s' %
(checkplotdict['objectid'],len(stimes),mindet))
checkplotdict['magseries'] = None
checkplotdict['status'] = 'failed: not enough LC points'
# at the end, return the dict
return checkplotdict
################################
## CHECKPLOT PICKLE FUNCTIONS ##
################################
[docs]def checkplot_pickle(
lspinfolist,
times,
mags,
errs,
fast_mode=False,
magsarefluxes=False,
nperiodstouse=3,
objectinfo=None,
deredden_object=True,
custom_bandpasses=None,
gaia_submit_timeout=10.0,
gaia_submit_tries=3,
gaia_max_timeout=180.0,
gaia_mirror=None,
complete_query_later=True,
varinfo=None,
getvarfeatures=True,
lclistpkl=None,
nbrradiusarcsec=60.0,
maxnumneighbors=5,
xmatchinfo=None,
xmatchradiusarcsec=3.0,
lcfitfunc=None,
lcfitparams=None,
externalplots=None,
findercmap='gray_r',
finderconvolve=None,
findercachedir='~/.astrobase/stamp-cache',
normto='globalmedian',
normmingap=4.0,
sigclip=4.0,
varepoch='min',
phasewrap=True,
phasesort=True,
phasebin=0.002,
minbinelems=7,
plotxlim=(-0.8,0.8),
xliminsetmode=False,
plotdpi=100,
bestperiodhighlight=None,
xgridlines=None,
mindet=99,
verbose=True,
outfile=None,
outgzip=False,
pickleprotocol=None,
returndict=False
):
'''This writes a multiple lspinfo checkplot to a (gzipped) pickle file.
This function can take input from multiple lspinfo dicts (e.g. a list of
output dicts or gzipped pickles of dicts from independent runs of BLS, PDM,
AoV, or GLS period-finders in periodbase).
NOTE: if `lspinfolist` contains more than one lspinfo object with the same
lspmethod ('pdm','gls','sls','aov','bls'), the latest one in the list will
overwrite the earlier ones.
The output dict contains all the plots (magseries and phased magseries),
periodograms, object information, variability information, light curves, and
phased light curves. This can be written to:
- a pickle with `checkplot_pickle` below
- a PNG with `checkplot.pkl_png.checkplot_pickle_to_png`
Parameters
----------
lspinfolist : list of dicts
This is a list of dicts containing period-finder results ('lspinfo'
dicts). These can be from any of the period-finder methods in
astrobase.periodbase. To incorporate external period-finder results into
checkplots, these dicts must be of the form below, including at least
the keys indicated here::
{'periods': np.array of all periods searched by the period-finder,
'lspvals': np.array of periodogram power value for each period,
'bestperiod': a float value that is the period with the highest
peak in the periodogram, i.e. the most-likely actual
period,
'method': a three-letter code naming the period-finder used; must
be one of the keys in the
`astrobase.periodbase.METHODLABELS` dict,
'nbestperiods': a list of the periods corresponding to periodogram
peaks (`nbestlspvals` below) to annotate on the
periodogram plot so they can be called out
visually,
'nbestlspvals': a list of the power values associated with
periodogram peaks to annotate on the periodogram
plot so they can be called out visually; should be
the same length as `nbestperiods` above}
`nbestperiods` and `nbestlspvals` in each lspinfo dict must have at
least as many elements as the `nperiodstouse` kwarg to this function.
times,mags,errs : np.arrays
The magnitude/flux time-series to process for this checkplot along with
their associated measurement errors.
fast_mode : bool or float
This runs the external catalog operations in a "fast" mode, with short
timeouts and not trying to hit external catalogs that take a long time
to respond.
If this is set to True, the default settings for the external requests
will then become::
skyview_lookup = False
skyview_timeout = 45.0
skyview_retry_failed = False
dust_timeout = 10.0
gaia_submit_timeout = 7.0
gaia_max_timeout = 10.0
gaia_submit_tries = 2
complete_query_later = False
search_simbad = False
If this is a float, will run in "fast" mode with the provided timeout
value in seconds and the following settings::
skyview_lookup = True
skyview_timeout = fast_mode
skyview_retry_failed = False
dust_timeout = fast_mode
gaia_submit_timeout = 0.66*fast_mode
gaia_max_timeout = fast_mode
gaia_submit_tries = 2
complete_query_later = False
search_simbad = False
magsarefluxes : bool
If True, indicates the input time-series is fluxes and not mags so the
plot y-axis direction and range can be set appropriately.
nperiodstouse : int
This controls how many 'best' periods to make phased LC plots for. By
default, this is the 3 best. If this is set to None, all 'best' periods
present in each lspinfo dict's 'nbestperiods' key will be processed for
this checkplot.
objectinfo : dict or None
If provided, this is a dict containing information on the object whose
light curve is being processed. This function will then be able to look
up and download a finder chart for this object and write that to the
output checkplotdict. External services such as GAIA, SIMBAD, TIC@MAST,
etc. will also be used to look up this object by its coordinates, and
will add in information available from those services.
The `objectinfo` dict must be of the form and contain at least the keys
described below::
{'objectid': the name of the object,
'ra': the right ascension of the object in decimal degrees,
'decl': the declination of the object in decimal degrees,
'ndet': the number of observations of this object}
You can also provide magnitudes and proper motions of the object using
the following keys and the appropriate values in the `objectinfo`
dict. These will be used to calculate colors, total and reduced proper
motion, etc. and display these in the output checkplot PNG::
'pmra' -> the proper motion in mas/yr in right ascension,
'pmdecl' -> the proper motion in mas/yr in the declination,
'umag' -> U mag -> colors: U-B, U-V, U-g
'bmag' -> B mag -> colors: U-B, B-V
'vmag' -> V mag -> colors: U-V, B-V, V-R, V-I, V-K
'rmag' -> R mag -> colors: V-R, R-I
'imag' -> I mag -> colors: g-I, V-I, R-I, B-I
'jmag' -> 2MASS J mag -> colors: J-H, J-K, g-J, i-J
'hmag' -> 2MASS H mag -> colors: J-H, H-K
'kmag' -> 2MASS Ks mag -> colors: g-Ks, H-Ks, J-Ks, V-Ks
'sdssu' -> SDSS u mag -> colors: u-g, u-V
'sdssg' -> SDSS g mag -> colors: g-r, g-i, g-K, u-g, U-g, g-J
'sdssr' -> SDSS r mag -> colors: r-i, g-r
'sdssi' -> SDSS i mag -> colors: r-i, i-z, g-i, i-J, i-W1
'sdssz' -> SDSS z mag -> colors: i-z, z-W2, g-z
'ujmag' -> UKIRT J mag -> colors: J-H, H-K, J-K, g-J, i-J
'uhmag' -> UKIRT H mag -> colors: J-H, H-K
'ukmag' -> UKIRT K mag -> colors: g-K, H-K, J-K, V-K
'irac1' -> Spitzer IRAC1 mag -> colors: i-I1, I1-I2
'irac2' -> Spitzer IRAC2 mag -> colors: I1-I2, I2-I3
'irac3' -> Spitzer IRAC3 mag -> colors: I2-I3
'irac4' -> Spitzer IRAC4 mag -> colors: I3-I4
'wise1' -> WISE W1 mag -> colors: i-W1, W1-W2
'wise2' -> WISE W2 mag -> colors: W1-W2, W2-W3
'wise3' -> WISE W3 mag -> colors: W2-W3
'wise4' -> WISE W4 mag -> colors: W3-W4
If you have magnitude measurements in other bands, use the
`custom_bandpasses` kwarg to pass these in.
If this is None, no object information will be incorporated into the
checkplot (kind of making it effectively useless for anything other than
glancing at the phased light curves at various 'best' periods from the
period-finder results).
deredden_object : bool
If this is True, will use the 2MASS DUST service to get extinction
coefficients in various bands, and then try to deredden the magnitudes
and colors of the object already present in the checkplot's objectinfo
dict.
custom_bandpasses : dict
This is a dict used to provide custom bandpass definitions for any
magnitude measurements in the objectinfo dict that are not automatically
recognized by :py:func:`astrobase.varclass.starfeatures.color_features`.
gaia_submit_timeout : float
Sets the timeout in seconds to use when submitting a request to look up
the object's information to the GAIA service. Note that if `fast_mode`
is set, this is ignored.
gaia_submit_tries : int
Sets the maximum number of times the GAIA services will be contacted to
obtain this object's information. If `fast_mode` is set, this is
ignored, and the services will be contacted only once (meaning that a
failure to respond will be silently ignored and no GAIA data will be
added to the checkplot's objectinfo dict).
gaia_max_timeout : float
Sets the timeout in seconds to use when waiting for the GAIA service to
respond to our request for the object's information. Note that if
`fast_mode` is set, this is ignored.
gaia_mirror : str or None
This sets the GAIA mirror to use. This is a key in the
`services.gaia.GAIA_URLS` dict which defines the URLs to hit for each
mirror.
complete_query_later : bool
If this is True, saves the state of GAIA queries that are not yet
complete when `gaia_max_timeout` is reached while waiting for the GAIA
service to respond to our request. A later call for GAIA info on the
same object will attempt to pick up the results from the existing query
if it's completed. If `fast_mode` is True, this is ignored.
varinfo : dict
If this is None, a blank dict of the form below will be added to the
checkplotdict::
{'objectisvar': None -> variability flag (None indicates unset),
'vartags': CSV str containing variability type tags from review,
'varisperiodic': None -> periodic variability flag (None -> unset),
'varperiod': the period associated with the periodic variability,
'varepoch': the epoch associated with the periodic variability}
If you provide a dict matching this format in this kwarg, this will be
passed unchanged to the output checkplotdict produced.
getvarfeatures : bool
If this is True, several light curve variability features for this
object will be calculated and added to the output checkpotdict as
checkplotdict['varinfo']['features']. This uses the function
`varclass.varfeatures.all_nonperiodic_features` so see its docstring for
the measures that are calculated (e.g. Stetson J indices, dispersion
measures, etc.)
lclistpkl : dict or str
If this is provided, must be a dict resulting from reading a catalog
produced by the `lcproc.catalogs.make_lclist` function or a str path
pointing to the pickle file produced by that function. This catalog is
used to find neighbors of the current object in the current light curve
collection. Looking at neighbors of the object within the radius
specified by `nbrradiusarcsec` is useful for light curves produced by
instruments that have a large pixel scale, so are susceptible to
blending of variability and potential confusion of neighbor variability
with that of the actual object being looked at. If this is None, no
neighbor lookups will be performed.
nbrradiusarcsec : flaot
The radius in arcseconds to use for a search conducted around the
coordinates of this object to look for any potential confusion and
blending of variability amplitude caused by their proximity.
maxnumneighbors : int
The maximum number of neighbors that will have their light curves and
magnitudes noted in this checkplot as potential blends with the target
object.
xmatchinfo : str or dict
This is either the xmatch dict produced by the function
`load_xmatch_external_catalogs` above, or the path to the xmatch info
pickle file produced by that function.
xmatchradiusarcsec : float
This is the cross-matching radius to use in arcseconds.
lcfitfunc : Python function or None
If provided, this should be a Python function that is used to fit a
model to the light curve. This fit is then overplotted for each phased
light curve in the checkplot. This function should have the following
signature:
`def lcfitfunc(times, mags, errs, period, **lcfitparams)`
where `lcfitparams` encapsulates all external parameters (i.e. number of
knots for a spline function, the degree of a Legendre polynomial fit,
etc., planet transit parameters) This function should return a Python
dict with the following structure (similar to the functions in
`astrobase.lcfit`) and at least the keys below::
{'fittype':<str: name of fit method>,
'fitchisq':<float: the chi-squared value of the fit>,
'fitredchisq':<float: the reduced chi-squared value of the fit>,
'fitinfo':{'fitmags':<ndarray: model mags/fluxes from fit func>},
'magseries':{'times':<ndarray: times where fitmags are evaluated>}}
Additional keys in the dict returned from this function can include
`fitdict['fitinfo']['finalparams']` for the final model fit parameters
(this will be used by the checkplotserver if present),
`fitdict['fitinfo']['fitepoch']` for the minimum light epoch returned by
the model fit, among others.
In any case, the output dict of `lcfitfunc` will be copied to the output
checkplotdict as
`checkplotdict[lspmethod][periodind]['lcfit'][<fittype>]` for each
phased light curve.
lcfitparams : dict
A dict containing the LC fit parameters to use when calling the function
provided in `lcfitfunc`. This contains key-val pairs corresponding to
parameter names and their respective initial values to be used by the
fit function.
externalplots : list of tuples of str
If provided, this is a list of 4-element tuples containing:
1. path to PNG of periodogram from an external period-finding method
2. path to PNG of best period phased LC from the external period-finder
3. path to PNG of 2nd-best phased LC from the external period-finder
4. path to PNG of 3rd-best phased LC from the external period-finder
This can be used to incorporate external period-finding method results
into the output checkplot pickle or exported PNG to allow for comparison
with astrobase results. Example of `externalplots`::
[('/path/to/external/bls-periodogram.png',
'/path/to/external/bls-phasedlc-plot-bestpeak.png',
'/path/to/external/bls-phasedlc-plot-peak2.png',
'/path/to/external/bls-phasedlc-plot-peak3.png'),
('/path/to/external/pdm-periodogram.png',
'/path/to/external/pdm-phasedlc-plot-bestpeak.png',
'/path/to/external/pdm-phasedlc-plot-peak2.png',
'/path/to/external/pdm-phasedlc-plot-peak3.png'),
...]
If `externalplots` is provided here, these paths will be stored in the
output checkplotdict. The `checkplot.pkl_png.checkplot_pickle_to_png`
function can then automatically retrieve these plot PNGs and put
them into the exported checkplot PNG.
findercmap : str or matplotlib.cm.ColorMap object
The Colormap object to use for the finder chart image.
finderconvolve : astropy.convolution.Kernel object or None
If not None, the Kernel object to use for convolving the finder image.
findercachedir : str
The path to the astrobase cache directory for finder chart downloads
from the NASA SkyView service.
normto : {'globalmedian', 'zero'} or a float
This specifies the normalization target::
'globalmedian' -> norms each mag to global median of the LC column
'zero' -> norms each mag to zero
a float -> norms each mag to this specified float value.
normmingap : float
This defines how much the difference between consecutive measurements is
allowed to be to consider them as parts of different timegroups. By
default it is set to 4.0 days.
sigclip : float or int or sequence of two floats/ints or None
If a single float or int, a symmetric sigma-clip will be performed using
the number provided as the sigma-multiplier to cut out from the input
time-series.
If a list of two ints/floats is provided, the function will perform an
'asymmetric' sigma-clip. The first element in this list is the sigma
value to use for fainter flux/mag values; the second element in this
list is the sigma value to use for brighter flux/mag values. For
example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
dimmings and greater than 3-sigma brightenings. Here the meaning of
"dimming" and "brightening" is set by *physics* (not the magnitude
system), which is why the `magsarefluxes` kwarg must be correctly set.
If `sigclip` is None, no sigma-clipping will be performed, and the
time-series (with non-finite elems removed) will be passed through to
the output.
varepoch : 'min' or float or list of lists or None
The epoch to use for this phased light curve plot tile. If this is a
float, will use the provided value directly. If this is 'min', will
automatically figure out the time-of-minimum of the phased light
curve. If this is None, will use the mimimum value of `stimes` as the
epoch of the phased light curve plot. If this is a list of lists, will
use the provided value of `lspmethodind` to look up the current
period-finder method and the provided value of `periodind` to look up
the epoch associated with that method and the current period. This is
mostly only useful when `twolspmode` is True.
phasewrap : bool
If this is True, the phased time-series will be wrapped around
phase 0.0.
phasesort : bool
If True, will sort the phased light curve in order of increasing phase.
phasebin: float
The bin size to use to group together measurements closer than this
amount in phase. This is in units of phase. If this is a float, a
phase-binned version of the phased light curve will be overplotted on
top of the regular phased light curve.
minbinelems : int
The minimum number of elements required per phase bin to include it in
the phased LC plot.
plotxlim : sequence of two floats or None
The x-range (min, max) of the phased light curve plot. If None, will be
determined automatically.
xliminsetmode : bool
If this is True, the generated phased light curve plot will use the
values of `plotxlim` as the main plot x-axis limits (i.e. zoomed-in if
`plotxlim` is a range smaller than the full phase range), and will show
the full phased light curve plot as an smaller inset. Useful for
planetary transit light curves.
plotdpi : int
The resolution of the output plot PNGs in dots per inch.
bestperiodhighlight : str or None
If not None, this is a str with a matplotlib color specification to use
as the background color to highlight the phased light curve plot of the
'best' period and epoch combination. If None, no highlight will be
applied.
xgridlines : list of floats or None
If this is provided, must be a list of floats corresponding to the phase
values where to draw vertical dashed lines as a means of highlighting
these.
mindet : int
The minimum of observations the input object's mag/flux time-series must
have for this function to plot its light curve and phased light
curve. If the object has less than this number, no light curves will be
plotted, but the checkplotdict will still contain all of the other
information.
verbose : bool
If True, will indicate progress and warn about problems.
outfile : str or None
The name of the output checkplot pickle file. If this is None, will
write the checkplot pickle to file called 'checkplot.pkl' in the current
working directory.
outgzip : bool
This controls whether to gzip the output pickle. It turns out that this
is the slowest bit in the output process, so if you're after speed, best
not to use this. This is False by default since it turns out that gzip
actually doesn't save that much space (29 MB vs. 35 MB for the average
checkplot pickle).
pickleprotocol : int or None
This sets the pickle file protocol to use when writing the pickle:
If None, will choose a protocol using the following rules:
- 4 -> default in Python >= 3.4 - fast but incompatible with Python 2
- 3 -> default in Python 3.0-3.3 - mildly fast
- 2 -> default in Python 2 - very slow, but compatible with Python 2/3
The default protocol kwarg is None, this will make an automatic choice
for pickle protocol that's best suited for the version of Python in
use. Note that this will make pickles generated by Py3 incompatible with
Py2.
returndict : bool
If this is True, will return the checkplotdict instead of returning the
filename of the output checkplot pickle.
Returns
-------
dict or str
If returndict is False, will return the path to the generated checkplot
pickle file. If returndict is True, will return the checkplotdict
instead.
'''
# call checkplot_dict for most of the work
checkplotdict = checkplot_dict(
lspinfolist,
times,
mags,
errs,
magsarefluxes=magsarefluxes,
nperiodstouse=nperiodstouse,
objectinfo=objectinfo,
deredden_object=deredden_object,
custom_bandpasses=custom_bandpasses,
gaia_submit_timeout=gaia_submit_timeout,
gaia_submit_tries=gaia_submit_tries,
gaia_max_timeout=gaia_max_timeout,
gaia_mirror=gaia_mirror,
complete_query_later=complete_query_later,
varinfo=varinfo,
getvarfeatures=getvarfeatures,
lclistpkl=lclistpkl,
nbrradiusarcsec=nbrradiusarcsec,
maxnumneighbors=maxnumneighbors,
xmatchinfo=xmatchinfo,
xmatchradiusarcsec=xmatchradiusarcsec,
lcfitfunc=lcfitfunc,
lcfitparams=lcfitparams,
externalplots=externalplots,
findercmap=findercmap,
finderconvolve=finderconvolve,
findercachedir=findercachedir,
normto=normto,
normmingap=normmingap,
sigclip=sigclip,
varepoch=varepoch,
phasewrap=phasewrap,
phasesort=phasesort,
phasebin=phasebin,
minbinelems=minbinelems,
plotxlim=plotxlim,
xliminsetmode=xliminsetmode,
plotdpi=plotdpi,
bestperiodhighlight=bestperiodhighlight,
xgridlines=xgridlines,
mindet=mindet,
verbose=verbose,
fast_mode=fast_mode
)
# if no pickle protocol is specified, use v4
if not pickleprotocol:
pickleprotocol = 4
# generate the output file path
if outgzip:
# generate the outfile filename
if (not outfile and
len(lspinfolist) > 0 and
isinstance(lspinfolist[0], str)):
plotfpath = os.path.join(os.path.dirname(lspinfolist[0]),
'checkplot-%s.pkl.gz' %
checkplotdict['objectid'])
elif outfile:
plotfpath = outfile
else:
plotfpath = 'checkplot.pkl.gz'
else:
# generate the outfile filename
if (not outfile and
len(lspinfolist) > 0 and
isinstance(lspinfolist[0], str)):
plotfpath = os.path.join(os.path.dirname(lspinfolist[0]),
'checkplot-%s.pkl' %
checkplotdict['objectid'])
elif outfile:
plotfpath = outfile
else:
plotfpath = 'checkplot.pkl'
# write the completed checkplotdict to a gzipped pickle
picklefname = _write_checkplot_picklefile(checkplotdict,
outfile=plotfpath,
protocol=pickleprotocol,
outgzip=outgzip)
# at the end, return the dict and filename if asked for
if returndict:
if verbose:
LOGINFO('checkplot done -> %s' % picklefname)
return checkplotdict, picklefname
# otherwise, just return the filename
else:
# just to make sure: free up space
del checkplotdict
if verbose:
LOGINFO('checkplot done -> %s' % picklefname)
return picklefname
[docs]def checkplot_pickle_update(
currentcp,
updatedcp,
outfile=None,
outgzip=False,
pickleprotocol=None,
verbose=True
):
'''This updates the current checkplotdict with updated values provided.
Parameters
----------
currentcp : dict or str
This is either a checkplotdict produced by `checkplot_pickle` above or a
checkplot pickle file produced by the same function. This checkplot will
be updated from the `updatedcp` checkplot.
updatedcp : dict or str
This is either a checkplotdict produced by `checkplot_pickle` above or a
checkplot pickle file produced by the same function. This checkplot will
be the source of the update to the `currentcp` checkplot.
outfile : str or None
The name of the output checkplot pickle file. The function will output
the new checkplot gzipped pickle file to `outfile` if outfile is a
filename. If `currentcp` is a file and `outfile`, this will be set to
that filename, so the function updates it in place.
outgzip : bool
This controls whether to gzip the output pickle. It turns out that this
is the slowest bit in the output process, so if you're after speed, best
not to use this. This is False by default since it turns out that gzip
actually doesn't save that much space (29 MB vs. 35 MB for the average
checkplot pickle).
pickleprotocol : int or None
This sets the pickle file protocol to use when writing the pickle:
If None, will choose a protocol using the following rules:
- 4 -> default in Python >= 3.4 - fast but incompatible with Python 2
- 3 -> default in Python 3.0-3.3 - mildly fast
- 2 -> default in Python 2 - very slow, but compatible with Python 2/3
The default protocol kwarg is None, this will make an automatic choice
for pickle protocol that's best suited for the version of Python in
use. Note that this will make pickles generated by Py3 incompatible with
Py2.
verbose : bool
If True, will indicate progress and warn about problems.
Returns
-------
str
The path to the updated checkplot pickle file. If `outfile` was None and
`currentcp` was a filename, this will return `currentcp` to indicate
that the checkplot pickle file was updated in place.
'''
# generate the outfile filename
if not outfile and isinstance(currentcp,str):
plotfpath = currentcp
elif outfile:
plotfpath = outfile
elif isinstance(currentcp, dict) and currentcp['objectid']:
if outgzip:
plotfpath = 'checkplot-%s.pkl.gz' % currentcp['objectid']
else:
plotfpath = 'checkplot-%s.pkl' % currentcp['objectid']
else:
# we'll get this later below
plotfpath = None
if (isinstance(currentcp, str) and os.path.exists(currentcp)):
cp_current = _read_checkplot_picklefile(currentcp)
elif isinstance(currentcp, dict):
cp_current = currentcp
else:
LOGERROR('currentcp: %s of type %s is not a '
'valid checkplot filename (or does not exist), or a dict' %
(os.path.abspath(currentcp), type(currentcp)))
return None
if (isinstance(updatedcp, str) and os.path.exists(updatedcp)):
cp_updated = _read_checkplot_picklefile(updatedcp)
elif isinstance(updatedcp, dict):
cp_updated = updatedcp
else:
LOGERROR('updatedcp: %s of type %s is not a '
'valid checkplot filename (or does not exist), or a dict' %
(os.path.abspath(updatedcp), type(updatedcp)))
return None
# do the update using python's dict update mechanism
# this requires updated to be in the same checkplotdict format as current
# all keys in current will now be from updated
cp_current.update(cp_updated)
# figure out the plotfpath if we haven't by now
if not plotfpath and outgzip:
plotfpath = 'checkplot-%s.pkl.gz' % cp_current['objectid']
elif (not plotfpath) and (not outgzip):
plotfpath = 'checkplot-%s.pkl' % cp_current['objectid']
# make sure we write the correct postfix
if plotfpath.endswith('.gz'):
outgzip = True
# write the new checkplotdict
return _write_checkplot_picklefile(cp_current,
outfile=plotfpath,
outgzip=outgzip,
protocol=pickleprotocol)