Source code for astrobase.checkplot.pkl_postproc

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# pkl_postproc.py - Waqas Bhatti (wbhatti@astro.princeton.edu) - Feb 2019
# License: MIT.

'''
This contains utility functions that support the checkplot pickle
post-processing functionality.

'''

#############
## LOGGING ##
#############

import logging
from astrobase import log_sub, log_fmt, log_date_fmt

DEBUG = False
if DEBUG:
    level = logging.DEBUG
else:
    level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
    level=level,
    style=log_sub,
    format=log_fmt,
    datefmt=log_date_fmt,
)

LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception


#############
## IMPORTS ##
#############

from copy import deepcopy
import numpy as np


###################
## LOCAL IMPORTS ##
###################

from .pkl_io import _read_checkplot_picklefile, _write_checkplot_picklefile
from .pkl_utils import _pkl_finder_objectinfo


################################
## POST-PROCESSING CHECKPLOTS ##
################################

[docs]def update_checkplot_objectinfo(cpf, fast_mode=False, findercmap='gray_r', finderconvolve=None, deredden_object=True, custom_bandpasses=None, gaia_submit_timeout=10.0, gaia_submit_tries=3, gaia_max_timeout=180.0, gaia_mirror=None, complete_query_later=True, lclistpkl=None, nbrradiusarcsec=60.0, maxnumneighbors=5, plotdpi=100, findercachedir='~/.astrobase/stamp-cache', verbose=True): '''This updates a checkplot objectinfo dict. Useful in cases where a previous round of GAIA/finderchart/external catalog acquisition failed. This will preserve the following keys in the checkplot if they exist:: comments varinfo objectinfo.objecttags Parameters ---------- cpf : str The path to the checkplot pickle to update. fast_mode : bool or float This runs the external catalog operations in a "fast" mode, with short timeouts and not trying to hit external catalogs that take a long time to respond. See the docstring for :py:func:`astrobase.checkplot.pkl_utils._pkl_finder_objectinfo` for details on how this works. If this is True, will run in "fast" mode with default timeouts (5 seconds in most cases). If this is a float, will run in "fast" mode with the provided timeout value in seconds. findercmap : str or matplotlib.cm.ColorMap object The Colormap object to use for the finder chart image. finderconvolve : astropy.convolution.Kernel object or None If not None, the Kernel object to use for convolving the finder image. deredden_objects : bool If this is True, will use the 2MASS DUST service to get extinction coefficients in various bands, and then try to deredden the magnitudes and colors of the object already present in the checkplot's objectinfo dict. custom_bandpasses : dict This is a dict used to provide custom bandpass definitions for any magnitude measurements in the objectinfo dict that are not automatically recognized by the `varclass.starfeatures.color_features` function. See its docstring for details on the required format. gaia_submit_timeout : float Sets the timeout in seconds to use when submitting a request to look up the object's information to the GAIA service. Note that if `fast_mode` is set, this is ignored. gaia_submit_tries : int Sets the maximum number of times the GAIA services will be contacted to obtain this object's information. If `fast_mode` is set, this is ignored, and the services will be contacted only once (meaning that a failure to respond will be silently ignored and no GAIA data will be added to the checkplot's objectinfo dict). gaia_max_timeout : float Sets the timeout in seconds to use when waiting for the GAIA service to respond to our request for the object's information. Note that if `fast_mode` is set, this is ignored. gaia_mirror : str This sets the GAIA mirror to use. This is a key in the :py:data:`astrobase.services.gaia.GAIA_URLS` dict which defines the URLs to hit for each mirror. complete_query_later : bool If this is True, saves the state of GAIA queries that are not yet complete when `gaia_max_timeout` is reached while waiting for the GAIA service to respond to our request. A later call for GAIA info on the same object will attempt to pick up the results from the existing query if it's completed. If `fast_mode` is True, this is ignored. lclistpkl : dict or str If this is provided, must be a dict resulting from reading a catalog produced by the `lcproc.catalogs.make_lclist` function or a str path pointing to the pickle file produced by that function. This catalog is used to find neighbors of the current object in the current light curve collection. Looking at neighbors of the object within the radius specified by `nbrradiusarcsec` is useful for light curves produced by instruments that have a large pixel scale, so are susceptible to blending of variability and potential confusion of neighbor variability with that of the actual object being looked at. If this is None, no neighbor lookups will be performed. nbrradiusarcsec : float The radius in arcseconds to use for a search conducted around the coordinates of this object to look for any potential confusion and blending of variability amplitude caused by their proximity. maxnumneighbors : int The maximum number of neighbors that will have their light curves and magnitudes noted in this checkplot as potential blends with the target object. plotdpi : int The resolution in DPI of the plots to generate in this function (e.g. the finder chart, etc.) findercachedir : str The path to the astrobase cache directory for finder chart downloads from the NASA SkyView service. verbose : bool If True, will indicate progress and warn about potential problems. Returns ------- str Path to the updated checkplot pickle file. ''' cpd = _read_checkplot_picklefile(cpf) if cpd['objectinfo']['objecttags'] is not None: objecttags = cpd['objectinfo']['objecttags'][::] else: objecttags = None varinfo = deepcopy(cpd['varinfo']) if 'comments' in cpd and cpd['comments'] is not None: comments = cpd['comments'][::] else: comments = None newcpd = _pkl_finder_objectinfo(cpd['objectinfo'], varinfo, findercmap, finderconvolve, cpd['sigclip'], cpd['normto'], cpd['normmingap'], fast_mode=fast_mode, deredden_object=deredden_object, custom_bandpasses=custom_bandpasses, gaia_submit_timeout=gaia_submit_timeout, gaia_submit_tries=gaia_submit_tries, gaia_max_timeout=gaia_max_timeout, gaia_mirror=gaia_mirror, complete_query_later=complete_query_later, lclistpkl=lclistpkl, nbrradiusarcsec=nbrradiusarcsec, maxnumneighbors=maxnumneighbors, plotdpi=plotdpi, findercachedir=findercachedir, verbose=verbose) # # don't update neighbors or finder chart if the new one is bad # if (newcpd['finderchart'] is None and cpd['finderchart'] is not None): newcpd['finderchart'] = deepcopy( cpd['finderchart'] ) if (newcpd['neighbors'] is None and cpd['neighbors'] is not None): newcpd['neighbors'] = deepcopy( cpd['neighbors'] ) # # if there's existing GAIA info, don't overwrite if the new objectinfo dict # doesn't have any # if (('failed' in newcpd['objectinfo']['gaia_status'] or ('gaiaid' in newcpd['objectinfo'] and newcpd['objectinfo']['gaiaid'] is None)) and 'ok' in cpd['objectinfo']['gaia_status']): newcpd['objectinfo']['gaia_status'] = deepcopy( cpd['objectinfo']['gaia_status'] ) if 'gaiaid' in cpd['objectinfo']: newcpd['objectinfo']['gaiaid'] = deepcopy( cpd['objectinfo']['gaiaid'] ) newcpd['objectinfo']['gaiamag'] = deepcopy( cpd['objectinfo']['gaiamag'] ) newcpd['objectinfo']['gaia_absmag'] = deepcopy( cpd['objectinfo']['gaia_absmag'] ) newcpd['objectinfo']['gaia_parallax'] = deepcopy( cpd['objectinfo']['gaia_parallax'] ) newcpd['objectinfo']['gaia_parallax_err'] = deepcopy( cpd['objectinfo']['gaia_parallax_err'] ) newcpd['objectinfo']['gaia_pmra'] = deepcopy( cpd['objectinfo']['gaia_pmra'] ) newcpd['objectinfo']['gaia_pmra_err'] = deepcopy( cpd['objectinfo']['gaia_pmra_err'] ) newcpd['objectinfo']['gaia_pmdecl'] = deepcopy( cpd['objectinfo']['gaia_pmdecl'] ) newcpd['objectinfo']['gaia_pmdecl_err'] = deepcopy( cpd['objectinfo']['gaia_pmdecl_err'] ) if (not np.isfinite(newcpd['objectinfo']['gaia_neighbors']) and np.isfinite(cpd['objectinfo']['gaia_neighbors'])): newcpd['objectinfo']['gaia_neighbors'] = deepcopy( cpd['objectinfo']['gaia_neighbors'] ) if (not np.isfinite(newcpd['objectinfo']['gaia_closest_distarcsec']) and np.isfinite(cpd['objectinfo']['gaia_closest_distarcsec'])): newcpd['objectinfo']['gaia_closest_distarcsec'] = deepcopy( cpd['objectinfo']['gaia_closest_gmagdiff'] ) if (not np.isfinite(newcpd['objectinfo']['gaia_closest_gmagdiff']) and np.isfinite(cpd['objectinfo']['gaia_closest_gmagdiff'])): newcpd['objectinfo']['gaia_closest_gmagdiff'] = deepcopy( cpd['objectinfo']['gaia_closest_gmagdiff'] ) if (newcpd['objectinfo']['gaia_ids'] is None and cpd['objectinfo']['gaia_ids'] is not None): newcpd['objectinfo']['gaia_ids'] = deepcopy( cpd['objectinfo']['gaia_ids'] ) if (newcpd['objectinfo']['gaia_xypos'] is None and cpd['objectinfo']['gaia_xypos'] is not None): newcpd['objectinfo']['gaia_xypos'] = deepcopy( cpd['objectinfo']['gaia_xypos'] ) if (newcpd['objectinfo']['gaia_mags'] is None and cpd['objectinfo']['gaia_mags'] is not None): newcpd['objectinfo']['gaia_mags'] = deepcopy( cpd['objectinfo']['gaia_mags'] ) if (newcpd['objectinfo']['gaia_parallaxes'] is None and cpd['objectinfo']['gaia_parallaxes'] is not None): newcpd['objectinfo']['gaia_parallaxes'] = deepcopy( cpd['objectinfo']['gaia_parallaxes'] ) if (newcpd['objectinfo']['gaia_parallax_errs'] is None and cpd['objectinfo']['gaia_parallax_errs'] is not None): newcpd['objectinfo']['gaia_parallax_errs'] = deepcopy( cpd['objectinfo']['gaia_parallax_errs'] ) if (newcpd['objectinfo']['gaia_pmras'] is None and cpd['objectinfo']['gaia_pmras'] is not None): newcpd['objectinfo']['gaia_pmras'] = deepcopy( cpd['objectinfo']['gaia_pmras'] ) if (newcpd['objectinfo']['gaia_pmra_errs'] is None and cpd['objectinfo']['gaia_pmra_errs'] is not None): newcpd['objectinfo']['gaia_pmra_errs'] = deepcopy( cpd['objectinfo']['gaia_pmra_errs'] ) if (newcpd['objectinfo']['gaia_pmdecls'] is None and cpd['objectinfo']['gaia_pmdecls'] is not None): newcpd['objectinfo']['gaia_pmdecls'] = deepcopy( cpd['objectinfo']['gaia_pmdecls'] ) if (newcpd['objectinfo']['gaia_pmdecl_errs'] is None and cpd['objectinfo']['gaia_pmdecl_errs'] is not None): newcpd['objectinfo']['gaia_pmdecl_errs'] = deepcopy( cpd['objectinfo']['gaia_pmdecl_errs'] ) if (newcpd['objectinfo']['gaia_absolute_mags'] is None and cpd['objectinfo']['gaia_absolute_mags'] is not None): newcpd['objectinfo']['gaia_absolute_mags'] = deepcopy( cpd['objectinfo']['gaia_absolute_mags'] ) if (newcpd['objectinfo']['gaiak_colors'] is None and cpd['objectinfo']['gaiak_colors'] is not None): newcpd['objectinfo']['gaiak_colors'] = deepcopy( cpd['objectinfo']['gaiak_colors'] ) if (newcpd['objectinfo']['gaia_dists'] is None and cpd['objectinfo']['gaia_dists'] is not None): newcpd['objectinfo']['gaia_dists'] = deepcopy( cpd['objectinfo']['gaia_dists'] ) # # don't overwrite good SIMBAD info with bad # if ('failed' in newcpd['objectinfo']['simbad_status'] and 'ok' in cpd['objectinfo']['simbad_status']): newcpd['objectinfo']['simbad_status'] = deepcopy( cpd['objectinfo']['simbad_status'] ) if (newcpd['objectinfo']['simbad_nmatches'] is None and cpd['objectinfo']['simbad_nmatches'] is not None): newcpd['objectinfo']['simbad_nmatches'] = deepcopy( cpd['objectinfo']['simbad_nmatches'] ) if (newcpd['objectinfo']['simbad_mainid'] is None and cpd['objectinfo']['simbad_mainid'] is not None): newcpd['objectinfo']['simbad_mainid'] = deepcopy( cpd['objectinfo']['simbad_mainid'] ) if (newcpd['objectinfo']['simbad_objtype'] is None and cpd['objectinfo']['simbad_objtype'] is not None): newcpd['objectinfo']['simbad_objtype'] = deepcopy( cpd['objectinfo']['simbad_objtype'] ) if (newcpd['objectinfo']['simbad_allids'] is None and cpd['objectinfo']['simbad_allids'] is not None): newcpd['objectinfo']['simbad_allids'] = deepcopy( cpd['objectinfo']['simbad_allids'] ) if (newcpd['objectinfo']['simbad_distarcsec'] is None and cpd['objectinfo']['simbad_distarcsec'] is not None): newcpd['objectinfo']['simbad_distarcsec'] = deepcopy( cpd['objectinfo']['simbad_distarcsec'] ) if (newcpd['objectinfo']['simbad_best_mainid'] is None and cpd['objectinfo']['simbad_best_mainid'] is not None): newcpd['objectinfo']['simbad_best_mainid'] = deepcopy( cpd['objectinfo']['simbad_best_mainid'] ) if (newcpd['objectinfo']['simbad_best_objtype'] is None and cpd['objectinfo']['simbad_best_objtype'] is not None): newcpd['objectinfo']['simbad_best_objtype'] = deepcopy( cpd['objectinfo']['simbad_best_objtype'] ) if (newcpd['objectinfo']['simbad_best_allids'] is None and cpd['objectinfo']['simbad_best_allids'] is not None): newcpd['objectinfo']['simbad_best_allids'] = deepcopy( cpd['objectinfo']['simbad_best_allids'] ) if (newcpd['objectinfo']['simbad_best_distarcsec'] is None and cpd['objectinfo']['simbad_best_distarcsec'] is not None): newcpd['objectinfo']['simbad_best_distarcsec'] = deepcopy( cpd['objectinfo']['simbad_best_distarcsec'] ) # # update the objectinfo dict # cpd.update(newcpd) cpd['objectinfo']['objecttags'] = objecttags cpd['comments'] = comments newcpf = _write_checkplot_picklefile(cpd, outfile=cpf) return newcpf
######################################################## ## FINALIZING CHECKPLOTS AFTER ALL PROCESSING IS DONE ## ########################################################
[docs]def finalize_checkplot(cpx, outdir, all_lclistpkl, objfits=None): '''This is used to prevent any further changes to the checkplot. TODO: finish this. Use this function after all variable classification, period-finding, and object xmatches are done. This function will add a 'final' key to the checkplot, which will contain: - a phased LC plot with the period and epoch set after review using the times, mags, errs after any appropriate filtering and sigclip was done in the checkplotserver UI - The unphased LC using the times, mags, errs after any appropriate filtering and sigclip was done in the checkplotserver UI - the same plots for any LC collection neighbors - the survey cutout for the object if objfits is provided and checks out - a redone neighbor search using GAIA and all light curves in the collection even if they don't have at least 1000 observations. These items will be shown in a special 'Final' tab in the `checkplotserver` webapp (this should be run in `readonly` mode as well). The final tab will also contain downloadable links for the checkplot pickle in pkl and PNG format, as well as the final times, mags, errs as a gzipped CSV with a header containing all of this info. Parameters ---------- cpx : dict or str This is the path to the checkplot dict or pickle file to process. outdir : str This is the directory to where the final pickle will be written. If this is set to the same dir as `cpx` and `cpx` is a pickle, the function will return a failure. This is meant to keep the in-process checkplots separate from the finalized versions. all_lclistpkl : str or dict This is the path to the pickle or the dict created by :py:func:`astrobase.lcproc.catalogs.make_lclist` with no restrictions on the number of observations (so ALL light curves in the collection). This is used to make sure all neighbors of this object in the light curve collection have had their proximity to this object noted. objfits : str or None If this is not None, should be a file path to a FITS file containing a WCS header and this object from the instrument that was used to observe it. This will be used to make a stamp cutout of the object using the actual image it was detected on. This will be a useful comparison to the usual DSS POSS-RED2 image used by the checkplots. Returns ------- str The path to the updated checkplot pickle file with a 'final' key added to it , as described above. '''
[docs]def parallel_finalize_cplist(cplist, outdir, objfits=None): '''This is a parallel driver for `finalize_checkplot`, operating on a list of checkplots. Parameters ---------- cplist : list of str This is a list of paths of all checkplot pickles to process and run through the finalization process. outdir : str The directory to where the finalized checkplot pickles will be written. objfits : str Path to the FITS file containing a WCS header and detections of all objects observed by the actual instrument that obtained the light curves. This should generally be a high quality 'reference' frame so that all of the objects whose checkplots we're finalizing (in `cplist`) can be seen on the frame. Returns ------- dict Dict indicating the success/failure (as True/False) of the checkplot finalize operations for each checkplot pickle provided in `cplist`. '''
[docs]def parallel_finalize_cpdir(cpdir, outdir, cpfileglob='checkplot-*.pkl*', objfits=None): '''This is a parallel driver for `finalize_checkplot`, operating on a directory of checkplots. Parameters ---------- cpdir : str This is the path to the directory containing all the checkplot pickles to process and run through the finalization process. outdir : str The directory to where the finalized checkplot pickles will be written. objfits : str Path to the FITS file containing a WCS header and detections of all objects observed by the actual instrument that obtained the light curves. This should generally be a high quality 'reference' frame so that all of the objects whose checkplots we're finalizing (in `cplist`) can be seen on the frame. Returns ------- dict Dict indicating the success/failure (as True/False) of the checkplot finalize operations for each checkplot pickle provided in `cplist`. '''