Source code for astrobase.checkplot.pkl_io

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# pkl_io.py - Waqas Bhatti (wbhatti@astro.princeton.edu) - Feb 2019
# License: MIT.

'''
This contains utility functions that support the checkplot.pkl input/output
functionality.

'''

#############
## LOGGING ##
#############

import logging
from astrobase import log_sub, log_fmt, log_date_fmt

DEBUG = False
if DEBUG:
    level = logging.DEBUG
else:
    level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
    level=level,
    style=log_sub,
    format=log_fmt,
    datefmt=log_date_fmt,
)

LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception


#############
## IMPORTS ##
#############

import os
import os.path
import gzip
import base64

import pickle
from io import BytesIO as StrIO

from tornado.escape import squeeze


#######################
## BASE64 OPERATIONS ##
#######################

[docs]def _base64_to_file(b64str, outfpath, writetostrio=False): '''This converts the base64 encoded string to a file. Parameters ---------- b64str : str A base64 encoded strin that is the output of `base64.b64encode`. outfpath : str The path to where the file will be written. This should include an appropriate extension for the file (e.g. a base64 encoded string that represents a PNG should have its `outfpath` end in a '.png') so the OS can open these files correctly. writetostrio : bool If this is True, will return a StringIO object with the binary stream decoded from the base64-encoded input string `b64str`. This can be useful to embed these into other files without having to write them to disk. Returns ------- str or StringIO object If `writetostrio` is False, will return the output file's path as a str. If it is True, will return a StringIO object directly. If writing the file fails in either case, will return None. ''' try: filebytes = base64.b64decode(b64str) # if we're writing back to a stringio object if writetostrio: outobj = StrIO(filebytes) return outobj # otherwise, we're writing to an actual file else: with open(outfpath,'wb') as outfd: outfd.write(filebytes) if os.path.exists(outfpath): return outfpath else: LOGERROR('could not write output file: %s' % outfpath) return None except Exception: LOGEXCEPTION('failed while trying to convert ' 'b64 string to file %s' % outfpath) return None
######################## ## READ/WRITE PICKLES ## ########################
[docs]def _read_checkplot_picklefile(checkplotpickle): '''This reads a checkplot gzipped pickle file back into a dict. NOTE: the try-except is for Python 2 pickles that have numpy arrays in them. Apparently, these aren't compatible with Python 3. See here: http://stackoverflow.com/q/11305790 The workaround is noted in this answer: http://stackoverflow.com/a/41366785 Parameters ---------- checkplotpickle : str The path to a checkplot pickle file. This can be a gzipped file (in which case the file extension should end in '.gz') Returns ------- dict This returns a checkplotdict. ''' if checkplotpickle.endswith('.gz'): try: with gzip.open(checkplotpickle,'rb') as infd: cpdict = pickle.load(infd) except UnicodeDecodeError: with gzip.open(checkplotpickle,'rb') as infd: cpdict = pickle.load(infd, encoding='latin1') else: try: with open(checkplotpickle,'rb') as infd: cpdict = pickle.load(infd) except UnicodeDecodeError: with open(checkplotpickle,'rb') as infd: cpdict = pickle.load(infd, encoding='latin1') return cpdict
[docs]def _write_checkplot_picklefile(checkplotdict, outfile=None, protocol=None, outgzip=False): '''This writes the checkplotdict to a (gzipped) pickle file. Parameters ---------- checkplotdict : dict This the checkplotdict to write to the pickle file. outfile : None or str The path to the output pickle file to write. If `outfile` is None, writes a (gzipped) pickle file of the form: checkplot-{objectid}.pkl(.gz) to the current directory. protocol : int This sets the pickle file protocol to use when writing the pickle: If None, will choose a protocol using the following rules: - 4 -> default in Python >= 3.4 - fast but incompatible with Python 2 - 3 -> default in Python 3.0-3.3 - mildly fast - 2 -> default in Python 2 - very slow, but compatible with Python 2/3 The default protocol kwarg is None, this will make an automatic choice for pickle protocol that's best suited for the version of Python in use. Note that this will make pickles generated by Py3 incompatible with Py2. outgzip : bool If this is True, will gzip the output file. Note that if the `outfile` str ends in a gzip, this will be automatically turned on. Returns ------- str The absolute path to the written checkplot pickle file. None if writing fails. ''' # for Python >= 3.4; use v4 by default if not protocol: protocol = 4 if outgzip: if not outfile: outfile = ( 'checkplot-{objectid}.pkl.gz'.format( objectid=squeeze(checkplotdict['objectid']).replace(' ','-') ) ) with gzip.open(outfile,'wb') as outfd: pickle.dump(checkplotdict,outfd,protocol=protocol) else: if not outfile: outfile = ( 'checkplot-{objectid}.pkl'.format( objectid=squeeze(checkplotdict['objectid']).replace(' ','-') ) ) # make sure to do the right thing if '.gz' is in the filename but # outgzip was False if outfile.endswith('.gz'): LOGWARNING('output filename ends with .gz but kwarg outgzip=False. ' 'will use gzip to compress the output pickle') with gzip.open(outfile,'wb') as outfd: pickle.dump(checkplotdict,outfd,protocol=protocol) else: with open(outfile,'wb') as outfd: pickle.dump(checkplotdict,outfd,protocol=protocol) return os.path.abspath(outfile)