Source code for astrobase.fakelcs.recovery

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# recovery - Waqas Bhatti (wbhatti@astro.princeton.edu) - Oct 2017
# License: MIT. See the LICENSE file for more details.

'''This is a companion module for fakelcs/generation.py. It runs LCs generated
using functions in that module through variable star detection and
classification to see how well they are recovered.

'''

#############
## LOGGING ##
#############

import logging
from astrobase import log_sub, log_fmt, log_date_fmt

DEBUG = False
if DEBUG:
    level = logging.DEBUG
else:
    level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
    level=level,
    style=log_sub,
    format=log_fmt,
    datefmt=log_date_fmt,
)

LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception


#############
## IMPORTS ##
#############

import os
import os.path
import pickle
import gzip
import glob

import multiprocessing as mp

from math import sqrt as msqrt

# to turn a list of keys into a dict address
# from https://stackoverflow.com/a/14692747
from functools import reduce
from operator import getitem


def _dict_get(datadict, keylist):
    return reduce(getitem, keylist, datadict)


import numpy as np
import numpy.random as npr
# seed the numpy random generator
npr.seed(0xdecaff)

import matplotlib
matplotlib.use('Agg')
matplotlib.rcParams['agg.path.chunksize'] = 10000

import matplotlib.pyplot as plt


###################
## LOCAL IMPORTS ##
###################

from .. import lcproc
from ..lcproc import lcvfeatures, varthreshold, periodsearch


#######################
## LC FORMATS SET UP ##
#######################

[docs]def read_fakelc(fakelcfile):
    '''
    This just reads a pickled fake LC.

    Parameters
    ----------

    fakelcfile : str
        The fake LC file to read.

    Returns
    -------

    dict
        This returns an lcdict.

    '''

    try:
        with open(fakelcfile,'rb') as infd:
            lcdict = pickle.load(infd)
    except UnicodeDecodeError:
        with open(fakelcfile,'rb') as infd:
            lcdict = pickle.load(infd, encoding='latin1')

    return lcdict


#######################
## UTILITY FUNCTIONS ##
#######################

[docs]def get_varfeatures(simbasedir,
                    mindet=1000,
                    nworkers=None):
    '''This runs `lcproc.lcvfeatures.parallel_varfeatures` on fake LCs in
    `simbasedir`.

    Parameters
    ----------

    simbasedir : str
        The directory containing the fake LCs to process.

    mindet : int
        The minimum number of detections needed to accept an LC and process it.

    nworkers : int or None
        The number of parallel workers to use when extracting variability
        features from the input light curves.

    Returns
    -------

    str
        The path to the `varfeatures` pickle created after running the
        `lcproc.lcvfeatures.parallel_varfeatures` function.

    '''

    # get the info from the simbasedir
    with open(os.path.join(simbasedir, 'fakelcs-info.pkl'),'rb') as infd:
        siminfo = pickle.load(infd)

    lcfpaths = siminfo['lcfpath']
    varfeaturedir = os.path.join(simbasedir,'varfeatures')

    # get the column defs for the fakelcs
    timecols = siminfo['timecols']
    magcols = siminfo['magcols']
    errcols = siminfo['errcols']

    # get the column defs for the fakelcs
    timecols = siminfo['timecols']
    magcols = siminfo['magcols']
    errcols = siminfo['errcols']

    # register the fakelc pklc as a custom lcproc format
    # now we should be able to use all lcproc functions correctly
    fakelc_formatkey = 'fake-%s' % siminfo['lcformat']
    lcproc.register_lcformat(
        fakelc_formatkey,
        '*-fakelc.pkl',
        timecols,
        magcols,
        errcols,
        'astrobase.lcproc',
        '_read_pklc',
        magsarefluxes=siminfo['magsarefluxes']
    )

    # now we can use lcproc.parallel_varfeatures directly
    varinfo = lcvfeatures.parallel_varfeatures(lcfpaths,
                                               varfeaturedir,
                                               lcformat=fakelc_formatkey,
                                               mindet=mindet,
                                               nworkers=nworkers)

    with open(os.path.join(simbasedir,'fakelc-varfeatures.pkl'),'wb') as outfd:
        pickle.dump(varinfo, outfd, pickle.HIGHEST_PROTOCOL)

    return os.path.join(simbasedir,'fakelc-varfeatures.pkl')


[docs]def precision(ntp, nfp):
    '''
    This calculates precision.

    https://en.wikipedia.org/wiki/Precision_and_recall

    Parameters
    ----------

    ntp : int
        The number of true positives.

    nfp : int
        The number of false positives.

    Returns
    -------

    float
        The precision calculated using `ntp/(ntp + nfp)`.

    '''

    if (ntp+nfp) > 0:
        return ntp/(ntp+nfp)
    else:
        return np.nan


[docs]def recall(ntp, nfn):
    '''
    This calculates recall.

    https://en.wikipedia.org/wiki/Precision_and_recall

    Parameters
    ----------

    ntp : int
        The number of true positives.

    nfn : int
        The number of false negatives.

    Returns
    -------

    float
        The precision calculated using `ntp/(ntp + nfn)`.

    '''

    if (ntp+nfn) > 0:
        return ntp/(ntp+nfn)
    else:
        return np.nan


[docs]def matthews_correl_coeff(ntp, ntn, nfp, nfn):
    '''
    This calculates the Matthews correlation coefficent.

    https://en.wikipedia.org/wiki/Matthews_correlation_coefficient

    Parameters
    ----------

    ntp : int
        The number of true positives.

    ntn : int
        The number of true negatives

    nfp : int
        The number of false positives.

    nfn : int
        The number of false negatives.

    Returns
    -------

    float
        The Matthews correlation coefficient.

    '''

    mcc_top = (ntp*ntn - nfp*nfn)
    mcc_bot = msqrt((ntp + nfp)*(ntp + nfn)*(ntn + nfp)*(ntn + nfn))

    if mcc_bot > 0:
        return mcc_top/mcc_bot
    else:
        return np.nan


#######################################
## VARIABILITY RECOVERY (PER MAGBIN) ##
#######################################

[docs]def get_recovered_variables_for_magbin(simbasedir,
                                       magbinmedian,
                                       stetson_stdev_min=2.0,
                                       inveta_stdev_min=2.0,
                                       iqr_stdev_min=2.0,
                                       statsonly=True):
    '''This runs variability selection for the given magbinmedian.

    To generate a full recovery matrix over all magnitude bins, run this
    function for each magbin over the specified stetson_stdev_min and
    inveta_stdev_min grid.

    Parameters
    ----------

    simbasedir : str
        The input directory of fake LCs.

    magbinmedian : float
        The magbin to run the variable recovery for. This is an item from the
        dict from `simbasedir/fakelcs-info.pkl: `fakelcinfo['magrms'][magcol]`
        list for each magcol and designates which magbin to get the recovery
        stats for.

    stetson_stdev_min : float
        The minimum sigma above the trend in the Stetson J variability index
        distribution for this magbin to use to consider objects as variable.

    inveta_stdev_min : float
        The minimum sigma above the trend in the 1/eta variability index
        distribution for this magbin to use to consider objects as variable.

    iqr_stdev_min : float
        The minimum sigma above the trend in the IQR variability index
        distribution for this magbin to use to consider objects as variable.

    statsonly : bool
        If this is True, only the final stats will be returned. If False, the
        full arrays used to generate the stats will also be returned.

    Returns
    -------

    dict
        The returned dict contains statistics for this magbin and if requested,
        the full arrays used to calculate the statistics.

    '''

    # get the info from the simbasedir
    with open(os.path.join(simbasedir, 'fakelcs-info.pkl'),'rb') as infd:
        siminfo = pickle.load(infd)

    objectids = siminfo['objectid']
    varflags = siminfo['isvariable']
    sdssr = siminfo['sdssr']

    # get the column defs for the fakelcs
    timecols = siminfo['timecols']
    magcols = siminfo['magcols']
    errcols = siminfo['errcols']

    # register the fakelc pklc as a custom lcproc format
    # now we should be able to use all lcproc functions correctly
    fakelc_formatkey = 'fake-%s' % siminfo['lcformat']
    lcproc.register_lcformat(
        fakelc_formatkey,
        '*-fakelc.pkl',
        timecols,
        magcols,
        errcols,
        'astrobase.lcproc',
        '_read_pklc',
        magsarefluxes=siminfo['magsarefluxes']
    )

    # make the output directory if it doesn't exit
    outdir = os.path.join(simbasedir, 'recvar-threshold-pkls')
    if not os.path.exists(outdir):
        os.mkdir(outdir)

    # run the variability search
    varfeaturedir = os.path.join(simbasedir, 'varfeatures')
    varthreshinfof = os.path.join(
        outdir,
        'varthresh-magbinmed%.2f-stet%.2f-inveta%.2f.pkl' % (magbinmedian,
                                                             stetson_stdev_min,
                                                             inveta_stdev_min)
    )
    varthresh = varthreshold.variability_threshold(
        varfeaturedir,
        varthreshinfof,
        lcformat=fakelc_formatkey,
        min_stetj_stdev=stetson_stdev_min,
        min_inveta_stdev=inveta_stdev_min,
        min_iqr_stdev=iqr_stdev_min,
        verbose=False
    )

    # get the magbins from the varthresh info
    magbins = varthresh['magbins']

    # get the magbininds
    magbininds = np.digitize(sdssr, magbins)

    # bin the objects according to these magbins
    binned_objectids = []
    binned_actualvars = []
    binned_actualnotvars = []

    # go through all the mag bins and bin up the objectids, actual variables,
    # and actual not-variables
    for mbinind, _magi in zip(np.unique(magbininds),
                              range(len(magbins)-1)):

        thisbinind = np.where(magbininds == mbinind)

        thisbin_objectids = objectids[thisbinind]
        thisbin_varflags = varflags[thisbinind]

        thisbin_actualvars = thisbin_objectids[thisbin_varflags]
        thisbin_actualnotvars = thisbin_objectids[~thisbin_varflags]

        binned_objectids.append(thisbin_objectids)
        binned_actualvars.append(thisbin_actualvars)
        binned_actualnotvars.append(thisbin_actualnotvars)

    # this is the output dict
    recdict = {
        'simbasedir':simbasedir,
        'timecols':timecols,
        'magcols':magcols,
        'errcols':errcols,
        'magsarefluxes':siminfo['magsarefluxes'],
        'stetj_min_stdev':stetson_stdev_min,
        'inveta_min_stdev':inveta_stdev_min,
        'iqr_min_stdev':iqr_stdev_min,
        'magbinmedian':magbinmedian,
    }

    # now, for each magcol, find the magbin corresponding to magbinmedian, and
    # get its stats
    for magcol in magcols:

        # this is the index of the matching magnitude bin for the magbinmedian
        # provided
        magbinind = np.where(
            np.array(varthresh[magcol]['binned_sdssr_median']) == magbinmedian
        )

        magbinind = np.asscalar(magbinind[0])

        # get the objectids, actual vars and actual notvars in this magbin
        thisbin_objectids = binned_objectids[magbinind]
        thisbin_actualvars = binned_actualvars[magbinind]
        thisbin_actualnotvars = binned_actualnotvars[magbinind]

        # stetson recovered variables in this magbin
        stet_recoveredvars = varthresh[magcol][
            'binned_objectids_thresh_stetsonj'
        ][magbinind]

        # calculate TP, FP, TN, FN
        stet_recoverednotvars = np.setdiff1d(thisbin_objectids,
                                             stet_recoveredvars)

        stet_truepositives = np.intersect1d(stet_recoveredvars,
                                            thisbin_actualvars)
        stet_falsepositives = np.intersect1d(stet_recoveredvars,
                                             thisbin_actualnotvars)
        stet_truenegatives = np.intersect1d(stet_recoverednotvars,
                                            thisbin_actualnotvars)
        stet_falsenegatives = np.intersect1d(stet_recoverednotvars,
                                             thisbin_actualvars)

        # calculate stetson recall, precision, Matthews correl coeff
        stet_recall = recall(stet_truepositives.size,
                             stet_falsenegatives.size)

        stet_precision = precision(stet_truepositives.size,
                                   stet_falsepositives.size)

        stet_mcc = matthews_correl_coeff(stet_truepositives.size,
                                         stet_truenegatives.size,
                                         stet_falsepositives.size,
                                         stet_falsenegatives.size)

        # inveta recovered variables in this magbin
        inveta_recoveredvars = varthresh[magcol][
            'binned_objectids_thresh_inveta'
        ][magbinind]
        inveta_recoverednotvars = np.setdiff1d(thisbin_objectids,
                                               inveta_recoveredvars)

        inveta_truepositives = np.intersect1d(inveta_recoveredvars,
                                              thisbin_actualvars)
        inveta_falsepositives = np.intersect1d(inveta_recoveredvars,
                                               thisbin_actualnotvars)
        inveta_truenegatives = np.intersect1d(inveta_recoverednotvars,
                                              thisbin_actualnotvars)
        inveta_falsenegatives = np.intersect1d(inveta_recoverednotvars,
                                               thisbin_actualvars)

        # calculate inveta recall, precision, Matthews correl coeff
        inveta_recall = recall(inveta_truepositives.size,
                               inveta_falsenegatives.size)

        inveta_precision = precision(inveta_truepositives.size,
                                     inveta_falsepositives.size)

        inveta_mcc = matthews_correl_coeff(inveta_truepositives.size,
                                           inveta_truenegatives.size,
                                           inveta_falsepositives.size,
                                           inveta_falsenegatives.size)

        # iqr recovered variables in this magbin
        iqr_recoveredvars = varthresh[magcol][
            'binned_objectids_thresh_iqr'
        ][magbinind]
        iqr_recoverednotvars = np.setdiff1d(thisbin_objectids,
                                            iqr_recoveredvars)

        iqr_truepositives = np.intersect1d(iqr_recoveredvars,
                                           thisbin_actualvars)
        iqr_falsepositives = np.intersect1d(iqr_recoveredvars,
                                            thisbin_actualnotvars)
        iqr_truenegatives = np.intersect1d(iqr_recoverednotvars,
                                           thisbin_actualnotvars)
        iqr_falsenegatives = np.intersect1d(iqr_recoverednotvars,
                                            thisbin_actualvars)

        # calculate iqr recall, precision, Matthews correl coeff
        iqr_recall = recall(iqr_truepositives.size,
                            iqr_falsenegatives.size)

        iqr_precision = precision(iqr_truepositives.size,
                                  iqr_falsepositives.size)

        iqr_mcc = matthews_correl_coeff(iqr_truepositives.size,
                                        iqr_truenegatives.size,
                                        iqr_falsepositives.size,
                                        iqr_falsenegatives.size)

        # calculate the items missed by one method but found by the other
        # methods
        stet_missed_inveta_found = np.setdiff1d(inveta_truepositives,
                                                stet_truepositives)
        stet_missed_iqr_found = np.setdiff1d(iqr_truepositives,
                                             stet_truepositives)

        inveta_missed_stet_found = np.setdiff1d(stet_truepositives,
                                                inveta_truepositives)
        inveta_missed_iqr_found = np.setdiff1d(iqr_truepositives,
                                               inveta_truepositives)

        iqr_missed_stet_found = np.setdiff1d(stet_truepositives,
                                             iqr_truepositives)
        iqr_missed_inveta_found = np.setdiff1d(inveta_truepositives,
                                               iqr_truepositives)

        if not statsonly:

            recdict[magcol] = {
                # stetson J alone
                'stet_recoveredvars':stet_recoveredvars,
                'stet_truepositives':stet_truepositives,
                'stet_falsepositives':stet_falsepositives,
                'stet_truenegatives':stet_truenegatives,
                'stet_falsenegatives':stet_falsenegatives,
                'stet_precision':stet_precision,
                'stet_recall':stet_recall,
                'stet_mcc':stet_mcc,
                # inveta alone
                'inveta_recoveredvars':inveta_recoveredvars,
                'inveta_truepositives':inveta_truepositives,
                'inveta_falsepositives':inveta_falsepositives,
                'inveta_truenegatives':inveta_truenegatives,
                'inveta_falsenegatives':inveta_falsenegatives,
                'inveta_precision':inveta_precision,
                'inveta_recall':inveta_recall,
                'inveta_mcc':inveta_mcc,
                # iqr alone
                'iqr_recoveredvars':iqr_recoveredvars,
                'iqr_truepositives':iqr_truepositives,
                'iqr_falsepositives':iqr_falsepositives,
                'iqr_truenegatives':iqr_truenegatives,
                'iqr_falsenegatives':iqr_falsenegatives,
                'iqr_precision':iqr_precision,
                'iqr_recall':iqr_recall,
                'iqr_mcc':iqr_mcc,
                # true positive variables missed by one method but picked up by
                # the others
                'stet_missed_inveta_found':stet_missed_inveta_found,
                'stet_missed_iqr_found':stet_missed_iqr_found,
                'inveta_missed_stet_found':inveta_missed_stet_found,
                'inveta_missed_iqr_found':inveta_missed_iqr_found,
                'iqr_missed_stet_found':iqr_missed_stet_found,
                'iqr_missed_inveta_found':iqr_missed_inveta_found,
                # bin info
                'actual_variables':thisbin_actualvars,
                'actual_nonvariables':thisbin_actualnotvars,
                'all_objectids':thisbin_objectids,
                'magbinind':magbinind,

            }

        # if statsonly is set, then we only return the numbers but not the
        # arrays themselves
        else:

            recdict[magcol] = {
                # stetson J alone
                'stet_recoveredvars':stet_recoveredvars.size,
                'stet_truepositives':stet_truepositives.size,
                'stet_falsepositives':stet_falsepositives.size,
                'stet_truenegatives':stet_truenegatives.size,
                'stet_falsenegatives':stet_falsenegatives.size,
                'stet_precision':stet_precision,
                'stet_recall':stet_recall,
                'stet_mcc':stet_mcc,
                # inveta alone
                'inveta_recoveredvars':inveta_recoveredvars.size,
                'inveta_truepositives':inveta_truepositives.size,
                'inveta_falsepositives':inveta_falsepositives.size,
                'inveta_truenegatives':inveta_truenegatives.size,
                'inveta_falsenegatives':inveta_falsenegatives.size,
                'inveta_precision':inveta_precision,
                'inveta_recall':inveta_recall,
                'inveta_mcc':inveta_mcc,
                # iqr alone
                'iqr_recoveredvars':iqr_recoveredvars.size,
                'iqr_truepositives':iqr_truepositives.size,
                'iqr_falsepositives':iqr_falsepositives.size,
                'iqr_truenegatives':iqr_truenegatives.size,
                'iqr_falsenegatives':iqr_falsenegatives.size,
                'iqr_precision':iqr_precision,
                'iqr_recall':iqr_recall,
                'iqr_mcc':iqr_mcc,
                # true positive variables missed by one method but picked up by
                # the others
                'stet_missed_inveta_found':stet_missed_inveta_found.size,
                'stet_missed_iqr_found':stet_missed_iqr_found.size,
                'inveta_missed_stet_found':inveta_missed_stet_found.size,
                'inveta_missed_iqr_found':inveta_missed_iqr_found.size,
                'iqr_missed_stet_found':iqr_missed_stet_found.size,
                'iqr_missed_inveta_found':iqr_missed_inveta_found.size,
                # bin info
                'actual_variables':thisbin_actualvars.size,
                'actual_nonvariables':thisbin_actualnotvars.size,
                'all_objectids':thisbin_objectids.size,
                'magbinind':magbinind,
            }

    #
    # done with per magcol
    #

    return recdict


[docs]def magbin_varind_gridsearch_worker(task):
    '''
    This is a parallel grid search worker for the function below.

    '''

    simbasedir, gridpoint, magbinmedian = task

    try:
        res = get_recovered_variables_for_magbin(simbasedir,
                                                 magbinmedian,
                                                 stetson_stdev_min=gridpoint[0],
                                                 inveta_stdev_min=gridpoint[1],
                                                 iqr_stdev_min=gridpoint[2],
                                                 statsonly=True)
        return res
    except Exception:
        LOGEXCEPTION('failed to get info for %s' % gridpoint)
        return None


[docs]def variable_index_gridsearch_magbin(simbasedir,
                                     stetson_stdev_range=(1.0,20.0),
                                     inveta_stdev_range=(1.0,20.0),
                                     iqr_stdev_range=(1.0,20.0),
                                     ngridpoints=32,
                                     ngridworkers=None):
    '''This runs a variable index grid search per magbin.

    For each magbin, this does a grid search using the stetson and inveta ranges
    provided and tries to optimize the Matthews Correlation Coefficient (best
    value is +1.0), indicating the best possible separation of variables
    vs. nonvariables. The thresholds on these two variable indexes that produce
    the largest coeff for the collection of fake LCs will probably be the ones
    that work best for actual variable classification on the real LCs.

    https://en.wikipedia.org/wiki/Matthews_correlation_coefficient

    For each grid-point, calculates the true positives, false positives, true
    negatives, false negatives. Then gets the precision and recall, confusion
    matrix, and the ROC curve for variable vs. nonvariable.

    Once we've identified the best thresholds to use, we can then calculate
    variable object numbers:

    - as a function of magnitude
    - as a function of period
    - as a function of number of detections
    - as a function of amplitude of variability


    Writes everything back to `simbasedir/fakevar-recovery.pkl`. Use the
    plotting function below to make plots for the results.

    Parameters
    ----------

    simbasedir : str
        The directory where the fake LCs are located.

    stetson_stdev_range : sequence of 2 floats
        The min and max values of the Stetson J variability index to generate a
        grid over these to test for the values of this index that produce the
        'best' recovery rate for the injected variable stars.

    inveta_stdev_range : sequence of 2 floats
        The min and max values of the 1/eta variability index to generate a
        grid over these to test for the values of this index that produce the
        'best' recovery rate for the injected variable stars.

    iqr_stdev_range : sequence of 2 floats
        The min and max values of the IQR variability index to generate a
        grid over these to test for the values of this index that produce the
        'best' recovery rate for the injected variable stars.

    ngridpoints : int
        The number of grid points for each variability index grid. Remember that
        this function will be searching in 3D and will require lots of time to
        run if ngridpoints is too large.

        For the default number of grid points and 25000 simulated light curves,
        this takes about 3 days to run on a 40 (effective) core machine with 2 x
        Xeon E5-2650v3 CPUs.

    ngridworkers : int or None
        The number of parallel grid search workers that will be launched.

    Returns
    -------

    dict
        The returned dict contains a list of recovery stats for each magbin and
        each grid point in the variability index grids that were used. This dict
        can be passed to the plotting function below to plot the results.

    '''

    # make the output directory where all the pkls from the variability
    # threshold runs will go
    outdir = os.path.join(simbasedir,'recvar-threshold-pkls')
    if not os.path.exists(outdir):
        os.mkdir(outdir)

    # get the info from the simbasedir
    with open(os.path.join(simbasedir, 'fakelcs-info.pkl'),'rb') as infd:
        siminfo = pickle.load(infd)

    # get the column defs for the fakelcs
    timecols = siminfo['timecols']
    magcols = siminfo['magcols']
    errcols = siminfo['errcols']

    # get the magbinmedians to use for the recovery processing
    magbinmedians = siminfo['magrms'][magcols[0]]['binned_sdssr_median']

    # generate the grids for stetson and inveta
    stetson_grid = np.linspace(stetson_stdev_range[0],
                               stetson_stdev_range[1],
                               num=ngridpoints)
    inveta_grid = np.linspace(inveta_stdev_range[0],
                              inveta_stdev_range[1],
                              num=ngridpoints)
    iqr_grid = np.linspace(iqr_stdev_range[0],
                           iqr_stdev_range[1],
                           num=ngridpoints)

    # generate the grid
    stet_inveta_iqr_grid = []
    for stet in stetson_grid:
        for inveta in inveta_grid:
            for iqr in iqr_grid:
                grid_point = [stet, inveta, iqr]
                stet_inveta_iqr_grid.append(grid_point)

    # the output dict
    grid_results = {'stetson_grid':stetson_grid,
                    'inveta_grid':inveta_grid,
                    'iqr_grid':iqr_grid,
                    'stet_inveta_iqr_grid':stet_inveta_iqr_grid,
                    'magbinmedians':magbinmedians,
                    'timecols':timecols,
                    'magcols':magcols,
                    'errcols':errcols,
                    'simbasedir':os.path.abspath(simbasedir),
                    'recovery':[]}

    # set up the pool
    pool = mp.Pool(ngridworkers)

    # run the grid search per magbinmedian
    for magbinmedian in magbinmedians:

        LOGINFO('running stetson J-inveta grid-search '
                'for magbinmedian = %.3f...' % magbinmedian)

        tasks = [(simbasedir, gp, magbinmedian) for gp in stet_inveta_iqr_grid]
        thisbin_results = pool.map(magbin_varind_gridsearch_worker, tasks)
        grid_results['recovery'].append(thisbin_results)

    pool.close()
    pool.join()

    LOGINFO('done.')
    with open(os.path.join(simbasedir,
                           'fakevar-recovery-per-magbin.pkl'),'wb') as outfd:
        pickle.dump(grid_results,outfd,pickle.HIGHEST_PROTOCOL)

    return grid_results


[docs]def plot_varind_gridsearch_magbin_results(gridsearch_results):
    '''This plots the gridsearch results from `variable_index_gridsearch_magbin`.

    Parameters
    ----------

    gridsearch_results : dict
        This is the dict produced by `variable_index_gridsearch_magbin` above.

    Returns
    -------

    dict
        The returned dict contains filenames of the recovery rate plots made for
        each variability index. These include plots of the precision, recall,
        and Matthews Correlation Coefficient over each magbin and a heatmap of
        these values over the grid points of the variability index stdev values
        arrays used.

    '''

    # get the result pickle/dict
    if (isinstance(gridsearch_results, str) and
        os.path.exists(gridsearch_results)):

        with open(gridsearch_results,'rb') as infd:
            gridresults = pickle.load(infd)

    elif isinstance(gridsearch_results, dict):

        gridresults = gridsearch_results

    else:
        LOGERROR('could not understand the input '
                 'variable index grid-search result dict/pickle')
        return None

    plotres = {'simbasedir':gridresults['simbasedir']}

    recgrid = gridresults['recovery']
    simbasedir = gridresults['simbasedir']

    for magcol in gridresults['magcols']:

        plotres[magcol] = {'best_stetsonj':[],
                           'best_inveta':[],
                           'best_iqr':[],
                           'magbinmedians':gridresults['magbinmedians']}

        # go through all the magbins
        for magbinind, magbinmedian in enumerate(gridresults['magbinmedians']):

            LOGINFO('plotting results for %s: magbin: %.3f' %
                    (magcol, magbinmedian))

            stet_mcc = np.array(
                [x[magcol]['stet_mcc']
                 for x in recgrid[magbinind]]
            )[::(gridresults['inveta_grid'].size *
                 gridresults['stetson_grid'].size)]
            stet_precision = np.array(
                [x[magcol]['stet_precision']
                 for x in recgrid[magbinind]]
            )[::(gridresults['inveta_grid'].size *
                 gridresults['stetson_grid'].size)]
            stet_recall = np.array(
                [x[magcol]['stet_recall']
                 for x in recgrid[magbinind]]
            )[::(gridresults['inveta_grid'].size *
                 gridresults['stetson_grid'].size)]
            stet_missed_inveta_found = np.array(
                [x[magcol]['stet_missed_inveta_found']
                 for x in recgrid[magbinind]]
            )[::(gridresults['inveta_grid'].size *
                 gridresults['stetson_grid'].size)]
            stet_missed_iqr_found = np.array(
                [x[magcol]['stet_missed_iqr_found']
                 for x in recgrid[magbinind]]
            )[::(gridresults['inveta_grid'].size *
                 gridresults['stetson_grid'].size)]

            inveta_mcc = np.array(
                [x[magcol]['inveta_mcc']
                 for x in recgrid[magbinind]]
            )[:(gridresults['iqr_grid'].size *
                gridresults['stetson_grid'].size)][
                    ::gridresults['inveta_grid'].size
                ]
            inveta_precision = np.array(
                [x[magcol]['inveta_precision']
                 for x in recgrid[magbinind]]
            )[:(gridresults['iqr_grid'].size *
                gridresults['stetson_grid'].size)][
                    ::gridresults['inveta_grid'].size
                ]
            inveta_recall = np.array(
                [x[magcol]['inveta_recall']
                 for x in recgrid[magbinind]]
            )[:(gridresults['iqr_grid'].size *
                gridresults['stetson_grid'].size)][
                    ::gridresults['inveta_grid'].size
                ]
            inveta_missed_stet_found = np.array(
                [x[magcol]['inveta_missed_stet_found']
                 for x in recgrid[magbinind]]
            )[:(gridresults['iqr_grid'].size *
                gridresults['stetson_grid'].size)][
                    ::gridresults['inveta_grid'].size
                ]
            inveta_missed_iqr_found = np.array(
                [x[magcol]['inveta_missed_iqr_found']
                 for x in recgrid[magbinind]]
            )[:(gridresults['iqr_grid'].size *
                gridresults['stetson_grid'].size)][
                    ::gridresults['inveta_grid'].size
                ]

            iqr_mcc = np.array(
                [x[magcol]['iqr_mcc']
                 for x in recgrid[magbinind]]
            )[:(gridresults['iqr_grid'].size *
                gridresults['stetson_grid'].size)][
                    :gridresults['inveta_grid'].size
                ]
            iqr_precision = np.array(
                [x[magcol]['iqr_precision']
                 for x in recgrid[magbinind]]
            )[:(gridresults['iqr_grid'].size *
                gridresults['stetson_grid'].size)][
                    :gridresults['inveta_grid'].size
                ]
            iqr_recall = np.array(
                [x[magcol]['iqr_recall']
                 for x in recgrid[magbinind]]
            )[:(gridresults['iqr_grid'].size *
                gridresults['stetson_grid'].size)][
                    :gridresults['inveta_grid'].size
                ]
            iqr_missed_stet_found = np.array(
                [x[magcol]['iqr_missed_stet_found']
                 for x in recgrid[magbinind]]
            )[:(gridresults['iqr_grid'].size *
                gridresults['stetson_grid'].size)][
                    :gridresults['inveta_grid'].size
                ]
            iqr_missed_inveta_found = np.array(
                [x[magcol]['iqr_missed_inveta_found']
                 for x in recgrid[magbinind]]
            )[:(gridresults['iqr_grid'].size *
                gridresults['stetson_grid'].size)][
                    :gridresults['inveta_grid'].size
                ]

            plt.figure(figsize=(6.4*5, 4.8*3))

            # FIRST ROW: stetson J plot

            plt.subplot(3,5,1)
            if np.any(np.isfinite(stet_mcc)):
                plt.plot(gridresults['stetson_grid'],
                         stet_mcc)
                plt.xlabel('stetson J stdev multiplier threshold')
                plt.ylabel('MCC')
                plt.title('MCC for stetson J')
            else:
                plt.text(0.5,0.5,
                         'stet MCC values are all nan '
                         'for this magbin',
                         transform=plt.gca().transAxes,
                         horizontalalignment='center',
                         verticalalignment='center')
                plt.xticks([])
                plt.yticks([])

            plt.subplot(3,5,2)
            if np.any(np.isfinite(stet_precision)):
                plt.plot(gridresults['stetson_grid'],
                         stet_precision)
                plt.xlabel('stetson J stdev multiplier threshold')
                plt.ylabel('precision')
                plt.title('precision for stetson J')
            else:
                plt.text(0.5,0.5,
                         'stet precision values are all nan '
                         'for this magbin',
                         transform=plt.gca().transAxes,
                         horizontalalignment='center',
                         verticalalignment='center')
                plt.xticks([])
                plt.yticks([])

            plt.subplot(3,5,3)
            if np.any(np.isfinite(stet_recall)):
                plt.plot(gridresults['stetson_grid'],
                         stet_recall)
                plt.xlabel('stetson J stdev multiplier threshold')
                plt.ylabel('recall')
                plt.title('recall for stetson J')
            else:
                plt.text(0.5,0.5,
                         'stet recall values are all nan '
                         'for this magbin',
                         transform=plt.gca().transAxes,
                         horizontalalignment='center',
                         verticalalignment='center')
                plt.xticks([])
                plt.yticks([])

            plt.subplot(3,5,4)
            if np.any(np.isfinite(stet_missed_inveta_found)):
                plt.plot(gridresults['stetson_grid'],
                         stet_missed_inveta_found)
                plt.xlabel('stetson J stdev multiplier threshold')
                plt.ylabel('# objects stetson missed but inveta found')
                plt.title('stetson J missed, inveta found')
            else:
                plt.text(0.5,0.5,
                         'stet-missed/inveta-found values are all nan '
                         'for this magbin',
                         transform=plt.gca().transAxes,
                         horizontalalignment='center',
                         verticalalignment='center')
                plt.xticks([])
                plt.yticks([])

            plt.subplot(3,5,5)
            if np.any(np.isfinite(stet_missed_iqr_found)):
                plt.plot(gridresults['stetson_grid'],
                         stet_missed_iqr_found)
                plt.xlabel('stetson J stdev multiplier threshold')
                plt.ylabel('# objects stetson missed but IQR found')
                plt.title('stetson J missed, IQR found')
            else:
                plt.text(0.5,0.5,
                         'stet-missed/IQR-found values are all nan '
                         'for this magbin',
                         transform=plt.gca().transAxes,
                         horizontalalignment='center',
                         verticalalignment='center')
                plt.xticks([])
                plt.yticks([])

            # SECOND ROW: inveta plots

            plt.subplot(3,5,6)
            if np.any(np.isfinite(inveta_mcc)):
                plt.plot(gridresults['inveta_grid'],
                         inveta_mcc)
                plt.xlabel('inveta stdev multiplier threshold')
                plt.ylabel('MCC')
                plt.title('MCC for inveta')
            else:
                plt.text(0.5,0.5,
                         'inveta MCC values are all nan '
                         'for this magbin',
                         transform=plt.gca().transAxes,
                         horizontalalignment='center',
                         verticalalignment='center')
                plt.xticks([])
                plt.yticks([])

            plt.subplot(3,5,7)
            if np.any(np.isfinite(inveta_precision)):
                plt.plot(gridresults['inveta_grid'],
                         inveta_precision)
                plt.xlabel('inveta stdev multiplier threshold')
                plt.ylabel('precision')
                plt.title('precision for inveta')
            else:
                plt.text(0.5,0.5,
                         'inveta precision values are all nan '
                         'for this magbin',
                         transform=plt.gca().transAxes,
                         horizontalalignment='center',
                         verticalalignment='center')
                plt.xticks([])
                plt.yticks([])

            plt.subplot(3,5,8)
            if np.any(np.isfinite(inveta_recall)):
                plt.plot(gridresults['inveta_grid'],
                         inveta_recall)
                plt.xlabel('inveta stdev multiplier threshold')
                plt.ylabel('recall')
                plt.title('recall for inveta')
            else:
                plt.text(0.5,0.5,
                         'inveta recall values are all nan '
                         'for this magbin',
                         transform=plt.gca().transAxes,
                         horizontalalignment='center',
                         verticalalignment='center')
                plt.xticks([])
                plt.yticks([])

            plt.subplot(3,5,9)
            if np.any(np.isfinite(inveta_missed_stet_found)):
                plt.plot(gridresults['inveta_grid'],
                         inveta_missed_stet_found)
                plt.xlabel('inveta stdev multiplier threshold')
                plt.ylabel('# objects inveta missed but stetson found')
                plt.title('inveta missed, stetson J found')
            else:
                plt.text(0.5,0.5,
                         'inveta-missed-stet-found values are all nan '
                         'for this magbin',
                         transform=plt.gca().transAxes,
                         horizontalalignment='center',
                         verticalalignment='center')
                plt.xticks([])
                plt.yticks([])

            plt.subplot(3,5,10)
            if np.any(np.isfinite(inveta_missed_iqr_found)):
                plt.plot(gridresults['inveta_grid'],
                         inveta_missed_iqr_found)
                plt.xlabel('inveta stdev multiplier threshold')
                plt.ylabel('# objects inveta missed but IQR found')
                plt.title('inveta missed, IQR found')
            else:
                plt.text(0.5,0.5,
                         'inveta-missed-iqr-found values are all nan '
                         'for this magbin',
                         transform=plt.gca().transAxes,
                         horizontalalignment='center',
                         verticalalignment='center')
                plt.xticks([])
                plt.yticks([])

            # THIRD ROW: inveta plots

            plt.subplot(3,5,11)
            if np.any(np.isfinite(iqr_mcc)):
                plt.plot(gridresults['iqr_grid'],
                         iqr_mcc)
                plt.xlabel('IQR stdev multiplier threshold')
                plt.ylabel('MCC')
                plt.title('MCC for IQR')
            else:
                plt.text(0.5,0.5,
                         'IQR MCC values are all nan '
                         'for this magbin',
                         transform=plt.gca().transAxes,
                         horizontalalignment='center',
                         verticalalignment='center')
                plt.xticks([])
                plt.yticks([])

            plt.subplot(3,5,12)
            if np.any(np.isfinite(iqr_precision)):
                plt.plot(gridresults['iqr_grid'],
                         iqr_precision)
                plt.xlabel('IQR stdev multiplier threshold')
                plt.ylabel('precision')
                plt.title('precision for IQR')
            else:
                plt.text(0.5,0.5,
                         'IQR precision values are all nan '
                         'for this magbin',
                         transform=plt.gca().transAxes,
                         horizontalalignment='center',
                         verticalalignment='center')
                plt.xticks([])
                plt.yticks([])

            plt.subplot(3,5,13)
            if np.any(np.isfinite(iqr_recall)):
                plt.plot(gridresults['iqr_grid'],
                         iqr_recall)
                plt.xlabel('IQR stdev multiplier threshold')
                plt.ylabel('recall')
                plt.title('recall for IQR')
            else:
                plt.text(0.5,0.5,
                         'IQR recall values are all nan '
                         'for this magbin',
                         transform=plt.gca().transAxes,
                         horizontalalignment='center',
                         verticalalignment='center')
                plt.xticks([])
                plt.yticks([])

            plt.subplot(3,5,14)
            if np.any(np.isfinite(iqr_missed_stet_found)):
                plt.plot(gridresults['iqr_grid'],
                         iqr_missed_stet_found)
                plt.xlabel('IQR stdev multiplier threshold')
                plt.ylabel('# objects IQR missed but stetson found')
                plt.title('IQR missed, stetson J found')
            else:
                plt.text(0.5,0.5,
                         'iqr-missed-stet-found values are all nan '
                         'for this magbin',
                         transform=plt.gca().transAxes,
                         horizontalalignment='center',
                         verticalalignment='center')
                plt.xticks([])
                plt.yticks([])

            plt.subplot(3,5,15)
            if np.any(np.isfinite(iqr_missed_inveta_found)):
                plt.plot(gridresults['iqr_grid'],
                         iqr_missed_inveta_found)
                plt.xlabel('IQR stdev multiplier threshold')
                plt.ylabel('# objects IQR missed but inveta found')
                plt.title('IQR missed, inveta found')
            else:
                plt.text(0.5,0.5,
                         'iqr-missed-inveta-found values are all nan '
                         'for this magbin',
                         transform=plt.gca().transAxes,
                         horizontalalignment='center',
                         verticalalignment='center')
                plt.xticks([])
                plt.yticks([])

            plt.subplots_adjust(hspace=0.25,wspace=0.25)

            plt.suptitle('magcol: %s, magbin: %.3f' % (magcol, magbinmedian))

            plotdir = os.path.join(gridresults['simbasedir'],
                                   'varindex-gridsearch-plots')
            if not os.path.exists(plotdir):
                os.mkdir(plotdir)

            gridplotf = os.path.join(
                plotdir,
                '%s-magbin-%.3f-var-recoverygrid-permagbin.png' %
                (magcol, magbinmedian)
            )

            plt.savefig(gridplotf,dpi=100,bbox_inches='tight')
            plt.close('all')

            # get the best values of MCC, recall, precision and their associated
            # stet, inveta
            stet_mcc_maxind = np.where(stet_mcc == np.max(stet_mcc))
            stet_precision_maxind = np.where(
                stet_precision == np.max(stet_precision)
            )
            stet_recall_maxind = np.where(stet_recall == np.max(stet_recall))

            best_stet_mcc = stet_mcc[stet_mcc_maxind]
            best_stet_precision = stet_mcc[stet_precision_maxind]
            best_stet_recall = stet_mcc[stet_recall_maxind]

            stet_with_best_mcc = gridresults['stetson_grid'][stet_mcc_maxind]
            stet_with_best_precision = gridresults['stetson_grid'][
                stet_precision_maxind
            ]
            stet_with_best_recall = (
                gridresults['stetson_grid'][stet_recall_maxind]
            )

            inveta_mcc_maxind = np.where(inveta_mcc == np.max(inveta_mcc))
            inveta_precision_maxind = np.where(
                inveta_precision == np.max(inveta_precision)
            )
            inveta_recall_maxind = (
                np.where(inveta_recall == np.max(inveta_recall))
            )

            best_inveta_mcc = inveta_mcc[inveta_mcc_maxind]
            best_inveta_precision = inveta_mcc[inveta_precision_maxind]
            best_inveta_recall = inveta_mcc[inveta_recall_maxind]

            inveta_with_best_mcc = gridresults['inveta_grid'][inveta_mcc_maxind]
            inveta_with_best_precision = gridresults['inveta_grid'][
                inveta_precision_maxind
            ]
            inveta_with_best_recall = gridresults['inveta_grid'][
                inveta_recall_maxind
            ]

            iqr_mcc_maxind = np.where(iqr_mcc == np.max(iqr_mcc))
            iqr_precision_maxind = np.where(
                iqr_precision == np.max(iqr_precision)
            )
            iqr_recall_maxind = (
                np.where(iqr_recall == np.max(iqr_recall))
            )

            best_iqr_mcc = iqr_mcc[iqr_mcc_maxind]
            best_iqr_precision = iqr_mcc[iqr_precision_maxind]
            best_iqr_recall = iqr_mcc[iqr_recall_maxind]

            iqr_with_best_mcc = gridresults['iqr_grid'][iqr_mcc_maxind]
            iqr_with_best_precision = gridresults['iqr_grid'][
                iqr_precision_maxind
            ]
            iqr_with_best_recall = gridresults['iqr_grid'][
                iqr_recall_maxind
            ]

            plotres[magcol][magbinmedian] = {
                # stetson
                'stet_grid':gridresults['stetson_grid'],
                'stet_mcc':stet_mcc,
                'stet_precision':stet_precision,
                'stet_recall':stet_recall,
                'stet_missed_inveta_found':stet_missed_inveta_found,
                'best_stet_mcc':best_stet_mcc,
                'stet_with_best_mcc':stet_with_best_mcc,
                'best_stet_precision':best_stet_precision,
                'stet_with_best_precision':stet_with_best_precision,
                'best_stet_recall':best_stet_recall,
                'stet_with_best_recall':stet_with_best_recall,
                # inveta
                'inveta_grid':gridresults['inveta_grid'],
                'inveta_mcc':inveta_mcc,
                'inveta_precision':inveta_precision,
                'inveta_recall':inveta_recall,
                'inveta_missed_stet_found':inveta_missed_stet_found,
                'best_inveta_mcc':best_inveta_mcc,
                'inveta_with_best_mcc':inveta_with_best_mcc,
                'best_inveta_precision':best_inveta_precision,
                'inveta_with_best_precision':inveta_with_best_precision,
                'best_inveta_recall':best_inveta_recall,
                'inveta_with_best_recall':inveta_with_best_recall,
                # iqr
                'iqr_grid':gridresults['iqr_grid'],
                'iqr_mcc':iqr_mcc,
                'iqr_precision':iqr_precision,
                'iqr_recall':iqr_recall,
                'iqr_missed_stet_found':iqr_missed_stet_found,
                'best_iqr_mcc':best_iqr_mcc,
                'iqr_with_best_mcc':iqr_with_best_mcc,
                'best_iqr_precision':best_iqr_precision,
                'iqr_with_best_precision':iqr_with_best_precision,
                'best_iqr_recall':best_iqr_recall,
                'iqr_with_best_recall':iqr_with_best_recall,
                # plot info
                'recoveryplot':gridplotf
            }

            # recommend inveta, stetson index, and iqr for this magbin

            # if there are multiple stets, choose the smallest one
            if stet_with_best_mcc.size > 1:
                plotres[magcol]['best_stetsonj'].append(stet_with_best_mcc[0])
            elif stet_with_best_mcc.size > 0:
                plotres[magcol]['best_stetsonj'].append(stet_with_best_mcc[0])
            else:
                plotres[magcol]['best_stetsonj'].append(np.nan)

            # if there are multiple best invetas, choose the smallest one
            if inveta_with_best_mcc.size > 1:
                plotres[magcol]['best_inveta'].append(inveta_with_best_mcc[0])
            elif inveta_with_best_mcc.size > 0:
                plotres[magcol]['best_inveta'].append(inveta_with_best_mcc[0])
            else:
                plotres[magcol]['best_inveta'].append(np.nan)

            # if there are multiple best iqrs, choose the smallest one
            if iqr_with_best_mcc.size > 1:
                plotres[magcol]['best_iqr'].append(iqr_with_best_mcc[0])
            elif iqr_with_best_mcc.size > 0:
                plotres[magcol]['best_iqr'].append(iqr_with_best_mcc[0])
            else:
                plotres[magcol]['best_iqr'].append(np.nan)

    # write the plotresults to a pickle
    plotrespicklef = os.path.join(simbasedir,
                                  'varindex-gridsearch-magbin-results.pkl')
    with open(plotrespicklef, 'wb') as outfd:
        pickle.dump(plotres, outfd, pickle.HIGHEST_PROTOCOL)

    # recommend the values of stetson J and inveta to use
    for magcol in gridresults['magcols']:

        LOGINFO('best stdev multipliers for each %s magbin:' % magcol)
        LOGINFO('magbin    inveta    stetson J    IQR')

        for magbin, inveta, stet, iqr in zip(
                plotres[magcol]['magbinmedians'],
                plotres[magcol]['best_inveta'],
                plotres[magcol]['best_stetsonj'],
                plotres[magcol]['best_iqr']):
            LOGINFO('%.3f    %.3f    %.3f    %.3f' % (magbin,
                                                      inveta,
                                                      stet,
                                                      iqr))
    return plotres


################################
## PERIODIC VARIABLE RECOVERY ##
################################

PERIODIC_VARTYPES = ['EB','RRab','RRc','rotator',
                     'HADS','planet','LPV','cepheid']

ALIAS_TYPES = ['actual',
               'twice',
               'half',
               'ratio_over_1plus',
               'ratio_over_1minus',
               'ratio_over_1plus_twice',
               'ratio_over_1minus_twice',
               'ratio_over_1plus_thrice',
               'ratio_over_1minus_thrice',
               'ratio_over_minus1',
               'ratio_over_twice_minus1']


[docs]def run_periodfinding(simbasedir,
                      pfmethods=('gls','pdm','bls'),
                      pfkwargs=({},{},{'startp':1.0,'maxtransitduration':0.3}),
                      getblssnr=False,
                      sigclip=5.0,
                      nperiodworkers=10,
                      ncontrolworkers=4,
                      liststartindex=None,
                      listmaxobjects=None):
    '''This runs periodfinding using several period-finders on a collection of
    fake LCs.

    As a rough benchmark, 25000 fake LCs with 10000--50000 points per LC take
    about 26 days in total to run on an invocation of this function using
    GLS+PDM+BLS and 10 periodworkers and 4 controlworkers (so all 40 'cores') on
    a 2 x Xeon E5-2660v3 machine.

    Parameters
    ----------

    pfmethods : sequence of str
        This is used to specify which periodfinders to run. These must be in the
        `lcproc.periodsearch.PFMETHODS` dict.

    pfkwargs : sequence of dict
        This is used to provide optional kwargs to the period-finders.

    getblssnr : bool
        If this is True, will run BLS SNR calculations for each object and
        magcol. This takes a while to run, so it's disabled (False) by default.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    nperiodworkers : int
        This is the number of parallel period-finding worker processes to use.

    ncontrolworkers : int
        This is the number of parallel period-finding control workers to
        use. Each control worker will launch `nperiodworkers` worker processes.

    liststartindex : int
        The starting index of processing. This refers to the filename list
        generated by running `glob.glob` on the fake LCs in `simbasedir`.

    maxobjects : int
        The maximum number of objects to process in this run. Use this with
        `liststartindex` to effectively distribute working on a large list of
        input light curves over several sessions or machines.

    Returns
    -------

    str
        The path to the output summary pickle produced by
        `lcproc.periodsearch.parallel_pf`

    '''

    # get the info from the simbasedir
    with open(os.path.join(simbasedir, 'fakelcs-info.pkl'),'rb') as infd:
        siminfo = pickle.load(infd)

    lcfpaths = siminfo['lcfpath']
    pfdir = os.path.join(simbasedir,'periodfinding')

    # get the column defs for the fakelcs
    timecols = siminfo['timecols']
    magcols = siminfo['magcols']
    errcols = siminfo['errcols']

    # register the fakelc pklc as a custom lcproc format
    # now we should be able to use all lcproc functions correctly
    fakelc_formatkey = 'fake-%s' % siminfo['lcformat']
    lcproc.register_lcformat(
        fakelc_formatkey,
        '*-fakelc.pkl',
        timecols,
        magcols,
        errcols,
        'astrobase.lcproc',
        '_read_pklc',
        magsarefluxes=siminfo['magsarefluxes']
    )

    if liststartindex:
        lcfpaths = lcfpaths[liststartindex:]

    if listmaxobjects:
        lcfpaths = lcfpaths[:listmaxobjects]

    pfinfo = periodsearch.parallel_pf(lcfpaths,
                                      pfdir,
                                      lcformat=fakelc_formatkey,
                                      pfmethods=pfmethods,
                                      pfkwargs=pfkwargs,
                                      getblssnr=getblssnr,
                                      sigclip=sigclip,
                                      nperiodworkers=nperiodworkers,
                                      ncontrolworkers=ncontrolworkers)

    with open(os.path.join(simbasedir,
                           'fakelc-periodsearch.pkl'),'wb') as outfd:
        pickle.dump(pfinfo, outfd, pickle.HIGHEST_PROTOCOL)

    return os.path.join(simbasedir,'fakelc-periodsearch.pkl')


[docs]def check_periodrec_alias(actualperiod,
                          recoveredperiod,
                          tolerance=1.0e-3):
    '''This determines what kind of aliasing (if any) exists between
    `recoveredperiod` and `actualperiod`.

    Parameters
    ----------

    actualperiod : float
        The actual period of the object.

    recoveredperiod : float
        The recovered period of the object.

    tolerance : float
        The absolute difference required between the input periods to mark the
        recovered period as close to the actual period.

    Returns
    -------

    str
        The type of alias determined for the input combination of periods. This
        will be CSV string with values taken from the following list, based on
        the types of alias found::

            ['actual',
             'twice',
             'half',
             'ratio_over_1plus',
             'ratio_over_1minus',
             'ratio_over_1plus_twice',
             'ratio_over_1minus_twice',
             'ratio_over_1plus_thrice',
             'ratio_over_1minus_thrice',
             'ratio_over_minus1',
             'ratio_over_twice_minus1']

    '''

    if not (np.isfinite(actualperiod) and np.isfinite(recoveredperiod)):

        LOGERROR("can't compare nan values for actual/recovered periods")
        return 'unknown'

    else:

        #################
        ## ALIAS TYPES ##
        #################

        # simple ratios
        twotimes_p = actualperiod*2.0
        half_p = actualperiod*0.5

        # first kind of alias
        alias_1a = actualperiod/(1.0+actualperiod)
        alias_1b = actualperiod/(1.0-actualperiod)

        # second kind of alias
        alias_2a = actualperiod/(1.0+2.0*actualperiod)
        alias_2b = actualperiod/(1.0-2.0*actualperiod)

        # third kind of alias
        alias_3a = actualperiod/(1.0+3.0*actualperiod)
        alias_3b = actualperiod/(1.0-3.0*actualperiod)

        # fourth kind of alias
        alias_4a = actualperiod/(actualperiod - 1.0)
        alias_4b = actualperiod/(2.0*actualperiod - 1.0)

        aliases = np.ravel(np.array([
            actualperiod,
            twotimes_p,
            half_p,
            alias_1a,
            alias_1b,
            alias_2a,
            alias_2b,
            alias_3a,
            alias_3b,
            alias_4a,
            alias_4b]
        ))
        alias_labels = np.array(ALIAS_TYPES)

        # check type of alias
        closest_alias = np.isclose(recoveredperiod, aliases, atol=tolerance)

        if np.any(closest_alias):

            closest_alias_type = alias_labels[closest_alias]
            return ','.join(closest_alias_type.tolist())

        else:

            return 'other'


[docs]def periodicvar_recovery(fakepfpkl,
                         simbasedir,
                         period_tolerance=1.0e-3):

    '''Recovers the periodic variable status/info for the simulated PF result.

    - Uses simbasedir and the lcfbasename stored in fakepfpkl to figure out
      where the LC for this object is.
    - Gets the actual_varparams, actual_varperiod, actual_vartype,
      actual_varamplitude elements from the LC.
    - Figures out if the current objectid is a periodic variable (using
      actual_vartype).
    - If it is a periodic variable, gets the canonical period assigned to it.
    - Checks if the period was recovered in any of the five best periods
      reported by any of the period-finders, checks if the period recovered was
      a harmonic of the period.
    - Returns the objectid, actual period and vartype, recovered period, and
      recovery status.


    Parameters
    ----------

    fakepfpkl : str
        This is a periodfinding-<objectid>.pkl[.gz] file produced in the
        `simbasedir/periodfinding` subdirectory after `run_periodfinding` above
        is done.

    simbasedir : str
        The base directory where all of the fake LCs and period-finding results
        are.

    period_tolerance : float
        The maximum difference that this function will consider between an
        actual period (or its aliases) and a recovered period to consider it as
        as a 'recovered' period.

    Returns
    -------

    dict
        Returns a dict of period-recovery results.

    '''

    if fakepfpkl.endswith('.gz'):
        infd = gzip.open(fakepfpkl,'rb')
    else:
        infd = open(fakepfpkl,'rb')

    fakepf = pickle.load(infd)
    infd.close()

    # get info from the fakepf dict
    objectid, lcfbasename = fakepf['objectid'], fakepf['lcfbasename']
    lcfpath = os.path.join(simbasedir,'lightcurves',lcfbasename)

    # if the LC doesn't exist, bail out
    if not os.path.exists(lcfpath):
        LOGERROR('light curve for %s does not exist at: %s' % (objectid,
                                                               lcfpath))
        return None

    # now, open the fakelc
    fakelc = lcproc._read_pklc(lcfpath)

    # get the actual_varparams, actual_varperiod, actual_varamplitude
    actual_varparams, actual_varperiod, actual_varamplitude, actual_vartype = (
        fakelc['actual_varparams'],
        fakelc['actual_varperiod'],
        fakelc['actual_varamplitude'],
        fakelc['actual_vartype']
    )

    # get the moments too so we can track LC noise, etc.
    actual_moments = fakelc['moments']

    # get the magcols for this LC
    magcols = fakelc['magcols']

    # get the recovered info from each of the available methods
    pfres = {
        'objectid':objectid,
        'simbasedir':simbasedir,
        'magcols':magcols,
        'fakelc':os.path.abspath(lcfpath),
        'fakepf':os.path.abspath(fakepfpkl),
        'actual_vartype':actual_vartype,
        'actual_varperiod':actual_varperiod,
        'actual_varamplitude':actual_varamplitude,
        'actual_varparams':actual_varparams,
        'actual_moments':actual_moments,
        'recovery_periods':[],
        'recovery_lspvals':[],
        'recovery_pfmethods':[],
        'recovery_magcols':[],
        'recovery_status':[],
        'recovery_pdiff':[],
    }

    # populate the pfres dict with the periods, pfmethods, and magcols
    for magcol in magcols:

        for pfm in lcproc.PFMETHODS:

            if pfm in fakepf[magcol]:

                # only get the unique recovered periods by using
                # period_tolerance
                for rpi, rp in enumerate(
                        fakepf[magcol][pfm]['nbestperiods']
                ):

                    if ((not np.any(np.isclose(
                            rp,
                            np.array(pfres['recovery_periods']),
                            rtol=period_tolerance
                    ))) and np.isfinite(rp)):

                        # populate the recovery periods, pfmethods, and magcols
                        pfres['recovery_periods'].append(rp)
                        pfres['recovery_pfmethods'].append(pfm)
                        pfres['recovery_magcols'].append(magcol)

                        # normalize the periodogram peak value to between
                        # 0 and 1 so we can put in the results of multiple
                        # periodfinders on one scale
                        if pfm == 'pdm':

                            this_lspval = (
                                np.max(fakepf[magcol][pfm]['lspvals']) -
                                fakepf[magcol][pfm]['nbestlspvals'][rpi]
                            )

                        else:

                            this_lspval = (
                                fakepf[magcol][pfm]['nbestlspvals'][rpi] /
                                np.max(fakepf[magcol][pfm]['lspvals'])
                            )

                        # add the normalized lspval to the outdict for
                        # this object as well. later, we'll use this to
                        # construct a periodogram for objects that were actually
                        # not variables
                        pfres['recovery_lspvals'].append(this_lspval)

    # convert the recovery_* lists to arrays
    pfres['recovery_periods'] = np.array(pfres['recovery_periods'])
    pfres['recovery_lspvals'] = np.array(pfres['recovery_lspvals'])
    pfres['recovery_pfmethods'] = np.array(pfres['recovery_pfmethods'])
    pfres['recovery_magcols'] = np.array(pfres['recovery_magcols'])

    #
    # now figure out recovery status
    #

    # if this is an actual periodic variable, characterize the recovery
    if (actual_vartype and
        actual_vartype in PERIODIC_VARTYPES and
        np.isfinite(actual_varperiod)):

        if pfres['recovery_periods'].size > 0:

            for ri in range(pfres['recovery_periods'].size):

                pfres['recovery_pdiff'].append(pfres['recovery_periods'][ri] -
                                               np.asscalar(actual_varperiod))

                # get the alias types
                pfres['recovery_status'].append(
                    check_periodrec_alias(actual_varperiod,
                                          pfres['recovery_periods'][ri],
                                          tolerance=period_tolerance)
                )

            # turn the recovery_pdiff/status lists into arrays
            pfres['recovery_status'] = np.array(pfres['recovery_status'])
            pfres['recovery_pdiff'] = np.array(pfres['recovery_pdiff'])

            # find the best recovered period and its status
            rec_absdiff = np.abs(pfres['recovery_pdiff'])
            best_recp_ind = rec_absdiff == rec_absdiff.min()

            pfres['best_recovered_period'] = (
                pfres['recovery_periods'][best_recp_ind]
            )
            pfres['best_recovered_pfmethod'] = (
                pfres['recovery_pfmethods'][best_recp_ind]
            )
            pfres['best_recovered_magcol'] = (
                pfres['recovery_magcols'][best_recp_ind]
            )
            pfres['best_recovered_status'] = (
                pfres['recovery_status'][best_recp_ind]
            )
            pfres['best_recovered_pdiff'] = (
                pfres['recovery_pdiff'][best_recp_ind]
            )

        else:

            LOGWARNING(
                'no finite periods recovered from period-finding for %s' %
                fakepfpkl
            )

            pfres['recovery_status'] = np.array(['no_finite_periods_recovered'])
            pfres['recovery_pdiff'] = np.array([np.nan])
            pfres['best_recovered_period'] = np.array([np.nan])
            pfres['best_recovered_pfmethod'] = np.array([],dtype=np.unicode_)
            pfres['best_recovered_magcol'] = np.array([],dtype=np.unicode_)
            pfres['best_recovered_status'] = np.array([],dtype=np.unicode_)
            pfres['best_recovered_pdiff'] = np.array([np.nan])

    # if this is not actually a variable, get the recovered period,
    # etc. anyway. this way, we can see what we need to look out for and avoid
    # when getting these values for actual objects
    else:

        pfres['recovery_status'] = np.array(
            ['not_variable']*pfres['recovery_periods'].size
        )
        pfres['recovery_pdiff'] = np.zeros(pfres['recovery_periods'].size)

        pfres['best_recovered_period'] = np.array([np.nan])
        pfres['best_recovered_pfmethod'] = np.array([],dtype=np.unicode_)
        pfres['best_recovered_magcol'] = np.array([],dtype=np.unicode_)
        pfres['best_recovered_status'] = np.array(['not_variable'])
        pfres['best_recovered_pdiff'] = np.array([np.nan])

    return pfres


[docs]def periodrec_worker(task):
    '''This is a parallel worker for running period-recovery.

    Parameters
    ----------

    task : tuple
        This is used to pass args to the `periodicvar_recovery` function::

            task[0] = period-finding result pickle to work on
            task[1] = simbasedir
            task[2] = period_tolerance

    Returns
    -------

    dict
        This is the dict produced by the `periodicvar_recovery` function for the
        input period-finding result pickle.

    '''

    pfpkl, simbasedir, period_tolerance = task

    try:
        return periodicvar_recovery(pfpkl,
                                    simbasedir,
                                    period_tolerance=period_tolerance)

    except Exception:
        LOGEXCEPTION('periodic var recovery failed for %s' % repr(task))
        return None


[docs]def parallel_periodicvar_recovery(simbasedir,
                                  period_tolerance=1.0e-3,
                                  liststartind=None,
                                  listmaxobjects=None,
                                  nworkers=None):
    '''This is a parallel driver for `periodicvar_recovery`.

    Parameters
    ----------

    simbasedir : str
        The base directory where all of the fake LCs and period-finding results
        are.

    period_tolerance : float
        The maximum difference that this function will consider between an
        actual period (or its aliases) and a recovered period to consider it as
        as a 'recovered' period.

    liststartindex : int
        The starting index of processing. This refers to the filename list
        generated by running `glob.glob` on the period-finding result pickles in
        `simbasedir/periodfinding`.

    listmaxobjects : int
        The maximum number of objects to process in this run. Use this with
        `liststartindex` to effectively distribute working on a large list of
        input period-finding result pickles over several sessions or machines.

    nperiodworkers : int
        This is the number of parallel period-finding worker processes to use.

    Returns
    -------

    str
        Returns the filename of the pickle produced containing all of the period
        recovery results.

    '''

    # figure out the periodfinding pickles directory
    pfpkldir = os.path.join(simbasedir,'periodfinding')

    if not os.path.exists(pfpkldir):
        LOGERROR('no "periodfinding" subdirectory in %s, can\'t continue' %
                 simbasedir)
        return None

    # find all the periodfinding pickles
    pfpkl_list = glob.glob(os.path.join(pfpkldir,'*periodfinding*pkl*'))

    if len(pfpkl_list) > 0:

        if liststartind:
            pfpkl_list = pfpkl_list[liststartind:]

        if listmaxobjects:
            pfpkl_list = pfpkl_list[:listmaxobjects]

        tasks = [(x, simbasedir, period_tolerance) for x in pfpkl_list]

        pool = mp.Pool(nworkers)
        results = pool.map(periodrec_worker, tasks)
        pool.close()
        pool.join()

        resdict = {x['objectid']:x for x in results if x is not None}

        actual_periodicvars = np.array(
            [x['objectid'] for x in results
             if (x is not None and x['actual_vartype'] in PERIODIC_VARTYPES)],
            dtype=np.unicode_
        )

        recovered_periodicvars = np.array(
            [x['objectid'] for x in results
             if (x is not None and 'actual' in x['best_recovered_status'])],
            dtype=np.unicode_
        )
        alias_twice_periodicvars = np.array(
            [x['objectid'] for x in results
             if (x is not None and 'twice' in x['best_recovered_status'])],
            dtype=np.unicode_
        )
        alias_half_periodicvars = np.array(
            [x['objectid'] for x in results
             if (x is not None and 'half' in x['best_recovered_status'])],
            dtype=np.unicode_
        )

        all_objectids = [x['objectid'] for x in results]

        outdict = {'simbasedir':os.path.abspath(simbasedir),
                   'objectids':all_objectids,
                   'period_tolerance':period_tolerance,
                   'actual_periodicvars':actual_periodicvars,
                   'recovered_periodicvars':recovered_periodicvars,
                   'alias_twice_periodicvars':alias_twice_periodicvars,
                   'alias_half_periodicvars':alias_half_periodicvars,
                   'details':resdict}

        outfile = os.path.join(simbasedir,'periodicvar-recovery.pkl')
        with open(outfile, 'wb') as outfd:
            pickle.dump(outdict, outfd, pickle.HIGHEST_PROTOCOL)

        return outdict

    else:

        LOGERROR(
            'no periodfinding result pickles found in %s, can\'t continue' %
            pfpkldir
        )
        return None


PERIODREC_DEFAULT_MAGBINS = np.arange(8.0,16.25,0.25)
PERIODREC_DEFAULT_PERIODBINS = np.arange(0.0,500.0,0.5)
PERIODREC_DEFAULT_AMPBINS = np.arange(0.0,2.0,0.05)
PERIODREC_DEFAULT_NDETBINS = np.arange(0.0,60000.0,1000.0)


[docs]def plot_periodicvar_recovery_results(
        precvar_results,
        aliases_count_as_recovered=None,
        magbins=None,
        periodbins=None,
        amplitudebins=None,
        ndetbins=None,
        minbinsize=1,
        plotfile_ext='png',
):
    '''This plots the results of periodic var recovery.

    This function makes plots for periodicvar recovered fraction as a function
    of:

    - magbin
    - periodbin
    - amplitude of variability
    - ndet

    with plot lines broken down by:

    - magcol
    - periodfinder
    - vartype
    - recovery status

    The kwargs `magbins`, `periodbins`, `amplitudebins`, and `ndetbins` can be
    used to set the bin lists as needed. The kwarg `minbinsize` controls how
    many elements per bin are required to accept a bin in processing its
    recovery characteristics for mags, periods, amplitudes, and ndets.

    Parameters
    ----------

    precvar_results : dict or str
        This is either a dict returned by parallel_periodicvar_recovery or the
        pickle created by that function.

    aliases_count_as_recovered : list of str or 'all'
        This is used to set which kinds of aliases this function considers as
        'recovered' objects. Normally, we require that recovered objects have a
        recovery status of 'actual' to indicate the actual period was
        recovered. To change this default behavior, aliases_count_as_recovered
        can be set to a list of alias status strings that should be considered
        as 'recovered' objects as well. Choose from the following alias types::

          'twice'                    recovered_p = 2.0*actual_p
          'half'                     recovered_p = 0.5*actual_p
          'ratio_over_1plus'         recovered_p = actual_p/(1.0+actual_p)
          'ratio_over_1minus'        recovered_p = actual_p/(1.0-actual_p)
          'ratio_over_1plus_twice'   recovered_p = actual_p/(1.0+2.0*actual_p)
          'ratio_over_1minus_twice'  recovered_p = actual_p/(1.0-2.0*actual_p)
          'ratio_over_1plus_thrice'  recovered_p = actual_p/(1.0+3.0*actual_p)
          'ratio_over_1minus_thrice' recovered_p = actual_p/(1.0-3.0*actual_p)
          'ratio_over_minus1'        recovered_p = actual_p/(actual_p - 1.0)
          'ratio_over_twice_minus1'  recovered_p = actual_p/(2.0*actual_p - 1.0)

        or set `aliases_count_as_recovered='all'` to include all of the above in
        the 'recovered' periodic var list.

    magbins : np.array
        The magnitude bins to plot the recovery rate results over. If None, the
        default mag bins will be used: `np.arange(8.0,16.25,0.25)`.

    periodbins : np.array
        The period bins to plot the recovery rate results over. If None, the
        default period bins will be used: `np.arange(0.0,500.0,0.5)`.

    amplitudebins : np.array
        The variability amplitude bins to plot the recovery rate results
        over. If None, the default amplitude bins will be used:
        `np.arange(0.0,2.0,0.05)`.

    ndetbins : np.array
        The ndet bins to plot the recovery rate results over. If None, the
        default ndet bins will be used: `np.arange(0.0,60000.0,1000.0)`.

    minbinsize : int
        The minimum number of objects per bin required to plot a bin and its
        recovery fraction on the plot.

    plotfile_ext : {'png','pdf'}
        Sets the plot output files' extension.

    Returns
    -------

    dict
        A dict containing recovery fraction statistics and the paths to each of
        the plots made.

    '''

    # get the result pickle/dict
    if isinstance(precvar_results, str) and os.path.exists(precvar_results):

        with open(precvar_results,'rb') as infd:
            precvar = pickle.load(infd)

    elif isinstance(precvar_results, dict):

        precvar = precvar_results

    else:
        LOGERROR('could not understand the input '
                 'periodic var recovery dict/pickle')
        return None

    # get the simbasedir and open the fakelc-info.pkl. we'll need the magbins
    # definition from here.
    simbasedir = precvar['simbasedir']

    lcinfof = os.path.join(simbasedir,'fakelcs-info.pkl')

    if not os.path.exists(lcinfof):
        LOGERROR('fakelcs-info.pkl does not exist in %s, can\'t continue' %
                 simbasedir)
        return None

    with open(lcinfof,'rb') as infd:
        lcinfo = pickle.load(infd)

    # get the magcols, vartypes, sdssr, isvariable flags
    magcols = lcinfo['magcols']
    objectid = lcinfo['objectid']
    ndet = lcinfo['ndet']
    sdssr = lcinfo['sdssr']

    # get the actual periodic vars
    actual_periodicvars = precvar['actual_periodicvars']

    # generate lists of objects binned by magbins and periodbins
    LOGINFO('getting sdssr and ndet for actual periodic vars...')

    # get the sdssr and ndet for all periodic vars
    periodicvar_sdssr = []
    periodicvar_ndet = []
    periodicvar_objectids = []

    for pobj in actual_periodicvars:

        pobjind = objectid == pobj
        periodicvar_objectids.append(pobj)
        periodicvar_sdssr.append(sdssr[pobjind])
        periodicvar_ndet.append(ndet[pobjind])

    periodicvar_sdssr = np.array(periodicvar_sdssr)
    periodicvar_objectids = np.array(periodicvar_objectids)
    periodicvar_ndet = np.array(periodicvar_ndet)

    LOGINFO('getting periods, vartypes, '
            'amplitudes, ndet for actual periodic vars...')

    # get the periods, vartypes, amplitudes for the actual periodic vars
    periodicvar_periods = [
        np.asscalar(precvar['details'][x]['actual_varperiod'])
        for x in periodicvar_objectids
    ]
    periodicvar_amplitudes = [
        np.asscalar(precvar['details'][x]['actual_varamplitude'])
        for x in periodicvar_objectids
    ]

    #
    # do the binning
    #

    # bin by mag
    LOGINFO('binning actual periodic vars by magnitude...')

    magbinned_sdssr = []
    magbinned_periodicvars = []

    if not magbins:
        magbins = PERIODREC_DEFAULT_MAGBINS
    magbininds = np.digitize(np.ravel(periodicvar_sdssr), magbins)

    for mbinind, magi in zip(np.unique(magbininds),
                             range(len(magbins)-1)):

        thisbin_periodicvars = periodicvar_objectids[magbininds == mbinind]

        if (thisbin_periodicvars.size > (minbinsize-1)):

            magbinned_sdssr.append((magbins[magi] + magbins[magi+1])/2.0)
            magbinned_periodicvars.append(thisbin_periodicvars)

    # bin by period
    LOGINFO('binning actual periodic vars by period...')

    periodbinned_periods = []
    periodbinned_periodicvars = []

    if not periodbins:
        periodbins = PERIODREC_DEFAULT_PERIODBINS
    periodbininds = np.digitize(np.ravel(periodicvar_periods), periodbins)

    for pbinind, peri in zip(np.unique(periodbininds),
                             range(len(periodbins)-1)):

        thisbin_periodicvars = periodicvar_objectids[periodbininds == pbinind]

        if (thisbin_periodicvars.size > (minbinsize-1)):

            periodbinned_periods.append((periodbins[peri] +
                                         periodbins[peri+1])/2.0)
            periodbinned_periodicvars.append(thisbin_periodicvars)

    # bin by amplitude of variability
    LOGINFO('binning actual periodic vars by variability amplitude...')

    amplitudebinned_amplitudes = []
    amplitudebinned_periodicvars = []

    if not amplitudebins:
        amplitudebins = PERIODREC_DEFAULT_AMPBINS
    amplitudebininds = np.digitize(np.ravel(np.abs(periodicvar_amplitudes)),
                                   amplitudebins)

    for abinind, ampi in zip(np.unique(amplitudebininds),
                             range(len(amplitudebins)-1)):

        thisbin_periodicvars = periodicvar_objectids[
            amplitudebininds == abinind
        ]

        if (thisbin_periodicvars.size > (minbinsize-1)):

            amplitudebinned_amplitudes.append(
                (amplitudebins[ampi] +
                 amplitudebins[ampi+1])/2.0
            )
            amplitudebinned_periodicvars.append(thisbin_periodicvars)

    # bin by ndet
    LOGINFO('binning actual periodic vars by ndet...')

    ndetbinned_ndets = []
    ndetbinned_periodicvars = []

    if not ndetbins:
        ndetbins = PERIODREC_DEFAULT_NDETBINS
    ndetbininds = np.digitize(np.ravel(periodicvar_ndet), ndetbins)

    for nbinind, ndeti in zip(np.unique(ndetbininds),
                              range(len(ndetbins)-1)):

        thisbin_periodicvars = periodicvar_objectids[ndetbininds == nbinind]

        if (thisbin_periodicvars.size > (minbinsize-1)):

            ndetbinned_ndets.append(
                (ndetbins[ndeti] +
                 ndetbins[ndeti+1])/2.0
            )
            ndetbinned_periodicvars.append(thisbin_periodicvars)

    # now figure out what 'recovered' means using the provided
    # aliases_count_as_recovered kwarg
    recovered_status = ['actual']

    if isinstance(aliases_count_as_recovered, list):

        for atype in aliases_count_as_recovered:
            if atype in ALIAS_TYPES:
                recovered_status.append(atype)
            else:
                LOGWARNING('unknown alias type: %s, skipping' % atype)

    elif aliases_count_as_recovered and aliases_count_as_recovered == 'all':
        for atype in ALIAS_TYPES[1:]:
            recovered_status.append(atype)

    # find all the matching objects for these recovered statuses
    recovered_periodicvars = np.array(
        [precvar['details'][x]['objectid'] for x in precvar['details']
         if (precvar['details'][x] is not None and
             precvar['details'][x]['best_recovered_status']
             in recovered_status)],
        dtype=np.unicode_
    )

    LOGINFO('recovered %s/%s periodic variables (frac: %.3f) with '
            'period recovery status: %s' %
            (recovered_periodicvars.size,
             actual_periodicvars.size,
             float(recovered_periodicvars.size/actual_periodicvars.size),
             ', '.join(recovered_status)))

    # get the objects recovered per bin and overall recovery fractions per bin
    magbinned_recovered_objects = [
        np.intersect1d(x,recovered_periodicvars)
        for x in magbinned_periodicvars
    ]
    magbinned_recfrac = np.array([float(x.size/y.size) for x,y
                                  in zip(magbinned_recovered_objects,
                                         magbinned_periodicvars)])

    periodbinned_recovered_objects = [
        np.intersect1d(x,recovered_periodicvars)
        for x in periodbinned_periodicvars
    ]
    periodbinned_recfrac = np.array([float(x.size/y.size) for x,y
                                     in zip(periodbinned_recovered_objects,
                                            periodbinned_periodicvars)])

    amplitudebinned_recovered_objects = [
        np.intersect1d(x,recovered_periodicvars)
        for x in amplitudebinned_periodicvars
    ]
    amplitudebinned_recfrac = np.array(
        [float(x.size/y.size) for x,y
         in zip(amplitudebinned_recovered_objects,
                amplitudebinned_periodicvars)]
    )

    ndetbinned_recovered_objects = [
        np.intersect1d(x,recovered_periodicvars)
        for x in ndetbinned_periodicvars
    ]
    ndetbinned_recfrac = np.array([float(x.size/y.size) for x,y
                                   in zip(ndetbinned_recovered_objects,
                                          ndetbinned_periodicvars)])

    # convert the bin medians to arrays
    magbinned_sdssr = np.array(magbinned_sdssr)
    periodbinned_periods = np.array(periodbinned_periods)
    amplitudebinned_amplitudes = np.array(amplitudebinned_amplitudes)
    ndetbinned_ndets = np.array(ndetbinned_ndets)

    # this is the initial output dict
    outdict = {
        'simbasedir':simbasedir,
        'precvar_results':precvar,
        'magcols':magcols,
        'objectids':objectid,
        'ndet':ndet,
        'sdssr':sdssr,
        'actual_periodicvars':actual_periodicvars,
        'recovered_periodicvars':recovered_periodicvars,
        'recovery_definition':recovered_status,
        # mag binned actual periodicvars
        # note that only bins with nobjects > minbinsize are included
        'magbins':magbins,
        'magbinned_mags':magbinned_sdssr,
        'magbinned_periodicvars':magbinned_periodicvars,
        'magbinned_recoveredvars':magbinned_recovered_objects,
        'magbinned_recfrac':magbinned_recfrac,
        # period binned actual periodicvars
        # note that only bins with nobjects > minbinsize are included
        'periodbins':periodbins,
        'periodbinned_periods':periodbinned_periods,
        'periodbinned_periodicvars':periodbinned_periodicvars,
        'periodbinned_recoveredvars':periodbinned_recovered_objects,
        'periodbinned_recfrac':periodbinned_recfrac,
        # amplitude binned actual periodicvars
        # note that only bins with nobjects > minbinsize are included
        'amplitudebins':amplitudebins,
        'amplitudebinned_amplitudes':amplitudebinned_amplitudes,
        'amplitudebinned_periodicvars':amplitudebinned_periodicvars,
        'amplitudebinned_recoveredvars':amplitudebinned_recovered_objects,
        'amplitudebinned_recfrac':amplitudebinned_recfrac,
        # ndet binned actual periodicvars
        # note that only bins with nobjects > minbinsize are included
        'ndetbins':ndetbins,
        'ndetbinned_ndets':ndetbinned_ndets,
        'ndetbinned_periodicvars':ndetbinned_periodicvars,
        'ndetbinned_recoveredvars':ndetbinned_recovered_objects,
        'ndetbinned_recfrac':ndetbinned_recfrac,
    }

    # figure out which pfmethods were used
    all_pfmethods = np.unique(
        np.concatenate(
            [np.unique(precvar['details'][x]['recovery_pfmethods'])
             for x in precvar['details']]
        )
    )

    # figure out all vartypes
    all_vartypes = np.unique(
        [(precvar['details'][x]['actual_vartype'])
         for x in precvar['details'] if
         (precvar['details'][x]['actual_vartype'] is not None)]
    )

    # figure out all alias types
    all_aliastypes = recovered_status

    # add these to the outdict
    outdict['aliastypes'] = all_aliastypes
    outdict['pfmethods'] = all_pfmethods
    outdict['vartypes'] = all_vartypes

    # these are recfracs per-magcol, -vartype, -periodfinder, -aliastype
    # binned appropriately by mags, periods, amplitudes, and ndet
    # all of these have the shape as the magcols, aliastypes, pfmethods, and
    # vartypes lists above.

    magbinned_per_magcol_recfracs = []
    magbinned_per_vartype_recfracs = []
    magbinned_per_pfmethod_recfracs = []
    magbinned_per_aliastype_recfracs = []

    periodbinned_per_magcol_recfracs = []
    periodbinned_per_vartype_recfracs = []
    periodbinned_per_pfmethod_recfracs = []
    periodbinned_per_aliastype_recfracs = []

    amplitudebinned_per_magcol_recfracs = []
    amplitudebinned_per_vartype_recfracs = []
    amplitudebinned_per_pfmethod_recfracs = []
    amplitudebinned_per_aliastype_recfracs = []

    ndetbinned_per_magcol_recfracs = []
    ndetbinned_per_vartype_recfracs = []
    ndetbinned_per_pfmethod_recfracs = []
    ndetbinned_per_aliastype_recfracs = []

    #
    # finally, we do stuff for the plots!
    #
    recplotdir = os.path.join(simbasedir, 'periodic-variable-recovery-plots')
    if not os.path.exists(recplotdir):
        os.mkdir(recplotdir)

    # 1. recovery-rate by magbin

    # 1a. plot of overall recovery rate per magbin
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    plt.plot(magbinned_sdssr, magbinned_recfrac,marker='.',ms=0.0)
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('overall recovery fraction by periodic var magnitudes')
    plt.ylim((0,1))
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-magnitudes-overall.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 1b. plot of recovery rate per magbin per magcol
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    for magcol in magcols:

        thismagcol_recfracs = []

        for magbin_pv, magbin_rv in zip(magbinned_periodicvars,
                                        magbinned_recovered_objects):

            thisbin_thismagcol_recvars = [
                x for x in magbin_rv
                if (precvar['details'][x]['best_recovered_magcol'] == magcol)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thismagcol_recvars).size /
                magbin_pv.size
            )
            thismagcol_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(magbinned_sdssr,
                 np.array(thismagcol_recfracs),
                 marker='.',
                 label='magcol: %s' % magcol,
                 ms=0.0)

        # add this to the outdict array
        magbinned_per_magcol_recfracs.append(np.array(thismagcol_recfracs))

    # finish up the plot
    plt.plot(magbinned_sdssr, magbinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per magcol recovery fraction by periodic var magnitudes')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-magnitudes-magcols.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 1c. plot of recovery rate per magbin per periodfinder
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    # figure out which pfmethods were used
    all_pfmethods = np.unique(
        np.concatenate(
            [np.unique(precvar['details'][x]['recovery_pfmethods'])
             for x in precvar['details']]
        )
    )

    for pfm in all_pfmethods:

        thispf_recfracs = []

        for magbin_pv, magbin_rv in zip(magbinned_periodicvars,
                                        magbinned_recovered_objects):

            thisbin_thispf_recvars = [
                x for x in magbin_rv
                if (precvar['details'][x]['best_recovered_pfmethod'] == pfm)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thispf_recvars).size /
                magbin_pv.size
            )
            thispf_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(magbinned_sdssr,
                 np.array(thispf_recfracs),
                 marker='.',
                 label='%s' % pfm.upper(),
                 ms=0.0)

        # add this to the outdict array
        magbinned_per_pfmethod_recfracs.append(np.array(thispf_recfracs))

    # finish up the plot
    plt.plot(magbinned_sdssr, magbinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per period-finder recovery fraction by periodic var magnitudes')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-magnitudes-pfmethod.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 1d. plot of recovery rate per magbin per variable type
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    # figure out all vartypes
    all_vartypes = np.unique(
        [(precvar['details'][x]['actual_vartype'])
         for x in precvar['details'] if
         (precvar['details'][x]['actual_vartype'] is not None)]
    )

    for vt in all_vartypes:

        thisvt_recfracs = []

        for magbin_pv, magbin_rv in zip(magbinned_periodicvars,
                                        magbinned_recovered_objects):

            thisbin_thisvt_recvars = [
                x for x in magbin_rv
                if (precvar['details'][x]['actual_vartype'] == vt)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thisvt_recvars).size /
                magbin_pv.size
            )
            thisvt_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(magbinned_sdssr,
                 np.array(thisvt_recfracs),
                 marker='.',
                 label='%s' % vt,
                 ms=0.0)

        # add this to the outdict array
        magbinned_per_vartype_recfracs.append(np.array(thisvt_recfracs))

    # finish up the plot
    plt.plot(magbinned_sdssr, magbinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per vartype recovery fraction by periodic var magnitudes')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-magnitudes-vartype.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 1e. plot of recovery rate per magbin per alias type
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    # figure out all alias types
    all_aliastypes = recovered_status

    for at in all_aliastypes:

        thisat_recfracs = []

        for magbin_pv, magbin_rv in zip(magbinned_periodicvars,
                                        magbinned_recovered_objects):

            thisbin_thisat_recvars = [
                x for x in magbin_rv
                if (precvar['details'][x]['best_recovered_status'][0] == at)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thisat_recvars).size /
                magbin_pv.size
            )
            thisat_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(magbinned_sdssr,
                 np.array(thisat_recfracs),
                 marker='.',
                 label='%s' % at,
                 ms=0.0)

        # add this to the outdict array
        magbinned_per_aliastype_recfracs.append(np.array(thisat_recfracs))

    # finish up the plot
    plt.plot(magbinned_sdssr, magbinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per alias-type recovery fraction by periodic var magnitudes')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-magnitudes-aliastype.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 2. recovery-rate by periodbin

    # 2a. plot of overall recovery rate per periodbin
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    plt.plot(periodbinned_periods, periodbinned_recfrac,
             marker='.',ms=0.0)
    plt.xlabel('periodic variable period [days]')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('overall recovery fraction by periodic var periods')
    plt.ylim((0,1))
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-periods-overall.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 2b. plot of recovery rate per periodbin per magcol
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    for magcol in magcols:

        thismagcol_recfracs = []

        for periodbin_pv, periodbin_rv in zip(periodbinned_periodicvars,
                                              periodbinned_recovered_objects):

            thisbin_thismagcol_recvars = [
                x for x in periodbin_rv
                if (precvar['details'][x]['best_recovered_magcol'] == magcol)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thismagcol_recvars).size /
                periodbin_pv.size
            )
            thismagcol_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(periodbinned_periods,
                 np.array(thismagcol_recfracs),
                 marker='.',
                 label='magcol: %s' % magcol,
                 ms=0.0)

        # add this to the outdict array
        periodbinned_per_magcol_recfracs.append(np.array(thismagcol_recfracs))

    # finish up the plot
    plt.plot(periodbinned_periods, periodbinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per magcol recovery fraction by periodic var periods')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-periods-magcols.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 2c. plot of recovery rate per periodbin per periodfinder
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    # figure out which pfmethods were used
    all_pfmethods = np.unique(
        np.concatenate(
            [np.unique(precvar['details'][x]['recovery_pfmethods'])
             for x in precvar['details']]
        )
    )

    for pfm in all_pfmethods:

        thispf_recfracs = []

        for periodbin_pv, periodbin_rv in zip(periodbinned_periodicvars,
                                              periodbinned_recovered_objects):

            thisbin_thispf_recvars = [
                x for x in periodbin_rv
                if (precvar['details'][x]['best_recovered_pfmethod'] == pfm)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thispf_recvars).size /
                periodbin_pv.size
            )
            thispf_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(periodbinned_periods,
                 np.array(thispf_recfracs),
                 marker='.',
                 label='%s' % pfm.upper(),
                 ms=0.0)

        # add this to the outdict array
        periodbinned_per_pfmethod_recfracs.append(np.array(thispf_recfracs))

    # finish up the plot
    plt.plot(periodbinned_periods, periodbinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per period-finder recovery fraction by periodic var periods')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-periods-pfmethod.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 2d. plot of recovery rate per periodbin per variable type
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    # figure out all vartypes
    all_vartypes = np.unique(
        [(precvar['details'][x]['actual_vartype'])
         for x in precvar['details'] if
         (precvar['details'][x]['actual_vartype'] is not None)]
    )

    for vt in all_vartypes:

        thisvt_recfracs = []

        for periodbin_pv, periodbin_rv in zip(periodbinned_periodicvars,
                                              periodbinned_recovered_objects):

            thisbin_thisvt_recvars = [
                x for x in periodbin_rv
                if (precvar['details'][x]['actual_vartype'] == vt)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thisvt_recvars).size /
                periodbin_pv.size
            )
            thisvt_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(periodbinned_periods,
                 np.array(thisvt_recfracs),
                 marker='.',
                 label='%s' % vt,
                 ms=0.0)

        # add this to the outdict array
        periodbinned_per_vartype_recfracs.append(np.array(thisvt_recfracs))

    # finish up the plot
    plt.plot(periodbinned_periods, periodbinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per vartype recovery fraction by periodic var magnitudes')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-periods-vartype.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 2e. plot of recovery rate per periodbin per alias type
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    # figure out all vartypes
    all_aliastypes = recovered_status

    for at in all_aliastypes:

        thisat_recfracs = []

        for periodbin_pv, periodbin_rv in zip(
                periodbinned_periodicvars,
                periodbinned_recovered_objects
        ):

            thisbin_thisat_recvars = [
                x for x in periodbin_rv
                if (precvar['details'][x]['best_recovered_status'][0] == at)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thisat_recvars).size /
                periodbin_pv.size
            )
            thisat_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(periodbinned_periods,
                 np.array(thisat_recfracs),
                 marker='.',
                 label='%s' % at,
                 ms=0.0)

        # add this to the outdict array
        periodbinned_per_aliastype_recfracs.append(np.array(thisat_recfracs))

    # finish up the plot
    plt.plot(periodbinned_periods, periodbinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per alias-type recovery fraction by periodic var magnitudes')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-periods-aliastype.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 3. recovery-rate by amplitude bin

    # 3a. plot of overall recovery rate per amplitude bin
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    plt.plot(amplitudebinned_amplitudes, amplitudebinned_recfrac,
             marker='.',ms=0.0)
    plt.xlabel('periodic variable amplitude [mag]')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('overall recovery fraction by periodic var amplitudes')
    plt.ylim((0,1))
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-amplitudes-overall.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 3b. plot of recovery rate per amplitude bin per magcol
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    for magcol in magcols:

        thismagcol_recfracs = []

        for amplitudebin_pv, amplitudebin_rv in zip(
                amplitudebinned_periodicvars,
                amplitudebinned_recovered_objects
        ):

            thisbin_thismagcol_recvars = [
                x for x in amplitudebin_rv
                if (precvar['details'][x]['best_recovered_magcol'] == magcol)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thismagcol_recvars).size /
                amplitudebin_pv.size
            )
            thismagcol_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(amplitudebinned_amplitudes,
                 np.array(thismagcol_recfracs),
                 marker='.',
                 label='magcol: %s' % magcol,
                 ms=0.0)

        # add this to the outdict array
        amplitudebinned_per_magcol_recfracs.append(
            np.array(thismagcol_recfracs)
        )

    # finish up the plot
    plt.plot(amplitudebinned_amplitudes, amplitudebinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per magcol recovery fraction by periodic var amplitudes')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-amplitudes-magcols.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 3c. plot of recovery rate per amplitude bin per periodfinder
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    # figure out which pfmethods were used
    all_pfmethods = np.unique(
        np.concatenate(
            [np.unique(precvar['details'][x]['recovery_pfmethods'])
             for x in precvar['details']]
        )
    )

    for pfm in all_pfmethods:

        thispf_recfracs = []

        for amplitudebin_pv, amplitudebin_rv in zip(
                amplitudebinned_periodicvars,
                amplitudebinned_recovered_objects
        ):

            thisbin_thispf_recvars = [
                x for x in amplitudebin_rv
                if (precvar['details'][x]['best_recovered_pfmethod'] == pfm)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thispf_recvars).size /
                amplitudebin_pv.size
            )
            thispf_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(amplitudebinned_amplitudes,
                 np.array(thispf_recfracs),
                 marker='.',
                 label='%s' % pfm.upper(),
                 ms=0.0)

        # add this to the outdict array
        amplitudebinned_per_pfmethod_recfracs.append(
            np.array(thispf_recfracs)
        )

    # finish up the plot
    plt.plot(amplitudebinned_amplitudes, amplitudebinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per period-finder recovery fraction by periodic var amplitudes')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-amplitudes-pfmethod.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 3d. plot of recovery rate per amplitude bin per variable type
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    # figure out all vartypes
    all_vartypes = np.unique(
        [(precvar['details'][x]['actual_vartype'])
         for x in precvar['details'] if
         (precvar['details'][x]['actual_vartype'] is not None)]
    )

    for vt in all_vartypes:

        thisvt_recfracs = []

        for amplitudebin_pv, amplitudebin_rv in zip(
                amplitudebinned_periodicvars,
                amplitudebinned_recovered_objects
        ):

            thisbin_thisvt_recvars = [
                x for x in amplitudebin_rv
                if (precvar['details'][x]['actual_vartype'] == vt)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thisvt_recvars).size /
                amplitudebin_pv.size
            )
            thisvt_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(amplitudebinned_amplitudes,
                 np.array(thisvt_recfracs),
                 marker='.',
                 label='%s' % vt,
                 ms=0.0)

        # add this to the outdict array
        amplitudebinned_per_vartype_recfracs.append(
            np.array(thisvt_recfracs)
        )

    # finish up the plot
    plt.plot(amplitudebinned_amplitudes, amplitudebinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per vartype recovery fraction by periodic var amplitudes')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-amplitudes-vartype.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 3e. plot of recovery rate per amplitude bin per alias type
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    # figure out all vartypes
    all_aliastypes = recovered_status

    for at in all_aliastypes:

        thisat_recfracs = []

        for amplitudebin_pv, amplitudebin_rv in zip(
                amplitudebinned_periodicvars,
                amplitudebinned_recovered_objects
        ):

            thisbin_thisat_recvars = [
                x for x in amplitudebin_rv
                if (precvar['details'][x]['best_recovered_status'][0] == at)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thisat_recvars).size /
                amplitudebin_pv.size
            )
            thisat_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(amplitudebinned_amplitudes,
                 np.array(thisat_recfracs),
                 marker='.',
                 label='%s' % at,
                 ms=0.0)

        # add this to the outdict array
        amplitudebinned_per_aliastype_recfracs.append(
            np.array(thisat_recfracs)
        )

    # finish up the plot
    plt.plot(amplitudebinned_amplitudes, amplitudebinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per alias-type recovery fraction by periodic var amplitudes')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-amplitudes-aliastype.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 4. recovery-rate by ndet bin

    # 4a. plot of overall recovery rate per ndet bin
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    plt.plot(ndetbinned_ndets, ndetbinned_recfrac,
             marker='.',ms=0.0)
    plt.xlabel('periodic variable light curve points')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('overall recovery fraction by periodic var ndet')
    plt.ylim((0,1))
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-ndet-overall.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 4b. plot of recovery rate per ndet bin per magcol
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    for magcol in magcols:

        thismagcol_recfracs = []

        for ndetbin_pv, ndetbin_rv in zip(ndetbinned_periodicvars,
                                          ndetbinned_recovered_objects):

            thisbin_thismagcol_recvars = [
                x for x in ndetbin_rv
                if (precvar['details'][x]['best_recovered_magcol'] == magcol)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thismagcol_recvars).size /
                ndetbin_pv.size
            )
            thismagcol_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(ndetbinned_ndets,
                 np.array(thismagcol_recfracs),
                 marker='.',
                 label='magcol: %s' % magcol,
                 ms=0.0)

        # add this to the outdict array
        ndetbinned_per_magcol_recfracs.append(
            np.array(thismagcol_recfracs)
        )

    # finish up the plot
    plt.plot(ndetbinned_ndets, ndetbinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per magcol recovery fraction by periodic var ndets')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-ndet-magcols.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 4c. plot of recovery rate per ndet bin per periodfinder
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    # figure out which pfmethods were used
    all_pfmethods = np.unique(
        np.concatenate(
            [np.unique(precvar['details'][x]['recovery_pfmethods'])
             for x in precvar['details']]
        )
    )

    for pfm in all_pfmethods:

        thispf_recfracs = []

        for ndetbin_pv, ndetbin_rv in zip(ndetbinned_periodicvars,
                                          ndetbinned_recovered_objects):

            thisbin_thispf_recvars = [
                x for x in ndetbin_rv
                if (precvar['details'][x]['best_recovered_pfmethod'] == pfm)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thispf_recvars).size /
                ndetbin_pv.size
            )
            thispf_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(ndetbinned_ndets,
                 np.array(thispf_recfracs),
                 marker='.',
                 label='%s' % pfm.upper(),
                 ms=0.0)

        # add this to the outdict array
        ndetbinned_per_pfmethod_recfracs.append(
            np.array(thispf_recfracs)
        )

    # finish up the plot
    plt.plot(ndetbinned_ndets, ndetbinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per period-finder recovery fraction by periodic var ndets')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-ndet-pfmethod.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 4d. plot of recovery rate per ndet bin per variable type
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    # figure out all vartypes
    all_vartypes = np.unique(
        [(precvar['details'][x]['actual_vartype'])
         for x in precvar['details'] if
         (precvar['details'][x]['actual_vartype'] in PERIODIC_VARTYPES)]
    )

    for vt in all_vartypes:

        thisvt_recfracs = []

        for ndetbin_pv, ndetbin_rv in zip(ndetbinned_periodicvars,
                                          ndetbinned_recovered_objects):

            thisbin_thisvt_recvars = [
                x for x in ndetbin_rv
                if (precvar['details'][x]['actual_vartype'] == vt)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thisvt_recvars).size /
                ndetbin_pv.size
            )
            thisvt_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(ndetbinned_ndets,
                 np.array(thisvt_recfracs),
                 marker='.',
                 label='%s' % vt,
                 ms=0.0)

        # add this to the outdict array
        ndetbinned_per_vartype_recfracs.append(
            np.array(thisvt_recfracs)
        )

    # finish up the plot
    plt.plot(ndetbinned_ndets, ndetbinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per vartype recovery fraction by periodic var ndets')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-ndet-vartype.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 4e. plot of recovery rate per ndet bin per alias type
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    # figure out all vartypes
    all_aliastypes = recovered_status

    for at in all_aliastypes:

        thisat_recfracs = []

        for ndetbin_pv, ndetbin_rv in zip(ndetbinned_periodicvars,
                                          ndetbinned_recovered_objects):

            thisbin_thisat_recvars = [
                x for x in ndetbin_rv
                if (precvar['details'][x]['best_recovered_status'][0] == at)
            ]
            thisbin_thismagcol_recfrac = (
                np.array(thisbin_thisat_recvars).size /
                ndetbin_pv.size
            )
            thisat_recfracs.append(thisbin_thismagcol_recfrac)

        # now that we have per magcol recfracs, plot them
        plt.plot(ndetbinned_ndets,
                 np.array(thisat_recfracs),
                 marker='.',
                 label='%s' % at,
                 ms=0.0)

        # add this to the outdict array
        ndetbinned_per_aliastype_recfracs.append(
            np.array(thisat_recfracs)
        )

    # finish up the plot
    plt.plot(ndetbinned_ndets, ndetbinned_recfrac,
             marker='.',ms=0.0, label='overall', color='k')
    plt.xlabel(r'SDSS $r$ magnitude')
    plt.ylabel('recovered fraction of periodic variables')
    plt.title('per alias-type recovery fraction by periodic var ndets')
    plt.ylim((0,1))
    plt.legend(markerscale=10.0)
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-binned-ndet-aliastype.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # update the lists in the outdict
    outdict['magbinned_per_magcol_recfracs'] = (
        magbinned_per_magcol_recfracs
    )
    outdict['magbinned_per_pfmethod_recfracs'] = (
        magbinned_per_pfmethod_recfracs
    )
    outdict['magbinned_per_vartype_recfracs'] = (
        magbinned_per_vartype_recfracs
    )
    outdict['magbinned_per_aliastype_recfracs'] = (
        magbinned_per_aliastype_recfracs
    )

    outdict['periodbinned_per_magcol_recfracs'] = (
        periodbinned_per_magcol_recfracs
    )
    outdict['periodbinned_per_pfmethod_recfracs'] = (
        periodbinned_per_pfmethod_recfracs
    )
    outdict['periodbinned_per_vartype_recfracs'] = (
        periodbinned_per_vartype_recfracs
    )
    outdict['periodbinned_per_aliastype_recfracs'] = (
        periodbinned_per_aliastype_recfracs
    )

    outdict['amplitudebinned_per_magcol_recfracs'] = (
        amplitudebinned_per_magcol_recfracs
    )
    outdict['amplitudebinned_per_pfmethod_recfracs'] = (
        amplitudebinned_per_pfmethod_recfracs
    )
    outdict['amplitudebinned_per_vartype_recfracs'] = (
        amplitudebinned_per_vartype_recfracs
    )
    outdict['amplitudebinned_per_aliastype_recfracs'] = (
        amplitudebinned_per_aliastype_recfracs
    )

    outdict['ndetbinned_per_magcol_recfracs'] = (
        ndetbinned_per_magcol_recfracs
    )
    outdict['ndetbinned_per_pfmethod_recfracs'] = (
        ndetbinned_per_pfmethod_recfracs
    )
    outdict['ndetbinned_per_vartype_recfracs'] = (
        ndetbinned_per_vartype_recfracs
    )
    outdict['ndetbinned_per_aliastype_recfracs'] = (
        ndetbinned_per_aliastype_recfracs
    )

    # get the overall recovered vars per pfmethod
    overall_recvars_per_pfmethod = []

    for pfm in all_pfmethods:

        thispfm_recvars = np.array([
            x for x in precvar['details'] if
            ((x in recovered_periodicvars) and
             (precvar['details'][x]['best_recovered_pfmethod'] == pfm))
        ])
        overall_recvars_per_pfmethod.append(thispfm_recvars)

    # get the overall recovered vars per vartype
    overall_recvars_per_vartype = []

    for vt in all_vartypes:

        thisvt_recvars = np.array([
            x for x in precvar['details'] if
            ((x in recovered_periodicvars) and
             (precvar['details'][x]['actual_vartype'] == vt))
        ])
        overall_recvars_per_vartype.append(thisvt_recvars)

    # get the overall recovered vars per magcol
    overall_recvars_per_magcol = []

    for mc in magcols:

        thismc_recvars = np.array([
            x for x in precvar['details'] if
            ((x in recovered_periodicvars) and
             (precvar['details'][x]['best_recovered_magcol'] == mc))
        ])
        overall_recvars_per_magcol.append(thismc_recvars)

    # get the overall recovered vars per aliastype
    overall_recvars_per_aliastype = []

    for at in all_aliastypes:

        thisat_recvars = np.array([
            x for x in precvar['details'] if
            ((x in recovered_periodicvars) and
             (precvar['details'][x]['best_recovered_status'] == at))
        ])
        overall_recvars_per_aliastype.append(thisat_recvars)

    # update the outdict with these
    outdict['overall_recfrac_per_pfmethod'] = np.array([
        x.size/actual_periodicvars.size for x in overall_recvars_per_pfmethod
    ])
    outdict['overall_recfrac_per_vartype'] = np.array([
        x.size/actual_periodicvars.size for x in overall_recvars_per_vartype
    ])
    outdict['overall_recfrac_per_magcol'] = np.array([
        x.size/actual_periodicvars.size for x in overall_recvars_per_magcol
    ])
    outdict['overall_recfrac_per_aliastype'] = np.array([
        x.size/actual_periodicvars.size for x in overall_recvars_per_aliastype
    ])

    # 5. bar plot of overall recovery rate per pfmethod
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    xt = np.arange(len(all_pfmethods))
    xl = all_pfmethods

    plt.barh(xt, outdict['overall_recfrac_per_pfmethod'], 0.50)
    plt.yticks(xt, xl)
    plt.xlabel('period-finding method')
    plt.ylabel('overall recovery rate')
    plt.title('overall recovery rate per period-finding method')
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-overall-pfmethod.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 6. bar plot of overall recovery rate per magcol
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    xt = np.arange(len(magcols))
    xl = magcols

    plt.barh(xt, outdict['overall_recfrac_per_magcol'], 0.50)
    plt.yticks(xt, xl)
    plt.xlabel('light curve magnitude column')
    plt.ylabel('overall recovery rate')
    plt.title('overall recovery rate per light curve magcol')
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-overall-magcol.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 7. bar plot of overall recovery rate per aliastype
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    xt = np.arange(len(all_aliastypes))
    xl = all_aliastypes

    plt.barh(xt, outdict['overall_recfrac_per_aliastype'], 0.50)
    plt.yticks(xt, xl)
    plt.xlabel('period recovery status')
    plt.ylabel('overall recovery rate')
    plt.title('overall recovery rate per period recovery status')
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-overall-aliastype.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 8. bar plot of overall recovery rate per vartype
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))

    xt = np.arange(len(all_vartypes))
    xl = all_vartypes

    plt.barh(xt, outdict['overall_recfrac_per_vartype'], 0.50)
    plt.yticks(xt, xl)
    plt.xlabel('periodic variable type')
    plt.ylabel('overall recovery rate')
    plt.title('overall recovery rate per periodic variable type')
    plt.savefig(
        os.path.join(recplotdir,
                     'recfrac-overall-vartype.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 9. overall recovered period periodogram for objects that aren't actual
    # periodic variables. this effectively should give us the window function of
    # the observations

    notvariable_recovered_periods = np.concatenate([
        precvar['details'][x]['recovery_periods']
        for x in precvar['details'] if
        (precvar['details'][x]['actual_vartype'] is None)
    ])
    notvariable_recovered_lspvals = np.concatenate([
        precvar['details'][x]['recovery_lspvals']
        for x in precvar['details'] if
        (precvar['details'][x]['actual_vartype'] is None)
    ])

    sortind = np.argsort(notvariable_recovered_periods)
    notvariable_recovered_periods = notvariable_recovered_periods[sortind]
    notvariable_recovered_lspvals = notvariable_recovered_lspvals[sortind]

    outdict['notvariable_recovered_periods'] = notvariable_recovered_periods
    outdict['notvariable_recovered_lspvals'] = notvariable_recovered_lspvals

    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))
    plt.plot(notvariable_recovered_periods,
             notvariable_recovered_lspvals,
             ms=1.0,linestyle='none',marker='.')
    plt.xscale('log')
    plt.xlabel('recovered periods [days]')
    plt.ylabel('recovered normalized periodogram power')
    plt.title('periodogram for actual not-variable objects')
    plt.savefig(
        os.path.join(recplotdir,
                     'recovered-periodogram-nonvariables.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # 10. overall recovered period histogram for objects marked
    # not-variable. this gives us the most common periods
    fig = plt.figure(figsize=(6.4*1.5,4.8*1.5))
    plt.hist(notvariable_recovered_periods,bins=np.arange(0.02,300.0,1.0e-3),
             histtype='step')
    plt.xscale('log')
    plt.xlabel('recovered periods [days]')
    plt.ylabel('number of times periods recovered')
    plt.title('recovered period histogram for non-variable objects')
    plt.savefig(
        os.path.join(recplotdir,
                     'recovered-period-hist-nonvariables.%s' % plotfile_ext),
        dpi=100,
        bbox_inches='tight'
    )
    plt.close('all')

    # at the end, write the outdict to a pickle and return it
    outfile = os.path.join(simbasedir, 'periodicvar-recovery-plotresults.pkl')
    with open(outfile,'wb') as outfd:
        pickle.dump(outdict, outfd, pickle.HIGHEST_PROTOCOL)

    return outdict