Source code for astrobase.fakelcs.recovery

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# recovery - Waqas Bhatti (wbhatti@astro.princeton.edu) - Oct 2017
# License: MIT. See the LICENSE file for more details.

'''This is a companion module for fakelcs/generation.py. It runs LCs generated
using functions in that module through variable star detection and
classification to see how well they are recovered.

'''

#############
## LOGGING ##
#############

import logging
from astrobase import log_sub, log_fmt, log_date_fmt

DEBUG = False
if DEBUG:
    level = logging.DEBUG
else:
    level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
    level=level,
    style=log_sub,
    format=log_fmt,
    datefmt=log_date_fmt,
)

LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception


#############
## IMPORTS ##
#############

import os
import os.path
import pickle
import gzip
import glob

import multiprocessing as mp

from math import sqrt as msqrt

# to turn a list of keys into a dict address
# from https://stackoverflow.com/a/14692747
from functools import reduce
from operator import getitem


def _dict_get(datadict, keylist):
    return reduce(getitem, keylist, datadict)


import numpy as np
import numpy.random as npr
# seed the numpy random generator
npr.seed(0xdecaff)

import matplotlib
matplotlib.use('Agg')
matplotlib.rcParams['agg.path.chunksize'] = 10000

import matplotlib.pyplot as plt


###################
## LOCAL IMPORTS ##
###################

from .. import lcproc
from ..lcproc import lcvfeatures, varthreshold, periodsearch


#######################
## LC FORMATS SET UP ##
#######################

[docs]def read_fakelc(fakelcfile): ''' This just reads a pickled fake LC. Parameters ---------- fakelcfile : str The fake LC file to read. Returns ------- dict This returns an lcdict. ''' try: with open(fakelcfile,'rb') as infd: lcdict = pickle.load(infd) except UnicodeDecodeError: with open(fakelcfile,'rb') as infd: lcdict = pickle.load(infd, encoding='latin1') return lcdict
####################### ## UTILITY FUNCTIONS ## #######################
[docs]def get_varfeatures(simbasedir, mindet=1000, nworkers=None): '''This runs `lcproc.lcvfeatures.parallel_varfeatures` on fake LCs in `simbasedir`. Parameters ---------- simbasedir : str The directory containing the fake LCs to process. mindet : int The minimum number of detections needed to accept an LC and process it. nworkers : int or None The number of parallel workers to use when extracting variability features from the input light curves. Returns ------- str The path to the `varfeatures` pickle created after running the `lcproc.lcvfeatures.parallel_varfeatures` function. ''' # get the info from the simbasedir with open(os.path.join(simbasedir, 'fakelcs-info.pkl'),'rb') as infd: siminfo = pickle.load(infd) lcfpaths = siminfo['lcfpath'] varfeaturedir = os.path.join(simbasedir,'varfeatures') # get the column defs for the fakelcs timecols = siminfo['timecols'] magcols = siminfo['magcols'] errcols = siminfo['errcols'] # get the column defs for the fakelcs timecols = siminfo['timecols'] magcols = siminfo['magcols'] errcols = siminfo['errcols'] # register the fakelc pklc as a custom lcproc format # now we should be able to use all lcproc functions correctly fakelc_formatkey = 'fake-%s' % siminfo['lcformat'] lcproc.register_lcformat( fakelc_formatkey, '*-fakelc.pkl', timecols, magcols, errcols, 'astrobase.lcproc', '_read_pklc', magsarefluxes=siminfo['magsarefluxes'] ) # now we can use lcproc.parallel_varfeatures directly varinfo = lcvfeatures.parallel_varfeatures(lcfpaths, varfeaturedir, lcformat=fakelc_formatkey, mindet=mindet, nworkers=nworkers) with open(os.path.join(simbasedir,'fakelc-varfeatures.pkl'),'wb') as outfd: pickle.dump(varinfo, outfd, pickle.HIGHEST_PROTOCOL) return os.path.join(simbasedir,'fakelc-varfeatures.pkl')
[docs]def precision(ntp, nfp): ''' This calculates precision. https://en.wikipedia.org/wiki/Precision_and_recall Parameters ---------- ntp : int The number of true positives. nfp : int The number of false positives. Returns ------- float The precision calculated using `ntp/(ntp + nfp)`. ''' if (ntp+nfp) > 0: return ntp/(ntp+nfp) else: return np.nan
[docs]def recall(ntp, nfn): ''' This calculates recall. https://en.wikipedia.org/wiki/Precision_and_recall Parameters ---------- ntp : int The number of true positives. nfn : int The number of false negatives. Returns ------- float The precision calculated using `ntp/(ntp + nfn)`. ''' if (ntp+nfn) > 0: return ntp/(ntp+nfn) else: return np.nan
[docs]def matthews_correl_coeff(ntp, ntn, nfp, nfn): ''' This calculates the Matthews correlation coefficent. https://en.wikipedia.org/wiki/Matthews_correlation_coefficient Parameters ---------- ntp : int The number of true positives. ntn : int The number of true negatives nfp : int The number of false positives. nfn : int The number of false negatives. Returns ------- float The Matthews correlation coefficient. ''' mcc_top = (ntp*ntn - nfp*nfn) mcc_bot = msqrt((ntp + nfp)*(ntp + nfn)*(ntn + nfp)*(ntn + nfn)) if mcc_bot > 0: return mcc_top/mcc_bot else: return np.nan
####################################### ## VARIABILITY RECOVERY (PER MAGBIN) ## #######################################
[docs]def get_recovered_variables_for_magbin(simbasedir, magbinmedian, stetson_stdev_min=2.0, inveta_stdev_min=2.0, iqr_stdev_min=2.0, statsonly=True): '''This runs variability selection for the given magbinmedian. To generate a full recovery matrix over all magnitude bins, run this function for each magbin over the specified stetson_stdev_min and inveta_stdev_min grid. Parameters ---------- simbasedir : str The input directory of fake LCs. magbinmedian : float The magbin to run the variable recovery for. This is an item from the dict from `simbasedir/fakelcs-info.pkl: `fakelcinfo['magrms'][magcol]` list for each magcol and designates which magbin to get the recovery stats for. stetson_stdev_min : float The minimum sigma above the trend in the Stetson J variability index distribution for this magbin to use to consider objects as variable. inveta_stdev_min : float The minimum sigma above the trend in the 1/eta variability index distribution for this magbin to use to consider objects as variable. iqr_stdev_min : float The minimum sigma above the trend in the IQR variability index distribution for this magbin to use to consider objects as variable. statsonly : bool If this is True, only the final stats will be returned. If False, the full arrays used to generate the stats will also be returned. Returns ------- dict The returned dict contains statistics for this magbin and if requested, the full arrays used to calculate the statistics. ''' # get the info from the simbasedir with open(os.path.join(simbasedir, 'fakelcs-info.pkl'),'rb') as infd: siminfo = pickle.load(infd) objectids = siminfo['objectid'] varflags = siminfo['isvariable'] sdssr = siminfo['sdssr'] # get the column defs for the fakelcs timecols = siminfo['timecols'] magcols = siminfo['magcols'] errcols = siminfo['errcols'] # register the fakelc pklc as a custom lcproc format # now we should be able to use all lcproc functions correctly fakelc_formatkey = 'fake-%s' % siminfo['lcformat'] lcproc.register_lcformat( fakelc_formatkey, '*-fakelc.pkl', timecols, magcols, errcols, 'astrobase.lcproc', '_read_pklc', magsarefluxes=siminfo['magsarefluxes'] ) # make the output directory if it doesn't exit outdir = os.path.join(simbasedir, 'recvar-threshold-pkls') if not os.path.exists(outdir): os.mkdir(outdir) # run the variability search varfeaturedir = os.path.join(simbasedir, 'varfeatures') varthreshinfof = os.path.join( outdir, 'varthresh-magbinmed%.2f-stet%.2f-inveta%.2f.pkl' % (magbinmedian, stetson_stdev_min, inveta_stdev_min) ) varthresh = varthreshold.variability_threshold( varfeaturedir, varthreshinfof, lcformat=fakelc_formatkey, min_stetj_stdev=stetson_stdev_min, min_inveta_stdev=inveta_stdev_min, min_iqr_stdev=iqr_stdev_min, verbose=False ) # get the magbins from the varthresh info magbins = varthresh['magbins'] # get the magbininds magbininds = np.digitize(sdssr, magbins) # bin the objects according to these magbins binned_objectids = [] binned_actualvars = [] binned_actualnotvars = [] # go through all the mag bins and bin up the objectids, actual variables, # and actual not-variables for mbinind, _magi in zip(np.unique(magbininds), range(len(magbins)-1)): thisbinind = np.where(magbininds == mbinind) thisbin_objectids = objectids[thisbinind] thisbin_varflags = varflags[thisbinind] thisbin_actualvars = thisbin_objectids[thisbin_varflags] thisbin_actualnotvars = thisbin_objectids[~thisbin_varflags] binned_objectids.append(thisbin_objectids) binned_actualvars.append(thisbin_actualvars) binned_actualnotvars.append(thisbin_actualnotvars) # this is the output dict recdict = { 'simbasedir':simbasedir, 'timecols':timecols, 'magcols':magcols, 'errcols':errcols, 'magsarefluxes':siminfo['magsarefluxes'], 'stetj_min_stdev':stetson_stdev_min, 'inveta_min_stdev':inveta_stdev_min, 'iqr_min_stdev':iqr_stdev_min, 'magbinmedian':magbinmedian, } # now, for each magcol, find the magbin corresponding to magbinmedian, and # get its stats for magcol in magcols: # this is the index of the matching magnitude bin for the magbinmedian # provided magbinind = np.where( np.array(varthresh[magcol]['binned_sdssr_median']) == magbinmedian ) magbinind = np.asscalar(magbinind[0]) # get the objectids, actual vars and actual notvars in this magbin thisbin_objectids = binned_objectids[magbinind] thisbin_actualvars = binned_actualvars[magbinind] thisbin_actualnotvars = binned_actualnotvars[magbinind] # stetson recovered variables in this magbin stet_recoveredvars = varthresh[magcol][ 'binned_objectids_thresh_stetsonj' ][magbinind] # calculate TP, FP, TN, FN stet_recoverednotvars = np.setdiff1d(thisbin_objectids, stet_recoveredvars) stet_truepositives = np.intersect1d(stet_recoveredvars, thisbin_actualvars) stet_falsepositives = np.intersect1d(stet_recoveredvars, thisbin_actualnotvars) stet_truenegatives = np.intersect1d(stet_recoverednotvars, thisbin_actualnotvars) stet_falsenegatives = np.intersect1d(stet_recoverednotvars, thisbin_actualvars) # calculate stetson recall, precision, Matthews correl coeff stet_recall = recall(stet_truepositives.size, stet_falsenegatives.size) stet_precision = precision(stet_truepositives.size, stet_falsepositives.size) stet_mcc = matthews_correl_coeff(stet_truepositives.size, stet_truenegatives.size, stet_falsepositives.size, stet_falsenegatives.size) # inveta recovered variables in this magbin inveta_recoveredvars = varthresh[magcol][ 'binned_objectids_thresh_inveta' ][magbinind] inveta_recoverednotvars = np.setdiff1d(thisbin_objectids, inveta_recoveredvars) inveta_truepositives = np.intersect1d(inveta_recoveredvars, thisbin_actualvars) inveta_falsepositives = np.intersect1d(inveta_recoveredvars, thisbin_actualnotvars) inveta_truenegatives = np.intersect1d(inveta_recoverednotvars, thisbin_actualnotvars) inveta_falsenegatives = np.intersect1d(inveta_recoverednotvars, thisbin_actualvars) # calculate inveta recall, precision, Matthews correl coeff inveta_recall = recall(inveta_truepositives.size, inveta_falsenegatives.size) inveta_precision = precision(inveta_truepositives.size, inveta_falsepositives.size) inveta_mcc = matthews_correl_coeff(inveta_truepositives.size, inveta_truenegatives.size, inveta_falsepositives.size, inveta_falsenegatives.size) # iqr recovered variables in this magbin iqr_recoveredvars = varthresh[magcol][ 'binned_objectids_thresh_iqr' ][magbinind] iqr_recoverednotvars = np.setdiff1d(thisbin_objectids, iqr_recoveredvars) iqr_truepositives = np.intersect1d(iqr_recoveredvars, thisbin_actualvars) iqr_falsepositives = np.intersect1d(iqr_recoveredvars, thisbin_actualnotvars) iqr_truenegatives = np.intersect1d(iqr_recoverednotvars, thisbin_actualnotvars) iqr_falsenegatives = np.intersect1d(iqr_recoverednotvars, thisbin_actualvars) # calculate iqr recall, precision, Matthews correl coeff iqr_recall = recall(iqr_truepositives.size, iqr_falsenegatives.size) iqr_precision = precision(iqr_truepositives.size, iqr_falsepositives.size) iqr_mcc = matthews_correl_coeff(iqr_truepositives.size, iqr_truenegatives.size, iqr_falsepositives.size, iqr_falsenegatives.size) # calculate the items missed by one method but found by the other # methods stet_missed_inveta_found = np.setdiff1d(inveta_truepositives, stet_truepositives) stet_missed_iqr_found = np.setdiff1d(iqr_truepositives, stet_truepositives) inveta_missed_stet_found = np.setdiff1d(stet_truepositives, inveta_truepositives) inveta_missed_iqr_found = np.setdiff1d(iqr_truepositives, inveta_truepositives) iqr_missed_stet_found = np.setdiff1d(stet_truepositives, iqr_truepositives) iqr_missed_inveta_found = np.setdiff1d(inveta_truepositives, iqr_truepositives) if not statsonly: recdict[magcol] = { # stetson J alone 'stet_recoveredvars':stet_recoveredvars, 'stet_truepositives':stet_truepositives, 'stet_falsepositives':stet_falsepositives, 'stet_truenegatives':stet_truenegatives, 'stet_falsenegatives':stet_falsenegatives, 'stet_precision':stet_precision, 'stet_recall':stet_recall, 'stet_mcc':stet_mcc, # inveta alone 'inveta_recoveredvars':inveta_recoveredvars, 'inveta_truepositives':inveta_truepositives, 'inveta_falsepositives':inveta_falsepositives, 'inveta_truenegatives':inveta_truenegatives, 'inveta_falsenegatives':inveta_falsenegatives, 'inveta_precision':inveta_precision, 'inveta_recall':inveta_recall, 'inveta_mcc':inveta_mcc, # iqr alone 'iqr_recoveredvars':iqr_recoveredvars, 'iqr_truepositives':iqr_truepositives, 'iqr_falsepositives':iqr_falsepositives, 'iqr_truenegatives':iqr_truenegatives, 'iqr_falsenegatives':iqr_falsenegatives, 'iqr_precision':iqr_precision, 'iqr_recall':iqr_recall, 'iqr_mcc':iqr_mcc, # true positive variables missed by one method but picked up by # the others 'stet_missed_inveta_found':stet_missed_inveta_found, 'stet_missed_iqr_found':stet_missed_iqr_found, 'inveta_missed_stet_found':inveta_missed_stet_found, 'inveta_missed_iqr_found':inveta_missed_iqr_found, 'iqr_missed_stet_found':iqr_missed_stet_found, 'iqr_missed_inveta_found':iqr_missed_inveta_found, # bin info 'actual_variables':thisbin_actualvars, 'actual_nonvariables':thisbin_actualnotvars, 'all_objectids':thisbin_objectids, 'magbinind':magbinind, } # if statsonly is set, then we only return the numbers but not the # arrays themselves else: recdict[magcol] = { # stetson J alone 'stet_recoveredvars':stet_recoveredvars.size, 'stet_truepositives':stet_truepositives.size, 'stet_falsepositives':stet_falsepositives.size, 'stet_truenegatives':stet_truenegatives.size, 'stet_falsenegatives':stet_falsenegatives.size, 'stet_precision':stet_precision, 'stet_recall':stet_recall, 'stet_mcc':stet_mcc, # inveta alone 'inveta_recoveredvars':inveta_recoveredvars.size, 'inveta_truepositives':inveta_truepositives.size, 'inveta_falsepositives':inveta_falsepositives.size, 'inveta_truenegatives':inveta_truenegatives.size, 'inveta_falsenegatives':inveta_falsenegatives.size, 'inveta_precision':inveta_precision, 'inveta_recall':inveta_recall, 'inveta_mcc':inveta_mcc, # iqr alone 'iqr_recoveredvars':iqr_recoveredvars.size, 'iqr_truepositives':iqr_truepositives.size, 'iqr_falsepositives':iqr_falsepositives.size, 'iqr_truenegatives':iqr_truenegatives.size, 'iqr_falsenegatives':iqr_falsenegatives.size, 'iqr_precision':iqr_precision, 'iqr_recall':iqr_recall, 'iqr_mcc':iqr_mcc, # true positive variables missed by one method but picked up by # the others 'stet_missed_inveta_found':stet_missed_inveta_found.size, 'stet_missed_iqr_found':stet_missed_iqr_found.size, 'inveta_missed_stet_found':inveta_missed_stet_found.size, 'inveta_missed_iqr_found':inveta_missed_iqr_found.size, 'iqr_missed_stet_found':iqr_missed_stet_found.size, 'iqr_missed_inveta_found':iqr_missed_inveta_found.size, # bin info 'actual_variables':thisbin_actualvars.size, 'actual_nonvariables':thisbin_actualnotvars.size, 'all_objectids':thisbin_objectids.size, 'magbinind':magbinind, } # # done with per magcol # return recdict
[docs]def magbin_varind_gridsearch_worker(task): ''' This is a parallel grid search worker for the function below. ''' simbasedir, gridpoint, magbinmedian = task try: res = get_recovered_variables_for_magbin(simbasedir, magbinmedian, stetson_stdev_min=gridpoint[0], inveta_stdev_min=gridpoint[1], iqr_stdev_min=gridpoint[2], statsonly=True) return res except Exception: LOGEXCEPTION('failed to get info for %s' % gridpoint) return None
[docs]def variable_index_gridsearch_magbin(simbasedir, stetson_stdev_range=(1.0,20.0), inveta_stdev_range=(1.0,20.0), iqr_stdev_range=(1.0,20.0), ngridpoints=32, ngridworkers=None): '''This runs a variable index grid search per magbin. For each magbin, this does a grid search using the stetson and inveta ranges provided and tries to optimize the Matthews Correlation Coefficient (best value is +1.0), indicating the best possible separation of variables vs. nonvariables. The thresholds on these two variable indexes that produce the largest coeff for the collection of fake LCs will probably be the ones that work best for actual variable classification on the real LCs. https://en.wikipedia.org/wiki/Matthews_correlation_coefficient For each grid-point, calculates the true positives, false positives, true negatives, false negatives. Then gets the precision and recall, confusion matrix, and the ROC curve for variable vs. nonvariable. Once we've identified the best thresholds to use, we can then calculate variable object numbers: - as a function of magnitude - as a function of period - as a function of number of detections - as a function of amplitude of variability Writes everything back to `simbasedir/fakevar-recovery.pkl`. Use the plotting function below to make plots for the results. Parameters ---------- simbasedir : str The directory where the fake LCs are located. stetson_stdev_range : sequence of 2 floats The min and max values of the Stetson J variability index to generate a grid over these to test for the values of this index that produce the 'best' recovery rate for the injected variable stars. inveta_stdev_range : sequence of 2 floats The min and max values of the 1/eta variability index to generate a grid over these to test for the values of this index that produce the 'best' recovery rate for the injected variable stars. iqr_stdev_range : sequence of 2 floats The min and max values of the IQR variability index to generate a grid over these to test for the values of this index that produce the 'best' recovery rate for the injected variable stars. ngridpoints : int The number of grid points for each variability index grid. Remember that this function will be searching in 3D and will require lots of time to run if ngridpoints is too large. For the default number of grid points and 25000 simulated light curves, this takes about 3 days to run on a 40 (effective) core machine with 2 x Xeon E5-2650v3 CPUs. ngridworkers : int or None The number of parallel grid search workers that will be launched. Returns ------- dict The returned dict contains a list of recovery stats for each magbin and each grid point in the variability index grids that were used. This dict can be passed to the plotting function below to plot the results. ''' # make the output directory where all the pkls from the variability # threshold runs will go outdir = os.path.join(simbasedir,'recvar-threshold-pkls') if not os.path.exists(outdir): os.mkdir(outdir) # get the info from the simbasedir with open(os.path.join(simbasedir, 'fakelcs-info.pkl'),'rb') as infd: siminfo = pickle.load(infd) # get the column defs for the fakelcs timecols = siminfo['timecols'] magcols = siminfo['magcols'] errcols = siminfo['errcols'] # get the magbinmedians to use for the recovery processing magbinmedians = siminfo['magrms'][magcols[0]]['binned_sdssr_median'] # generate the grids for stetson and inveta stetson_grid = np.linspace(stetson_stdev_range[0], stetson_stdev_range[1], num=ngridpoints) inveta_grid = np.linspace(inveta_stdev_range[0], inveta_stdev_range[1], num=ngridpoints) iqr_grid = np.linspace(iqr_stdev_range[0], iqr_stdev_range[1], num=ngridpoints) # generate the grid stet_inveta_iqr_grid = [] for stet in stetson_grid: for inveta in inveta_grid: for iqr in iqr_grid: grid_point = [stet, inveta, iqr] stet_inveta_iqr_grid.append(grid_point) # the output dict grid_results = {'stetson_grid':stetson_grid, 'inveta_grid':inveta_grid, 'iqr_grid':iqr_grid, 'stet_inveta_iqr_grid':stet_inveta_iqr_grid, 'magbinmedians':magbinmedians, 'timecols':timecols, 'magcols':magcols, 'errcols':errcols, 'simbasedir':os.path.abspath(simbasedir), 'recovery':[]} # set up the pool pool = mp.Pool(ngridworkers) # run the grid search per magbinmedian for magbinmedian in magbinmedians: LOGINFO('running stetson J-inveta grid-search ' 'for magbinmedian = %.3f...' % magbinmedian) tasks = [(simbasedir, gp, magbinmedian) for gp in stet_inveta_iqr_grid] thisbin_results = pool.map(magbin_varind_gridsearch_worker, tasks) grid_results['recovery'].append(thisbin_results) pool.close() pool.join() LOGINFO('done.') with open(os.path.join(simbasedir, 'fakevar-recovery-per-magbin.pkl'),'wb') as outfd: pickle.dump(grid_results,outfd,pickle.HIGHEST_PROTOCOL) return grid_results
[docs]def plot_varind_gridsearch_magbin_results(gridsearch_results): '''This plots the gridsearch results from `variable_index_gridsearch_magbin`. Parameters ---------- gridsearch_results : dict This is the dict produced by `variable_index_gridsearch_magbin` above. Returns ------- dict The returned dict contains filenames of the recovery rate plots made for each variability index. These include plots of the precision, recall, and Matthews Correlation Coefficient over each magbin and a heatmap of these values over the grid points of the variability index stdev values arrays used. ''' # get the result pickle/dict if (isinstance(gridsearch_results, str) and os.path.exists(gridsearch_results)): with open(gridsearch_results,'rb') as infd: gridresults = pickle.load(infd) elif isinstance(gridsearch_results, dict): gridresults = gridsearch_results else: LOGERROR('could not understand the input ' 'variable index grid-search result dict/pickle') return None plotres = {'simbasedir':gridresults['simbasedir']} recgrid = gridresults['recovery'] simbasedir = gridresults['simbasedir'] for magcol in gridresults['magcols']: plotres[magcol] = {'best_stetsonj':[], 'best_inveta':[], 'best_iqr':[], 'magbinmedians':gridresults['magbinmedians']} # go through all the magbins for magbinind, magbinmedian in enumerate(gridresults['magbinmedians']): LOGINFO('plotting results for %s: magbin: %.3f' % (magcol, magbinmedian)) stet_mcc = np.array( [x[magcol]['stet_mcc'] for x in recgrid[magbinind]] )[::(gridresults['inveta_grid'].size * gridresults['stetson_grid'].size)] stet_precision = np.array( [x[magcol]['stet_precision'] for x in recgrid[magbinind]] )[::(gridresults['inveta_grid'].size * gridresults['stetson_grid'].size)] stet_recall = np.array( [x[magcol]['stet_recall'] for x in recgrid[magbinind]] )[::(gridresults['inveta_grid'].size * gridresults['stetson_grid'].size)] stet_missed_inveta_found = np.array( [x[magcol]['stet_missed_inveta_found'] for x in recgrid[magbinind]] )[::(gridresults['inveta_grid'].size * gridresults['stetson_grid'].size)] stet_missed_iqr_found = np.array( [x[magcol]['stet_missed_iqr_found'] for x in recgrid[magbinind]] )[::(gridresults['inveta_grid'].size * gridresults['stetson_grid'].size)] inveta_mcc = np.array( [x[magcol]['inveta_mcc'] for x in recgrid[magbinind]] )[:(gridresults['iqr_grid'].size * gridresults['stetson_grid'].size)][ ::gridresults['inveta_grid'].size ] inveta_precision = np.array( [x[magcol]['inveta_precision'] for x in recgrid[magbinind]] )[:(gridresults['iqr_grid'].size * gridresults['stetson_grid'].size)][ ::gridresults['inveta_grid'].size ] inveta_recall = np.array( [x[magcol]['inveta_recall'] for x in recgrid[magbinind]] )[:(gridresults['iqr_grid'].size * gridresults['stetson_grid'].size)][ ::gridresults['inveta_grid'].size ] inveta_missed_stet_found = np.array( [x[magcol]['inveta_missed_stet_found'] for x in recgrid[magbinind]] )[:(gridresults['iqr_grid'].size * gridresults['stetson_grid'].size)][ ::gridresults['inveta_grid'].size ] inveta_missed_iqr_found = np.array( [x[magcol]['inveta_missed_iqr_found'] for x in recgrid[magbinind]] )[:(gridresults['iqr_grid'].size * gridresults['stetson_grid'].size)][ ::gridresults['inveta_grid'].size ] iqr_mcc = np.array( [x[magcol]['iqr_mcc'] for x in recgrid[magbinind]] )[:(gridresults['iqr_grid'].size * gridresults['stetson_grid'].size)][ :gridresults['inveta_grid'].size ] iqr_precision = np.array( [x[magcol]['iqr_precision'] for x in recgrid[magbinind]] )[:(gridresults['iqr_grid'].size * gridresults['stetson_grid'].size)][ :gridresults['inveta_grid'].size ] iqr_recall = np.array( [x[magcol]['iqr_recall'] for x in recgrid[magbinind]] )[:(gridresults['iqr_grid'].size * gridresults['stetson_grid'].size)][ :gridresults['inveta_grid'].size ] iqr_missed_stet_found = np.array( [x[magcol]['iqr_missed_stet_found'] for x in recgrid[magbinind]] )[:(gridresults['iqr_grid'].size * gridresults['stetson_grid'].size)][ :gridresults['inveta_grid'].size ] iqr_missed_inveta_found = np.array( [x[magcol]['iqr_missed_inveta_found'] for x in recgrid[magbinind]] )[:(gridresults['iqr_grid'].size * gridresults['stetson_grid'].size)][ :gridresults['inveta_grid'].size ] plt.figure(figsize=(6.4*5, 4.8*3)) # FIRST ROW: stetson J plot plt.subplot(3,5,1) if np.any(np.isfinite(stet_mcc)): plt.plot(gridresults['stetson_grid'], stet_mcc) plt.xlabel('stetson J stdev multiplier threshold') plt.ylabel('MCC') plt.title('MCC for stetson J') else: plt.text(0.5,0.5, 'stet MCC values are all nan ' 'for this magbin', transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='center') plt.xticks([]) plt.yticks([]) plt.subplot(3,5,2) if np.any(np.isfinite(stet_precision)): plt.plot(gridresults['stetson_grid'], stet_precision) plt.xlabel('stetson J stdev multiplier threshold') plt.ylabel('precision') plt.title('precision for stetson J') else: plt.text(0.5,0.5, 'stet precision values are all nan ' 'for this magbin', transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='center') plt.xticks([]) plt.yticks([]) plt.subplot(3,5,3) if np.any(np.isfinite(stet_recall)): plt.plot(gridresults['stetson_grid'], stet_recall) plt.xlabel('stetson J stdev multiplier threshold') plt.ylabel('recall') plt.title('recall for stetson J') else: plt.text(0.5,0.5, 'stet recall values are all nan ' 'for this magbin', transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='center') plt.xticks([]) plt.yticks([]) plt.subplot(3,5,4) if np.any(np.isfinite(stet_missed_inveta_found)): plt.plot(gridresults['stetson_grid'], stet_missed_inveta_found) plt.xlabel('stetson J stdev multiplier threshold') plt.ylabel('# objects stetson missed but inveta found') plt.title('stetson J missed, inveta found') else: plt.text(0.5,0.5, 'stet-missed/inveta-found values are all nan ' 'for this magbin', transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='center') plt.xticks([]) plt.yticks([]) plt.subplot(3,5,5) if np.any(np.isfinite(stet_missed_iqr_found)): plt.plot(gridresults['stetson_grid'], stet_missed_iqr_found) plt.xlabel('stetson J stdev multiplier threshold') plt.ylabel('# objects stetson missed but IQR found') plt.title('stetson J missed, IQR found') else: plt.text(0.5,0.5, 'stet-missed/IQR-found values are all nan ' 'for this magbin', transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='center') plt.xticks([]) plt.yticks([]) # SECOND ROW: inveta plots plt.subplot(3,5,6) if np.any(np.isfinite(inveta_mcc)): plt.plot(gridresults['inveta_grid'], inveta_mcc) plt.xlabel('inveta stdev multiplier threshold') plt.ylabel('MCC') plt.title('MCC for inveta') else: plt.text(0.5,0.5, 'inveta MCC values are all nan ' 'for this magbin', transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='center') plt.xticks([]) plt.yticks([]) plt.subplot(3,5,7) if np.any(np.isfinite(inveta_precision)): plt.plot(gridresults['inveta_grid'], inveta_precision) plt.xlabel('inveta stdev multiplier threshold') plt.ylabel('precision') plt.title('precision for inveta') else: plt.text(0.5,0.5, 'inveta precision values are all nan ' 'for this magbin', transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='center') plt.xticks([]) plt.yticks([]) plt.subplot(3,5,8) if np.any(np.isfinite(inveta_recall)): plt.plot(gridresults['inveta_grid'], inveta_recall) plt.xlabel('inveta stdev multiplier threshold') plt.ylabel('recall') plt.title('recall for inveta') else: plt.text(0.5,0.5, 'inveta recall values are all nan ' 'for this magbin', transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='center') plt.xticks([]) plt.yticks([]) plt.subplot(3,5,9) if np.any(np.isfinite(inveta_missed_stet_found)): plt.plot(gridresults['inveta_grid'], inveta_missed_stet_found) plt.xlabel('inveta stdev multiplier threshold') plt.ylabel('# objects inveta missed but stetson found') plt.title('inveta missed, stetson J found') else: plt.text(0.5,0.5, 'inveta-missed-stet-found values are all nan ' 'for this magbin', transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='center') plt.xticks([]) plt.yticks([]) plt.subplot(3,5,10) if np.any(np.isfinite(inveta_missed_iqr_found)): plt.plot(gridresults['inveta_grid'], inveta_missed_iqr_found) plt.xlabel('inveta stdev multiplier threshold') plt.ylabel('# objects inveta missed but IQR found') plt.title('inveta missed, IQR found') else: plt.text(0.5,0.5, 'inveta-missed-iqr-found values are all nan ' 'for this magbin', transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='center') plt.xticks([]) plt.yticks([]) # THIRD ROW: inveta plots plt.subplot(3,5,11) if np.any(np.isfinite(iqr_mcc)): plt.plot(gridresults['iqr_grid'], iqr_mcc) plt.xlabel('IQR stdev multiplier threshold') plt.ylabel('MCC') plt.title('MCC for IQR') else: plt.text(0.5,0.5, 'IQR MCC values are all nan ' 'for this magbin', transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='center') plt.xticks([]) plt.yticks([]) plt.subplot(3,5,12) if np.any(np.isfinite(iqr_precision)): plt.plot(gridresults['iqr_grid'], iqr_precision) plt.xlabel('IQR stdev multiplier threshold') plt.ylabel('precision') plt.title('precision for IQR') else: plt.text(0.5,0.5, 'IQR precision values are all nan ' 'for this magbin', transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='center') plt.xticks([]) plt.yticks([]) plt.subplot(3,5,13) if np.any(np.isfinite(iqr_recall)): plt.plot(gridresults['iqr_grid'], iqr_recall) plt.xlabel('IQR stdev multiplier threshold') plt.ylabel('recall') plt.title('recall for IQR') else: plt.text(0.5,0.5, 'IQR recall values are all nan ' 'for this magbin', transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='center') plt.xticks([]) plt.yticks([]) plt.subplot(3,5,14) if np.any(np.isfinite(iqr_missed_stet_found)): plt.plot(gridresults['iqr_grid'], iqr_missed_stet_found) plt.xlabel('IQR stdev multiplier threshold') plt.ylabel('# objects IQR missed but stetson found') plt.title('IQR missed, stetson J found') else: plt.text(0.5,0.5, 'iqr-missed-stet-found values are all nan ' 'for this magbin', transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='center') plt.xticks([]) plt.yticks([]) plt.subplot(3,5,15) if np.any(np.isfinite(iqr_missed_inveta_found)): plt.plot(gridresults['iqr_grid'], iqr_missed_inveta_found) plt.xlabel('IQR stdev multiplier threshold') plt.ylabel('# objects IQR missed but inveta found') plt.title('IQR missed, inveta found') else: plt.text(0.5,0.5, 'iqr-missed-inveta-found values are all nan ' 'for this magbin', transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='center') plt.xticks([]) plt.yticks([]) plt.subplots_adjust(hspace=0.25,wspace=0.25) plt.suptitle('magcol: %s, magbin: %.3f' % (magcol, magbinmedian)) plotdir = os.path.join(gridresults['simbasedir'], 'varindex-gridsearch-plots') if not os.path.exists(plotdir): os.mkdir(plotdir) gridplotf = os.path.join( plotdir, '%s-magbin-%.3f-var-recoverygrid-permagbin.png' % (magcol, magbinmedian) ) plt.savefig(gridplotf,dpi=100,bbox_inches='tight') plt.close('all') # get the best values of MCC, recall, precision and their associated # stet, inveta stet_mcc_maxind = np.where(stet_mcc == np.max(stet_mcc)) stet_precision_maxind = np.where( stet_precision == np.max(stet_precision) ) stet_recall_maxind = np.where(stet_recall == np.max(stet_recall)) best_stet_mcc = stet_mcc[stet_mcc_maxind] best_stet_precision = stet_mcc[stet_precision_maxind] best_stet_recall = stet_mcc[stet_recall_maxind] stet_with_best_mcc = gridresults['stetson_grid'][stet_mcc_maxind] stet_with_best_precision = gridresults['stetson_grid'][ stet_precision_maxind ] stet_with_best_recall = ( gridresults['stetson_grid'][stet_recall_maxind] ) inveta_mcc_maxind = np.where(inveta_mcc == np.max(inveta_mcc)) inveta_precision_maxind = np.where( inveta_precision == np.max(inveta_precision) ) inveta_recall_maxind = ( np.where(inveta_recall == np.max(inveta_recall)) ) best_inveta_mcc = inveta_mcc[inveta_mcc_maxind] best_inveta_precision = inveta_mcc[inveta_precision_maxind] best_inveta_recall = inveta_mcc[inveta_recall_maxind] inveta_with_best_mcc = gridresults['inveta_grid'][inveta_mcc_maxind] inveta_with_best_precision = gridresults['inveta_grid'][ inveta_precision_maxind ] inveta_with_best_recall = gridresults['inveta_grid'][ inveta_recall_maxind ] iqr_mcc_maxind = np.where(iqr_mcc == np.max(iqr_mcc)) iqr_precision_maxind = np.where( iqr_precision == np.max(iqr_precision) ) iqr_recall_maxind = ( np.where(iqr_recall == np.max(iqr_recall)) ) best_iqr_mcc = iqr_mcc[iqr_mcc_maxind] best_iqr_precision = iqr_mcc[iqr_precision_maxind] best_iqr_recall = iqr_mcc[iqr_recall_maxind] iqr_with_best_mcc = gridresults['iqr_grid'][iqr_mcc_maxind] iqr_with_best_precision = gridresults['iqr_grid'][ iqr_precision_maxind ] iqr_with_best_recall = gridresults['iqr_grid'][ iqr_recall_maxind ] plotres[magcol][magbinmedian] = { # stetson 'stet_grid':gridresults['stetson_grid'], 'stet_mcc':stet_mcc, 'stet_precision':stet_precision, 'stet_recall':stet_recall, 'stet_missed_inveta_found':stet_missed_inveta_found, 'best_stet_mcc':best_stet_mcc, 'stet_with_best_mcc':stet_with_best_mcc, 'best_stet_precision':best_stet_precision, 'stet_with_best_precision':stet_with_best_precision, 'best_stet_recall':best_stet_recall, 'stet_with_best_recall':stet_with_best_recall, # inveta 'inveta_grid':gridresults['inveta_grid'], 'inveta_mcc':inveta_mcc, 'inveta_precision':inveta_precision, 'inveta_recall':inveta_recall, 'inveta_missed_stet_found':inveta_missed_stet_found, 'best_inveta_mcc':best_inveta_mcc, 'inveta_with_best_mcc':inveta_with_best_mcc, 'best_inveta_precision':best_inveta_precision, 'inveta_with_best_precision':inveta_with_best_precision, 'best_inveta_recall':best_inveta_recall, 'inveta_with_best_recall':inveta_with_best_recall, # iqr 'iqr_grid':gridresults['iqr_grid'], 'iqr_mcc':iqr_mcc, 'iqr_precision':iqr_precision, 'iqr_recall':iqr_recall, 'iqr_missed_stet_found':iqr_missed_stet_found, 'best_iqr_mcc':best_iqr_mcc, 'iqr_with_best_mcc':iqr_with_best_mcc, 'best_iqr_precision':best_iqr_precision, 'iqr_with_best_precision':iqr_with_best_precision, 'best_iqr_recall':best_iqr_recall, 'iqr_with_best_recall':iqr_with_best_recall, # plot info 'recoveryplot':gridplotf } # recommend inveta, stetson index, and iqr for this magbin # if there are multiple stets, choose the smallest one if stet_with_best_mcc.size > 1: plotres[magcol]['best_stetsonj'].append(stet_with_best_mcc[0]) elif stet_with_best_mcc.size > 0: plotres[magcol]['best_stetsonj'].append(stet_with_best_mcc[0]) else: plotres[magcol]['best_stetsonj'].append(np.nan) # if there are multiple best invetas, choose the smallest one if inveta_with_best_mcc.size > 1: plotres[magcol]['best_inveta'].append(inveta_with_best_mcc[0]) elif inveta_with_best_mcc.size > 0: plotres[magcol]['best_inveta'].append(inveta_with_best_mcc[0]) else: plotres[magcol]['best_inveta'].append(np.nan) # if there are multiple best iqrs, choose the smallest one if iqr_with_best_mcc.size > 1: plotres[magcol]['best_iqr'].append(iqr_with_best_mcc[0]) elif iqr_with_best_mcc.size > 0: plotres[magcol]['best_iqr'].append(iqr_with_best_mcc[0]) else: plotres[magcol]['best_iqr'].append(np.nan) # write the plotresults to a pickle plotrespicklef = os.path.join(simbasedir, 'varindex-gridsearch-magbin-results.pkl') with open(plotrespicklef, 'wb') as outfd: pickle.dump(plotres, outfd, pickle.HIGHEST_PROTOCOL) # recommend the values of stetson J and inveta to use for magcol in gridresults['magcols']: LOGINFO('best stdev multipliers for each %s magbin:' % magcol) LOGINFO('magbin inveta stetson J IQR') for magbin, inveta, stet, iqr in zip( plotres[magcol]['magbinmedians'], plotres[magcol]['best_inveta'], plotres[magcol]['best_stetsonj'], plotres[magcol]['best_iqr']): LOGINFO('%.3f %.3f %.3f %.3f' % (magbin, inveta, stet, iqr)) return plotres
################################ ## PERIODIC VARIABLE RECOVERY ## ################################ PERIODIC_VARTYPES = ['EB','RRab','RRc','rotator', 'HADS','planet','LPV','cepheid'] ALIAS_TYPES = ['actual', 'twice', 'half', 'ratio_over_1plus', 'ratio_over_1minus', 'ratio_over_1plus_twice', 'ratio_over_1minus_twice', 'ratio_over_1plus_thrice', 'ratio_over_1minus_thrice', 'ratio_over_minus1', 'ratio_over_twice_minus1']
[docs]def run_periodfinding(simbasedir, pfmethods=('gls','pdm','bls'), pfkwargs=({},{},{'startp':1.0,'maxtransitduration':0.3}), getblssnr=False, sigclip=5.0, nperiodworkers=10, ncontrolworkers=4, liststartindex=None, listmaxobjects=None): '''This runs periodfinding using several period-finders on a collection of fake LCs. As a rough benchmark, 25000 fake LCs with 10000--50000 points per LC take about 26 days in total to run on an invocation of this function using GLS+PDM+BLS and 10 periodworkers and 4 controlworkers (so all 40 'cores') on a 2 x Xeon E5-2660v3 machine. Parameters ---------- pfmethods : sequence of str This is used to specify which periodfinders to run. These must be in the `lcproc.periodsearch.PFMETHODS` dict. pfkwargs : sequence of dict This is used to provide optional kwargs to the period-finders. getblssnr : bool If this is True, will run BLS SNR calculations for each object and magcol. This takes a while to run, so it's disabled (False) by default. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. nperiodworkers : int This is the number of parallel period-finding worker processes to use. ncontrolworkers : int This is the number of parallel period-finding control workers to use. Each control worker will launch `nperiodworkers` worker processes. liststartindex : int The starting index of processing. This refers to the filename list generated by running `glob.glob` on the fake LCs in `simbasedir`. maxobjects : int The maximum number of objects to process in this run. Use this with `liststartindex` to effectively distribute working on a large list of input light curves over several sessions or machines. Returns ------- str The path to the output summary pickle produced by `lcproc.periodsearch.parallel_pf` ''' # get the info from the simbasedir with open(os.path.join(simbasedir, 'fakelcs-info.pkl'),'rb') as infd: siminfo = pickle.load(infd) lcfpaths = siminfo['lcfpath'] pfdir = os.path.join(simbasedir,'periodfinding') # get the column defs for the fakelcs timecols = siminfo['timecols'] magcols = siminfo['magcols'] errcols = siminfo['errcols'] # register the fakelc pklc as a custom lcproc format # now we should be able to use all lcproc functions correctly fakelc_formatkey = 'fake-%s' % siminfo['lcformat'] lcproc.register_lcformat( fakelc_formatkey, '*-fakelc.pkl', timecols, magcols, errcols, 'astrobase.lcproc', '_read_pklc', magsarefluxes=siminfo['magsarefluxes'] ) if liststartindex: lcfpaths = lcfpaths[liststartindex:] if listmaxobjects: lcfpaths = lcfpaths[:listmaxobjects] pfinfo = periodsearch.parallel_pf(lcfpaths, pfdir, lcformat=fakelc_formatkey, pfmethods=pfmethods, pfkwargs=pfkwargs, getblssnr=getblssnr, sigclip=sigclip, nperiodworkers=nperiodworkers, ncontrolworkers=ncontrolworkers) with open(os.path.join(simbasedir, 'fakelc-periodsearch.pkl'),'wb') as outfd: pickle.dump(pfinfo, outfd, pickle.HIGHEST_PROTOCOL) return os.path.join(simbasedir,'fakelc-periodsearch.pkl')
[docs]def check_periodrec_alias(actualperiod, recoveredperiod, tolerance=1.0e-3): '''This determines what kind of aliasing (if any) exists between `recoveredperiod` and `actualperiod`. Parameters ---------- actualperiod : float The actual period of the object. recoveredperiod : float The recovered period of the object. tolerance : float The absolute difference required between the input periods to mark the recovered period as close to the actual period. Returns ------- str The type of alias determined for the input combination of periods. This will be CSV string with values taken from the following list, based on the types of alias found:: ['actual', 'twice', 'half', 'ratio_over_1plus', 'ratio_over_1minus', 'ratio_over_1plus_twice', 'ratio_over_1minus_twice', 'ratio_over_1plus_thrice', 'ratio_over_1minus_thrice', 'ratio_over_minus1', 'ratio_over_twice_minus1'] ''' if not (np.isfinite(actualperiod) and np.isfinite(recoveredperiod)): LOGERROR("can't compare nan values for actual/recovered periods") return 'unknown' else: ################# ## ALIAS TYPES ## ################# # simple ratios twotimes_p = actualperiod*2.0 half_p = actualperiod*0.5 # first kind of alias alias_1a = actualperiod/(1.0+actualperiod) alias_1b = actualperiod/(1.0-actualperiod) # second kind of alias alias_2a = actualperiod/(1.0+2.0*actualperiod) alias_2b = actualperiod/(1.0-2.0*actualperiod) # third kind of alias alias_3a = actualperiod/(1.0+3.0*actualperiod) alias_3b = actualperiod/(1.0-3.0*actualperiod) # fourth kind of alias alias_4a = actualperiod/(actualperiod - 1.0) alias_4b = actualperiod/(2.0*actualperiod - 1.0) aliases = np.ravel(np.array([ actualperiod, twotimes_p, half_p, alias_1a, alias_1b, alias_2a, alias_2b, alias_3a, alias_3b, alias_4a, alias_4b] )) alias_labels = np.array(ALIAS_TYPES) # check type of alias closest_alias = np.isclose(recoveredperiod, aliases, atol=tolerance) if np.any(closest_alias): closest_alias_type = alias_labels[closest_alias] return ','.join(closest_alias_type.tolist()) else: return 'other'
[docs]def periodicvar_recovery(fakepfpkl, simbasedir, period_tolerance=1.0e-3): '''Recovers the periodic variable status/info for the simulated PF result. - Uses simbasedir and the lcfbasename stored in fakepfpkl to figure out where the LC for this object is. - Gets the actual_varparams, actual_varperiod, actual_vartype, actual_varamplitude elements from the LC. - Figures out if the current objectid is a periodic variable (using actual_vartype). - If it is a periodic variable, gets the canonical period assigned to it. - Checks if the period was recovered in any of the five best periods reported by any of the period-finders, checks if the period recovered was a harmonic of the period. - Returns the objectid, actual period and vartype, recovered period, and recovery status. Parameters ---------- fakepfpkl : str This is a periodfinding-<objectid>.pkl[.gz] file produced in the `simbasedir/periodfinding` subdirectory after `run_periodfinding` above is done. simbasedir : str The base directory where all of the fake LCs and period-finding results are. period_tolerance : float The maximum difference that this function will consider between an actual period (or its aliases) and a recovered period to consider it as as a 'recovered' period. Returns ------- dict Returns a dict of period-recovery results. ''' if fakepfpkl.endswith('.gz'): infd = gzip.open(fakepfpkl,'rb') else: infd = open(fakepfpkl,'rb') fakepf = pickle.load(infd) infd.close() # get info from the fakepf dict objectid, lcfbasename = fakepf['objectid'], fakepf['lcfbasename'] lcfpath = os.path.join(simbasedir,'lightcurves',lcfbasename) # if the LC doesn't exist, bail out if not os.path.exists(lcfpath): LOGERROR('light curve for %s does not exist at: %s' % (objectid, lcfpath)) return None # now, open the fakelc fakelc = lcproc._read_pklc(lcfpath) # get the actual_varparams, actual_varperiod, actual_varamplitude actual_varparams, actual_varperiod, actual_varamplitude, actual_vartype = ( fakelc['actual_varparams'], fakelc['actual_varperiod'], fakelc['actual_varamplitude'], fakelc['actual_vartype'] ) # get the moments too so we can track LC noise, etc. actual_moments = fakelc['moments'] # get the magcols for this LC magcols = fakelc['magcols'] # get the recovered info from each of the available methods pfres = { 'objectid':objectid, 'simbasedir':simbasedir, 'magcols':magcols, 'fakelc':os.path.abspath(lcfpath), 'fakepf':os.path.abspath(fakepfpkl), 'actual_vartype':actual_vartype, 'actual_varperiod':actual_varperiod, 'actual_varamplitude':actual_varamplitude, 'actual_varparams':actual_varparams, 'actual_moments':actual_moments, 'recovery_periods':[], 'recovery_lspvals':[], 'recovery_pfmethods':[], 'recovery_magcols':[], 'recovery_status':[], 'recovery_pdiff':[], } # populate the pfres dict with the periods, pfmethods, and magcols for magcol in magcols: for pfm in lcproc.PFMETHODS: if pfm in fakepf[magcol]: # only get the unique recovered periods by using # period_tolerance for rpi, rp in enumerate( fakepf[magcol][pfm]['nbestperiods'] ): if ((not np.any(np.isclose( rp, np.array(pfres['recovery_periods']), rtol=period_tolerance ))) and np.isfinite(rp)): # populate the recovery periods, pfmethods, and magcols pfres['recovery_periods'].append(rp) pfres['recovery_pfmethods'].append(pfm) pfres['recovery_magcols'].append(magcol) # normalize the periodogram peak value to between # 0 and 1 so we can put in the results of multiple # periodfinders on one scale if pfm == 'pdm': this_lspval = ( np.max(fakepf[magcol][pfm]['lspvals']) - fakepf[magcol][pfm]['nbestlspvals'][rpi] ) else: this_lspval = ( fakepf[magcol][pfm]['nbestlspvals'][rpi] / np.max(fakepf[magcol][pfm]['lspvals']) ) # add the normalized lspval to the outdict for # this object as well. later, we'll use this to # construct a periodogram for objects that were actually # not variables pfres['recovery_lspvals'].append(this_lspval) # convert the recovery_* lists to arrays pfres['recovery_periods'] = np.array(pfres['recovery_periods']) pfres['recovery_lspvals'] = np.array(pfres['recovery_lspvals']) pfres['recovery_pfmethods'] = np.array(pfres['recovery_pfmethods']) pfres['recovery_magcols'] = np.array(pfres['recovery_magcols']) # # now figure out recovery status # # if this is an actual periodic variable, characterize the recovery if (actual_vartype and actual_vartype in PERIODIC_VARTYPES and np.isfinite(actual_varperiod)): if pfres['recovery_periods'].size > 0: for ri in range(pfres['recovery_periods'].size): pfres['recovery_pdiff'].append(pfres['recovery_periods'][ri] - np.asscalar(actual_varperiod)) # get the alias types pfres['recovery_status'].append( check_periodrec_alias(actual_varperiod, pfres['recovery_periods'][ri], tolerance=period_tolerance) ) # turn the recovery_pdiff/status lists into arrays pfres['recovery_status'] = np.array(pfres['recovery_status']) pfres['recovery_pdiff'] = np.array(pfres['recovery_pdiff']) # find the best recovered period and its status rec_absdiff = np.abs(pfres['recovery_pdiff']) best_recp_ind = rec_absdiff == rec_absdiff.min() pfres['best_recovered_period'] = ( pfres['recovery_periods'][best_recp_ind] ) pfres['best_recovered_pfmethod'] = ( pfres['recovery_pfmethods'][best_recp_ind] ) pfres['best_recovered_magcol'] = ( pfres['recovery_magcols'][best_recp_ind] ) pfres['best_recovered_status'] = ( pfres['recovery_status'][best_recp_ind] ) pfres['best_recovered_pdiff'] = ( pfres['recovery_pdiff'][best_recp_ind] ) else: LOGWARNING( 'no finite periods recovered from period-finding for %s' % fakepfpkl ) pfres['recovery_status'] = np.array(['no_finite_periods_recovered']) pfres['recovery_pdiff'] = np.array([np.nan]) pfres['best_recovered_period'] = np.array([np.nan]) pfres['best_recovered_pfmethod'] = np.array([],dtype=np.unicode_) pfres['best_recovered_magcol'] = np.array([],dtype=np.unicode_) pfres['best_recovered_status'] = np.array([],dtype=np.unicode_) pfres['best_recovered_pdiff'] = np.array([np.nan]) # if this is not actually a variable, get the recovered period, # etc. anyway. this way, we can see what we need to look out for and avoid # when getting these values for actual objects else: pfres['recovery_status'] = np.array( ['not_variable']*pfres['recovery_periods'].size ) pfres['recovery_pdiff'] = np.zeros(pfres['recovery_periods'].size) pfres['best_recovered_period'] = np.array([np.nan]) pfres['best_recovered_pfmethod'] = np.array([],dtype=np.unicode_) pfres['best_recovered_magcol'] = np.array([],dtype=np.unicode_) pfres['best_recovered_status'] = np.array(['not_variable']) pfres['best_recovered_pdiff'] = np.array([np.nan]) return pfres
[docs]def periodrec_worker(task): '''This is a parallel worker for running period-recovery. Parameters ---------- task : tuple This is used to pass args to the `periodicvar_recovery` function:: task[0] = period-finding result pickle to work on task[1] = simbasedir task[2] = period_tolerance Returns ------- dict This is the dict produced by the `periodicvar_recovery` function for the input period-finding result pickle. ''' pfpkl, simbasedir, period_tolerance = task try: return periodicvar_recovery(pfpkl, simbasedir, period_tolerance=period_tolerance) except Exception: LOGEXCEPTION('periodic var recovery failed for %s' % repr(task)) return None
[docs]def parallel_periodicvar_recovery(simbasedir, period_tolerance=1.0e-3, liststartind=None, listmaxobjects=None, nworkers=None): '''This is a parallel driver for `periodicvar_recovery`. Parameters ---------- simbasedir : str The base directory where all of the fake LCs and period-finding results are. period_tolerance : float The maximum difference that this function will consider between an actual period (or its aliases) and a recovered period to consider it as as a 'recovered' period. liststartindex : int The starting index of processing. This refers to the filename list generated by running `glob.glob` on the period-finding result pickles in `simbasedir/periodfinding`. listmaxobjects : int The maximum number of objects to process in this run. Use this with `liststartindex` to effectively distribute working on a large list of input period-finding result pickles over several sessions or machines. nperiodworkers : int This is the number of parallel period-finding worker processes to use. Returns ------- str Returns the filename of the pickle produced containing all of the period recovery results. ''' # figure out the periodfinding pickles directory pfpkldir = os.path.join(simbasedir,'periodfinding') if not os.path.exists(pfpkldir): LOGERROR('no "periodfinding" subdirectory in %s, can\'t continue' % simbasedir) return None # find all the periodfinding pickles pfpkl_list = glob.glob(os.path.join(pfpkldir,'*periodfinding*pkl*')) if len(pfpkl_list) > 0: if liststartind: pfpkl_list = pfpkl_list[liststartind:] if listmaxobjects: pfpkl_list = pfpkl_list[:listmaxobjects] tasks = [(x, simbasedir, period_tolerance) for x in pfpkl_list] pool = mp.Pool(nworkers) results = pool.map(periodrec_worker, tasks) pool.close() pool.join() resdict = {x['objectid']:x for x in results if x is not None} actual_periodicvars = np.array( [x['objectid'] for x in results if (x is not None and x['actual_vartype'] in PERIODIC_VARTYPES)], dtype=np.unicode_ ) recovered_periodicvars = np.array( [x['objectid'] for x in results if (x is not None and 'actual' in x['best_recovered_status'])], dtype=np.unicode_ ) alias_twice_periodicvars = np.array( [x['objectid'] for x in results if (x is not None and 'twice' in x['best_recovered_status'])], dtype=np.unicode_ ) alias_half_periodicvars = np.array( [x['objectid'] for x in results if (x is not None and 'half' in x['best_recovered_status'])], dtype=np.unicode_ ) all_objectids = [x['objectid'] for x in results] outdict = {'simbasedir':os.path.abspath(simbasedir), 'objectids':all_objectids, 'period_tolerance':period_tolerance, 'actual_periodicvars':actual_periodicvars, 'recovered_periodicvars':recovered_periodicvars, 'alias_twice_periodicvars':alias_twice_periodicvars, 'alias_half_periodicvars':alias_half_periodicvars, 'details':resdict} outfile = os.path.join(simbasedir,'periodicvar-recovery.pkl') with open(outfile, 'wb') as outfd: pickle.dump(outdict, outfd, pickle.HIGHEST_PROTOCOL) return outdict else: LOGERROR( 'no periodfinding result pickles found in %s, can\'t continue' % pfpkldir ) return None
PERIODREC_DEFAULT_MAGBINS = np.arange(8.0,16.25,0.25) PERIODREC_DEFAULT_PERIODBINS = np.arange(0.0,500.0,0.5) PERIODREC_DEFAULT_AMPBINS = np.arange(0.0,2.0,0.05) PERIODREC_DEFAULT_NDETBINS = np.arange(0.0,60000.0,1000.0)
[docs]def plot_periodicvar_recovery_results( precvar_results, aliases_count_as_recovered=None, magbins=None, periodbins=None, amplitudebins=None, ndetbins=None, minbinsize=1, plotfile_ext='png', ): '''This plots the results of periodic var recovery. This function makes plots for periodicvar recovered fraction as a function of: - magbin - periodbin - amplitude of variability - ndet with plot lines broken down by: - magcol - periodfinder - vartype - recovery status The kwargs `magbins`, `periodbins`, `amplitudebins`, and `ndetbins` can be used to set the bin lists as needed. The kwarg `minbinsize` controls how many elements per bin are required to accept a bin in processing its recovery characteristics for mags, periods, amplitudes, and ndets. Parameters ---------- precvar_results : dict or str This is either a dict returned by parallel_periodicvar_recovery or the pickle created by that function. aliases_count_as_recovered : list of str or 'all' This is used to set which kinds of aliases this function considers as 'recovered' objects. Normally, we require that recovered objects have a recovery status of 'actual' to indicate the actual period was recovered. To change this default behavior, aliases_count_as_recovered can be set to a list of alias status strings that should be considered as 'recovered' objects as well. Choose from the following alias types:: 'twice' recovered_p = 2.0*actual_p 'half' recovered_p = 0.5*actual_p 'ratio_over_1plus' recovered_p = actual_p/(1.0+actual_p) 'ratio_over_1minus' recovered_p = actual_p/(1.0-actual_p) 'ratio_over_1plus_twice' recovered_p = actual_p/(1.0+2.0*actual_p) 'ratio_over_1minus_twice' recovered_p = actual_p/(1.0-2.0*actual_p) 'ratio_over_1plus_thrice' recovered_p = actual_p/(1.0+3.0*actual_p) 'ratio_over_1minus_thrice' recovered_p = actual_p/(1.0-3.0*actual_p) 'ratio_over_minus1' recovered_p = actual_p/(actual_p - 1.0) 'ratio_over_twice_minus1' recovered_p = actual_p/(2.0*actual_p - 1.0) or set `aliases_count_as_recovered='all'` to include all of the above in the 'recovered' periodic var list. magbins : np.array The magnitude bins to plot the recovery rate results over. If None, the default mag bins will be used: `np.arange(8.0,16.25,0.25)`. periodbins : np.array The period bins to plot the recovery rate results over. If None, the default period bins will be used: `np.arange(0.0,500.0,0.5)`. amplitudebins : np.array The variability amplitude bins to plot the recovery rate results over. If None, the default amplitude bins will be used: `np.arange(0.0,2.0,0.05)`. ndetbins : np.array The ndet bins to plot the recovery rate results over. If None, the default ndet bins will be used: `np.arange(0.0,60000.0,1000.0)`. minbinsize : int The minimum number of objects per bin required to plot a bin and its recovery fraction on the plot. plotfile_ext : {'png','pdf'} Sets the plot output files' extension. Returns ------- dict A dict containing recovery fraction statistics and the paths to each of the plots made. ''' # get the result pickle/dict if isinstance(precvar_results, str) and os.path.exists(precvar_results): with open(precvar_results,'rb') as infd: precvar = pickle.load(infd) elif isinstance(precvar_results, dict): precvar = precvar_results else: LOGERROR('could not understand the input ' 'periodic var recovery dict/pickle') return None # get the simbasedir and open the fakelc-info.pkl. we'll need the magbins # definition from here. simbasedir = precvar['simbasedir'] lcinfof = os.path.join(simbasedir,'fakelcs-info.pkl') if not os.path.exists(lcinfof): LOGERROR('fakelcs-info.pkl does not exist in %s, can\'t continue' % simbasedir) return None with open(lcinfof,'rb') as infd: lcinfo = pickle.load(infd) # get the magcols, vartypes, sdssr, isvariable flags magcols = lcinfo['magcols'] objectid = lcinfo['objectid'] ndet = lcinfo['ndet'] sdssr = lcinfo['sdssr'] # get the actual periodic vars actual_periodicvars = precvar['actual_periodicvars'] # generate lists of objects binned by magbins and periodbins LOGINFO('getting sdssr and ndet for actual periodic vars...') # get the sdssr and ndet for all periodic vars periodicvar_sdssr = [] periodicvar_ndet = [] periodicvar_objectids = [] for pobj in actual_periodicvars: pobjind = objectid == pobj periodicvar_objectids.append(pobj) periodicvar_sdssr.append(sdssr[pobjind]) periodicvar_ndet.append(ndet[pobjind]) periodicvar_sdssr = np.array(periodicvar_sdssr) periodicvar_objectids = np.array(periodicvar_objectids) periodicvar_ndet = np.array(periodicvar_ndet) LOGINFO('getting periods, vartypes, ' 'amplitudes, ndet for actual periodic vars...') # get the periods, vartypes, amplitudes for the actual periodic vars periodicvar_periods = [ np.asscalar(precvar['details'][x]['actual_varperiod']) for x in periodicvar_objectids ] periodicvar_amplitudes = [ np.asscalar(precvar['details'][x]['actual_varamplitude']) for x in periodicvar_objectids ] # # do the binning # # bin by mag LOGINFO('binning actual periodic vars by magnitude...') magbinned_sdssr = [] magbinned_periodicvars = [] if not magbins: magbins = PERIODREC_DEFAULT_MAGBINS magbininds = np.digitize(np.ravel(periodicvar_sdssr), magbins) for mbinind, magi in zip(np.unique(magbininds), range(len(magbins)-1)): thisbin_periodicvars = periodicvar_objectids[magbininds == mbinind] if (thisbin_periodicvars.size > (minbinsize-1)): magbinned_sdssr.append((magbins[magi] + magbins[magi+1])/2.0) magbinned_periodicvars.append(thisbin_periodicvars) # bin by period LOGINFO('binning actual periodic vars by period...') periodbinned_periods = [] periodbinned_periodicvars = [] if not periodbins: periodbins = PERIODREC_DEFAULT_PERIODBINS periodbininds = np.digitize(np.ravel(periodicvar_periods), periodbins) for pbinind, peri in zip(np.unique(periodbininds), range(len(periodbins)-1)): thisbin_periodicvars = periodicvar_objectids[periodbininds == pbinind] if (thisbin_periodicvars.size > (minbinsize-1)): periodbinned_periods.append((periodbins[peri] + periodbins[peri+1])/2.0) periodbinned_periodicvars.append(thisbin_periodicvars) # bin by amplitude of variability LOGINFO('binning actual periodic vars by variability amplitude...') amplitudebinned_amplitudes = [] amplitudebinned_periodicvars = [] if not amplitudebins: amplitudebins = PERIODREC_DEFAULT_AMPBINS amplitudebininds = np.digitize(np.ravel(np.abs(periodicvar_amplitudes)), amplitudebins) for abinind, ampi in zip(np.unique(amplitudebininds), range(len(amplitudebins)-1)): thisbin_periodicvars = periodicvar_objectids[ amplitudebininds == abinind ] if (thisbin_periodicvars.size > (minbinsize-1)): amplitudebinned_amplitudes.append( (amplitudebins[ampi] + amplitudebins[ampi+1])/2.0 ) amplitudebinned_periodicvars.append(thisbin_periodicvars) # bin by ndet LOGINFO('binning actual periodic vars by ndet...') ndetbinned_ndets = [] ndetbinned_periodicvars = [] if not ndetbins: ndetbins = PERIODREC_DEFAULT_NDETBINS ndetbininds = np.digitize(np.ravel(periodicvar_ndet), ndetbins) for nbinind, ndeti in zip(np.unique(ndetbininds), range(len(ndetbins)-1)): thisbin_periodicvars = periodicvar_objectids[ndetbininds == nbinind] if (thisbin_periodicvars.size > (minbinsize-1)): ndetbinned_ndets.append( (ndetbins[ndeti] + ndetbins[ndeti+1])/2.0 ) ndetbinned_periodicvars.append(thisbin_periodicvars) # now figure out what 'recovered' means using the provided # aliases_count_as_recovered kwarg recovered_status = ['actual'] if isinstance(aliases_count_as_recovered, list): for atype in aliases_count_as_recovered: if atype in ALIAS_TYPES: recovered_status.append(atype) else: LOGWARNING('unknown alias type: %s, skipping' % atype) elif aliases_count_as_recovered and aliases_count_as_recovered == 'all': for atype in ALIAS_TYPES[1:]: recovered_status.append(atype) # find all the matching objects for these recovered statuses recovered_periodicvars = np.array( [precvar['details'][x]['objectid'] for x in precvar['details'] if (precvar['details'][x] is not None and precvar['details'][x]['best_recovered_status'] in recovered_status)], dtype=np.unicode_ ) LOGINFO('recovered %s/%s periodic variables (frac: %.3f) with ' 'period recovery status: %s' % (recovered_periodicvars.size, actual_periodicvars.size, float(recovered_periodicvars.size/actual_periodicvars.size), ', '.join(recovered_status))) # get the objects recovered per bin and overall recovery fractions per bin magbinned_recovered_objects = [ np.intersect1d(x,recovered_periodicvars) for x in magbinned_periodicvars ] magbinned_recfrac = np.array([float(x.size/y.size) for x,y in zip(magbinned_recovered_objects, magbinned_periodicvars)]) periodbinned_recovered_objects = [ np.intersect1d(x,recovered_periodicvars) for x in periodbinned_periodicvars ] periodbinned_recfrac = np.array([float(x.size/y.size) for x,y in zip(periodbinned_recovered_objects, periodbinned_periodicvars)]) amplitudebinned_recovered_objects = [ np.intersect1d(x,recovered_periodicvars) for x in amplitudebinned_periodicvars ] amplitudebinned_recfrac = np.array( [float(x.size/y.size) for x,y in zip(amplitudebinned_recovered_objects, amplitudebinned_periodicvars)] ) ndetbinned_recovered_objects = [ np.intersect1d(x,recovered_periodicvars) for x in ndetbinned_periodicvars ] ndetbinned_recfrac = np.array([float(x.size/y.size) for x,y in zip(ndetbinned_recovered_objects, ndetbinned_periodicvars)]) # convert the bin medians to arrays magbinned_sdssr = np.array(magbinned_sdssr) periodbinned_periods = np.array(periodbinned_periods) amplitudebinned_amplitudes = np.array(amplitudebinned_amplitudes) ndetbinned_ndets = np.array(ndetbinned_ndets) # this is the initial output dict outdict = { 'simbasedir':simbasedir, 'precvar_results':precvar, 'magcols':magcols, 'objectids':objectid, 'ndet':ndet, 'sdssr':sdssr, 'actual_periodicvars':actual_periodicvars, 'recovered_periodicvars':recovered_periodicvars, 'recovery_definition':recovered_status, # mag binned actual periodicvars # note that only bins with nobjects > minbinsize are included 'magbins':magbins, 'magbinned_mags':magbinned_sdssr, 'magbinned_periodicvars':magbinned_periodicvars, 'magbinned_recoveredvars':magbinned_recovered_objects, 'magbinned_recfrac':magbinned_recfrac, # period binned actual periodicvars # note that only bins with nobjects > minbinsize are included 'periodbins':periodbins, 'periodbinned_periods':periodbinned_periods, 'periodbinned_periodicvars':periodbinned_periodicvars, 'periodbinned_recoveredvars':periodbinned_recovered_objects, 'periodbinned_recfrac':periodbinned_recfrac, # amplitude binned actual periodicvars # note that only bins with nobjects > minbinsize are included 'amplitudebins':amplitudebins, 'amplitudebinned_amplitudes':amplitudebinned_amplitudes, 'amplitudebinned_periodicvars':amplitudebinned_periodicvars, 'amplitudebinned_recoveredvars':amplitudebinned_recovered_objects, 'amplitudebinned_recfrac':amplitudebinned_recfrac, # ndet binned actual periodicvars # note that only bins with nobjects > minbinsize are included 'ndetbins':ndetbins, 'ndetbinned_ndets':ndetbinned_ndets, 'ndetbinned_periodicvars':ndetbinned_periodicvars, 'ndetbinned_recoveredvars':ndetbinned_recovered_objects, 'ndetbinned_recfrac':ndetbinned_recfrac, } # figure out which pfmethods were used all_pfmethods = np.unique( np.concatenate( [np.unique(precvar['details'][x]['recovery_pfmethods']) for x in precvar['details']] ) ) # figure out all vartypes all_vartypes = np.unique( [(precvar['details'][x]['actual_vartype']) for x in precvar['details'] if (precvar['details'][x]['actual_vartype'] is not None)] ) # figure out all alias types all_aliastypes = recovered_status # add these to the outdict outdict['aliastypes'] = all_aliastypes outdict['pfmethods'] = all_pfmethods outdict['vartypes'] = all_vartypes # these are recfracs per-magcol, -vartype, -periodfinder, -aliastype # binned appropriately by mags, periods, amplitudes, and ndet # all of these have the shape as the magcols, aliastypes, pfmethods, and # vartypes lists above. magbinned_per_magcol_recfracs = [] magbinned_per_vartype_recfracs = [] magbinned_per_pfmethod_recfracs = [] magbinned_per_aliastype_recfracs = [] periodbinned_per_magcol_recfracs = [] periodbinned_per_vartype_recfracs = [] periodbinned_per_pfmethod_recfracs = [] periodbinned_per_aliastype_recfracs = [] amplitudebinned_per_magcol_recfracs = [] amplitudebinned_per_vartype_recfracs = [] amplitudebinned_per_pfmethod_recfracs = [] amplitudebinned_per_aliastype_recfracs = [] ndetbinned_per_magcol_recfracs = [] ndetbinned_per_vartype_recfracs = [] ndetbinned_per_pfmethod_recfracs = [] ndetbinned_per_aliastype_recfracs = [] # # finally, we do stuff for the plots! # recplotdir = os.path.join(simbasedir, 'periodic-variable-recovery-plots') if not os.path.exists(recplotdir): os.mkdir(recplotdir) # 1. recovery-rate by magbin # 1a. plot of overall recovery rate per magbin fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) plt.plot(magbinned_sdssr, magbinned_recfrac,marker='.',ms=0.0) plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('overall recovery fraction by periodic var magnitudes') plt.ylim((0,1)) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-magnitudes-overall.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 1b. plot of recovery rate per magbin per magcol fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) for magcol in magcols: thismagcol_recfracs = [] for magbin_pv, magbin_rv in zip(magbinned_periodicvars, magbinned_recovered_objects): thisbin_thismagcol_recvars = [ x for x in magbin_rv if (precvar['details'][x]['best_recovered_magcol'] == magcol) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thismagcol_recvars).size / magbin_pv.size ) thismagcol_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(magbinned_sdssr, np.array(thismagcol_recfracs), marker='.', label='magcol: %s' % magcol, ms=0.0) # add this to the outdict array magbinned_per_magcol_recfracs.append(np.array(thismagcol_recfracs)) # finish up the plot plt.plot(magbinned_sdssr, magbinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per magcol recovery fraction by periodic var magnitudes') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-magnitudes-magcols.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 1c. plot of recovery rate per magbin per periodfinder fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) # figure out which pfmethods were used all_pfmethods = np.unique( np.concatenate( [np.unique(precvar['details'][x]['recovery_pfmethods']) for x in precvar['details']] ) ) for pfm in all_pfmethods: thispf_recfracs = [] for magbin_pv, magbin_rv in zip(magbinned_periodicvars, magbinned_recovered_objects): thisbin_thispf_recvars = [ x for x in magbin_rv if (precvar['details'][x]['best_recovered_pfmethod'] == pfm) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thispf_recvars).size / magbin_pv.size ) thispf_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(magbinned_sdssr, np.array(thispf_recfracs), marker='.', label='%s' % pfm.upper(), ms=0.0) # add this to the outdict array magbinned_per_pfmethod_recfracs.append(np.array(thispf_recfracs)) # finish up the plot plt.plot(magbinned_sdssr, magbinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per period-finder recovery fraction by periodic var magnitudes') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-magnitudes-pfmethod.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 1d. plot of recovery rate per magbin per variable type fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) # figure out all vartypes all_vartypes = np.unique( [(precvar['details'][x]['actual_vartype']) for x in precvar['details'] if (precvar['details'][x]['actual_vartype'] is not None)] ) for vt in all_vartypes: thisvt_recfracs = [] for magbin_pv, magbin_rv in zip(magbinned_periodicvars, magbinned_recovered_objects): thisbin_thisvt_recvars = [ x for x in magbin_rv if (precvar['details'][x]['actual_vartype'] == vt) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thisvt_recvars).size / magbin_pv.size ) thisvt_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(magbinned_sdssr, np.array(thisvt_recfracs), marker='.', label='%s' % vt, ms=0.0) # add this to the outdict array magbinned_per_vartype_recfracs.append(np.array(thisvt_recfracs)) # finish up the plot plt.plot(magbinned_sdssr, magbinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per vartype recovery fraction by periodic var magnitudes') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-magnitudes-vartype.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 1e. plot of recovery rate per magbin per alias type fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) # figure out all alias types all_aliastypes = recovered_status for at in all_aliastypes: thisat_recfracs = [] for magbin_pv, magbin_rv in zip(magbinned_periodicvars, magbinned_recovered_objects): thisbin_thisat_recvars = [ x for x in magbin_rv if (precvar['details'][x]['best_recovered_status'][0] == at) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thisat_recvars).size / magbin_pv.size ) thisat_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(magbinned_sdssr, np.array(thisat_recfracs), marker='.', label='%s' % at, ms=0.0) # add this to the outdict array magbinned_per_aliastype_recfracs.append(np.array(thisat_recfracs)) # finish up the plot plt.plot(magbinned_sdssr, magbinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per alias-type recovery fraction by periodic var magnitudes') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-magnitudes-aliastype.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 2. recovery-rate by periodbin # 2a. plot of overall recovery rate per periodbin fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) plt.plot(periodbinned_periods, periodbinned_recfrac, marker='.',ms=0.0) plt.xlabel('periodic variable period [days]') plt.ylabel('recovered fraction of periodic variables') plt.title('overall recovery fraction by periodic var periods') plt.ylim((0,1)) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-periods-overall.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 2b. plot of recovery rate per periodbin per magcol fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) for magcol in magcols: thismagcol_recfracs = [] for periodbin_pv, periodbin_rv in zip(periodbinned_periodicvars, periodbinned_recovered_objects): thisbin_thismagcol_recvars = [ x for x in periodbin_rv if (precvar['details'][x]['best_recovered_magcol'] == magcol) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thismagcol_recvars).size / periodbin_pv.size ) thismagcol_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(periodbinned_periods, np.array(thismagcol_recfracs), marker='.', label='magcol: %s' % magcol, ms=0.0) # add this to the outdict array periodbinned_per_magcol_recfracs.append(np.array(thismagcol_recfracs)) # finish up the plot plt.plot(periodbinned_periods, periodbinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per magcol recovery fraction by periodic var periods') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-periods-magcols.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 2c. plot of recovery rate per periodbin per periodfinder fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) # figure out which pfmethods were used all_pfmethods = np.unique( np.concatenate( [np.unique(precvar['details'][x]['recovery_pfmethods']) for x in precvar['details']] ) ) for pfm in all_pfmethods: thispf_recfracs = [] for periodbin_pv, periodbin_rv in zip(periodbinned_periodicvars, periodbinned_recovered_objects): thisbin_thispf_recvars = [ x for x in periodbin_rv if (precvar['details'][x]['best_recovered_pfmethod'] == pfm) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thispf_recvars).size / periodbin_pv.size ) thispf_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(periodbinned_periods, np.array(thispf_recfracs), marker='.', label='%s' % pfm.upper(), ms=0.0) # add this to the outdict array periodbinned_per_pfmethod_recfracs.append(np.array(thispf_recfracs)) # finish up the plot plt.plot(periodbinned_periods, periodbinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per period-finder recovery fraction by periodic var periods') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-periods-pfmethod.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 2d. plot of recovery rate per periodbin per variable type fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) # figure out all vartypes all_vartypes = np.unique( [(precvar['details'][x]['actual_vartype']) for x in precvar['details'] if (precvar['details'][x]['actual_vartype'] is not None)] ) for vt in all_vartypes: thisvt_recfracs = [] for periodbin_pv, periodbin_rv in zip(periodbinned_periodicvars, periodbinned_recovered_objects): thisbin_thisvt_recvars = [ x for x in periodbin_rv if (precvar['details'][x]['actual_vartype'] == vt) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thisvt_recvars).size / periodbin_pv.size ) thisvt_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(periodbinned_periods, np.array(thisvt_recfracs), marker='.', label='%s' % vt, ms=0.0) # add this to the outdict array periodbinned_per_vartype_recfracs.append(np.array(thisvt_recfracs)) # finish up the plot plt.plot(periodbinned_periods, periodbinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per vartype recovery fraction by periodic var magnitudes') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-periods-vartype.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 2e. plot of recovery rate per periodbin per alias type fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) # figure out all vartypes all_aliastypes = recovered_status for at in all_aliastypes: thisat_recfracs = [] for periodbin_pv, periodbin_rv in zip( periodbinned_periodicvars, periodbinned_recovered_objects ): thisbin_thisat_recvars = [ x for x in periodbin_rv if (precvar['details'][x]['best_recovered_status'][0] == at) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thisat_recvars).size / periodbin_pv.size ) thisat_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(periodbinned_periods, np.array(thisat_recfracs), marker='.', label='%s' % at, ms=0.0) # add this to the outdict array periodbinned_per_aliastype_recfracs.append(np.array(thisat_recfracs)) # finish up the plot plt.plot(periodbinned_periods, periodbinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per alias-type recovery fraction by periodic var magnitudes') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-periods-aliastype.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 3. recovery-rate by amplitude bin # 3a. plot of overall recovery rate per amplitude bin fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) plt.plot(amplitudebinned_amplitudes, amplitudebinned_recfrac, marker='.',ms=0.0) plt.xlabel('periodic variable amplitude [mag]') plt.ylabel('recovered fraction of periodic variables') plt.title('overall recovery fraction by periodic var amplitudes') plt.ylim((0,1)) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-amplitudes-overall.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 3b. plot of recovery rate per amplitude bin per magcol fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) for magcol in magcols: thismagcol_recfracs = [] for amplitudebin_pv, amplitudebin_rv in zip( amplitudebinned_periodicvars, amplitudebinned_recovered_objects ): thisbin_thismagcol_recvars = [ x for x in amplitudebin_rv if (precvar['details'][x]['best_recovered_magcol'] == magcol) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thismagcol_recvars).size / amplitudebin_pv.size ) thismagcol_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(amplitudebinned_amplitudes, np.array(thismagcol_recfracs), marker='.', label='magcol: %s' % magcol, ms=0.0) # add this to the outdict array amplitudebinned_per_magcol_recfracs.append( np.array(thismagcol_recfracs) ) # finish up the plot plt.plot(amplitudebinned_amplitudes, amplitudebinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per magcol recovery fraction by periodic var amplitudes') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-amplitudes-magcols.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 3c. plot of recovery rate per amplitude bin per periodfinder fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) # figure out which pfmethods were used all_pfmethods = np.unique( np.concatenate( [np.unique(precvar['details'][x]['recovery_pfmethods']) for x in precvar['details']] ) ) for pfm in all_pfmethods: thispf_recfracs = [] for amplitudebin_pv, amplitudebin_rv in zip( amplitudebinned_periodicvars, amplitudebinned_recovered_objects ): thisbin_thispf_recvars = [ x for x in amplitudebin_rv if (precvar['details'][x]['best_recovered_pfmethod'] == pfm) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thispf_recvars).size / amplitudebin_pv.size ) thispf_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(amplitudebinned_amplitudes, np.array(thispf_recfracs), marker='.', label='%s' % pfm.upper(), ms=0.0) # add this to the outdict array amplitudebinned_per_pfmethod_recfracs.append( np.array(thispf_recfracs) ) # finish up the plot plt.plot(amplitudebinned_amplitudes, amplitudebinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per period-finder recovery fraction by periodic var amplitudes') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-amplitudes-pfmethod.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 3d. plot of recovery rate per amplitude bin per variable type fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) # figure out all vartypes all_vartypes = np.unique( [(precvar['details'][x]['actual_vartype']) for x in precvar['details'] if (precvar['details'][x]['actual_vartype'] is not None)] ) for vt in all_vartypes: thisvt_recfracs = [] for amplitudebin_pv, amplitudebin_rv in zip( amplitudebinned_periodicvars, amplitudebinned_recovered_objects ): thisbin_thisvt_recvars = [ x for x in amplitudebin_rv if (precvar['details'][x]['actual_vartype'] == vt) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thisvt_recvars).size / amplitudebin_pv.size ) thisvt_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(amplitudebinned_amplitudes, np.array(thisvt_recfracs), marker='.', label='%s' % vt, ms=0.0) # add this to the outdict array amplitudebinned_per_vartype_recfracs.append( np.array(thisvt_recfracs) ) # finish up the plot plt.plot(amplitudebinned_amplitudes, amplitudebinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per vartype recovery fraction by periodic var amplitudes') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-amplitudes-vartype.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 3e. plot of recovery rate per amplitude bin per alias type fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) # figure out all vartypes all_aliastypes = recovered_status for at in all_aliastypes: thisat_recfracs = [] for amplitudebin_pv, amplitudebin_rv in zip( amplitudebinned_periodicvars, amplitudebinned_recovered_objects ): thisbin_thisat_recvars = [ x for x in amplitudebin_rv if (precvar['details'][x]['best_recovered_status'][0] == at) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thisat_recvars).size / amplitudebin_pv.size ) thisat_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(amplitudebinned_amplitudes, np.array(thisat_recfracs), marker='.', label='%s' % at, ms=0.0) # add this to the outdict array amplitudebinned_per_aliastype_recfracs.append( np.array(thisat_recfracs) ) # finish up the plot plt.plot(amplitudebinned_amplitudes, amplitudebinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per alias-type recovery fraction by periodic var amplitudes') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-amplitudes-aliastype.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 4. recovery-rate by ndet bin # 4a. plot of overall recovery rate per ndet bin fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) plt.plot(ndetbinned_ndets, ndetbinned_recfrac, marker='.',ms=0.0) plt.xlabel('periodic variable light curve points') plt.ylabel('recovered fraction of periodic variables') plt.title('overall recovery fraction by periodic var ndet') plt.ylim((0,1)) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-ndet-overall.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 4b. plot of recovery rate per ndet bin per magcol fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) for magcol in magcols: thismagcol_recfracs = [] for ndetbin_pv, ndetbin_rv in zip(ndetbinned_periodicvars, ndetbinned_recovered_objects): thisbin_thismagcol_recvars = [ x for x in ndetbin_rv if (precvar['details'][x]['best_recovered_magcol'] == magcol) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thismagcol_recvars).size / ndetbin_pv.size ) thismagcol_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(ndetbinned_ndets, np.array(thismagcol_recfracs), marker='.', label='magcol: %s' % magcol, ms=0.0) # add this to the outdict array ndetbinned_per_magcol_recfracs.append( np.array(thismagcol_recfracs) ) # finish up the plot plt.plot(ndetbinned_ndets, ndetbinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per magcol recovery fraction by periodic var ndets') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-ndet-magcols.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 4c. plot of recovery rate per ndet bin per periodfinder fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) # figure out which pfmethods were used all_pfmethods = np.unique( np.concatenate( [np.unique(precvar['details'][x]['recovery_pfmethods']) for x in precvar['details']] ) ) for pfm in all_pfmethods: thispf_recfracs = [] for ndetbin_pv, ndetbin_rv in zip(ndetbinned_periodicvars, ndetbinned_recovered_objects): thisbin_thispf_recvars = [ x for x in ndetbin_rv if (precvar['details'][x]['best_recovered_pfmethod'] == pfm) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thispf_recvars).size / ndetbin_pv.size ) thispf_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(ndetbinned_ndets, np.array(thispf_recfracs), marker='.', label='%s' % pfm.upper(), ms=0.0) # add this to the outdict array ndetbinned_per_pfmethod_recfracs.append( np.array(thispf_recfracs) ) # finish up the plot plt.plot(ndetbinned_ndets, ndetbinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per period-finder recovery fraction by periodic var ndets') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-ndet-pfmethod.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 4d. plot of recovery rate per ndet bin per variable type fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) # figure out all vartypes all_vartypes = np.unique( [(precvar['details'][x]['actual_vartype']) for x in precvar['details'] if (precvar['details'][x]['actual_vartype'] in PERIODIC_VARTYPES)] ) for vt in all_vartypes: thisvt_recfracs = [] for ndetbin_pv, ndetbin_rv in zip(ndetbinned_periodicvars, ndetbinned_recovered_objects): thisbin_thisvt_recvars = [ x for x in ndetbin_rv if (precvar['details'][x]['actual_vartype'] == vt) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thisvt_recvars).size / ndetbin_pv.size ) thisvt_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(ndetbinned_ndets, np.array(thisvt_recfracs), marker='.', label='%s' % vt, ms=0.0) # add this to the outdict array ndetbinned_per_vartype_recfracs.append( np.array(thisvt_recfracs) ) # finish up the plot plt.plot(ndetbinned_ndets, ndetbinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per vartype recovery fraction by periodic var ndets') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-ndet-vartype.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 4e. plot of recovery rate per ndet bin per alias type fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) # figure out all vartypes all_aliastypes = recovered_status for at in all_aliastypes: thisat_recfracs = [] for ndetbin_pv, ndetbin_rv in zip(ndetbinned_periodicvars, ndetbinned_recovered_objects): thisbin_thisat_recvars = [ x for x in ndetbin_rv if (precvar['details'][x]['best_recovered_status'][0] == at) ] thisbin_thismagcol_recfrac = ( np.array(thisbin_thisat_recvars).size / ndetbin_pv.size ) thisat_recfracs.append(thisbin_thismagcol_recfrac) # now that we have per magcol recfracs, plot them plt.plot(ndetbinned_ndets, np.array(thisat_recfracs), marker='.', label='%s' % at, ms=0.0) # add this to the outdict array ndetbinned_per_aliastype_recfracs.append( np.array(thisat_recfracs) ) # finish up the plot plt.plot(ndetbinned_ndets, ndetbinned_recfrac, marker='.',ms=0.0, label='overall', color='k') plt.xlabel(r'SDSS $r$ magnitude') plt.ylabel('recovered fraction of periodic variables') plt.title('per alias-type recovery fraction by periodic var ndets') plt.ylim((0,1)) plt.legend(markerscale=10.0) plt.savefig( os.path.join(recplotdir, 'recfrac-binned-ndet-aliastype.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # update the lists in the outdict outdict['magbinned_per_magcol_recfracs'] = ( magbinned_per_magcol_recfracs ) outdict['magbinned_per_pfmethod_recfracs'] = ( magbinned_per_pfmethod_recfracs ) outdict['magbinned_per_vartype_recfracs'] = ( magbinned_per_vartype_recfracs ) outdict['magbinned_per_aliastype_recfracs'] = ( magbinned_per_aliastype_recfracs ) outdict['periodbinned_per_magcol_recfracs'] = ( periodbinned_per_magcol_recfracs ) outdict['periodbinned_per_pfmethod_recfracs'] = ( periodbinned_per_pfmethod_recfracs ) outdict['periodbinned_per_vartype_recfracs'] = ( periodbinned_per_vartype_recfracs ) outdict['periodbinned_per_aliastype_recfracs'] = ( periodbinned_per_aliastype_recfracs ) outdict['amplitudebinned_per_magcol_recfracs'] = ( amplitudebinned_per_magcol_recfracs ) outdict['amplitudebinned_per_pfmethod_recfracs'] = ( amplitudebinned_per_pfmethod_recfracs ) outdict['amplitudebinned_per_vartype_recfracs'] = ( amplitudebinned_per_vartype_recfracs ) outdict['amplitudebinned_per_aliastype_recfracs'] = ( amplitudebinned_per_aliastype_recfracs ) outdict['ndetbinned_per_magcol_recfracs'] = ( ndetbinned_per_magcol_recfracs ) outdict['ndetbinned_per_pfmethod_recfracs'] = ( ndetbinned_per_pfmethod_recfracs ) outdict['ndetbinned_per_vartype_recfracs'] = ( ndetbinned_per_vartype_recfracs ) outdict['ndetbinned_per_aliastype_recfracs'] = ( ndetbinned_per_aliastype_recfracs ) # get the overall recovered vars per pfmethod overall_recvars_per_pfmethod = [] for pfm in all_pfmethods: thispfm_recvars = np.array([ x for x in precvar['details'] if ((x in recovered_periodicvars) and (precvar['details'][x]['best_recovered_pfmethod'] == pfm)) ]) overall_recvars_per_pfmethod.append(thispfm_recvars) # get the overall recovered vars per vartype overall_recvars_per_vartype = [] for vt in all_vartypes: thisvt_recvars = np.array([ x for x in precvar['details'] if ((x in recovered_periodicvars) and (precvar['details'][x]['actual_vartype'] == vt)) ]) overall_recvars_per_vartype.append(thisvt_recvars) # get the overall recovered vars per magcol overall_recvars_per_magcol = [] for mc in magcols: thismc_recvars = np.array([ x for x in precvar['details'] if ((x in recovered_periodicvars) and (precvar['details'][x]['best_recovered_magcol'] == mc)) ]) overall_recvars_per_magcol.append(thismc_recvars) # get the overall recovered vars per aliastype overall_recvars_per_aliastype = [] for at in all_aliastypes: thisat_recvars = np.array([ x for x in precvar['details'] if ((x in recovered_periodicvars) and (precvar['details'][x]['best_recovered_status'] == at)) ]) overall_recvars_per_aliastype.append(thisat_recvars) # update the outdict with these outdict['overall_recfrac_per_pfmethod'] = np.array([ x.size/actual_periodicvars.size for x in overall_recvars_per_pfmethod ]) outdict['overall_recfrac_per_vartype'] = np.array([ x.size/actual_periodicvars.size for x in overall_recvars_per_vartype ]) outdict['overall_recfrac_per_magcol'] = np.array([ x.size/actual_periodicvars.size for x in overall_recvars_per_magcol ]) outdict['overall_recfrac_per_aliastype'] = np.array([ x.size/actual_periodicvars.size for x in overall_recvars_per_aliastype ]) # 5. bar plot of overall recovery rate per pfmethod fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) xt = np.arange(len(all_pfmethods)) xl = all_pfmethods plt.barh(xt, outdict['overall_recfrac_per_pfmethod'], 0.50) plt.yticks(xt, xl) plt.xlabel('period-finding method') plt.ylabel('overall recovery rate') plt.title('overall recovery rate per period-finding method') plt.savefig( os.path.join(recplotdir, 'recfrac-overall-pfmethod.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 6. bar plot of overall recovery rate per magcol fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) xt = np.arange(len(magcols)) xl = magcols plt.barh(xt, outdict['overall_recfrac_per_magcol'], 0.50) plt.yticks(xt, xl) plt.xlabel('light curve magnitude column') plt.ylabel('overall recovery rate') plt.title('overall recovery rate per light curve magcol') plt.savefig( os.path.join(recplotdir, 'recfrac-overall-magcol.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 7. bar plot of overall recovery rate per aliastype fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) xt = np.arange(len(all_aliastypes)) xl = all_aliastypes plt.barh(xt, outdict['overall_recfrac_per_aliastype'], 0.50) plt.yticks(xt, xl) plt.xlabel('period recovery status') plt.ylabel('overall recovery rate') plt.title('overall recovery rate per period recovery status') plt.savefig( os.path.join(recplotdir, 'recfrac-overall-aliastype.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 8. bar plot of overall recovery rate per vartype fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) xt = np.arange(len(all_vartypes)) xl = all_vartypes plt.barh(xt, outdict['overall_recfrac_per_vartype'], 0.50) plt.yticks(xt, xl) plt.xlabel('periodic variable type') plt.ylabel('overall recovery rate') plt.title('overall recovery rate per periodic variable type') plt.savefig( os.path.join(recplotdir, 'recfrac-overall-vartype.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 9. overall recovered period periodogram for objects that aren't actual # periodic variables. this effectively should give us the window function of # the observations notvariable_recovered_periods = np.concatenate([ precvar['details'][x]['recovery_periods'] for x in precvar['details'] if (precvar['details'][x]['actual_vartype'] is None) ]) notvariable_recovered_lspvals = np.concatenate([ precvar['details'][x]['recovery_lspvals'] for x in precvar['details'] if (precvar['details'][x]['actual_vartype'] is None) ]) sortind = np.argsort(notvariable_recovered_periods) notvariable_recovered_periods = notvariable_recovered_periods[sortind] notvariable_recovered_lspvals = notvariable_recovered_lspvals[sortind] outdict['notvariable_recovered_periods'] = notvariable_recovered_periods outdict['notvariable_recovered_lspvals'] = notvariable_recovered_lspvals fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) plt.plot(notvariable_recovered_periods, notvariable_recovered_lspvals, ms=1.0,linestyle='none',marker='.') plt.xscale('log') plt.xlabel('recovered periods [days]') plt.ylabel('recovered normalized periodogram power') plt.title('periodogram for actual not-variable objects') plt.savefig( os.path.join(recplotdir, 'recovered-periodogram-nonvariables.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # 10. overall recovered period histogram for objects marked # not-variable. this gives us the most common periods fig = plt.figure(figsize=(6.4*1.5,4.8*1.5)) plt.hist(notvariable_recovered_periods,bins=np.arange(0.02,300.0,1.0e-3), histtype='step') plt.xscale('log') plt.xlabel('recovered periods [days]') plt.ylabel('number of times periods recovered') plt.title('recovered period histogram for non-variable objects') plt.savefig( os.path.join(recplotdir, 'recovered-period-hist-nonvariables.%s' % plotfile_ext), dpi=100, bbox_inches='tight' ) plt.close('all') # at the end, write the outdict to a pickle and return it outfile = os.path.join(simbasedir, 'periodicvar-recovery-plotresults.pkl') with open(outfile,'wb') as outfd: pickle.dump(outdict, outfd, pickle.HIGHEST_PROTOCOL) return outdict