Source code for astrobase.periodbase.utils

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# utils.py - Waqas Bhatti (wbhatti@astro.princeton.edu) - Apr 2019
# License: MIT - see the LICENSE file for the full text.

'''This contains some utilities for periodbase functions.

- :py:func:`.independent_freq_count`: gets the number of independent frequencies
  when calculating false alarm probabilities.

- :py:func:`.get_frequency_grid`: generates frequency grids automatically.

- :py:func:`.make_combined_periodogram`: makes a combined periodogram from the
  results of several period-finders

FIXME: add an iterative peak-removal and refit mode to all period-finders here.

'''

#############
## LOGGING ##
#############

import logging
from astrobase import log_sub, log_fmt, log_date_fmt

DEBUG = False
if DEBUG:
    level = logging.DEBUG
else:
    level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
    level=level,
    style=log_sub,
    format=log_fmt,
    datefmt=log_date_fmt,
)

LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception


#############
## IMPORTS ##
#############

import numpy as np


#######################
## UTILITY FUNCTIONS ##
#######################

[docs]def resort_by_time(times, mags, errs): ''' Resorts the input arrays so they're in time order. NOTE: the input arrays must not have nans in them. Parameters ---------- times,mags,errs : np.arrays The times, mags, and errs arrays to resort by time. The times array is assumed to be the first one in the input args. Returns ------- times,mags,errs : np.arrays The resorted times, mags, errs arrays. ''' sort_order = np.argsort(times) times, mags, errs = times[sort_order], mags[sort_order], errs[sort_order] return times, mags, errs
[docs]def independent_freq_count(frequencies, times, conservative=True): '''This estimates the number of independent frequencies in a periodogram. This follows the terminology on page 3 of Zechmeister & Kurster (2009):: M = DELTA_f / delta_f where:: DELTA_f = freq.max() - freq.min() delta_f = 1.0/(times.max() - times.min()) Parameters ---------- frequencies : np.array The frequencies array used for the calculation of the GLS periodogram. times : np.array The array of input times used for the calculation of the GLS periodogram. conservative : bool If True, will follow the prescription given in Schwarzenberg-Czerny (2003): http://adsabs.harvard.edu/abs/2003ASPC..292..383S and estimate the number of independent frequences as:: min(N_obs, N_freq, DELTA_f/delta_f) Returns ------- M : int The number of independent frequencies. ''' M = frequencies.ptp()*times.ptp() if conservative: M_eff = min([times.size, frequencies.size, M]) else: M_eff = M return M_eff
[docs]def get_frequency_grid(times, samplesperpeak=5, nyquistfactor=5, minfreq=None, maxfreq=None, returnf0dfnf=False): '''This calculates a frequency grid for the period finding functions in this module. Based on the autofrequency function in astropy.stats.lombscargle. http://docs.astropy.org/en/stable/_modules/astropy/stats/lombscargle/core.html#LombScargle.autofrequency Parameters ---------- times : np.array The times to use to generate the frequency grid over. samplesperpeak : int The minimum sample coverage each frequency point in the grid will get. nyquistfactor : int The multiplier over the Nyquist rate to use. minfreq,maxfreq : float or None If not None, these will be the limits of the frequency grid generated. returnf0dfnf : bool If this is True, will return the values of `f0`, `df`, and `Nf` generated for this grid. Returns ------- np.array A grid of frequencies. ''' baseline = times.max() - times.min() nsamples = times.size df = 1. / baseline / samplesperpeak if minfreq is not None: f0 = minfreq else: f0 = 0.5 * df if maxfreq is not None: Nf = int(np.ceil((maxfreq - f0) / df)) else: Nf = int(0.5 * samplesperpeak * nyquistfactor * nsamples) if returnf0dfnf: return f0, df, Nf, f0 + df * np.arange(Nf) else: return f0 + df * np.arange(Nf)
############################################ ## FUNCTIONS FOR COMPARING PERIOD-FINDERS ## ############################################
[docs]def make_combined_periodogram(pflist, outfile, addmethods=False): '''This just puts all of the period-finders on a single periodogram. This will renormalize all of the periodograms so their values lie between 0 and 1, with values lying closer to 1 being more significant. Periodograms that give the same best periods will have their peaks line up together. Parameters ---------- pflist : list of dict This is a list of result dicts from any of the period-finders in periodbase. To use your own period-finders' results here, make sure the result dict is of the form and has at least the keys below:: {'periods': np.array of all periods searched by the period-finder, 'lspvals': np.array of periodogram power value for each period, 'bestperiod': a float value that is the period with the highest peak in the periodogram, i.e. the most-likely actual period, 'method': a three-letter code naming the period-finder used; must be one of the keys in the `astrobase.periodbase.METHODLABELS` dict, 'nbestperiods': a list of the periods corresponding to periodogram peaks (`nbestlspvals` below) to annotate on the periodogram plot so they can be called out visually, 'nbestlspvals': a list of the power values associated with periodogram peaks to annotate on the periodogram plot so they can be called out visually; should be the same length as `nbestperiods` above, 'kwargs': dict of kwargs passed to your own period-finder function} outfile : str This is the output file to write the output to. NOTE: EPS/PS won't work because we use alpha transparency to better distinguish between the various periodograms. addmethods : bool If this is True, will add all of the normalized periodograms together, then renormalize them to between 0 and 1. In this way, if all of the period-finders agree on something, it'll stand out easily. FIXME: implement this kwarg. Returns ------- str The name of the generated plot file. ''' import matplotlib.pyplot as plt for pf in pflist: if pf['method'] == 'pdm': plt.plot(pf['periods'], np.max(pf['lspvals'])/pf['lspvals'] - 1.0, label='%s P=%.5f' % (pf['method'], pf['bestperiod']), alpha=0.5) else: plt.plot(pf['periods'], pf['lspvals']/np.max(pf['lspvals']), label='%s P=%.5f' % (pf['method'], pf['bestperiod']), alpha=0.5) plt.xlabel('period [days]') plt.ylabel('normalized periodogram power') plt.xscale('log') plt.legend() plt.tight_layout() plt.savefig(outfile) plt.close('all') return outfile