#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# mast - Waqas Bhatti (wbhatti@astro.princeton.edu) - Oct 2018
# License: MIT. See the LICENSE file for more details.
'''
This interfaces with the MAST API. The main use for this (for now) is to fill in
TIC information for checkplots.
The MAST API service documentation is at:
https://mast.stsci.edu/api/v0/index.html
For a more general and useful interface to MAST, see the astroquery
package by A. Ginsburg, B. Sipocz, et al.:
http://astroquery.readthedocs.io
'''
#############
## LOGGING ##
#############
import logging
from astrobase import log_sub, log_fmt, log_date_fmt
DEBUG = False
if DEBUG:
level = logging.DEBUG
else:
level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
level=level,
style=log_sub,
format=log_fmt,
datefmt=log_date_fmt,
)
LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception
#############
## IMPORTS ##
#############
import os
import os.path
import hashlib
import time
import json
import random
# to do the queries
import requests
import requests.exceptions
###################
## FORM SETTINGS ##
###################
MAST_URLS = {
'v0':{'url':'https://mast.stsci.edu/api/v0/invoke'},
}
#####################
## QUERY FUNCTIONS ##
#####################
[docs]def mast_query(service,
params,
data=None,
apiversion='v0',
forcefetch=False,
cachedir='~/.astrobase/mast-cache',
verbose=True,
timeout=10.0,
refresh=5.0,
maxtimeout=90.0,
maxtries=3,
raiseonfail=False,
jitter=5.0):
'''This queries the STScI MAST service for catalog data.
All results are downloaded as JSON files that are written to `cachedir`.
Parameters
----------
service : str
This is the name of the service to use. See
https://mast.stsci.edu/api/v0/_services.html for a list of all available
services.
params : dict
This is a dict containing the input params to the service as described
on its details page linked in the `service description page on MAST
<https://mast.stsci.edu/api/v0/_services.html>`_.
data : dict or None
This contains optional data to upload to the service.
apiversion : str
The API version of the MAST service to use. This sets the URL that this
function will call, using `apiversion` as key into the `MAST_URLS` dict
above.
forcefetch : bool
If this is True, the query will be retried even if cached results for
it exist.
cachedir : str
This points to the directory where results will be downloaded.
verbose : bool
If True, will indicate progress and warn of any issues.
timeout : float
This sets the amount of time in seconds to wait for the service to
respond to our initial request.
refresh : float
This sets the amount of time in seconds to wait before checking if the
result file is available. If the results file isn't available after
`refresh` seconds have elapsed, the function will wait for `refresh`
seconds continuously, until `maxtimeout` is reached or the results file
becomes available.
maxtimeout : float
The maximum amount of time in seconds to wait for a result to become
available after submitting our query request.
maxtries : int
The maximum number of tries (across all mirrors tried) to make to either
submit the request or download the results, before giving up.
raiseonfail : bool
If this is True, the function will raise an Exception if something goes
wrong, instead of returning None.
jitter : float
This is used to control the scale of the random wait in seconds before
starting the query. Useful in parallelized situations.
Returns
-------
dict
This returns a dict of the following form::
{'params':dict of the input params used for the query,
'provenance':'cache' or 'new download',
'result':path to the file on disk with the downloaded data table}
'''
# this matches:
# https://mast.stsci.edu/api/v0/class_mashup_1_1_mashup_request.html
inputparams = {
'format':'json',
'params':params,
'service':service,
'timeout':timeout,
}
if data is not None:
inputparams['data'] = data
# see if the cachedir exists
if '~' in cachedir:
cachedir = os.path.expanduser(cachedir)
if not os.path.exists(cachedir):
os.makedirs(cachedir)
# generate the cachefname and look for it
xcachekey = '-'.join([repr(inputparams[x])
for x in sorted(inputparams.keys())])
cachekey = hashlib.sha256(xcachekey.encode()).hexdigest()
cachefname = os.path.join(
cachedir,
'%s.json' % (cachekey,)
)
provenance = 'cache'
#####################
## RUN A NEW QUERY ##
#####################
# otherwise, we check the cache if it's done already, or run it again if not
if forcefetch or (not os.path.exists(cachefname)):
time.sleep(random.randint(1,jitter))
provenance = 'new download'
waitdone = False
timeelapsed = 0.0
ntries = 1
url = MAST_URLS[apiversion]['url']
formdata = {'request':json.dumps(inputparams)}
while (not waitdone) or (ntries < maxtries):
if timeelapsed > maxtimeout:
retdict = None
break
try:
resp = requests.post(url,
data=formdata,
# we'll let the service time us out first
# if that fails, we'll timeout ourselves
timeout=timeout+1.0)
resp.raise_for_status()
respjson = resp.json()
if respjson['status'] == 'COMPLETE':
data = respjson['data']
nrows = len(data)
if nrows > 0:
with open(cachefname, 'w') as outfd:
json.dump(respjson, outfd)
retdict = {
'params':inputparams,
'provenance':provenance,
'cachefname':cachefname
}
waitdone = True
if verbose:
LOGINFO('query successful. nmatches: %s' % nrows)
break
else:
LOGERROR(
'no matching objects found for inputparams: %r' %
inputparams
)
retdict = None
waitdone = True
break
# if we're still executing after the initial timeout is done
elif respjson['status'] == 'EXECUTING':
if verbose:
LOGINFO('query is still executing, '
'waiting %s seconds to retry...' % refresh)
waitdone = False
time.sleep(refresh)
timeelapsed = timeelapsed + refresh
retdict = None
else:
LOGERROR('Query failed! Message from service: %s' %
respjson['msg'])
retdict = None
waitdone = True
break
except requests.exceptions.Timeout:
if verbose:
LOGWARNING('MAST query try timed out, '
'site is probably down. '
'Waiting for %s seconds to try again...' %
refresh)
waitdone = False
time.sleep(refresh)
timeelapsed = timeelapsed + refresh
retdict = None
except KeyboardInterrupt:
LOGERROR('MAST request wait aborted for '
'%s' % repr(inputparams))
return None
except Exception:
LOGEXCEPTION('MAST query failed!')
if raiseonfail:
raise
return None
#
# increment number of tries at the bottom of the loop
#
ntries = ntries + 1
#
# done with waiting for completion
#
if retdict is None:
LOGERROR('Timed out, errored out, or reached maximum number '
'of tries with no response. Query was: %r' % inputparams)
return None
else:
return retdict
# otherwise, get the file from the cache
else:
if verbose:
LOGINFO('getting cached MAST query result for '
'request: %s' %
(repr(inputparams)))
retdict = {
'params':inputparams,
'provenance':provenance,
'cachefname':cachefname
}
return retdict
[docs]def tic_conesearch(
ra,
decl,
radius_arcmin=5.0,
apiversion='v0',
forcefetch=False,
cachedir='~/.astrobase/mast-cache',
verbose=True,
timeout=10.0,
refresh=5.0,
maxtimeout=90.0,
maxtries=3,
jitter=5.0,
raiseonfail=False
):
'''This runs a TESS Input Catalog cone search on MAST.
If you use this, please cite the TIC paper (Stassun et al 2018;
http://adsabs.harvard.edu/abs/2018AJ....156..102S). Also see the "living"
TESS input catalog docs:
https://docs.google.com/document/d/1zdiKMs4Ld4cXZ2DW4lMX-fuxAF6hPHTjqjIwGqnfjqI
Also see: https://mast.stsci.edu/api/v0/_t_i_cfields.html for the fields
returned by the service and present in the result JSON file.
Parameters
----------
ra,decl : float
The center coordinates of the cone-search in decimal degrees.
radius_arcmin : float
The cone-search radius in arcminutes.
apiversion : str
The API version of the MAST service to use. This sets the URL that this
function will call, using `apiversion` as key into the `MAST_URLS` dict
above.
forcefetch : bool
If this is True, the query will be retried even if cached results for
it exist.
cachedir : str
This points to the directory where results will be downloaded.
verbose : bool
If True, will indicate progress and warn of any issues.
timeout : float
This sets the amount of time in seconds to wait for the service to
respond to our initial request.
refresh : float
This sets the amount of time in seconds to wait before checking if the
result file is available. If the results file isn't available after
`refresh` seconds have elapsed, the function will wait for `refresh`
seconds continuously, until `maxtimeout` is reached or the results file
becomes available.
maxtimeout : float
The maximum amount of time in seconds to wait for a result to become
available after submitting our query request.
maxtries : int
The maximum number of tries (across all mirrors tried) to make to either
submit the request or download the results, before giving up.
jitter : float
This is used to control the scale of the random wait in seconds before
starting the query. Useful in parallelized situations.
raiseonfail : bool
If this is True, the function will raise an Exception if something goes
wrong, instead of returning None.
Returns
-------
dict
This returns a dict of the following form::
{'params':dict of the input params used for the query,
'provenance':'cache' or 'new download',
'result':path to the file on disk with the downloaded data table}
'''
params = {'ra':ra,
'dec':decl,
'radius':radius_arcmin/60.0}
service = 'Mast.Catalogs.Tic.Cone'
return mast_query(service,
params,
jitter=jitter,
apiversion=apiversion,
forcefetch=forcefetch,
cachedir=cachedir,
verbose=verbose,
timeout=timeout,
refresh=refresh,
maxtimeout=maxtimeout,
maxtries=maxtries,
raiseonfail=raiseonfail)
[docs]def tic_xmatch(
ra,
decl,
radius_arcsec=5.0,
apiversion='v0',
forcefetch=False,
cachedir='~/.astrobase/mast-cache',
verbose=True,
timeout=90.0,
refresh=5.0,
maxtimeout=180.0,
maxtries=3,
jitter=5.0,
raiseonfail=False
):
'''This does a cross-match with TIC.
Parameters
----------
ra,decl : np.arrays or lists of floats
The coordinates that will be cross-matched against the TIC.
radius_arcsec : float
The cross-match radius in arcseconds.
apiversion : str
The API version of the MAST service to use. This sets the URL that this
function will call, using `apiversion` as key into the `MAST_URLS` dict
above.
forcefetch : bool
If this is True, the query will be retried even if cached results for
it exist.
cachedir : str
This points to the directory where results will be downloaded.
verbose : bool
If True, will indicate progress and warn of any issues.
timeout : float
This sets the amount of time in seconds to wait for the service to
respond to our initial request.
refresh : float
This sets the amount of time in seconds to wait before checking if the
result file is available. If the results file isn't available after
`refresh` seconds have elapsed, the function will wait for `refresh`
seconds continuously, until `maxtimeout` is reached or the results file
becomes available.
maxtimeout : float
The maximum amount of time in seconds to wait for a result to become
available after submitting our query request.
maxtries : int
The maximum number of tries (across all mirrors tried) to make to either
submit the request or download the results, before giving up.
jitter : float
This is used to control the scale of the random wait in seconds before
starting the query. Useful in parallelized situations.
raiseonfail : bool
If this is True, the function will raise an Exception if something goes
wrong, instead of returning None.
Returns
-------
dict
This returns a dict of the following form::
{'params':dict of the input params used for the query,
'provenance':'cache' or 'new download',
'result':path to the file on disk with the downloaded data table}
'''
service = 'Mast.Tic.Crossmatch'
xmatch_input = {'fields':[{'name':'ra','type':'float'},
{'name':'dec','type':'float'}]}
xmatch_input['data'] = [{'ra':x, 'dec':y} for (x,y) in zip(ra, decl)]
params = {'raColumn':'ra',
'decColumn':'dec',
'radius':radius_arcsec/3600.0}
return mast_query(service,
params,
data=xmatch_input,
jitter=jitter,
apiversion=apiversion,
forcefetch=forcefetch,
cachedir=cachedir,
verbose=verbose,
timeout=timeout,
refresh=refresh,
maxtimeout=maxtimeout,
maxtries=maxtries,
raiseonfail=raiseonfail)
[docs]def tic_objectsearch(
objectid,
idcol_to_use="ID",
apiversion='v0',
forcefetch=False,
cachedir='~/.astrobase/mast-cache',
verbose=True,
timeout=90.0,
refresh=5.0,
maxtimeout=180.0,
maxtries=3,
jitter=5.0,
raiseonfail=False
):
'''
This runs a TIC search for a specified TIC ID.
Parameters
----------
objectid : str
The object ID to look up information for.
idcol_to_use : str
This is the name of the object ID column to use when looking up the
provided `objectid`. This is one of {'ID', 'HIP', 'TYC', 'UCAC',
'TWOMASS', 'ALLWISE', 'SDSS', 'GAIA', 'APASS', 'KIC'}.
apiversion : str
The API version of the MAST service to use. This sets the URL that this
function will call, using `apiversion` as key into the `MAST_URLS` dict
above.
forcefetch : bool
If this is True, the query will be retried even if cached results for
it exist.
cachedir : str
This points to the directory where results will be downloaded.
verbose : bool
If True, will indicate progress and warn of any issues.
timeout : float
This sets the amount of time in seconds to wait for the service to
respond to our initial request.
refresh : float
This sets the amount of time in seconds to wait before checking if the
result file is available. If the results file isn't available after
`refresh` seconds have elapsed, the function will wait for `refresh`
seconds continuously, until `maxtimeout` is reached or the results file
becomes available.
maxtimeout : float
The maximum amount of time in seconds to wait for a result to become
available after submitting our query request.
maxtries : int
The maximum number of tries (across all mirrors tried) to make to either
submit the request or download the results, before giving up.
jitter : float
This is used to control the scale of the random wait in seconds before
starting the query. Useful in parallelized situations.
raiseonfail : bool
If this is True, the function will raise an Exception if something goes
wrong, instead of returning None.
Returns
-------
dict
This returns a dict of the following form::
{'params':dict of the input params used for the query,
'provenance':'cache' or 'new download',
'result':path to the file on disk with the downloaded data table}
'''
params = {
'columns':'*',
'filters':[
{"paramName": idcol_to_use,
"values":[str(objectid)]}
]
}
service = 'Mast.Catalogs.Filtered.Tic'
return mast_query(service,
params,
jitter=jitter,
apiversion=apiversion,
forcefetch=forcefetch,
cachedir=cachedir,
verbose=verbose,
timeout=timeout,
refresh=refresh,
maxtimeout=maxtimeout,
maxtries=maxtries,
raiseonfail=raiseonfail)