#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# convert_identifiers.py - Luke Bouma (bouma.luke@gmail.com) - Oct 2019
# License: MIT - see the LICENSE file for the full text.
'''
Easy conversion between survey identifiers. Works best on bright and/or famous
objects, particularly when SIMBAD is involved.
``simbad_to_gaiadr2()``: given simbad name, attempt to get GAIA DR2 source_id
``gaiadr2_to_tic()``: given GAIA DR2 source_id, attempt to get TIC ID
``simbad_to_tic()``: given simbad name, get TIC ID
``tic_to_gaiadr2()``: given TIC ID, get GAIA DR2 source_id
'''
#############
## LOGGING ##
#############
import logging
from astrobase import log_sub, log_fmt, log_date_fmt
DEBUG = False
if DEBUG:
level = logging.DEBUG
else:
level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
level=level,
style=log_sub,
format=log_fmt,
datefmt=log_date_fmt,
)
LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception
#############
## IMPORTS ##
#############
import json
import numpy as np
from astropy.table import Table
from astrobase.services.simbad import tap_query as simbad_tap_query
from astrobase.services.gaia import objectid_search as gaia_objectid_search
from astrobase.services.mast import tic_conesearch, tic_objectsearch
###############
## FUNCTIONS ##
###############
[docs]def simbad_to_gaiadr2(
simbad_name,
simbad_mirror='simbad',
returnformat='csv',
forcefetch=False,
cachedir='~/.astrobase/simbad-cache',
verbose=True,
timeout=10.0,
refresh=2.0,
maxtimeout=90.0,
maxtries=1,
complete_query_later=True
):
"""
Convenience function that, given a SIMBAD object name, returns string of
the Gaia-DR2 identifier.
Parameters
----------
simbad_name : str
The SIMBAD object name to search for.
simbad_mirror : str
This is the key used to select a SIMBAD mirror from the
`SIMBAD_URLS` dict above. If set, the specified mirror will be used. If
None, a random mirror chosen from that dict will be used.
returnformat : {'csv','votable','json'}
The returned file format to request from the GAIA catalog service.
forcefetch : bool
If this is True, the query will be retried even if cached results for
it exist.
cachedir : str
This points to the directory where results will be downloaded.
verbose : bool
If True, will indicate progress and warn of any issues.
timeout : float
This sets the amount of time in seconds to wait for the service to
respond to our initial request.
refresh : float
This sets the amount of time in seconds to wait before checking if the
result file is available. If the results file isn't available after
`refresh` seconds have elapsed, the function will wait for `refresh`
seconds continuously, until `maxtimeout` is reached or the results file
becomes available.
maxtimeout : float
The maximum amount of time in seconds to wait for a result to become
available after submitting our query request.
maxtries : int
The maximum number of tries (across all mirrors tried) to make to either
submit the request or download the results, before giving up.
complete_query_later : bool
If set to True, a submitted query that does not return a result before
`maxtimeout` has passed will be cancelled but its input request
parameters and the result URL provided by the service will be saved. If
this function is then called later with these same input request
parameters, it will check if the query finally finished and a result is
available. If so, will download the results instead of submitting a new
query. If it's not done yet, will start waiting for results again. To
force launch a new query with the same request parameters, set the
`forcefetch` kwarg to True.
Returns
-------
gaiadr2_id : str
Returns the GAIA DR2 ID as a string.
"""
if not isinstance(simbad_name, str):
LOGWARNING("The given simbad_name must be a string, "
"converting automatically...")
use_simbad_name = str(simbad_name)
else:
use_simbad_name = simbad_name
# TAP table list is here:
# http://simbad.u-strasbg.fr/simbad/tap/tapsearch.html
query = (
"SELECT basic.OID, basic.RA, basic.DEC, "
"ident.id, ident.oidref, ids.ids "
"FROM basic "
"LEFT OUTER JOIN ident ON ident.oidref = basic.oid "
"LEFT OUTER JOIN ids ON ids.oidref = ident.oidref "
"WHERE ident.id = '{use_simbad_name}'; "
)
formatted_query = query.format(use_simbad_name=use_simbad_name)
# astroquery.simbad would have been fine here too. Sometimes pure astrobase
# solutions are nice though ;-).
r = simbad_tap_query(
formatted_query,
simbad_mirror=simbad_mirror,
returnformat=returnformat,
forcefetch=forcefetch,
cachedir=cachedir,
verbose=verbose,
timeout=timeout,
refresh=refresh,
maxtimeout=maxtimeout,
maxtries=maxtries,
complete_query_later=complete_query_later
)
df = Table.read(r['result'],format='csv')
if len(df) != 1:
errmsg = (
'Expected 1 result from name {}; got {} results.'.format(
use_simbad_name, len(df)
)
)
LOGERROR(errmsg)
return None
if 'Gaia DR2' not in df['ids'][0]:
errmsg = (
'Failed to retrieve Gaia DR2 identifier for {}'.format(
use_simbad_name
)
)
LOGERROR(errmsg)
return None
# simbad returns a "|"-separated list of cross-matched names
names = df['ids'][0].split('|')
gaia_name = [n for n in names if 'Gaia DR2' in n]
gaia_id = gaia_name[0].split(' ')[-1]
return gaia_id
[docs]def gaiadr2_to_tic(
source_id,
gaia_mirror='heidelberg',
gaia_data_release='dr2',
returnformat='csv',
forcefetch=False,
cachedir='~/.astrobase/simbad-cache',
verbose=True,
timeout=10.0,
refresh=2.0,
maxtimeout=90.0,
maxtries=1,
complete_query_later=True
):
"""
First, gets RA/dec from Gaia DR2, given source_id. Then searches TICv8
spatially, and returns matches with the correct DR2 source_id.
Parameters
----------
source_id : str
The GAIA DR2 source identifier.
gaia_mirror : {'gaia','heidelberg','vizier'} or None
This is the key used to select a GAIA catalog mirror from the
`GAIA_URLS` dict above. If set, the specified mirror will be used. If
None, a random mirror chosen from that dict will be used.
gaia_data_release: {'dr2', 'edr3'}
The Gaia data release to use for the query. This provides hints for
which table to use for the GAIA mirror being queried.
returnformat : {'csv','votable','json'}
The returned file format to request from the GAIA catalog service.
forcefetch : bool
If this is True, the query will be retried even if cached results for
it exist.
cachedir : str
This points to the directory where results will be downloaded.
verbose : bool
If True, will indicate progress and warn of any issues.
timeout : float
This sets the amount of time in seconds to wait for the service to
respond to our initial request.
refresh : float
This sets the amount of time in seconds to wait before checking if the
result file is available. If the results file isn't available after
`refresh` seconds have elapsed, the function will wait for `refresh`
seconds continuously, until `maxtimeout` is reached or the results file
becomes available.
maxtimeout : float
The maximum amount of time in seconds to wait for a result to become
available after submitting our query request.
maxtries : int
The maximum number of tries (across all mirrors tried) to make to either
submit the request or download the results, before giving up.
completequerylater : bool
If set to True, a submitted query that does not return a result before
`maxtimeout` has passed will be cancelled but its input request
parameters and the result URL provided by the service will be saved. If
this function is then called later with these same input request
parameters, it will check if the query finally finished and a result is
available. If so, will download the results instead of submitting a new
query. If it's not done yet, will start waiting for results again. To
force launch a new query with the same request parameters, set the
`forcefetch` kwarg to True.
Returns
-------
tic_id : str
Returns the TIC ID of the object as a string.
"""
r = gaia_objectid_search(source_id, gaia_mirror=gaia_mirror,
data_release=gaia_data_release,
returnformat=returnformat, forcefetch=forcefetch,
cachedir=cachedir, verbose=verbose,
timeout=timeout, refresh=refresh,
maxtimeout=maxtimeout, maxtries=maxtries,
complete_query_later=complete_query_later)
try:
df = Table.read(r['result'], format='csv')
if len(df) == 0 or len(df) > 1:
errmsg = (
'Expected 1 Gaia result from source_id {}; got {} results.'.
format(source_id, len(df))
)
LOGERROR(errmsg)
return None
except Exception:
LOGEXCEPTION("Could not fetch GAIA info for source_id = %s" % source_id)
return None
ra, dec = df['ra'][0], df['dec'][0]
# use mast.tic_conesearch to find the closest match to the GAIA object
tic_res = tic_conesearch(ra, dec, radius_arcmin=0.5,
timeout=timeout,refresh=refresh,
maxtimeout=maxtimeout,maxtries=maxtries)
try:
with open(tic_res['cachefname'],'r') as infd:
tic_info = json.load(infd)
if len(tic_info['data']) == 0:
errmsg = (
'Expected 1 TIC result from source_id {}; got {} results.'.
format(source_id, len(tic_info['data']))
)
LOGERROR(errmsg)
return None
except Exception:
LOGEXCEPTION("Could not fetch TIC info for source_id = %s" % source_id)
return None
#
# now, select the appropriate row in the returned matches
#
gaia_ids = np.array([
(int(x['GAIA']) if x['GAIA'] is not None else -1)
for x in tic_info['data']
])
tic_ids = np.array([
x['ID'] for x in tic_info['data']
])
dispositions = np.array([
x['disposition'] for x in tic_info['data']
])
matched_tic_id = tic_ids[gaia_ids == int(source_id)]
# see https://outerspace.stsci.edu/display/TESS/TIC+v8.2+and+CTL+v8.xx+Data+Release+Notes
# NULL, DUPLICATE (6), ARTIFACT (7), or SPLIT (8)
matched_dispositions = dispositions[gaia_ids == int(source_id)]
if matched_tic_id.size > 0:
if matched_tic_id.size == 1:
return str(matched_tic_id.item())
elif matched_tic_id.size == 2:
tic_id = matched_tic_id[matched_dispositions != 'DUPLICATE'].item()
return str(tic_id)
else:
LOGERROR("Got multiple unresolved matches in TIC IDs for "
"source ID: %s in TIC (version: %s)" %
(source_id, tic_info['data'][0]['version']))
else:
LOGERROR("Could not find TIC ID for "
"source ID: %s in TIC (version: %s)" %
(source_id, tic_info['data'][0]['version']))
return None
[docs]def simbad_to_tic(simbad_name):
"""
This goes from a SIMBAD name to a TIC name.
Parameters
----------
simbad_name : str
The SIMBAD name of the object to look up the TIC ID for.
Returns
-------
tic_id : str
Returns the TIC ID of the object as a string.
"""
source_id = simbad_to_gaiadr2(simbad_name)
if source_id is not None:
return gaiadr2_to_tic(source_id)
else:
LOGERROR("Could not find TIC ID for SIMBAD name: %s" % simbad_name)
return None
[docs]def tic_to_gaiadr2(tic_id, raiseonfail=False):
"""
This goes from a TIC name to a GAIA DR2 source_id.
Parameters
----------
tic_id : str
The TIC ID to look for, e.g., "260265964".
Returns
-------
source_id : str
Returns the GAIA DR2 ID of the object as a string.
"""
tic_res = tic_objectsearch(tic_id, raiseonfail=raiseonfail)
with open(tic_res['cachefname'],'r') as infd:
tic_info = json.load(infd)
if len(tic_info['data']) != 1:
errmsg = (
'Expected exactly 1 TIC result from tic_id {}; got {}.'.
format(tic_id, len(tic_info['data']))
)
if raiseonfail:
raise ValueError(errmsg)
else:
LOGEXCEPTION(errmsg)
return None
gaia_id = str(tic_info['data'][0]['GAIA'])
return gaia_id