Source code for astrobase.services.lccs

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# lccs.py - Waqas Bhatti (wbhatti@astro.princeton.edu) - Aug 2018
# License: MIT - see LICENSE for the full text.

'''This contains functions to search for objects and get light curves from a
Light Curve Collection server (https://github.com/waqasbhatti/lcc-server) using
its HTTP API.

The LCC-Server requires an API key to access most services. The service
functions in this module will automatically acquire an anonymous user API key on
first use (and upon API key expiry afterwards). If you sign up for an LCC-Server
user account, you can import the API key generated for that account on the user
home page. To do this, use the import_apikey function in this module.

This currently supports the following LCC-Server services::

    conesearch   : cone_search(lcc_server_url, center_ra, center_decl, ...)
    ftsquery     : fulltext_search(lcc_server_url, searchtxt, sesame=False, ...)
    columnsearch : column_search(lcc_server_url, filters, ...)
    xmatch       : xmatch_search(lcc_server_url, file_to_upload, ...

The functions above will download the data products (data table CSVs, light
curve ZIP files) of the search results automatically, or in case the query takes
too long, will return within a configurable timeout. The query information is
cached to `~/.astrobase/lccs`, and can be used to download data products for
long-running queries later.

The functions below support various auxiliary LCC services::

    get-dataset  : get_dataset(lcc_server_url, dataset_id)
    objectinfo   : object_info(lcc_server_url, objectid, collection, ...)
    dataset-list : list_recent_datasets(lcc_server_url, nrecent=25, ...)
    collections  : list_lc_collections(lcc_server_url)

'''

# put this in here because lccs can be used as a standalone module
__version__ = '0.5.3'


#############
## LOGGING ##
#############

import logging

# the basic logging styles common to all astrobase modules
log_sub = '{'
log_fmt = '[{levelname:1.1} {asctime} {module}:{lineno}] {message}'
log_date_fmt = '%y%m%d %H:%M:%S'

DEBUG = False
if DEBUG:
    level = logging.DEBUG
else:
    level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
    level=level,
    style=log_sub,
    format=log_fmt,
    datefmt=log_date_fmt,
)

LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception


# get the correct datetime bits
try:
    from datetime import datetime, timezone
    utc = timezone.utc
except Exception:
    from datetime import datetime, timedelta, tzinfo

    # we'll need to instantiate a tzinfo object because py2.7's datetime
    # doesn't have the super convenient timezone object (seriously)
    # https://docs.python.org/2/library/datetime.html#datetime.tzinfo.fromutc
    ZERO = timedelta(0)

    class UTC(tzinfo):
        """UTC"""

        def utcoffset(self, dt):
            return ZERO

        def tzname(self, dt):
            return "UTC"

        def dst(self, dt):
            return ZERO

    utc = UTC()


####################
## SYSTEM IMPORTS ##
####################

import os
import os.path
import stat
import json
import sys
import time

try:
    import cPickle as pickle
except Exception:
    import pickle


# import url methods here.  we use built-ins because we want this module to be
# usable as a single file. otherwise, we'd use something sane like Requests.
from urllib.request import urlopen, Request
from urllib.error import HTTPError
from urllib.parse import urlencode, urlparse


####################
## API KEY CONFIG ##
####################

[docs]def check_existing_apikey(lcc_server):
    '''This validates if an API key for the specified LCC-Server is available.

    API keys are stored using the following file scheme::

        ~/.astrobase/lccs/apikey-domain.of.lccserver.org

    e.g. for the HAT LCC-Server at https://data.hatsurveys.org::

        ~/.astrobase/lccs/apikey-https-data.hatsurveys.org

    Parameters
    ----------

    lcc_server : str
        The base URL of the LCC-Server for which the existence of API keys will
        be checked.

    Returns
    -------

    (apikey_ok, apikey_str, expiry) : tuple
        The returned tuple contains the status of the API key, the API key
        itself if present, and its expiry date if present.

    '''

    USERHOME = os.path.expanduser('~')
    APIKEYFILE = os.path.join(USERHOME,
                              '.astrobase',
                              'lccs',
                              'apikey-%s' % lcc_server.replace(
                                  'https://',
                                  'https-'
                              ).replace(
                                  'http://',
                                  'http-'
                              ))

    if os.path.exists(APIKEYFILE):

        # check if this file is readable/writeable by user only
        fileperm = oct(os.stat(APIKEYFILE)[stat.ST_MODE])

        if fileperm == '0100600' or fileperm == '0o100600':

            with open(APIKEYFILE) as infd:
                apikey, expires = infd.read().strip('\n').split()

            # get today's datetime
            now = datetime.now(utc)

            if sys.version_info[:2] < (3,7):
                # this hideous incantation is required for lesser Pythons
                expdt = datetime.strptime(
                    expires.replace('Z',''),
                    '%Y-%m-%dT%H:%M:%S.%f'
                ).replace(tzinfo=utc)
            else:
                expdt = datetime.fromisoformat(expires.replace('Z','+00:00'))

            if now > expdt:
                LOGERROR('API key has expired. expiry was on: %s' % expires)
                return False, apikey, expires
            else:
                return True, apikey, expires

        else:
            LOGWARNING('The API key file %s has bad permissions '
                       'and is insecure, not reading it.\n'
                       '(you need to chmod 600 this file)'
                       % APIKEYFILE)

            return False, None, None
    else:
        LOGWARNING('No LCC-Server API key '
                   'found in: {apikeyfile}'.format(apikeyfile=APIKEYFILE))

        return False, None, None


[docs]def get_new_apikey(lcc_server):
    '''This gets a new API key from the specified LCC-Server.

    NOTE: this only gets an anonymous API key. To get an API key tied to a user
    account (and associated privilege level), see the `import_apikey` function
    below.

    Parameters
    ----------

    lcc_server : str
        The base URL of the LCC-Server from where the API key will be fetched.

    Returns
    -------

    (apikey, expiry) : tuple
        This returns a tuple with the API key and its expiry date.

    '''

    USERHOME = os.path.expanduser('~')
    APIKEYFILE = os.path.join(USERHOME,
                              '.astrobase',
                              'lccs',
                              'apikey-%s' % lcc_server.replace(
                                  'https://',
                                  'https-'
                              ).replace(
                                  'http://',
                                  'http-'
                              ))

    # url for getting an API key
    url = '%s/api/key' % lcc_server

    # get the API key
    resp = urlopen(url)

    if resp.code == 200:

        respdict = json.loads(resp.read())

    else:

        LOGERROR('could not fetch the API key from LCC-Server at: %s' %
                 lcc_server)
        LOGERROR('the HTTP status code was: %s' % resp.status_code)
        return None

    #
    # now that we have an API key dict, get the API key out of it and write it
    # to the APIKEYFILE
    #
    apikey = respdict['result']['apikey']
    expires = respdict['result']['expires']

    # write this to the apikey file

    if not os.path.exists(os.path.dirname(APIKEYFILE)):
        os.makedirs(os.path.dirname(APIKEYFILE))

    with open(APIKEYFILE,'w') as outfd:
        outfd.write('%s %s\n' % (apikey, expires))

    # chmod it to the correct value
    os.chmod(APIKEYFILE, 0o100600)

    LOGINFO('key fetched successfully from: %s. expires on: %s' % (lcc_server,
                                                                   expires))
    LOGINFO('written to: %s' % APIKEYFILE)

    return apikey, expires


[docs]def import_apikey(lcc_server, apikey_json):
    '''This imports an API key from text and writes it to the cache dir.

    Use this with the JSON file downloaded from API key download link on your
    LCC-Server user home page. The API key will thus be tied to the privileges
    of that user account and can then access objects, datasets, and collections
    marked as private for the user only or shared with that user.

    Parameters
    ----------

    lcc_server : str
        The base URL of the LCC-Server to get the API key for.

    apikey_text_json : str
        The JSON string from the API key text box on the user's LCC-Server home
        page at `lcc_server/users/home`.

    Returns
    -------

    (apikey, expiry) : tuple
        This returns a tuple with the API key and its expiry date.

    '''
    USERHOME = os.path.expanduser('~')
    APIKEYFILE = os.path.join(USERHOME,
                              '.astrobase',
                              'lccs',
                              'apikey-%s' % lcc_server.replace(
                                  'https://',
                                  'https-'
                              ).replace(
                                  'http://',
                                  'http-'
                              ))

    # get the JSON
    with open(apikey_json,'r') as infd:
        respdict = json.load(infd)

    #
    # now that we have an API key dict, get the API key out of it and write it
    # to the APIKEYFILE
    #
    apikey = respdict['apikey']
    expires = respdict['expires']

    # write this to the apikey file

    if not os.path.exists(os.path.dirname(APIKEYFILE)):
        os.makedirs(os.path.dirname(APIKEYFILE))

    with open(APIKEYFILE,'w') as outfd:
        outfd.write('%s %s\n' % (apikey, expires))

    # chmod it to the correct value
    os.chmod(APIKEYFILE, 0o100600)

    LOGINFO('key fetched successfully from: %s. expires on: %s' % (lcc_server,
                                                                   expires))
    LOGINFO('written to: %s' % APIKEYFILE)

    return apikey, expires


##############################
## QUERY HANDLING FUNCTIONS ##
##############################

[docs]def submit_post_searchquery(url, data, apikey):
    '''This submits a POST query to an LCC-Server search API endpoint.

    Handles streaming of the results, and returns the final JSON stream. Also
    handles results that time out.

    Parameters
    ----------

    url : str
        The URL of the search API endpoint to hit. This is something like
        `https://data.hatsurveys.org/api/conesearch`

    data : dict
        A dict of the search query parameters to pass to the search service.

    apikey : str
        The API key to use to access the search service. API keys are required
        for all POST request made to an LCC-Server's API endpoints.

    Returns
    -------

    (status_flag, data_dict, dataset_id) : tuple
        This returns a tuple containing the status of the request: ('complete',
        'failed', 'background', etc.), a dict parsed from the JSON result of the
        request, and a dataset ID, which can be used to reconstruct the URL on
        the LCC-Server where the results can be browsed.

    '''

    # first, we need to convert any columns and collections items to broken out
    # params
    postdata = {}

    for key in data:

        if key == 'columns':
            postdata['columns[]'] = data[key]
        elif key == 'collections':
            postdata['collections[]'] = data[key]
        else:
            postdata[key] = data[key]

    # do the urlencode with doseq=True
    # we also need to encode to bytes
    encoded_postdata = urlencode(postdata, doseq=True).encode()

    # if apikey is not None, add it in as an Authorization: Bearer [apikey]
    # header
    if apikey:
        headers = {'Authorization':'Bearer: %s' % apikey}
    else:
        headers = {}

    LOGINFO('submitting search query to LCC-Server API URL: %s' % url)

    try:

        # hit the server with a POST request
        req = Request(url, data=encoded_postdata, headers=headers)
        resp = urlopen(req)

        if resp.code == 200:

            # we'll iterate over the lines in the response
            # this works super-well for ND-JSON!
            for line in resp:

                data = json.loads(line)
                msg = data['message']
                status = data['status']

                if status != 'failed':
                    LOGINFO('status: %s, %s' % (status, msg))
                else:
                    LOGERROR('status: %s, %s' % (status, msg))

                # here, we'll decide what to do about the query

                # completed query or query sent to background...
                if status in ('ok','background'):

                    setid = data['result']['setid']
                    # save the data pickle to astrobase lccs directory
                    outpickle = os.path.join(os.path.expanduser('~'),
                                             '.astrobase',
                                             'lccs',
                                             'query-%s.pkl' % setid)
                    if not os.path.exists(os.path.dirname(outpickle)):
                        os.makedirs(os.path.dirname(outpickle))

                    with open(outpickle,'wb') as outfd:
                        pickle.dump(data, outfd, pickle.HIGHEST_PROTOCOL)
                        LOGINFO('saved query info to %s, use this to '
                                'download results later with '
                                'retrieve_dataset_files' % outpickle)

                    # we're done at this point, return
                    return status, data, data['result']['setid']

                # the query probably failed...
                elif status == 'failed':

                    # we're done at this point, return
                    return status, data, None

        # if the response was not OK, then we probably failed
        else:

            try:
                data = json.load(resp)
                msg = data['message']

                LOGERROR(msg)
                return 'failed', None, None

            except Exception:

                LOGEXCEPTION('failed to submit query to %s' % url)
                return 'failed', None, None

    except HTTPError as e:

        LOGERROR('could not submit query to LCC API at: %s' % url)
        LOGERROR('HTTP status code was %s, reason: %s' % (e.code, e.reason))
        return 'failed', None, None


[docs]def retrieve_dataset_files(searchresult,
                           getpickle=False,
                           outdir=None,
                           apikey=None):
    '''This retrieves a search result dataset's CSV and any LC zip files.

    Takes the output from the `submit_post_searchquery` function above or a
    pickle file generated from that function's output if the query timed out.

    Parameters
    ----------

    searchresult : str or tuple
        If provided as a str, points to the pickle file created using the output
        from the `submit_post_searchquery` function. If provided as a tuple,
        this is the result tuple from the `submit_post_searchquery` function.

    getpickle : False
        If this is True, will also download the dataset's pickle. Note that
        LCC-Server is a Python 3.6+ package (while lccs.py still works with
        Python 2.7) and it saves its pickles in pickle.HIGHEST_PROTOCOL for
        efficiency, so these pickles may be unreadable in lower Pythons. As an
        alternative, the dataset CSV contains the full data table and all the
        information about the dataset in its header, which is JSON
        parseable. You can also use the function `get_dataset` below to get the
        dataset pickle information in JSON form.

    outdir : None or str
        If this is a str, points to the output directory where the results will
        be placed. If it's None, they will be placed in the current directory.

    apikey : str or None
        If this is a str, uses the given API key to authenticate the download
        request. This is useful when you have a private dataset you want to get
        products for.

    Returns
    -------

    (local_dataset_csv, local_dataset_lczip, local_dataset_pickle) : tuple
        This returns a tuple containing paths to the dataset CSV, LC zipfile,
        and the dataset pickle if getpickle was set to True (None otherwise).

    '''

    # this handles the direct result case from submit_*_query functions
    if isinstance(searchresult, tuple):

        info, setid = searchresult[1:]

    # handles the case where we give the function a existing query pickle
    elif isinstance(searchresult, str) and os.path.exists(searchresult):

        with open(searchresult,'rb') as infd:
            info = pickle.load(infd)
        setid = info['result']['setid']

    else:

        LOGERROR('could not understand input, '
                 'we need a searchresult from the '
                 'lccs.submit_post_searchquery function or '
                 'the path to an existing query pickle')
        return None, None, None

    # now that we have everything, let's download some files!

    dataset_pickle = 'dataset-%s.pkl.gz' % setid
    dataset_csv = 'dataset-%s.csv' % setid
    dataset_lczip = 'lightcurves-%s.zip' % setid

    if outdir is None:
        localdir = os.getcwd()
    else:
        localdir = outdir

    server_scheme, server_netloc = urlparse(info['result']['seturl'])[:2]
    dataset_pickle_link = '%s://%s/d/%s' % (server_scheme,
                                            server_netloc,
                                            dataset_pickle)
    dataset_csv_link = '%s://%s/d/%s' % (server_scheme,
                                         server_netloc,
                                         dataset_csv)
    dataset_lczip_link = '%s://%s/p/%s' % (server_scheme,
                                           server_netloc,
                                           dataset_lczip)

    if getpickle:

        # get the dataset pickle
        LOGINFO('getting %s...' % dataset_pickle_link)
        try:

            if os.path.exists(os.path.join(localdir, dataset_pickle)):

                LOGWARNING('dataset pickle already exists, '
                           'not downloading again..')
                local_dataset_pickle = os.path.join(localdir,
                                                    dataset_pickle)

            else:

                # if apikey is not None, add it in as an Authorization: Bearer
                # [apikey] header
                if apikey:
                    headers = {'Authorization':'Bearer: %s' % apikey}
                else:
                    headers = {}

                req = Request(
                    dataset_pickle_link,
                    data=None,
                    headers=headers
                )
                resp = urlopen(req)

                # save the file
                LOGINFO('saving %s' % dataset_pickle)
                localf = os.path.join(localdir, dataset_pickle)
                with open(localf, 'wb') as outfd:
                    with resp:
                        data = resp.read()
                        outfd.write(data)

                LOGINFO('OK -> %s' % localf)
                local_dataset_pickle = localf

        except HTTPError as e:
            LOGERROR('could not download %s, '
                     'HTTP status code was: %s, reason: %s' %
                     (dataset_pickle_link, e.code, e.reason))
            local_dataset_pickle = None

    else:
        local_dataset_pickle = None

    # get the dataset CSV
    LOGINFO('getting %s...' % dataset_csv_link)
    try:

        if os.path.exists(os.path.join(localdir, dataset_csv)):

            LOGWARNING('dataset CSV already exists, not downloading again...')
            local_dataset_csv = os.path.join(localdir, dataset_csv)

        else:

            # if apikey is not None, add it in as an Authorization: Bearer
            # [apikey] header
            if apikey:
                headers = {'Authorization':'Bearer: %s' % apikey}
            else:
                headers = {}

            req = Request(
                dataset_csv_link,
                data=None,
                headers=headers
            )
            resp = urlopen(req)

            # save the file
            LOGINFO('saving %s' % dataset_csv)
            localf = os.path.join(localdir, dataset_csv)
            with open(localf, 'wb') as outfd:
                with resp:
                    data = resp.read()
                    outfd.write(data)

            LOGINFO('OK -> %s' % localf)
            local_dataset_csv = localf

    except HTTPError as e:

        LOGERROR('could not download %s, HTTP status code was: %s, reason: %s' %
                 (dataset_csv_link, e.code, e.reason))
        local_dataset_csv = None

    # get the dataset LC zip
    LOGINFO('getting %s...' % dataset_lczip_link)
    try:

        if os.path.exists(os.path.join(localdir, dataset_lczip)):

            LOGWARNING('dataset LC ZIP already exists, '
                       'not downloading again...')
            local_dataset_lczip = os.path.join(localdir, dataset_lczip)

        else:

            # if apikey is not None, add it in as an Authorization: Bearer
            # [apikey] header
            if apikey:
                headers = {'Authorization':'Bearer: %s' % apikey}
            else:
                headers = {}

            req = Request(
                dataset_lczip_link,
                data=None,
                headers=headers
            )
            resp = urlopen(req)

            # save the file
            LOGINFO('saving %s' % dataset_lczip)
            localf = os.path.join(localdir, dataset_lczip)
            with open(localf, 'wb') as outfd:
                with resp:
                    data = resp.read()
                    outfd.write(data)

            LOGINFO('OK -> %s' % localf)
            local_dataset_lczip = localf

    except HTTPError as e:
        LOGERROR('could not download %s, HTTP status code was: %s, reason: %s' %
                 (dataset_lczip_link, e.code, e.reason))
        local_dataset_lczip = None

    return local_dataset_csv, local_dataset_lczip, local_dataset_pickle


###########################
## MAIN SEARCH FUNCTIONS ##
###########################

[docs]def cone_search(lcc_server,
                center_ra,
                center_decl,
                radiusarcmin=5.0,
                result_visibility='unlisted',
                email_when_done=False,
                collections=None,
                columns=None,
                filters=None,
                sortspec=None,
                samplespec=None,
                limitspec=None,
                download_data=True,
                outdir=None,
                maxtimeout=300.0,
                refresh=15.0):

    '''This runs a cone-search query.

    Parameters
    ----------

    lcc_server : str
        This is the base URL of the LCC-Server to talk to.  (e.g. for HAT, use:
        https://data.hatsurveys.org)

    center_ra,center_decl : float
        These are the central coordinates of the search to conduct. These can be
        either decimal degrees of type float, or sexagesimal coordinates of type
        str:

        - OK: 290.0, 45.0
        - OK: 15:00:00 +45:00:00
        - OK: 15 00 00.0 -45 00 00.0
        - NOT OK: 290.0 +45:00:00
        - NOT OK: 15:00:00 45.0

    radiusarcmin : float
        This is the search radius to use for the cone-search. This is in
        arcminutes. The maximum radius you can use is 60 arcminutes = 1 degree.

    result_visibility : {'private', 'unlisted', 'public'}
        This sets the visibility of the dataset produced from the search
        result::

               'private' -> the dataset and its products are not visible or
                            accessible by any user other than the one that
                            created the dataset.

               'unlisted' -> the dataset and its products are not visible in the
                             list of public datasets, but can be accessed if the
                             dataset URL is known

               'public' -> the dataset and its products are visible in the list
                           of public datasets and can be accessed by anyone.

    email_when_done : bool
        If True, the LCC-Server will email you when the search is complete. This
        will also set `download_data` to False. Using this requires an
        LCC-Server account and an API key tied to that account.

    collections : list of str or None
        This is a list of LC collections to search in. If this is None, all
        collections will be searched.

    columns : list of str or None
        This is a list of columns to return in the results. Matching objects'
        object IDs, RAs, DECs, and links to light curve files will always be
        returned so there is no need to specify these columns. If None, only
        these columns will be returned: 'objectid', 'ra', 'decl', 'lcfname'

    filters : str or None
        This is an SQL-like string to use to filter on database columns in the
        LCC-Server's collections. To see the columns available for a search,
        visit the Collections tab in the LCC-Server's browser UI. The filter
        operators allowed are::

            lt      -> less than
            gt      -> greater than
            ge      -> greater than or equal to
            le      -> less than or equal to
            eq      -> equal to
            ne      -> not equal to
            ct      -> contains text
            isnull  -> column value is null
            notnull -> column value is not null

        You may use the `and` and `or` operators between filter specifications
        to chain them together logically.

        Example filter strings::

            "(propermotion gt 200.0) and (sdssr lt 11.0)"
            "(dered_jmag_kmag gt 2.0) and (aep_000_stetsonj gt 10.0)"
            "(gaia_status ct 'ok') and (propermotion gt 300.0)"
            "(simbad_best_objtype ct 'RR') and (dered_sdssu_sdssg lt 0.5)"

    sortspec : tuple of two strs or None
        If not None, this should be a tuple of two items::

            ('column to sort by', 'asc|desc')

        This sets the column to sort the results by. For cone_search, the
        default column and sort order are 'dist_arcsec' and 'asc', meaning the
        distance from the search center in ascending order.

    samplespec : int or None
        If this is an int, will indicate how many rows from the initial search
        result will be uniformly random sampled and returned.

    limitspec : int or None
        If this is an int, will indicate how many rows from the initial search
        result to return in total.

        `sortspec`, `samplespec`, and `limitspec` are applied in this order:

            sample -> sort -> limit

    download_data : bool
        This sets if the accompanying data from the search results will be
        downloaded automatically. This includes the data table CSV, the dataset
        pickle file, and a light curve ZIP file. Note that if the search service
        indicates that your query is still in progress, this function will block
        until the light curve ZIP file becomes available. The maximum wait time
        in seconds is set by maxtimeout and the refresh interval is set by
        refresh.

        To avoid the wait block, set download_data to False and the function
        will write a pickle file to `~/.astrobase/lccs/query-[setid].pkl`
        containing all the information necessary to retrieve these data files
        later when the query is done. To do so, call the
        `retrieve_dataset_files` with the path to this pickle file (it will be
        returned).

    outdir : str or None
        If this is provided, sets the output directory of the downloaded dataset
        files. If None, they will be downloaded to the current directory.

    maxtimeout : float
        The maximum time in seconds to wait for the LCC-Server to respond with a
        result before timing out. You can use the `retrieve_dataset_files`
        function to get results later as needed.

    refresh : float
        The time to wait in seconds before pinging the LCC-Server to see if a
        search query has completed and dataset result files can be downloaded.

    Returns
    -------

    tuple
        Returns a tuple with the following elements::

            (search result status dict,
             search result CSV file path,
             search result LC ZIP path)

    '''

    # turn the input into a param dict

    coords = '%.5f %.5f %.1f' % (center_ra, center_decl, radiusarcmin)
    params = {
        'coords':coords
    }

    if collections:
        params['collections'] = collections
    if columns:
        params['columns'] = columns
    if filters:
        params['filters'] = filters
    if sortspec:
        params['sortspec'] = json.dumps([sortspec])
    if samplespec:
        params['samplespec'] = int(samplespec)
    if limitspec:
        params['limitspec'] = int(limitspec)

    params['visibility'] = result_visibility
    params['emailwhendone'] = email_when_done

    # we won't wait for the LC ZIP to complete if email_when_done = True
    if email_when_done:
        download_data = False

    # check if we have an API key already
    have_apikey, apikey, expires = check_existing_apikey(lcc_server)

    # if not, get a new one
    if not have_apikey:
        apikey, expires = get_new_apikey(lcc_server)

    # hit the server
    api_url = '%s/api/conesearch' % lcc_server

    searchresult = submit_post_searchquery(api_url, params, apikey)

    # check the status of the search
    status = searchresult[0]

    # now we'll check if we want to download the data
    if download_data:

        if status == 'ok':

            LOGINFO('query complete, downloading associated data...')
            csv, lczip, pkl = retrieve_dataset_files(searchresult,
                                                     outdir=outdir,
                                                     apikey=apikey)

            if pkl:
                return searchresult[1], csv, lczip, pkl
            else:
                return searchresult[1], csv, lczip

        elif status == 'background':

            LOGINFO('query is not yet complete, '
                    'waiting up to %.1f minutes, '
                    'updates every %s seconds (hit Ctrl+C to cancel)...' %
                    (maxtimeout/60.0, refresh))

            timewaited = 0.0

            while timewaited < maxtimeout:

                try:

                    time.sleep(refresh)
                    csv, lczip, pkl = retrieve_dataset_files(searchresult,
                                                             outdir=outdir,
                                                             apikey=apikey)

                    if (csv and os.path.exists(csv) and
                        lczip and os.path.exists(lczip)):

                        LOGINFO('all dataset products collected')
                        return searchresult[1], csv, lczip

                    timewaited = timewaited + refresh

                except KeyboardInterrupt:

                    LOGWARNING('abandoned wait for downloading data')
                    return searchresult[1], None, None

            LOGERROR('wait timed out.')
            return searchresult[1], None, None

        else:

            LOGERROR('could not download the data for this query result')
            return searchresult[1], None, None

    else:

        return searchresult[1], None, None


[docs]def fulltext_search(lcc_server,
                    searchterm,
                    sesame_lookup=False,
                    result_visibility='unlisted',
                    email_when_done=False,
                    collections=None,
                    columns=None,
                    filters=None,
                    sortspec=None,
                    samplespec=None,
                    limitspec=None,
                    download_data=True,
                    outdir=None,
                    maxtimeout=300.0,
                    refresh=15.0):

    '''This runs a full-text search query.

    Parameters
    ----------

    lcc_server : str
        This is the base URL of the LCC-Server to talk to.  (e.g. for HAT, use:
        https://data.hatsurveys.org)

    searchterm : str
        This is the term to look for in a full-text search of the LCC-Server's
        collections. This can be an object name, tag, description, etc., as
        noted in the LCC-Server's full-text search tab in its browser UI. To
        search for an exact match to a string (like an object name), you can add
        double quotes around the string, e.g. searchitem = '"exact match to me
        needed"'.

    sesame_lookup : bool
        If True, means the LCC-Server will assume the provided search term is a
        single object's name, look up its coordinates using the CDS SIMBAD
        SESAME name resolution service, and then search the LCC-Server for any
        matching objects. The object name can be either a star name known to
        SIMBAD, or it can be an extended source name (e.g. an open cluster or
        nebula). In the first case, a search radius of 5 arcseconds will be
        used. In the second case, a search radius of 1 degree will be used to
        find all nearby database objects associated with an extended source
        name.

    result_visibility : {'private', 'unlisted', 'public'}
        This sets the visibility of the dataset produced from the search
        result::

               'private' -> the dataset and its products are not visible or
                            accessible by any user other than the one that
                            created the dataset.

               'unlisted' -> the dataset and its products are not visible in the
                             list of public datasets, but can be accessed if the
                             dataset URL is known

               'public' -> the dataset and its products are visible in the list
                           of public datasets and can be accessed by anyone.

    email_when_done : bool
        If True, the LCC-Server will email you when the search is complete. This
        will also set `download_data` to False. Using this requires an
        LCC-Server account and an API key tied to that account.

    collections : list of str or None
        This is a list of LC collections to search in. If this is None, all
        collections will be searched.

    columns : list of str or None
        This is a list of columns to return in the results. Matching objects'
        object IDs, RAs, DECs, and links to light curve files will always be
        returned so there is no need to specify these columns. If None, only
        these columns will be returned: 'objectid', 'ra', 'decl', 'lcfname'

    filters : str or None
        This is an SQL-like string to use to filter on database columns in the
        LCC-Server's collections. To see the columns available for a search,
        visit the Collections tab in the LCC-Server's browser UI. The filter
        operators allowed are::

            lt      -> less than
            gt      -> greater than
            ge      -> greater than or equal to
            le      -> less than or equal to
            eq      -> equal to
            ne      -> not equal to
            ct      -> contains text
            isnull  -> column value is null
            notnull -> column value is not null

        You may use the `and` and `or` operators between filter specifications
        to chain them together logically.

        Example filter strings::

            "(propermotion gt 200.0) and (sdssr lt 11.0)"
            "(dered_jmag_kmag gt 2.0) and (aep_000_stetsonj gt 10.0)"
            "(gaia_status ct 'ok') and (propermotion gt 300.0)"
            "(simbad_best_objtype ct 'RR') and (dered_sdssu_sdssg lt 0.5)"

    sortspec : tuple of two strs or None
        If not None, this should be a tuple of two items::

            ('column to sort by', 'asc|desc')

        This sets the column to sort the results by. For cone_search, the
        default column and sort order are 'dist_arcsec' and 'asc', meaning the
        distance from the search center in ascending order.

    samplespec : int or None
        If this is an int, will indicate how many rows from the initial search
        result will be uniformly random sampled and returned.

    limitspec : int or None
        If this is an int, will indicate how many rows from the initial search
        result to return in total.

        `sortspec`, `samplespec`, and `limitspec` are applied in this order:

            sample -> sort -> limit

    download_data : bool
        This sets if the accompanying data from the search results will be
        downloaded automatically. This includes the data table CSV, the dataset
        pickle file, and a light curve ZIP file. Note that if the search service
        indicates that your query is still in progress, this function will block
        until the light curve ZIP file becomes available. The maximum wait time
        in seconds is set by maxtimeout and the refresh interval is set by
        refresh.

        To avoid the wait block, set download_data to False and the function
        will write a pickle file to `~/.astrobase/lccs/query-[setid].pkl`
        containing all the information necessary to retrieve these data files
        later when the query is done. To do so, call the
        `retrieve_dataset_files` with the path to this pickle file (it will be
        returned).

    outdir : str or None
        If this is provided, sets the output directory of the downloaded dataset
        files. If None, they will be downloaded to the current directory.

    maxtimeout : float
        The maximum time in seconds to wait for the LCC-Server to respond with a
        result before timing out. You can use the `retrieve_dataset_files`
        function to get results later as needed.

    refresh : float
        The time to wait in seconds before pinging the LCC-Server to see if a
        search query has completed and dataset result files can be downloaded.

    Returns
    -------

    tuple
        Returns a tuple with the following elements::

            (search result status dict,
             search result CSV file path,
             search result LC ZIP path)

    '''

    # turn the input into a param dict
    params = {'ftstext':searchterm}

    if collections:
        params['collections'] = collections
    if columns:
        params['columns'] = columns
    if filters:
        params['filters'] = filters
    if sortspec:
        params['sortspec'] = json.dumps([sortspec])
    if samplespec:
        params['samplespec'] = int(samplespec)
    if limitspec:
        params['limitspec'] = int(limitspec)

    params['visibility'] = result_visibility
    params['emailwhendone'] = email_when_done
    params['sesame'] = sesame_lookup

    # we won't wait for the LC ZIP to complete if email_when_done = True
    if email_when_done:
        download_data = False

    # check if we have an API key already
    have_apikey, apikey, expires = check_existing_apikey(lcc_server)

    # if not, get a new one
    if not have_apikey:
        apikey, expires = get_new_apikey(lcc_server)

    # hit the server
    api_url = '%s/api/ftsquery' % lcc_server

    searchresult = submit_post_searchquery(api_url, params, apikey)

    # check the status of the search
    status = searchresult[0]

    # now we'll check if we want to download the data
    if download_data:

        if status == 'ok':

            LOGINFO('query complete, downloading associated data...')
            csv, lczip, pkl = retrieve_dataset_files(searchresult,
                                                     outdir=outdir,
                                                     apikey=apikey)

            if pkl:
                return searchresult[1], csv, lczip, pkl
            else:
                return searchresult[1], csv, lczip

        elif status == 'background':

            LOGINFO('query is not yet complete, '
                    'waiting up to %.1f minutes, '
                    'updates every %s seconds (hit Ctrl+C to cancel)...' %
                    (maxtimeout/60.0, refresh))

            timewaited = 0.0

            while timewaited < maxtimeout:

                try:

                    time.sleep(refresh)
                    csv, lczip, pkl = retrieve_dataset_files(searchresult,
                                                             outdir=outdir,
                                                             apikey=apikey)

                    if (csv and os.path.exists(csv) and
                        lczip and os.path.exists(lczip)):

                        LOGINFO('all dataset products collected')
                        return searchresult[1], csv, lczip

                    timewaited = timewaited + refresh

                except KeyboardInterrupt:

                    LOGWARNING('abandoned wait for downloading data')
                    return searchresult[1], None, None

            LOGERROR('wait timed out.')
            return searchresult[1], None, None

        else:

            LOGERROR('could not download the data for this query result')
            return searchresult[1], None, None

    else:

        return searchresult[1], None, None


[docs]def column_search(lcc_server,
                  filters,
                  result_visibility='unlisted',
                  email_when_done=False,
                  collections=None,
                  columns=None,
                  sortspec=('sdssr','asc'),
                  samplespec=None,
                  limitspec=None,
                  download_data=True,
                  outdir=None,
                  maxtimeout=300.0,
                  refresh=15.0):

    '''This runs a column search query.

    Parameters
    ----------

    lcc_server : str
        This is the base URL of the LCC-Server to talk to.  (e.g. for HAT, use:
        https://data.hatsurveys.org)

    filters : str or None
        This is an SQL-like string to use to filter on database columns in the
        LCC-Server's collections. To see the columns available for a search,
        visit the Collections tab in the LCC-Server's browser UI. The filter
        operators allowed are::

            lt      -> less than
            gt      -> greater than
            ge      -> greater than or equal to
            le      -> less than or equal to
            eq      -> equal to
            ne      -> not equal to
            ct      -> contains text
            isnull  -> column value is null
            notnull -> column value is not null

        You may use the `and` and `or` operators between filter specifications
        to chain them together logically.

        Example filter strings::

            "(propermotion gt 200.0) and (sdssr lt 11.0)"
            "(dered_jmag_kmag gt 2.0) and (aep_000_stetsonj gt 10.0)"
            "(gaia_status ct 'ok') and (propermotion gt 300.0)"
            "(simbad_best_objtype ct 'RR') and (dered_sdssu_sdssg lt 0.5)"

    result_visibility : {'private', 'unlisted', 'public'}
        This sets the visibility of the dataset produced from the search
        result::

               'private' -> the dataset and its products are not visible or
                            accessible by any user other than the one that
                            created the dataset.

               'unlisted' -> the dataset and its products are not visible in the
                             list of public datasets, but can be accessed if the
                             dataset URL is known

               'public' -> the dataset and its products are visible in the list
                           of public datasets and can be accessed by anyone.

    email_when_done : bool
        If True, the LCC-Server will email you when the search is complete. This
        will also set `download_data` to False. Using this requires an
        LCC-Server account and an API key tied to that account.

    collections : list of str or None
        This is a list of LC collections to search in. If this is None, all
        collections will be searched.

    columns : list of str or None
        This is a list of columns to return in the results. Matching objects'
        object IDs, RAs, DECs, and links to light curve files will always be
        returned so there is no need to specify these columns. If None, only
        these columns will be returned: 'objectid', 'ra', 'decl', 'lcfname'

    sortspec : tuple of two strs or None
        If not None, this should be a tuple of two items::

            ('column to sort by', 'asc|desc')

        This sets the column to sort the results by. For cone_search, the
        default column and sort order are 'dist_arcsec' and 'asc', meaning the
        distance from the search center in ascending order.

    samplespec : int or None
        If this is an int, will indicate how many rows from the initial search
        result will be uniformly random sampled and returned.

    limitspec : int or None
        If this is an int, will indicate how many rows from the initial search
        result to return in total.

        `sortspec`, `samplespec`, and `limitspec` are applied in this order:

            sample -> sort -> limit

    download_data : bool
        This sets if the accompanying data from the search results will be
        downloaded automatically. This includes the data table CSV, the dataset
        pickle file, and a light curve ZIP file. Note that if the search service
        indicates that your query is still in progress, this function will block
        until the light curve ZIP file becomes available. The maximum wait time
        in seconds is set by maxtimeout and the refresh interval is set by
        refresh.

        To avoid the wait block, set download_data to False and the function
        will write a pickle file to `~/.astrobase/lccs/query-[setid].pkl`
        containing all the information necessary to retrieve these data files
        later when the query is done. To do so, call the
        `retrieve_dataset_files` with the path to this pickle file (it will be
        returned).

    outdir : str or None
        If this is provided, sets the output directory of the downloaded dataset
        files. If None, they will be downloaded to the current directory.

    maxtimeout : float
        The maximum time in seconds to wait for the LCC-Server to respond with a
        result before timing out. You can use the `retrieve_dataset_files`
        function to get results later as needed.

    refresh : float
        The time to wait in seconds before pinging the LCC-Server to see if a
        search query has completed and dataset result files can be downloaded.

    Returns
    -------

    tuple
        Returns a tuple with the following elements::

            (search result status dict,
             search result CSV file path,
             search result LC ZIP path)

    '''

    # turn the input into a param dict
    params = {
        'filters':filters
    }

    if collections:
        params['collections'] = collections
    if columns:
        params['columns'] = columns
    if sortspec:
        params['sortspec'] = json.dumps([sortspec])
    if samplespec:
        params['samplespec'] = int(samplespec)
    if limitspec:
        params['limitspec'] = int(limitspec)

    params['visibility'] = result_visibility
    params['emailwhendone'] = email_when_done

    # we won't wait for the LC ZIP to complete if email_when_done = True
    if email_when_done:
        download_data = False

    # check if we have an API key already
    have_apikey, apikey, expires = check_existing_apikey(lcc_server)

    # if not, get a new one
    if not have_apikey:
        apikey, expires = get_new_apikey(lcc_server)

    # hit the server
    api_url = '%s/api/columnsearch' % lcc_server

    # no API key is required for now, but we'll load one automatically if we
    # require it in the future
    searchresult = submit_post_searchquery(api_url, params, apikey)

    # check the status of the search
    status = searchresult[0]

    # now we'll check if we want to download the data
    if download_data:

        if status == 'ok':

            LOGINFO('query complete, downloading associated data...')
            csv, lczip, pkl = retrieve_dataset_files(searchresult,
                                                     outdir=outdir,
                                                     apikey=apikey)

            if pkl:
                return searchresult[1], csv, lczip, pkl
            else:
                return searchresult[1], csv, lczip

        elif status == 'background':

            LOGINFO('query is not yet complete, '
                    'waiting up to %.1f minutes, '
                    'updates every %s seconds (hit Ctrl+C to cancel)...' %
                    (maxtimeout/60.0, refresh))

            timewaited = 0.0

            while timewaited < maxtimeout:

                try:

                    time.sleep(refresh)
                    csv, lczip, pkl = retrieve_dataset_files(searchresult,
                                                             outdir=outdir,
                                                             apikey=apikey)

                    if (csv and os.path.exists(csv) and
                        lczip and os.path.exists(lczip)):

                        LOGINFO('all dataset products collected')
                        return searchresult[1], csv, lczip

                    timewaited = timewaited + refresh

                except KeyboardInterrupt:

                    LOGWARNING('abandoned wait for downloading data')
                    return searchresult[1], None, None

            LOGERROR('wait timed out.')
            return searchresult[1], None, None

        else:

            LOGERROR('could not download the data for this query result')
            return searchresult[1], None, None

    else:

        return searchresult[1], None, None


[docs]def xmatch_search(lcc_server,
                  file_to_upload,
                  xmatch_dist_arcsec=3.0,
                  result_visibility='unlisted',
                  email_when_done=False,
                  collections=None,
                  columns=None,
                  filters=None,
                  sortspec=None,
                  limitspec=None,
                  samplespec=None,
                  download_data=True,
                  outdir=None,
                  maxtimeout=300.0,
                  refresh=15.0):

    '''This runs a cross-match search query.

    Parameters
    ----------

    lcc_server : str
        This is the base URL of the LCC-Server to talk to.  (e.g. for HAT, use:
        https://data.hatsurveys.org)

    file_to_upload : str
        This is the path to a text file containing objectid, RA, declination
        rows for the objects to cross-match against the LCC-Server
        collections. This should follow the format of the following example::

            # example object and coordinate list
            # objectid ra dec
            aaa 289.99698 44.99839
            bbb 293.358 -23.206
            ccc 294.197 +23.181
            ddd 19 25 27.9129 +42 47 03.693
            eee 19:25:27 -42:47:03.21
            # .
            # .
            # .
            # etc. lines starting with '#' will be ignored
            # (max 5000 objects)

    xmatch_dist_arcsec : float
        This is the maximum distance in arcseconds to consider when
        cross-matching objects in the uploaded file to the LCC-Server's
        collections. The maximum allowed distance is 30 arcseconds. Multiple
        matches to an uploaded object are possible and will be returned in order
        of increasing distance grouped by input `objectid`.

    result_visibility : {'private', 'unlisted', 'public'}
        This sets the visibility of the dataset produced from the search
        result::

               'private' -> the dataset and its products are not visible or
                            accessible by any user other than the one that
                            created the dataset.

               'unlisted' -> the dataset and its products are not visible in the
                             list of public datasets, but can be accessed if the
                             dataset URL is known

               'public' -> the dataset and its products are visible in the list
                           of public datasets and can be accessed by anyone.

    email_when_done : bool
        If True, the LCC-Server will email you when the search is complete. This
        will also set `download_data` to False. Using this requires an
        LCC-Server account and an API key tied to that account.

    collections : list of str or None
        This is a list of LC collections to search in. If this is None, all
        collections will be searched.

    columns : list of str or None
        This is a list of columns to return in the results. Matching objects'
        object IDs, RAs, DECs, and links to light curve files will always be
        returned so there is no need to specify these columns. If None, only
        these columns will be returned: 'objectid', 'ra', 'decl', 'lcfname'

    filters : str or None
        This is an SQL-like string to use to filter on database columns in the
        LCC-Server's collections. To see the columns available for a search,
        visit the Collections tab in the LCC-Server's browser UI. The filter
        operators allowed are::

            lt      -> less than
            gt      -> greater than
            ge      -> greater than or equal to
            le      -> less than or equal to
            eq      -> equal to
            ne      -> not equal to
            ct      -> contains text
            isnull  -> column value is null
            notnull -> column value is not null

        You may use the `and` and `or` operators between filter specifications
        to chain them together logically.

        Example filter strings::

            "(propermotion gt 200.0) and (sdssr lt 11.0)"
            "(dered_jmag_kmag gt 2.0) and (aep_000_stetsonj gt 10.0)"
            "(gaia_status ct 'ok') and (propermotion gt 300.0)"
            "(simbad_best_objtype ct 'RR') and (dered_sdssu_sdssg lt 0.5)"

    sortspec : tuple of two strs or None
        If not None, this should be a tuple of two items::

            ('column to sort by', 'asc|desc')

        This sets the column to sort the results by. For cone_search, the
        default column and sort order are 'dist_arcsec' and 'asc', meaning the
        distance from the search center in ascending order.

    samplespec : int or None
        If this is an int, will indicate how many rows from the initial search
        result will be uniformly random sampled and returned.

    limitspec : int or None
        If this is an int, will indicate how many rows from the initial search
        result to return in total.

        `sortspec`, `samplespec`, and `limitspec` are applied in this order:

            sample -> sort -> limit

    download_data : bool
        This sets if the accompanying data from the search results will be
        downloaded automatically. This includes the data table CSV, the dataset
        pickle file, and a light curve ZIP file. Note that if the search service
        indicates that your query is still in progress, this function will block
        until the light curve ZIP file becomes available. The maximum wait time
        in seconds is set by maxtimeout and the refresh interval is set by
        refresh.

        To avoid the wait block, set download_data to False and the function
        will write a pickle file to `~/.astrobase/lccs/query-[setid].pkl`
        containing all the information necessary to retrieve these data files
        later when the query is done. To do so, call the
        `retrieve_dataset_files` with the path to this pickle file (it will be
        returned).

    outdir : str or None
        If this is provided, sets the output directory of the downloaded dataset
        files. If None, they will be downloaded to the current directory.

    maxtimeout : float
        The maximum time in seconds to wait for the LCC-Server to respond with a
        result before timing out. You can use the `retrieve_dataset_files`
        function to get results later as needed.

    refresh : float
        The time to wait in seconds before pinging the LCC-Server to see if a
        search query has completed and dataset result files can be downloaded.

    Returns
    -------

    tuple
        Returns a tuple with the following elements::

            (search result status dict,
             search result CSV file path,
             search result LC ZIP path)

    '''

    with open(file_to_upload) as infd:
        xmq = infd.read()

    # check the number of lines in the input
    xmqlines = len(xmq.split('\n')[:-1])

    if xmqlines > 5000:

        LOGERROR('you have more than 5000 lines in the file to upload: %s' %
                 file_to_upload)
        return None, None, None

    # turn the input into a param dict
    params = {'xmq':xmq,
              'xmd':xmatch_dist_arcsec}

    if collections:
        params['collections'] = collections
    if columns:
        params['columns'] = columns
    if filters:
        params['filters'] = filters

    if sortspec:
        params['sortspec'] = json.dumps([sortspec])
    if samplespec:
        params['samplespec'] = int(samplespec)
    if limitspec:
        params['limitspec'] = int(limitspec)

    params['visibility'] = result_visibility
    params['emailwhendone'] = email_when_done

    # we won't wait for the LC ZIP to complete if email_when_done = True
    if email_when_done:
        download_data = False

    # check if we have an API key already
    have_apikey, apikey, expires = check_existing_apikey(lcc_server)

    # if not, get a new one
    if not have_apikey:
        apikey, expires = get_new_apikey(lcc_server)

    # hit the server
    api_url = '%s/api/xmatch' % lcc_server

    searchresult = submit_post_searchquery(api_url, params, apikey)

    # check the status of the search
    status = searchresult[0]

    # now we'll check if we want to download the data
    if download_data:

        if status == 'ok':

            LOGINFO('query complete, downloading associated data...')
            csv, lczip, pkl = retrieve_dataset_files(searchresult,
                                                     outdir=outdir,
                                                     apikey=apikey)

            if pkl:
                return searchresult[1], csv, lczip, pkl
            else:
                return searchresult[1], csv, lczip

        elif status == 'background':

            LOGINFO('query is not yet complete, '
                    'waiting up to %.1f minutes, '
                    'updates every %s seconds (hit Ctrl+C to cancel)...' %
                    (maxtimeout/60.0, refresh))

            timewaited = 0.0

            while timewaited < maxtimeout:

                try:

                    time.sleep(refresh)
                    csv, lczip, pkl = retrieve_dataset_files(searchresult,
                                                             outdir=outdir,
                                                             apikey=apikey)

                    if (csv and os.path.exists(csv) and
                        lczip and os.path.exists(lczip)):

                        LOGINFO('all dataset products collected')
                        return searchresult[1], csv, lczip

                    timewaited = timewaited + refresh

                except KeyboardInterrupt:

                    LOGWARNING('abandoned wait for downloading data')
                    return searchresult[1], None, None

            LOGERROR('wait timed out.')
            return searchresult[1], None, None

        else:

            LOGERROR('could not download the data for this query result')
            return searchresult[1], None, None

    else:

        return searchresult[1], None, None


#######################################
## DATASET AND OBJECT INFO FUNCTIONS ##
#######################################

[docs]def get_dataset(lcc_server,
                dataset_id,
                strformat=False,
                page=1):
    '''This downloads a JSON form of a dataset from the specified lcc_server.

    If the dataset contains more than 1000 rows, it will be paginated, so you
    must use the `page` kwarg to get the page you want. The dataset JSON will
    contain the keys 'npages', 'currpage', and 'rows_per_page' to help with
    this. The 'rows' key contains the actual data rows as a list of tuples.

    The JSON contains metadata about the query that produced the dataset,
    information about the data table's columns, and links to download the
    dataset's products including the light curve ZIP and the dataset CSV.

    Parameters
    ----------

    lcc_server : str
        This is the base URL of the LCC-Server to talk to.

    dataset_id : str
        This is the unique setid of the dataset you want to get. In the results
        from the `*_search` functions above, this is the value of the
        `infodict['result']['setid']` key in the first item (the infodict) in
        the returned tuple.

    strformat : bool
        This sets if you want the returned data rows to be formatted in their
        string representations already. This can be useful if you're piping the
        returned JSON straight into some sort of UI and you don't want to deal
        with formatting floats, etc. To do this manually when strformat is set
        to False, look at the `coldesc` item in the returned dict, which gives
        the Python and Numpy string format specifiers for each column in the
        data table.

    page : int
        This sets which page of the dataset should be retrieved.

    Returns
    -------

    dict
        This returns the dataset JSON loaded into a dict.

    '''

    urlparams = {'strformat':1 if strformat else 0,
                 'page':page,
                 'json':1}
    urlqs = urlencode(urlparams)

    dataset_url = '%s/set/%s?%s' % (lcc_server, dataset_id, urlqs)

    LOGINFO('retrieving dataset %s from %s, using URL: %s ...' % (lcc_server,
                                                                  dataset_id,
                                                                  dataset_url))

    try:

        # check if we have an API key already
        have_apikey, apikey, expires = check_existing_apikey(lcc_server)

        # if not, get a new one
        if not have_apikey:
            apikey, expires = get_new_apikey(lcc_server)

        # if apikey is not None, add it in as an Authorization: Bearer [apikey]
        # header
        if apikey:
            headers = {'Authorization':'Bearer: %s' % apikey}
        else:
            headers = {}

        # hit the server
        req = Request(dataset_url, data=None, headers=headers)
        resp = urlopen(req)
        dataset = json.loads(resp.read())
        return dataset

    except Exception:

        LOGEXCEPTION('could not retrieve the dataset JSON!')
        return None


[docs]def object_info(lcc_server, objectid, db_collection_id):
    '''This gets information on a single object from the LCC-Server.

    Returns a dict with all of the available information on an object, including
    finding charts, comments, object type and variability tags, and
    period-search results (if available).

    If you have an LCC-Server API key present in `~/.astrobase/lccs/` that is
    associated with an LCC-Server user account, objects that are visible to this
    user will be returned, even if they are not visible to the public. Use this
    to look up objects that have been marked as 'private' or 'shared'.

    NOTE: you can pass the result dict returned by this function directly into
    the `astrobase.checkplot.checkplot_pickle_to_png` function, e.g.::

        astrobase.checkplot.checkplot_pickle_to_png(result_dict,
                                                    'object-%s-info.png' %
                                                    result_dict['objectid'])

    to generate a quick PNG overview of the object information.

    Parameters
    ----------

    lcc_server : str
        This is the base URL of the LCC-Server to talk to.

    objectid : str
        This is the unique database ID of the object to retrieve info for. This
        is always returned as the `db_oid` column in LCC-Server search results.

    db_collection_id : str
        This is the collection ID which will be searched for the object. This is
        always returned as the `collection` column in LCC-Server search results.

    Returns
    -------

    dict
        A dict containing the object info is returned. Some important items in
        the result dict:

        - `objectinfo`: all object magnitude, color, GAIA cross-match, and
          object type information available for this object

        - `objectcomments`: comments on the object's variability if available

        - `varinfo`: variability comments, variability features, type tags,
          period and epoch information if available

        - `neighbors`: information on the neighboring objects of this object in
          its parent light curve collection

        - `xmatch`: information on any cross-matches to external catalogs
          (e.g. KIC, EPIC, TIC, APOGEE, etc.)

        - `finderchart`: a base-64 encoded PNG image of the object's DSS2 RED
          finder chart. To convert this to an actual PNG, try the function:
          `astrobase.checkplot.pkl_io._b64_to_file`.

        - `magseries`: a base-64 encoded PNG image of the object's light
          curve. To convert this to an actual PNG, try the function:
          `astrobase.checkplot.pkl_io._b64_to_file`.

        - `pfmethods`: a list of period-finding methods applied to the object if
          any. If this list is present, use the keys in it to get to the actual
          period-finding results for each method. These will contain base-64
          encoded PNGs of the periodogram and phased light curves using the best
          three peaks in the periodogram, as well as period and epoch
          information.

    '''

    urlparams = {
        'objectid':objectid,
        'collection':db_collection_id
    }

    urlqs = urlencode(urlparams)
    url = '%s/api/object?%s' % (lcc_server, urlqs)

    try:

        LOGINFO(
            'getting info for %s in collection %s from %s' % (
                objectid,
                db_collection_id,
                lcc_server
            )
        )

        # check if we have an API key already
        have_apikey, apikey, expires = check_existing_apikey(lcc_server)

        # if not, get a new one
        if not have_apikey:
            apikey, expires = get_new_apikey(lcc_server)

        # if apikey is not None, add it in as an Authorization: Bearer [apikey]
        # header
        if apikey:
            headers = {'Authorization':'Bearer: %s' % apikey}
        else:
            headers = {}

        # hit the server
        req = Request(url, data=None, headers=headers)
        resp = urlopen(req)
        objectinfo = json.loads(resp.read())['result']
        return objectinfo

    except HTTPError as e:

        if e.code == 404:

            LOGERROR(
                'additional info for object %s not '
                'found in collection: %s' % (objectid,
                                             db_collection_id)
            )

        else:

            LOGERROR('could not retrieve object info, '
                     'URL used: %s, error code: %s, reason: %s' %
                     (url, e.code, e.reason))

        return None


[docs]def list_recent_datasets(lcc_server, nrecent=25):
    '''This lists recent publicly visible datasets available on the LCC-Server.

    If you have an LCC-Server API key present in `~/.astrobase/lccs/` that is
    associated with an LCC-Server user account, datasets that belong to this
    user will be returned as well, even if they are not visible to the public.

    Parameters
    ----------

    lcc_server : str
        This is the base URL of the LCC-Server to talk to.

    nrecent : int
        This indicates how many recent public datasets you want to list. This is
        always capped at 1000.

    Returns
    -------

    list of dicts
        Returns a list of dicts, with each dict containing info on each dataset.

    '''

    urlparams = {'nsets':nrecent}
    urlqs = urlencode(urlparams)

    url = '%s/api/datasets?%s' % (lcc_server, urlqs)

    try:

        LOGINFO(
            'getting list of recent publicly '
            'visible and owned datasets from %s' % (
                lcc_server,
            )
        )

        # check if we have an API key already
        have_apikey, apikey, expires = check_existing_apikey(lcc_server)

        # if not, get a new one
        if not have_apikey:
            apikey, expires = get_new_apikey(lcc_server)

        # if apikey is not None, add it in as an Authorization: Bearer [apikey]
        # header
        if apikey:
            headers = {'Authorization':'Bearer: %s' % apikey}
        else:
            headers = {}

        # hit the server
        req = Request(url, data=None, headers=headers)
        resp = urlopen(req)
        recent_datasets = json.loads(resp.read())['result']
        return recent_datasets

    except HTTPError as e:

        LOGERROR('could not retrieve recent datasets list, '
                 'URL used: %s, error code: %s, reason: %s' %
                 (url, e.code, e.reason))

        return None


[docs]def list_lc_collections(lcc_server):
    '''This lists all light curve collections made available on the LCC-Server.

    If you have an LCC-Server API key present in `~/.astrobase/lccs/` that is
    associated with an LCC-Server user account, light curve collections visible
    to this user will be returned as well, even if they are not visible to the
    public.

    Parameters
    ----------

    lcc_server : str
        The base URL of the LCC-Server to talk to.

    Returns
    -------

    dict
        Returns a dict containing lists of info items per collection. This
        includes collection_ids, lists of columns, lists of indexed columns,
        lists of full-text indexed columns, detailed column descriptions, number
        of objects in each collection, collection sky coverage, etc.

    '''

    url = '%s/api/collections' % lcc_server

    try:

        LOGINFO(
            'getting list of recent publicly visible '
            'and owned LC collections from %s' % (
                lcc_server,
            )
        )

        # check if we have an API key already
        have_apikey, apikey, expires = check_existing_apikey(lcc_server)

        # if not, get a new one
        if not have_apikey:
            apikey, expires = get_new_apikey(lcc_server)

        # if apikey is not None, add it in as an Authorization: Bearer [apikey]
        # header
        if apikey:
            headers = {'Authorization':'Bearer: %s' % apikey}
        else:
            headers = {}

        # hit the server
        req = Request(url, data=None, headers=headers)
        resp = urlopen(req)
        lcc_list = json.loads(resp.read())['result']['collections']
        return lcc_list

    except HTTPError as e:

        LOGERROR('could not retrieve list of collections, '
                 'URL used: %s, error code: %s, reason: %s' %
                 (url, e.code, e.reason))

        return None