Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
881f383
create a new _query_execute that is used by query_region and query_pa…
zoghbi-a Sep 4, 2025
75612b8
add unit tests and increase coverage
zoghbi-a Sep 5, 2025
e5ccbc1
add TAP limit if maxrec is high
zoghbi-a Sep 5, 2025
5ff18aa
add uploads to query_tap that gets passed to pyvo
zoghbi-a Sep 5, 2025
1f62576
renamed query_by_parameters to query_by_column; add docs
zoghbi-a Sep 8, 2025
9388249
fix the maxrec fix
zoghbi-a Sep 8, 2025
0967e86
update changelog
zoghbi-a Sep 8, 2025
5cbdf3f
fix styles
zoghbi-a Sep 8, 2025
4ef56c4
fix changelog
zoghbi-a Sep 8, 2025
cfb0884
fix docstring
zoghbi-a Sep 8, 2025
fe1995c
another attempt to fix docstring
zoghbi-a Sep 8, 2025
27d0277
add automatic host guess in download_data
zoghbi-a Sep 8, 2025
8be084d
fix _guess_host in windows
zoghbi-a Sep 9, 2025
d0ecb45
update the host info in download_data docstring
zoghbi-a Sep 26, 2025
30d90a0
rename query_by_columns to query_constraints
zoghbi-a Oct 20, 2025
69d37f2
remove the maxrec fix for now
zoghbi-a Oct 20, 2025
74eaa15
move the changelog entries to 0.4.12
zoghbi-a Oct 20, 2025
9366c3c
move column_filter to query_region and remove query_constraints. Upda…
zoghbi-a Oct 21, 2025
d58cad5
add extra check in locate_data; fix #3340
zoghbi-a Oct 22, 2025
6abfc19
update datalink url to enable fast queries
zoghbi-a Oct 22, 2025
8606f4f
document add_offset in query_region; addresses #3315
zoghbi-a Oct 22, 2025
5c2ee14
default to all-sky search when column_filters is given and position i…
zoghbi-a Oct 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ API changes
Service fixes and enhancements
------------------------------

heasarc
^^^^^^^
- Add ``query_constraints`` to allow querying of different catalog columns. [#3403]
- Add support for uploading tables when using TAP directly through ``query_tap``. [#3403]
- Add automatic guessing for the data host in ``download_data``. [#3403]


esa.hubble
^^^^^^^^^^

Expand Down
263 changes: 225 additions & 38 deletions astroquery/heasarc/core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@

import os

import shutil
import requests
import tarfile
Expand Down Expand Up @@ -261,7 +259,7 @@ def query_mission_cols(self, mission, *, cache=True,
cols = [col.upper() for col in cols['name'] if '__' not in col]
return cols

def query_tap(self, query, *, maxrec=None):
def query_tap(self, query, *, maxrec=None, uploads=None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would think propagating the table uploads could be super useful for the other methods calling query_tap internally, so please consider adding it to query_region and the new query_by_column, too.

Copy link
Contributor Author

@zoghbi-a zoghbi-a Oct 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uploads make sense only if the user writes a TAP query because they need to reference the uploaded columns in the query. I am not sure it can be done in a generic way in query_region or query_by_column

"""
Send query to HEASARC's Xamin TAP using ADQL.
Results in `~pyvo.dal.TAPResults` format.
Expand All @@ -273,6 +271,10 @@ def query_tap(self, query, *, maxrec=None):
ADQL query to be executed
maxrec : int
maximum number of records to return
uploads : dict
a mapping from table names used in the query to file like
objects containing a votable
(e.g. a file path or `~astropy.table.Table`).

Returns
-------
Expand All @@ -286,7 +288,130 @@ def query_tap(self, query, *, maxrec=None):
"""
log.debug(f'TAP query: {query}')
self._saved_query = query
return self.tap.search(query, language='ADQL', maxrec=maxrec)
return self.tap.search(
query, language='ADQL', maxrec=maxrec, uploads=uploads)
Comment on lines +291 to +292
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick as it doesn't matter really, but just a FYI: we allow long (120) lines here, no need to do these kind of line breaks


def _query_execute(self, catalog=None, where=None, *,
get_query_payload=False, columns=None,
verbose=False, maxrec=None):
"""Queries some catalog using the HEASARC TAP server based on the
'where' condition and returns an `~astropy.table.Table`.

Parameters
----------
catalog : str
The catalog to query. To list the available catalogs, use
:meth:`~astroquery.heasarc.HeasarcClass.list_catalogs`.
where : str
The WHERE condition to be used in the query. It must
include the 'WHERE' keyword or be empty.
get_query_payload : bool, optional
If `True` then returns the generated ADQL query as str.
Defaults to `False`.
columns : str, optional
Target column list with value separated by a comma(,).
Use * for all the columns. The default is to return a subset
of the columns that are generally the most useful.
verbose : bool, optional
If False, suppress vo warnings.
maxrec : int, optional
Maximum number of records


Returns
-------
table : A `~astropy.table.Table` object.
"""
# if verbose is False then suppress any VOTable related warnings
if not verbose:
commons.suppress_vo_warnings()

if catalog is None:
raise InvalidQueryError("catalog name is required! Use 'xray' "
"to search the master X-ray catalog")

if where is None:
where = ''

# __row is needed for locate_data; we add it if not already present
# and remove it afterwards only if the user requested specific
# columns. keep_row tracks that.
keep_row = (
columns in (None, '*')
or isinstance(columns, str) and '__row' in columns
)

if columns is None:
columns = ', '.join(self._get_default_columns(catalog))

if '__row' not in columns and columns != '*':
columns += ', __row'

if where != '' and not where.startswith(' '):
where = ' ' + where.strip()
adql = f'SELECT {columns} FROM {catalog}{where}'

if get_query_payload:
return adql
response = self.query_tap(query=adql, maxrec=maxrec)

# save the response in case we want to use it later
self._last_result = response
self._last_catalog_name = catalog

table = response.to_table()
if not keep_row and '__row' in table.colnames:
table.remove_column('__row')
return table

def _parse_constraints(self, column_filters):
"""Convert constraints dictionary to ADQL WHERE clause

Parameters
----------
column_filters : dict
A dictionary of column constraint filters to include in the query.
Each key-value pair will be translated into an ADQL condition.
See `query_region` for details.

Returns
-------
conditions : list
a list of ADQL conditions as str

"""
conditions = []
if column_filters is None:
return conditions
for key, value in column_filters.items():
if isinstance(value, tuple):
if (
len(value) == 2
and all(isinstance(v, (int, float)) for v in value)
):
conditions.append(
f"{key} BETWEEN {value[0]} AND {value[1]}"
)
elif (
len(value) == 2
and value[0] in (">", "<", ">=", "<=")
):
conditions.append(f"{key} {value[0]} {value[1]}")
elif isinstance(value, list):
# handle list values: key IN (...)
formatted = []
for v in value:
if isinstance(v, str):
formatted.append(f"'{v}'")
else:
formatted.append(str(v))
conditions.append(f"{key} IN ({', '.join(formatted)})")
else:
conditions.append(
f"{key} = '{value}'"
if isinstance(value, str) else f"{key} = {value}"
)
return conditions

@deprecated_renamed_argument(
('mission', 'fields', 'resultmax', 'entry', 'coordsys', 'equinox',
Expand All @@ -298,8 +423,8 @@ def query_tap(self, query, *, maxrec=None):
True, True, True, False)
)
def query_region(self, position=None, catalog=None, radius=None, *,
spatial='cone', width=None, polygon=None, add_offset=False,
get_query_payload=False, columns=None, cache=False,
spatial='cone', width=None, polygon=None, column_filters=None,
add_offset=False, get_query_payload=False, columns=None, cache=False,
verbose=False, maxrec=None,
**kwargs):
"""Queries the HEASARC TAP server around a coordinate and returns a
Expand Down Expand Up @@ -335,6 +460,23 @@ def query_region(self, position=None, catalog=None, radius=None, *,
outlining the polygon to search in. It can also be a list of
`astropy.coordinates` object or strings that can be parsed by
`astropy.coordinates.ICRS`.
column_filters : dict
A dictionary of column constraint filters to include in the query.
Each key-value pair will be translated into an ADQL condition.
- For a range query, use a tuple of two values (min, max).
e.g. ``{'flux': (1e-12, 1e-10)}`` translates to
``flux BETWEEN 1e-12 AND 1e-10``.
- For list values, use a list of values.
e.g. ``{'object_type': ['QSO', 'GALAXY']}`` translates to
``object_type IN ('QSO', 'GALAXY')``.
- For comparison queries, use a tuple of (operator, value),
where operator is one of '=', '!=', '<', '>', '<=', '>='.
e.g. ``{'magnitude': ('<', 15)}`` translates to ``magnitude < 15``.
- For exact matches, use a single value (str, int, float).
e.g. ``{'object_type': 'QSO'}`` translates to
``object_type = 'QSO'``.
The keys should correspond to valid column names in the catalog.
Use `list_columns` to see the available columns.
add_offset: bool
If True and spatial=='cone', add a search_offset column that
indicates the separation (in arcmin) between the requested
Expand All @@ -356,18 +498,10 @@ def query_region(self, position=None, catalog=None, radius=None, *,
-------
table : A `~astropy.table.Table` object.
"""
# if verbose is False then suppress any VOTable related warnings
if not verbose:
commons.suppress_vo_warnings()

if catalog is None:
raise InvalidQueryError("catalog name is required! Use 'xray' "
"to search the master X-ray catalog")

if columns is None:
columns = ', '.join(self._get_default_columns(catalog))
if '__row' not in columns:
columns += ',__row'
# if we have column_filters and no position, assume all-sky search
if position is None and column_filters is not None:
spatial = 'all-sky'

if spatial.lower() == 'all-sky':
where = ''
Expand All @@ -390,9 +524,14 @@ def query_region(self, position=None, catalog=None, radius=None, *,

coords_str = [f'{coord.ra.deg},{coord.dec.deg}'
for coord in coords_list]
where = (" WHERE CONTAINS(POINT('ICRS',ra,dec),"
where = ("WHERE CONTAINS(POINT('ICRS',ra,dec),"
f"POLYGON('ICRS',{','.join(coords_str)}))=1")
else:
if position is None:
raise InvalidQueryError(
"position is required to for spatial='cone' (default). "
"Use spatial='all-sky' For all-sky searches."
)
coords_icrs = parse_coordinates(position).icrs
ra, dec = coords_icrs.ra.deg, coords_icrs.dec.deg

Expand All @@ -401,7 +540,7 @@ def query_region(self, position=None, catalog=None, radius=None, *,
radius = self.get_default_radius(catalog)
elif isinstance(radius, str):
radius = coordinates.Angle(radius)
where = (" WHERE CONTAINS(POINT('ICRS',ra,dec),CIRCLE("
where = ("WHERE CONTAINS(POINT('ICRS',ra,dec),CIRCLE("
f"'ICRS',{ra},{dec},{radius.to(u.deg).value}))=1")
# add search_offset for the case of cone
if add_offset:
Expand All @@ -410,24 +549,33 @@ def query_region(self, position=None, catalog=None, radius=None, *,
elif spatial.lower() == 'box':
if isinstance(width, str):
width = coordinates.Angle(width)
where = (" WHERE CONTAINS(POINT('ICRS',ra,dec),"
where = ("WHERE CONTAINS(POINT('ICRS',ra,dec),"
f"BOX('ICRS',{ra},{dec},{width.to(u.deg).value},"
f"{width.to(u.deg).value}))=1")
else:
raise ValueError("Unrecognized spatial query type. Must be one"
" of 'cone', 'box', 'polygon', or 'all-sky'.")

adql = f'SELECT {columns} FROM {catalog}{where}'

# handle column filters
if column_filters is not None:
conditions = self._parse_constraints(column_filters)
if len(conditions) > 0:
constraints_str = ' AND '.join(conditions)
if where == '':
where = 'WHERE ' + constraints_str
else:
where += ' AND ' + constraints_str

table_or_query = self._query_execute(
catalog=catalog, where=where,
get_query_payload=get_query_payload,
columns=columns, verbose=verbose,
maxrec=maxrec
)
if get_query_payload:
return adql
response = self.query_tap(query=adql, maxrec=maxrec)

# save the response in case we want to use it later
self._last_result = response
self._last_catalog_name = catalog
return table_or_query
table = table_or_query

table = response.to_table()
if add_offset:
table['search_offset'].unit = u.arcmin
if len(table) == 0:
Expand Down Expand Up @@ -505,18 +653,22 @@ def locate_data(self, query_result=None, catalog_name=None):
if '__row' not in query_result.colnames:
raise ValueError('No __row column found in query_result. '
'query_result needs to be the output of '
'query_region or a subset.')
'query_region or a subset. try adding '
'__row to the requested columns')

if catalog_name is None:
if not hasattr(self, '_last_catalog_name'):
raise ValueError('locate_data needs a catalog_name, and none '
'found from a previous search. Please provide one.')
catalog_name = self._last_catalog_name
if not (
isinstance(catalog_name, str)
and catalog_name in self.tap.tables.keys()
):
raise ValueError(f'Unknown catalog name: {catalog_name}')

# datalink url
dlink_url = f'{self.VO_URL}/datalink/{catalog_name}'
# datalink url; use sizefiles=false to speed up the response
dlink_url = f'{self.VO_URL}/datalink/{catalog_name}?sizefiles=false&'
query = pyvo.dal.adhoc.DatalinkQuery(
baseurl=dlink_url,
id=query_result['__row'],
Expand Down Expand Up @@ -592,17 +744,52 @@ def enable_cloud(self, provider='aws', profile=None):

self.s3_client = self.s3_resource.meta.client

def download_data(self, links, host='heasarc', location='.'):
def _guess_host(self, host):
"""Guess the host to use for downloading data

Parameters
----------
host : str
The host provided by the user

Returns
-------
host : str
The guessed host

"""
if host in ['heasarc', 'sciserver', 'aws']:
return host
elif host is not None:
raise ValueError(
'host has to be one of heasarc, sciserver, aws or None')

# host is None, so we guess
if (
'HOME' in os.environ
and os.environ['HOME'] == '/home/idies'
and os.path.exists('/FTP/')
):
# we are on idies, so we can use sciserver
return 'sciserver'

for var in ['AWS_REGION', 'AWS_DEFAULT_REGION', 'AWS_ROLE_ARN']:
if var in os.environ:
return 'aws'
return 'heasarc'

def download_data(self, links, *, host=None, location='.'):
"""Download data products in links with a choice of getting the
data from either the heasarc server, sciserver, or the cloud in AWS.


Parameters
----------
links : `astropy.table.Table` or `astropy.table.Row`
The result from locate_data
host : str
The data host. The options are: heasarc (default), sciserver, aws.
A table (or row) of data links, typically the result of locate_data.
host : str or None
The data host. The options are: None (default), heasarc, sciserver, aws.
If None, the host is guessed based on the environment.
If host == 'sciserver', data is copied from the local mounted
data drive.
If host == 'aws', data is downloaded from Amazon S3 Open
Expand All @@ -623,8 +810,8 @@ def download_data(self, links, host='heasarc', location='.'):
if isinstance(links, Row):
links = links.table[[links.index]]

if host not in ['heasarc', 'sciserver', 'aws']:
raise ValueError('host has to be one of heasarc, sciserver, aws')
# guess the host if not provided
host = self._guess_host(host)

host_column = 'access_url' if host == 'heasarc' else host
if host_column not in links.colnames:
Expand Down
Loading
Loading