From 881f3833a6d6257a3761a4b379fde1c822b9be8f Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Thu, 4 Sep 2025 08:17:54 -0400 Subject: [PATCH 01/22] create a new _query_execute that is used by query_region and query_parameters --- astroquery/heasarc/core.py | 202 +++++++++++++++++++---- astroquery/heasarc/tests/test_heasarc.py | 5 +- docs/heasarc/heasarc.rst | 8 +- 3 files changed, 182 insertions(+), 33 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index eaa29809d5..6290397bdd 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -1,6 +1,4 @@ - import os - import shutil import requests import tarfile @@ -288,6 +286,76 @@ def query_tap(self, query, *, maxrec=None): self._saved_query = query return self.tap.search(query, language='ADQL', maxrec=maxrec) + def _query_execute(self, catalog=None, where=None, *, + get_query_payload=False, columns=None, + verbose=False, maxrec=None): + """Queries some catalog using the HEASARC TAP server based on the + where condition and returns an `~astropy.table.Table`. + + Parameters + ---------- + catalog : str + The catalog to query. To list the available catalogs, use + :meth:`~astroquery.heasarc.HeasarcClass.list_catalogs`. + where : str + The WHERE condition to be used in the query. It must + include the 'WHERE' keyword or be empty. + get_query_payload : bool, optional + If `True` then returns the generated ADQL query as str. + Defaults to `False`. + columns : str, optional + Target column list with value separated by a comma(,). + Use * for all the columns. The default is to return a subset + of the columns that are generally the most useful. + verbose : bool, optional + If False, suppress vo warnings. + maxrec : int, optional + Maximum number of records + + + Returns + ------- + table : A `~astropy.table.Table` object. + """ + # if verbose is False then suppress any VOTable related warnings + if not verbose: + commons.suppress_vo_warnings() + + if catalog is None: + raise InvalidQueryError("catalog name is required! Use 'xray' " + "to search the master X-ray catalog") + + # __row is needed for locate_data; we add it if not already present + # and remove it afterwards only if the user requested specific + # columns. keep_row tracks that. + keep_row = ( + columns in (None, '*') or + isinstance(columns, str) and '__row' in columns + ) + + if columns is None: + columns = ', '.join(self._get_default_columns(catalog)) + + if '__row' not in columns and columns != '*': + columns += ',__row' + + if where != '' and not where.startswith(' '): + where = ' ' + where.strip() + adql = f'SELECT {columns} FROM {catalog}{where}' + + if get_query_payload: + return adql + response = self.query_tap(query=adql, maxrec=maxrec) + + # save the response in case we want to use it later + self._last_result = response + self._last_catalog_name = catalog + + table = response.to_table() + if not keep_row and '__row' in table.colnames: + table.remove_column('__row') + return table + @deprecated_renamed_argument( ('mission', 'fields', 'resultmax', 'entry', 'coordsys', 'equinox', 'displaymode', 'action', 'sortvar', 'cache'), @@ -356,18 +424,6 @@ def query_region(self, position=None, catalog=None, radius=None, *, ------- table : A `~astropy.table.Table` object. """ - # if verbose is False then suppress any VOTable related warnings - if not verbose: - commons.suppress_vo_warnings() - - if catalog is None: - raise InvalidQueryError("catalog name is required! Use 'xray' " - "to search the master X-ray catalog") - - if columns is None: - columns = ', '.join(self._get_default_columns(catalog)) - if '__row' not in columns: - columns += ',__row' if spatial.lower() == 'all-sky': where = '' @@ -390,7 +446,7 @@ def query_region(self, position=None, catalog=None, radius=None, *, coords_str = [f'{coord.ra.deg},{coord.dec.deg}' for coord in coords_list] - where = (" WHERE CONTAINS(POINT('ICRS',ra,dec)," + where = ("WHERE CONTAINS(POINT('ICRS',ra,dec)," f"POLYGON('ICRS',{','.join(coords_str)}))=1") else: coords_icrs = parse_coordinates(position).icrs @@ -401,7 +457,7 @@ def query_region(self, position=None, catalog=None, radius=None, *, radius = self.get_default_radius(catalog) elif isinstance(radius, str): radius = coordinates.Angle(radius) - where = (" WHERE CONTAINS(POINT('ICRS',ra,dec),CIRCLE(" + where = ("WHERE CONTAINS(POINT('ICRS',ra,dec),CIRCLE(" f"'ICRS',{ra},{dec},{radius.to(u.deg).value}))=1") # add search_offset for the case of cone if add_offset: @@ -410,24 +466,23 @@ def query_region(self, position=None, catalog=None, radius=None, *, elif spatial.lower() == 'box': if isinstance(width, str): width = coordinates.Angle(width) - where = (" WHERE CONTAINS(POINT('ICRS',ra,dec)," + where = ("WHERE CONTAINS(POINT('ICRS',ra,dec)," f"BOX('ICRS',{ra},{dec},{width.to(u.deg).value}," f"{width.to(u.deg).value}))=1") else: raise ValueError("Unrecognized spatial query type. Must be one" " of 'cone', 'box', 'polygon', or 'all-sky'.") - - adql = f'SELECT {columns} FROM {catalog}{where}' - + + table_or_query = self._query_execute( + catalog=catalog, where=where, + get_query_payload=get_query_payload, + columns=columns, verbose=verbose, + maxrec=maxrec + ) if get_query_payload: - return adql - response = self.query_tap(query=adql, maxrec=maxrec) + return table_or_query + table = table_or_query - # save the response in case we want to use it later - self._last_result = response - self._last_catalog_name = catalog - - table = response.to_table() if add_offset: table['search_offset'].unit = u.arcmin if len(table) == 0: @@ -464,6 +519,96 @@ def query_object(self, object_name, mission, *, return self.query_region(pos, catalog=mission, spatial='cone', get_query_payload=get_query_payload) + + def query_parameters(self, catalog, params, *, + get_query_payload=False, columns=None, + verbose=False, maxrec=None): + """Query the HEASARC TAP server using a set of parameters. + + This is a simple wrapper around + `~astroquery.heasarc.HeasarcClass.query_tap` + that constructs an ADQL query from a dictionary of parameters. + + Parameters + ---------- + catalog : str + The catalog to query. To list the available catalogs, use + :meth:`~astroquery.heasarc.HeasarcClass.list_catalogs`. + params : dict + A dictionary of parameters to include in the query. + Each key-value pair will be translated into an ADQL condition. + - For a range query, use a tuple of two values (min, max). + e.g. `{'flux': (1e-12, 1e-10)}` translates to + `flux BETWEEN 1e-12 AND 1e-10`. + - For list values, use a list of values. + e.g. `{'object_type': ['QSO', 'GALAXY']}` translates to + `object_type IN ('QSO', 'GALAXY')`. + - For comparison queries, use a tuple of (operator, value), + where operator is one of '=', '!=', '<', '>', '<=', '>='. + e.g. `{'magnitude': ('<', 15)}` translates to `magnitude < 15`. + - For exact matches, use a single value (str, int, float). + e.g. `{'object_type': 'QSO'}` translates to + `object_type = 'QSO'`. + The keys should correspond to valid column names in the catalog. + Use `list_columns` to see available columns. + get_query_payload : bool, optional + If `True` then returns the generated ADQL query as str. + Defaults to `False`. + columns : str, optional + Target column list with value separated by a comma(,). + Use * for all the columns. The default is to return a subset + of the columns that are generally the most useful. + verbose : bool, optional + If False, suppress vo warnings. + maxrec : int, optional + Maximum number of records + + """ + + conditions = [] + for key, value in params.items(): + if isinstance(value, tuple): + if ( + len(value) == 2 and + all(isinstance(v, (int, float)) for v in value) + ): + conditions.append( + f"{key} BETWEEN {value[0]} AND {value[1]}" + ) + elif ( + len(value) == 2 and + value[0] in (">", "<", ">=", "<=") + ): + conditions.append(f"{key} {value[0]} {value[1]}") + elif isinstance(value, list): + # handle list values: key IN (...) + formatted = [] + for v in value: + if isinstance(v, str): + formatted.append(f"'{v}'") + else: + formatted.append(str(v)) + conditions.append(f"{key} IN ({', '.join(formatted)})") + else: + conditions.append( + f"{key} = '{value}'" + if isinstance(value, str) else f"{key} = {value}" + ) + if len(conditions) == 0: + where = "" + else: + where = "WHERE " + (" AND ".join(conditions)) + + + + table_or_query = self._query_execute( + catalog=catalog, where=where, + get_query_payload=get_query_payload, + columns=columns, verbose=verbose, + maxrec=maxrec + ) + return table_or_query + def locate_data(self, query_result=None, catalog_name=None): """Get links to data products Use vo/datalinks to query the data products for some query_results. @@ -505,7 +650,8 @@ def locate_data(self, query_result=None, catalog_name=None): if '__row' not in query_result.colnames: raise ValueError('No __row column found in query_result. ' 'query_result needs to be the output of ' - 'query_region or a subset.') + 'query_region or a subset. try adding ' + '__row to the requested columns') if catalog_name is None: catalog_name = self._last_catalog_name diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index a2f51b5c67..091a33313b 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -195,7 +195,10 @@ def test_spatial_invalid(spatial): def test_no_catalog(): with pytest.raises(InvalidQueryError): - Heasarc.query_region("m31", spatial="cone", columns="*") + # OBJ_LIST[0] and radius added to avoid a remote call + Heasarc.query_region( + OBJ_LIST[0], spatial="cone", columns="*", radius="2arcmin" + ) def test_tap_def(): diff --git a/docs/heasarc/heasarc.rst b/docs/heasarc/heasarc.rst index 0812bf8477..22a888b24e 100644 --- a/docs/heasarc/heasarc.rst +++ b/docs/heasarc/heasarc.rst @@ -111,8 +111,8 @@ catalogs. name description ---------- ------------------------------------------------------------- ascamaster ASCA Master Catalog + burcbmastr BurstCube Master Observation Catalog chanmaster Chandra Observations - cmbmaster LAMBDA Cosmic Microwave Background Experiments Master Catalog ... If you do not know the name of the catalog you are looking for, you can use the ``keywords`` @@ -191,10 +191,10 @@ with those results. >>> tab = tab[tab['exposure'] > 0] >>> links = Heasarc.locate_data(tab[:2]) >>> links['access_url'].pprint() - access_url + access_url --------------------------------------------------------------------- - https://heasarc.gsfc.nasa.gov/FTP/nicer/data/obs/2018_08//1100120101/ - https://heasarc.gsfc.nasa.gov/FTP/nicer/data/obs/2018_08//1100120102/ + https://heasarc.gsfc.nasa.gov/FTP/nicer/data/obs/2025_01//7100120102/ + https://heasarc.gsfc.nasa.gov/FTP/nicer/data/obs/2025_01//7100120101/ The ``links`` table has three relevant columns: ``access_url``, ``sciserver`` and ``aws``. The first gives the url to the data from the main heasarc server. The second gives From 75612b87bbad433d4e3f579ee7ce7c7daca6ab47 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Fri, 5 Sep 2025 08:56:24 -0400 Subject: [PATCH 02/22] add unit tests and increase coverage --- astroquery/heasarc/core.py | 10 +- astroquery/heasarc/tests/test_heasarc.py | 196 ++++++++++++++++++++++- 2 files changed, 200 insertions(+), 6 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 6290397bdd..1f653daa68 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -325,6 +325,9 @@ def _query_execute(self, catalog=None, where=None, *, raise InvalidQueryError("catalog name is required! Use 'xray' " "to search the master X-ray catalog") + if where is None: + where = '' + # __row is needed for locate_data; we add it if not already present # and remove it afterwards only if the user requested specific # columns. keep_row tracks that. @@ -337,7 +340,7 @@ def _query_execute(self, catalog=None, where=None, *, columns = ', '.join(self._get_default_columns(catalog)) if '__row' not in columns and columns != '*': - columns += ',__row' + columns += ', __row' if where != '' and not where.startswith(' '): where = ' ' + where.strip() @@ -520,7 +523,7 @@ def query_object(self, object_name, mission, *, get_query_payload=get_query_payload) - def query_parameters(self, catalog, params, *, + def query_by_parameters(self, catalog, params, *, get_query_payload=False, columns=None, verbose=False, maxrec=None): """Query the HEASARC TAP server using a set of parameters. @@ -565,6 +568,9 @@ def query_parameters(self, catalog, params, *, """ + if not isinstance(params, dict): + raise ValueError('params must be a dictionary of key-value pairs') + conditions = [] for key, value in params.items(): if isinstance(value, tuple): diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index 091a33313b..2f491c9c70 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -53,7 +53,13 @@ def __init__(self, desc, cols=[]): 'name-2': vTable('description-2 chandra', cols), 'TAPname': None } + + def search(self, query, language='ADQL', maxrec=1000): + return MockResult() +class MockResult: + def to_table(self): + return Table({'value': ['1.5', '1.2', '-0.3']}) @pytest.fixture def mock_tap(): @@ -92,7 +98,7 @@ def test_query_region_cone(coordinates, radius, offset): radius=radius, columns="*", get_query_payload=True, - add_offset=True, + add_offset=offset, ) # We don't fully float compare in this string, there are slight @@ -168,6 +174,27 @@ def test_query_region_polygon(polygon): "10.1,10.1,10.0,10.1,10.0,10.0))=1" ) +def test_query_region_polygon_no_unit(): + # position is not used for polygon + poly = [ + (10.1, 10.1), + (10.0, 10.1), + (10.0, 10.0), + ] + with pytest.warns(UserWarning, match="Polygon endpoints are being interpreted as"): + query = Heasarc.query_region( + catalog="suzamaster", + spatial="polygon", + polygon=poly, + columns="*", + get_query_payload=True, + ) + + assert query == ( + "SELECT * FROM suzamaster " + "WHERE CONTAINS(POINT('ICRS',ra,dec),POLYGON('ICRS'," + "10.1,10.1,10.0,10.1,10.0,10.0))=1" + ) def test_query_allsky(): query1 = Heasarc.query_region( @@ -200,6 +227,164 @@ def test_no_catalog(): OBJ_LIST[0], spatial="cone", columns="*", radius="2arcmin" ) +def test_by_params_no_catalog(): + with pytest.raises(InvalidQueryError): + # OBJ_LIST[0] and radius added to avoid a remote call + Heasarc.query_by_parameters( + None, params={"flux": (1e-12, 1e-10)} + ) + + +def test__query_execute_no_catalog(): + with pytest.raises(InvalidQueryError): + # OBJ_LIST[0] and radius added to avoid a remote call + Heasarc._query_execute(None) + + +def test_by_params_none_params(): + with pytest.raises(ValueError): + Heasarc.query_by_parameters('testcatalog', params=None) + + +def test_by_params_no_params(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster" + + +def test_by_params_range(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={"flux": (1e-12, 1e-10)}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster WHERE flux BETWEEN 1e-12 AND 1e-10" + + +def test_by_params_eq_float(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={"flux": 1.2}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster WHERE flux = 1.2" + + +def test_by_params_eq_str(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={"flux": "1.2"}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster WHERE flux = '1.2'" + + +def test_by_params_cmp_float(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={"flux": ('>', 1.2)}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster WHERE flux > 1.2" + + +def test_by_params_cmp_float_2(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={"flux": ('>', 1.2), "magnitude": ('<=', 15)}, + columns="*", + get_query_payload=True, + ) + assert query == ("SELECT * FROM suzamaster WHERE flux > 1.2 " + "AND magnitude <= 15") + + +def test_by_params_list(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={"flux": [1.2, 2.3, 3.4]}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster WHERE flux IN (1.2, 2.3, 3.4)" + + +def test__query_execute_none_where(): + query = Heasarc._query_execute( + catalog="suzamaster", + columns="*", + get_query_payload=True, + ) + assert query == ("SELECT * FROM suzamaster") + + +def test__query_execute_none_where(): + query = Heasarc._query_execute( + catalog="suzamaster", + where=" EXTRA", + columns="*", + get_query_payload=True, + ) + assert query == ("SELECT * FROM suzamaster EXTRA") + + +def test__query_execute_add_row(): + query1 = Heasarc._query_execute( + catalog="suzamaster", + where="", + columns="col1, col2", + get_query_payload=True, + ) + query2 = Heasarc._query_execute( + catalog="suzamaster", + where=None, + columns="col1, col2", + get_query_payload=True, + ) + assert query1 == query2 == ("SELECT col1, col2, __row FROM suzamaster") + +def test__query_execute_extra_space(): + query1 = Heasarc._query_execute( + catalog="suzamaster", + where="WHERE EXTRA", + columns="*", + get_query_payload=True, + ) + + query2 = Heasarc._query_execute( + catalog="suzamaster", + where=" WHERE EXTRA", + columns="*", + get_query_payload=True, + ) + assert query1 == query2 == ("SELECT * FROM suzamaster WHERE EXTRA") + +def test_query_execute_columns(mock_tap, mock_default_cols): + query = Heasarc._query_execute( + catalog="suzamaster", + where="WHERE EXTRA", + columns=None, + get_query_payload=True, + ) + assert query == ("SELECT col-3, col-2, __row FROM suzamaster WHERE EXTRA") + +def test_query_execute_columns(mock_tap, mock_default_cols): + res = Heasarc._query_execute( + catalog="suzamaster", + where="WHERE EXTRA", + columns='*' + ) + assert Heasarc._last_catalog_name == "suzamaster" + # reset last result to avoid interference with other tests + Heasarc._last_result = None def test_tap_def(): # Use a new HeasarcClass object @@ -210,9 +395,6 @@ def test_tap_def(): def test_meta_def(): - class MockResult: - def to_table(self): - return Table({'value': ['1.5', '1.2', '-0.3']}) # Use a new HeasarcClass object Heasarc = HeasarcClass() assert Heasarc._meta_info is None @@ -261,6 +443,12 @@ def test_list_catalogs_keywords_list_non_str(): Heasarc.list_catalogs(keywords=['x-ray', 12]) +def test__list_catalogs_keywords(mock_tap): + catalogs = Heasarc.list_catalogs(keywords=['xmm']) + assert list(catalogs['name']) == [ + lab for lab, desc in MockTap().tables.items() if 'TAP' not in lab and 'xmm' in desc.description.lower() + ] + def test__list_columns__missing_table(mock_tap): with pytest.raises(ValueError, match="not available as a public catalog"): Heasarc.list_columns(catalog_name='missing-table') From e5ccbc174bb2d0197c18f96c5ac13a91e8a5a565 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Fri, 5 Sep 2025 09:01:15 -0400 Subject: [PATCH 03/22] add TAP limit if maxrec is high --- astroquery/heasarc/core.py | 4 ++++ astroquery/heasarc/tests/test_heasarc.py | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 1f653daa68..83125d996d 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -346,6 +346,10 @@ def _query_execute(self, catalog=None, where=None, *, where = ' ' + where.strip() adql = f'SELECT {columns} FROM {catalog}{where}' + # if maxrec is more than the server limit, we set a higher limit + if maxrec is not None and maxrec > 100000: + adql += f' LIMIT {maxrec*4}' + if get_query_payload: return adql response = self.query_tap(query=adql, maxrec=maxrec) diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index 2f491c9c70..eaaa7f6ad0 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -255,6 +255,17 @@ def test_by_params_no_params(): ) assert query == "SELECT * FROM suzamaster" +def test_by_params_limit(): + query = Heasarc.query_by_parameters( + catalog="suzamaster", + params={}, + columns="*", + get_query_payload=True, + maxrec=500000, + ) + assert query == "SELECT * FROM suzamaster LIMIT 2000000" + + def test_by_params_range(): query = Heasarc.query_by_parameters( From 5ff18aad0499f926037f16af41c149b2a21d776a Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Fri, 5 Sep 2025 12:16:47 -0400 Subject: [PATCH 04/22] add uploads to query_tap that gets passed to pyvo --- astroquery/heasarc/core.py | 9 +++++++-- astroquery/heasarc/tests/test_heasarc.py | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 83125d996d..e42f419fbf 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -259,7 +259,7 @@ def query_mission_cols(self, mission, *, cache=True, cols = [col.upper() for col in cols['name'] if '__' not in col] return cols - def query_tap(self, query, *, maxrec=None): + def query_tap(self, query, *, maxrec=None, uploads=None): """ Send query to HEASARC's Xamin TAP using ADQL. Results in `~pyvo.dal.TAPResults` format. @@ -271,6 +271,10 @@ def query_tap(self, query, *, maxrec=None): ADQL query to be executed maxrec : int maximum number of records to return + uploads : dict + a mapping from table names used in the query to file like + objects containing a votable + (e.g. a file path or `~astropy.table.Table`). Returns ------- @@ -284,7 +288,8 @@ def query_tap(self, query, *, maxrec=None): """ log.debug(f'TAP query: {query}') self._saved_query = query - return self.tap.search(query, language='ADQL', maxrec=maxrec) + return self.tap.search( + query, language='ADQL', maxrec=maxrec, uploads=uploads) def _query_execute(self, catalog=None, where=None, *, get_query_payload=False, columns=None, diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index eaaa7f6ad0..a7bf434122 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -54,7 +54,7 @@ def __init__(self, desc, cols=[]): 'TAPname': None } - def search(self, query, language='ADQL', maxrec=1000): + def search(self, query, language='ADQL', maxrec=1000, uploads=None): return MockResult() class MockResult: From 1f6257603775dc7a8a2d8b2e73162e86bc52ba0a Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 11:08:14 -0400 Subject: [PATCH 05/22] renamed query_by_parameters to query_by_column; add docs --- astroquery/heasarc/core.py | 8 +-- astroquery/heasarc/tests/test_heasarc.py | 40 +++++------ docs/heasarc/heasarc.rst | 90 ++++++++++++++++++++++-- 3 files changed, 110 insertions(+), 28 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index e42f419fbf..5b1034e319 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -532,10 +532,10 @@ def query_object(self, object_name, mission, *, get_query_payload=get_query_payload) - def query_by_parameters(self, catalog, params, *, + def query_by_column(self, catalog, params, *, get_query_payload=False, columns=None, verbose=False, maxrec=None): - """Query the HEASARC TAP server using a set of parameters. + """Query the HEASARC TAP server using a constraints on the columns. This is a simple wrapper around `~astroquery.heasarc.HeasarcClass.query_tap` @@ -547,7 +547,7 @@ def query_by_parameters(self, catalog, params, *, The catalog to query. To list the available catalogs, use :meth:`~astroquery.heasarc.HeasarcClass.list_catalogs`. params : dict - A dictionary of parameters to include in the query. + A dictionary of column constraint parameters to include in the query. Each key-value pair will be translated into an ADQL condition. - For a range query, use a tuple of two values (min, max). e.g. `{'flux': (1e-12, 1e-10)}` translates to @@ -562,7 +562,7 @@ def query_by_parameters(self, catalog, params, *, e.g. `{'object_type': 'QSO'}` translates to `object_type = 'QSO'`. The keys should correspond to valid column names in the catalog. - Use `list_columns` to see available columns. + Use `list_columns` to see the available columns. get_query_payload : bool, optional If `True` then returns the generated ADQL query as str. Defaults to `False`. diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index a7bf434122..c5ea3a8c8a 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -227,10 +227,10 @@ def test_no_catalog(): OBJ_LIST[0], spatial="cone", columns="*", radius="2arcmin" ) -def test_by_params_no_catalog(): +def test_by_columns_no_catalog(): with pytest.raises(InvalidQueryError): # OBJ_LIST[0] and radius added to avoid a remote call - Heasarc.query_by_parameters( + Heasarc.query_by_column( None, params={"flux": (1e-12, 1e-10)} ) @@ -241,13 +241,13 @@ def test__query_execute_no_catalog(): Heasarc._query_execute(None) -def test_by_params_none_params(): +def test_by_columns_none_params(): with pytest.raises(ValueError): - Heasarc.query_by_parameters('testcatalog', params=None) + Heasarc.query_by_column('testcatalog', params=None) -def test_by_params_no_params(): - query = Heasarc.query_by_parameters( +def test_by_columns_no_params(): + query = Heasarc.query_by_column( catalog="suzamaster", params={}, columns="*", @@ -255,8 +255,8 @@ def test_by_params_no_params(): ) assert query == "SELECT * FROM suzamaster" -def test_by_params_limit(): - query = Heasarc.query_by_parameters( +def test_by_columns_limit(): + query = Heasarc.query_by_column( catalog="suzamaster", params={}, columns="*", @@ -267,8 +267,8 @@ def test_by_params_limit(): -def test_by_params_range(): - query = Heasarc.query_by_parameters( +def test_by_columns_range(): + query = Heasarc.query_by_column( catalog="suzamaster", params={"flux": (1e-12, 1e-10)}, columns="*", @@ -277,8 +277,8 @@ def test_by_params_range(): assert query == "SELECT * FROM suzamaster WHERE flux BETWEEN 1e-12 AND 1e-10" -def test_by_params_eq_float(): - query = Heasarc.query_by_parameters( +def test_by_columns_eq_float(): + query = Heasarc.query_by_column( catalog="suzamaster", params={"flux": 1.2}, columns="*", @@ -287,8 +287,8 @@ def test_by_params_eq_float(): assert query == "SELECT * FROM suzamaster WHERE flux = 1.2" -def test_by_params_eq_str(): - query = Heasarc.query_by_parameters( +def test_by_columns_eq_str(): + query = Heasarc.query_by_column( catalog="suzamaster", params={"flux": "1.2"}, columns="*", @@ -297,8 +297,8 @@ def test_by_params_eq_str(): assert query == "SELECT * FROM suzamaster WHERE flux = '1.2'" -def test_by_params_cmp_float(): - query = Heasarc.query_by_parameters( +def test_by_columns_cmp_float(): + query = Heasarc.query_by_column( catalog="suzamaster", params={"flux": ('>', 1.2)}, columns="*", @@ -307,8 +307,8 @@ def test_by_params_cmp_float(): assert query == "SELECT * FROM suzamaster WHERE flux > 1.2" -def test_by_params_cmp_float_2(): - query = Heasarc.query_by_parameters( +def test_by_columns_cmp_float_2(): + query = Heasarc.query_by_column( catalog="suzamaster", params={"flux": ('>', 1.2), "magnitude": ('<=', 15)}, columns="*", @@ -318,8 +318,8 @@ def test_by_params_cmp_float_2(): "AND magnitude <= 15") -def test_by_params_list(): - query = Heasarc.query_by_parameters( +def test_by_columns_list(): + query = Heasarc.query_by_column( catalog="suzamaster", params={"flux": [1.2, 2.3, 3.4]}, columns="*", diff --git a/docs/heasarc/heasarc.rst b/docs/heasarc/heasarc.rst index 22a888b24e..b1b21bcd1e 100644 --- a/docs/heasarc/heasarc.rst +++ b/docs/heasarc/heasarc.rst @@ -26,7 +26,7 @@ Query a Catalog The basic use case is one where we want to query a catalog from some position in the sky. In this example, we query the NuSTAR master catalog ``numaster`` for all observations of the AGN ``NGC 3783``. We use `~astropy.coordinates.SkyCoord` to obtain the coordinates -and then pass them to `~astroquery.heasarc.HeasarcClass.query_region`. In following, we +and then pass them to `~astroquery.heasarc.HeasarcClass.query_region`. In the following, we also select only columns with ``time > 0``. Zero values are typically used for observations that have been approved but not observed. @@ -98,8 +98,11 @@ If you want all the columns returned, use ``columns='*'`` List Available Catalogs ----------------------- The collection of available catalogs can be obtained by calling the `~astroquery.heasarc.HeasarcClass.list_catalogs` -method. In this example, we query the master catalogs only by passing ``master=True``. -which is ``False`` by default (i.e. return all catalogs). `~astroquery.heasarc.HeasarcClass.list_catalogs` returns an +method. In this example, we request the master catalogs only by passing ``master=True``. +Master catalogs are catalogs that contain one entry per observation, as opposed to +other catalogs that may record other information. There is typically one master catalog +per mission. The ``master`` parameter is a boolean flag, which is ``False`` by default +(i.e. return all catalogs). `~astroquery.heasarc.HeasarcClass.list_catalogs` returns an `~astropy.table.Table` with two columns containing the names and description of the available catalogs. @@ -138,7 +141,7 @@ are related to Chandra, you can do: cargm31cxo Carina Nebula Gum 31 Chandra X-Ray Point Source Catalog carinaclas Carina Nebula Chandra X-Ray Point Source Classes -If you are interested only finding the master catalogs, you can also set ``master`` to ``True``. +If you are interested only finding the master catalogs only, you can set ``master`` to ``True``. .. doctest-remote-data:: @@ -177,6 +180,47 @@ following for instance will find master catalogs that have keywords 'nicer' or ' nicermastr NICER Master Catalog swiftmastr Swift Master Catalog + +Other non-region queries +---------------------------------------- +In addition to `~astroquery.heasarc.HeasarcClass.query_region`, `~astroquery.heasarc.HeasarcClass.query_by_column` +is also available. This method allows you to query a catalog by specifying +various column constraints. For example, the following query searches the ``chanmaster`` +catalog for all observations with exposure time greater than 190 ks. + +.. doctest-remote-data:: + + >>> from astroquery.heasarc import Heasarc + >>> tab = Heasarc.query_by_column( + ... catalog='chanmaster', params={'exposure': ('>', '190000')} + ... ) + >>> tab['name', 'obsid', 'ra', 'dec', 'exposure'][:3].pprint() + name obsid ra dec exposure + deg deg s + --------------- ----- --------- --------- -------- + Sgr A* 13842 266.41667 -29.00781 191760 + IGR J17480-2446 30481 267.02013 -24.78024 200000 + IGR J17480-2446 31425 267.02013 -24.78024 200000 + +Another example may be to search the ``xmmmaster`` for a observation in some time range: + +.. doctest-remote-data:: + + >>> from astroquery.heasarc import Heasarc + >>> tab = Heasarc.query_by_column( + ... catalog='xmmmaster', params={'time': (52300, 52310)} + ... ) + >>> tab['name', 'obsid', 'ra', 'dec', 'time', 'duration'][:3].pprint() + name obsid ra dec time duration + deg deg d s + ------------- ---------- -------- --------- ---------------- -------- + NGC 1316 0091770101 50.95833 -37.28333 52308.6872337963 60362 + NGC 1316 0091770201 50.67296 -37.20928 52308.642974537 3462 + Fei 16 offset 0154150101 28.64374 -6.86667 52305.2210416667 24619 + +To see the available columns that can be queried for a given catalog and their units, +use `~astroquery.heasarc.HeasarcClass.list_columns` (see below). + Links to Data Products ---------------------- Once the query result is obtained, you can query any data products associated @@ -250,6 +294,44 @@ returns the constructed ADQL query. 121.92084 39.00417 UGC4229 0138951401 121.92099 39.00422 MRK 622 0852180501 +Table Uploads +----------------- +You can also upload a table of positions to be queried. The table can be an +`~astropy.table.Table` or a path to a file in VOtable format. The following example +shows how to use the upload feature to do a cross-match between the +``chanmaster`` catalog and a list of known source positions: + +.. doctest-remote-data:: + + >>> from astroquery.heasarc import Heasarc + >>> from astropy.table import Table + >>> sample = Table({ + ... 'ra': [1.58, 188.90], + ... 'dec': [20.20, -39.90] + ... }) + >>> query = """ + ... SELECT cat.name, cat.ra, cat.dec, cat.obsid + ... FROM chanmaster cat, tap_upload.mytable mt + ... WHERE 1=CONTAINS(POINT('ICRS', mt.ra, mt.dec), CIRCLE('ICRS',cat.ra, cat.dec, 0.1)) + ... """ + >>> result = Heasarc.query_tap(query, uploads={'mytable': sample}).to_table() + >>> result.pprint() + name ra dec obsid + deg deg + ----------- --------- --------- ----- + NGC 4507 188.90250 -39.90928 12292 + NGC 4507 188.90208 -39.90925 2150 + HR4796 189.00417 -39.86950 7414 + KUG0003+199 1.58134 20.20291 23709 + Mrk 335 1.58142 20.20295 23292 + Mrk 335 1.58142 20.20295 23297 + Mrk 335 1.58142 20.20295 23298 + Mrk 335 1.58142 20.20295 23299 + Mrk 335 1.58142 20.20295 23300 + Mrk 335 1.58142 20.20295 23301 + Mrk 335 1.58142 20.20295 23302 + + Complex Regions --------------- In addition to a cone search (some position and search radius), ```Heasarc.query_region``` accepts From 938824920400227a89177ff79546157756ed26c6 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 11:29:08 -0400 Subject: [PATCH 06/22] fix the maxrec fix --- astroquery/heasarc/core.py | 2 +- astroquery/heasarc/tests/test_heasarc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 5b1034e319..c0ff2bdea1 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -353,7 +353,7 @@ def _query_execute(self, catalog=None, where=None, *, # if maxrec is more than the server limit, we set a higher limit if maxrec is not None and maxrec > 100000: - adql += f' LIMIT {maxrec*4}' + adql = adql.replace('SELECT ', f'SELECT TOP {maxrec*4} ') if get_query_payload: return adql diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index c5ea3a8c8a..51714d6731 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -263,7 +263,7 @@ def test_by_columns_limit(): get_query_payload=True, maxrec=500000, ) - assert query == "SELECT * FROM suzamaster LIMIT 2000000" + assert query == "SELECT TOP 2000000 * FROM suzamaster" From 0967e8616c9cece4c6317d95c43d7812b42f9a03 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 11:35:11 -0400 Subject: [PATCH 07/22] update changelog --- CHANGES.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 6b7a418315..cd0b36c8d1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -70,6 +70,13 @@ mast Service fixes and enhancements ------------------------------ +heasarc +^^^^^^^ + +- Add ``query_by_column`` to allow querying of different catalog columns.The user passes a dict that is parsed into a TAP WHERE statements. +- Add support for uploading tables when using TAP directly through ``query_tap``. +- Improve how maxrec works. If it is bigger than the default server limit, add a TOP statement. + alma ^^^^ From 5cbdf3f36f57f6cc128faaf91e313471395e71d9 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 11:44:22 -0400 Subject: [PATCH 08/22] fix styles --- astroquery/heasarc/core.py | 41 +++++++++++------------- astroquery/heasarc/tests/test_heasarc.py | 32 +++++++++++------- 2 files changed, 39 insertions(+), 34 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index c0ff2bdea1..8bf006372e 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -292,8 +292,8 @@ def query_tap(self, query, *, maxrec=None, uploads=None): query, language='ADQL', maxrec=maxrec, uploads=uploads) def _query_execute(self, catalog=None, where=None, *, - get_query_payload=False, columns=None, - verbose=False, maxrec=None): + get_query_payload=False, columns=None, + verbose=False, maxrec=None): """Queries some catalog using the HEASARC TAP server based on the where condition and returns an `~astropy.table.Table`. @@ -334,16 +334,16 @@ def _query_execute(self, catalog=None, where=None, *, where = '' # __row is needed for locate_data; we add it if not already present - # and remove it afterwards only if the user requested specific + # and remove it afterwards only if the user requested specific # columns. keep_row tracks that. keep_row = ( - columns in (None, '*') or - isinstance(columns, str) and '__row' in columns + columns in (None, '*') + or isinstance(columns, str) and '__row' in columns ) if columns is None: columns = ', '.join(self._get_default_columns(catalog)) - + if '__row' not in columns and columns != '*': columns += ', __row' @@ -484,7 +484,7 @@ def query_region(self, position=None, catalog=None, radius=None, *, else: raise ValueError("Unrecognized spatial query type. Must be one" " of 'cone', 'box', 'polygon', or 'all-sky'.") - + table_or_query = self._query_execute( catalog=catalog, where=where, get_query_payload=get_query_payload, @@ -531,16 +531,15 @@ def query_object(self, object_name, mission, *, return self.query_region(pos, catalog=mission, spatial='cone', get_query_payload=get_query_payload) - def query_by_column(self, catalog, params, *, - get_query_payload=False, columns=None, - verbose=False, maxrec=None): + get_query_payload=False, columns=None, + verbose=False, maxrec=None): """Query the HEASARC TAP server using a constraints on the columns. - - This is a simple wrapper around + + This is a simple wrapper around `~astroquery.heasarc.HeasarcClass.query_tap` that constructs an ADQL query from a dictionary of parameters. - + Parameters ---------- catalog : str @@ -550,16 +549,16 @@ def query_by_column(self, catalog, params, *, A dictionary of column constraint parameters to include in the query. Each key-value pair will be translated into an ADQL condition. - For a range query, use a tuple of two values (min, max). - e.g. `{'flux': (1e-12, 1e-10)}` translates to + e.g. `{'flux': (1e-12, 1e-10)}` translates to `flux BETWEEN 1e-12 AND 1e-10`. - For list values, use a list of values. - e.g. `{'object_type': ['QSO', 'GALAXY']}` translates to + e.g. `{'object_type': ['QSO', 'GALAXY']}` translates to `object_type IN ('QSO', 'GALAXY')`. - For comparison queries, use a tuple of (operator, value), where operator is one of '=', '!=', '<', '>', '<=', '>='. e.g. `{'magnitude': ('<', 15)}` translates to `magnitude < 15`. - For exact matches, use a single value (str, int, float). - e.g. `{'object_type': 'QSO'}` translates to + e.g. `{'object_type': 'QSO'}` translates to `object_type = 'QSO'`. The keys should correspond to valid column names in the catalog. Use `list_columns` to see the available columns. @@ -584,15 +583,15 @@ def query_by_column(self, catalog, params, *, for key, value in params.items(): if isinstance(value, tuple): if ( - len(value) == 2 and - all(isinstance(v, (int, float)) for v in value) + len(value) == 2 + and all(isinstance(v, (int, float)) for v in value) ): conditions.append( f"{key} BETWEEN {value[0]} AND {value[1]}" ) elif ( - len(value) == 2 and - value[0] in (">", "<", ">=", "<=") + len(value) == 2 + and value[0] in (">", "<", ">=", "<=") ): conditions.append(f"{key} {value[0]} {value[1]}") elif isinstance(value, list): @@ -613,8 +612,6 @@ def query_by_column(self, catalog, params, *, where = "" else: where = "WHERE " + (" AND ".join(conditions)) - - table_or_query = self._query_execute( catalog=catalog, where=where, diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index 51714d6731..aaec5ec390 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -53,14 +53,16 @@ def __init__(self, desc, cols=[]): 'name-2': vTable('description-2 chandra', cols), 'TAPname': None } - + def search(self, query, language='ADQL', maxrec=1000, uploads=None): return MockResult() + class MockResult: def to_table(self): return Table({'value': ['1.5', '1.2', '-0.3']}) + @pytest.fixture def mock_tap(): with patch('astroquery.heasarc.core.HeasarcClass.tap', new_callable=PropertyMock) as tap: @@ -174,6 +176,7 @@ def test_query_region_polygon(polygon): "10.1,10.1,10.0,10.1,10.0,10.0))=1" ) + def test_query_region_polygon_no_unit(): # position is not used for polygon poly = [ @@ -187,8 +190,8 @@ def test_query_region_polygon_no_unit(): spatial="polygon", polygon=poly, columns="*", - get_query_payload=True, - ) + get_query_payload=True, + ) assert query == ( "SELECT * FROM suzamaster " @@ -196,6 +199,7 @@ def test_query_region_polygon_no_unit(): "10.1,10.1,10.0,10.1,10.0,10.0))=1" ) + def test_query_allsky(): query1 = Heasarc.query_region( catalog="suzamaster", spatial="all-sky", columns="*", @@ -224,15 +228,14 @@ def test_no_catalog(): with pytest.raises(InvalidQueryError): # OBJ_LIST[0] and radius added to avoid a remote call Heasarc.query_region( - OBJ_LIST[0], spatial="cone", columns="*", radius="2arcmin" - ) + OBJ_LIST[0], spatial="cone", columns="*", radius="2arcmin") + def test_by_columns_no_catalog(): with pytest.raises(InvalidQueryError): # OBJ_LIST[0] and radius added to avoid a remote call Heasarc.query_by_column( - None, params={"flux": (1e-12, 1e-10)} - ) + None, params={"flux": (1e-12, 1e-10)}) def test__query_execute_no_catalog(): @@ -255,6 +258,7 @@ def test_by_columns_no_params(): ) assert query == "SELECT * FROM suzamaster" + def test_by_columns_limit(): query = Heasarc.query_by_column( catalog="suzamaster", @@ -266,7 +270,6 @@ def test_by_columns_limit(): assert query == "SELECT TOP 2000000 * FROM suzamaster" - def test_by_columns_range(): query = Heasarc.query_by_column( catalog="suzamaster", @@ -337,7 +340,7 @@ def test__query_execute_none_where(): assert query == ("SELECT * FROM suzamaster") -def test__query_execute_none_where(): +def test__query_execute_extra_where(): query = Heasarc._query_execute( catalog="suzamaster", where=" EXTRA", @@ -362,6 +365,7 @@ def test__query_execute_add_row(): ) assert query1 == query2 == ("SELECT col1, col2, __row FROM suzamaster") + def test__query_execute_extra_space(): query1 = Heasarc._query_execute( catalog="suzamaster", @@ -378,7 +382,8 @@ def test__query_execute_extra_space(): ) assert query1 == query2 == ("SELECT * FROM suzamaster WHERE EXTRA") -def test_query_execute_columns(mock_tap, mock_default_cols): + +def test_query_execute_columns1(mock_tap, mock_default_cols): query = Heasarc._query_execute( catalog="suzamaster", where="WHERE EXTRA", @@ -387,8 +392,9 @@ def test_query_execute_columns(mock_tap, mock_default_cols): ) assert query == ("SELECT col-3, col-2, __row FROM suzamaster WHERE EXTRA") -def test_query_execute_columns(mock_tap, mock_default_cols): - res = Heasarc._query_execute( + +def test_query_execute_columns2(mock_tap, mock_default_cols): + _ = Heasarc._query_execute( catalog="suzamaster", where="WHERE EXTRA", columns='*' @@ -397,6 +403,7 @@ def test_query_execute_columns(mock_tap, mock_default_cols): # reset last result to avoid interference with other tests Heasarc._last_result = None + def test_tap_def(): # Use a new HeasarcClass object Heasarc = HeasarcClass() @@ -460,6 +467,7 @@ def test__list_catalogs_keywords(mock_tap): lab for lab, desc in MockTap().tables.items() if 'TAP' not in lab and 'xmm' in desc.description.lower() ] + def test__list_columns__missing_table(mock_tap): with pytest.raises(ValueError, match="not available as a public catalog"): Heasarc.list_columns(catalog_name='missing-table') From 4ef56c4c8e23c89e689ca17edc8dea54e40c0db1 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 12:01:04 -0400 Subject: [PATCH 09/22] fix changelog --- CHANGES.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index cd0b36c8d1..480a663c06 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -72,10 +72,9 @@ Service fixes and enhancements heasarc ^^^^^^^ - -- Add ``query_by_column`` to allow querying of different catalog columns.The user passes a dict that is parsed into a TAP WHERE statements. -- Add support for uploading tables when using TAP directly through ``query_tap``. -- Improve how maxrec works. If it is bigger than the default server limit, add a TOP statement. +- Add ``query_by_column`` to allow querying of different catalog columns. [#3403] +- Add support for uploading tables when using TAP directly through ``query_tap``. [#3403] +- Improve how maxrec works. If it is bigger than the default server limit, add a TOP statement. [#3403] alma ^^^^ From cfb0884d50c1af8cc4036b07b3d7546d683d1f21 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 12:08:03 -0400 Subject: [PATCH 10/22] fix docstring --- astroquery/heasarc/core.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 8bf006372e..8282bf5cc5 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -549,17 +549,17 @@ def query_by_column(self, catalog, params, *, A dictionary of column constraint parameters to include in the query. Each key-value pair will be translated into an ADQL condition. - For a range query, use a tuple of two values (min, max). - e.g. `{'flux': (1e-12, 1e-10)}` translates to - `flux BETWEEN 1e-12 AND 1e-10`. + e.g. ``{'flux': (1e-12, 1e-10)}`` translates to + ``flux BETWEEN 1e-12 AND 1e-10``. - For list values, use a list of values. - e.g. `{'object_type': ['QSO', 'GALAXY']}` translates to - `object_type IN ('QSO', 'GALAXY')`. + e.g. ``{'object_type': ['QSO', 'GALAXY']}`` translates to + ``object_type IN ('QSO', 'GALAXY')``. - For comparison queries, use a tuple of (operator, value), where operator is one of '=', '!=', '<', '>', '<=', '>='. - e.g. `{'magnitude': ('<', 15)}` translates to `magnitude < 15`. + e.g. ``{'magnitude': ('<', 15)}`` translates to ``magnitude < 15``. - For exact matches, use a single value (str, int, float). - e.g. `{'object_type': 'QSO'}` translates to - `object_type = 'QSO'`. + e.g. ``{'object_type': 'QSO'}`` translates to + ``object_type = 'QSO'``. The keys should correspond to valid column names in the catalog. Use `list_columns` to see the available columns. get_query_payload : bool, optional From fe1995c0078b8c44506964a7b5d8c4c80f539eec Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 12:12:36 -0400 Subject: [PATCH 11/22] another attempt to fix docstring --- astroquery/heasarc/core.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 8282bf5cc5..81c6dd9e91 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -549,17 +549,17 @@ def query_by_column(self, catalog, params, *, A dictionary of column constraint parameters to include in the query. Each key-value pair will be translated into an ADQL condition. - For a range query, use a tuple of two values (min, max). - e.g. ``{'flux': (1e-12, 1e-10)}`` translates to - ``flux BETWEEN 1e-12 AND 1e-10``. + e.g. ``{'flux': (1e-12, 1e-10)}`` translates to + ``flux BETWEEN 1e-12 AND 1e-10``. - For list values, use a list of values. - e.g. ``{'object_type': ['QSO', 'GALAXY']}`` translates to - ``object_type IN ('QSO', 'GALAXY')``. + e.g. ``{'object_type': ['QSO', 'GALAXY']}`` translates to + ``object_type IN ('QSO', 'GALAXY')``. - For comparison queries, use a tuple of (operator, value), - where operator is one of '=', '!=', '<', '>', '<=', '>='. - e.g. ``{'magnitude': ('<', 15)}`` translates to ``magnitude < 15``. + where operator is one of '=', '!=', '<', '>', '<=', '>='. + e.g. ``{'magnitude': ('<', 15)}`` translates to ``magnitude < 15``. - For exact matches, use a single value (str, int, float). - e.g. ``{'object_type': 'QSO'}`` translates to - ``object_type = 'QSO'``. + e.g. ``{'object_type': 'QSO'}`` translates to + ``object_type = 'QSO'``. The keys should correspond to valid column names in the catalog. Use `list_columns` to see the available columns. get_query_payload : bool, optional From 27d02779b301426bee766e961a33ef2a03cb7f7e Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 8 Sep 2025 17:48:31 -0400 Subject: [PATCH 12/22] add automatic host guess in download_data --- CHANGES.rst | 1 + astroquery/heasarc/core.py | 34 ++++++++++++++++++++++-- astroquery/heasarc/tests/test_heasarc.py | 23 ++++++++++++++++ docs/heasarc/heasarc.rst | 1 + 4 files changed, 57 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 480a663c06..eb8680974e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -75,6 +75,7 @@ heasarc - Add ``query_by_column`` to allow querying of different catalog columns. [#3403] - Add support for uploading tables when using TAP directly through ``query_tap``. [#3403] - Improve how maxrec works. If it is bigger than the default server limit, add a TOP statement. [#3403] +- Add automatic guessing for the data host in ``download_data``. [#3403] alma ^^^^ diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 81c6dd9e91..ce658ddd50 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -750,6 +750,36 @@ def enable_cloud(self, provider='aws', profile=None): self.s3_client = self.s3_resource.meta.client + def _guess_host(self, host): + """Guess the host to use for downloading data + + Parameters + ---------- + host : str + The host provided by the user + + Returns + ------- + host : str + The guessed host + + """ + if host in ['heasarc', 'sciserver', 'aws']: + return host + elif host is not None: + raise ValueError( + 'host has to be one of heasarc, sciserver, aws or None') + + # host is None, so we guess + if os.environ['HOME'] == '/home/idies' and os.path.exists('/FTP/'): + # we are on idies, so we can use sciserver + return 'sciserver' + + for var in ['AWS_REGION', 'AWS_DEFAULT_REGION', 'AWS_ROLE_ARN']: + if var in os.environ: + return 'aws' + return 'heasarc' + def download_data(self, links, host='heasarc', location='.'): """Download data products in links with a choice of getting the data from either the heasarc server, sciserver, or the cloud in AWS. @@ -781,8 +811,8 @@ def download_data(self, links, host='heasarc', location='.'): if isinstance(links, Row): links = links.table[[links.index]] - if host not in ['heasarc', 'sciserver', 'aws']: - raise ValueError('host has to be one of heasarc, sciserver, aws') + # guess the host if not provided + host = self._guess_host(host) host_column = 'access_url' if host == 'heasarc' else host if host_column not in links.colnames: diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index aaec5ec390..e0eba65438 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -506,6 +506,29 @@ def test_locate_data_row(): Heasarc.locate_data(table[0:2], catalog_name="xray") +def test__guess_host_default(): + # Use a new HeasarcClass object + assert Heasarc._guess_host(host=None) == 'heasarc' + + +@pytest.mark.parametrize("host", ["heasarc", "sciserver", "aws"]) +def test__guess_host_know(host): + # Use a new HeasarcClass object + assert Heasarc._guess_host(host=host) == host + + +def test__guess_host_sciserver(monkeypatch): + monkeypatch.setenv("HOME", "/home/idies") + monkeypatch.setattr("os.path.exists", lambda path: path.startswith('/FTP')) + assert Heasarc._guess_host(host=None) == 'sciserver' + + +@pytest.mark.parametrize("var", ["AWS_REGION", "AWS_REGION_DEFAULT", "AWS_ROLE_ARN"]) +def test__guess_host_aws(monkeypatch, var): + monkeypatch.setenv("AWS_REGION", var) + assert Heasarc._guess_host(host=None) == 'aws' + + def test_download_data__empty(): with pytest.raises(ValueError, match="Input links table is empty"): Heasarc.download_data(Table()) diff --git a/docs/heasarc/heasarc.rst b/docs/heasarc/heasarc.rst index b1b21bcd1e..773cf1d411 100644 --- a/docs/heasarc/heasarc.rst +++ b/docs/heasarc/heasarc.rst @@ -247,6 +247,7 @@ You can specify where the data are to be downloaded using the ``location`` param To download the data, you can pass ``links`` table (or row) to `~astroquery.heasarc.HeasarcClass.download_data`, specifying from where you want the data to be fetched by specifying the ``host`` parameter. By default, +the function will try to guess the best host based on your environment. If it cannot guess, then the data is fetched from the main HEASARC servers. The recommendation is to use different hosts depending on where your code is running: * ``host='sciserver'``: Use this option if you running you analysis on Sciserver. Because From 8be084d58c49ae6078e5d3188fa0f5b94c6e1023 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Tue, 9 Sep 2025 10:18:35 -0400 Subject: [PATCH 13/22] fix _guess_host in windows --- astroquery/heasarc/core.py | 6 +++++- docs/heasarc/heasarc.rst | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index ce658ddd50..df58ccc44f 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -771,7 +771,11 @@ def _guess_host(self, host): 'host has to be one of heasarc, sciserver, aws or None') # host is None, so we guess - if os.environ['HOME'] == '/home/idies' and os.path.exists('/FTP/'): + if ( + 'HOME' in os.environ + and os.environ['HOME'] == '/home/idies' + and os.path.exists('/FTP/') + ): # we are on idies, so we can use sciserver return 'sciserver' diff --git a/docs/heasarc/heasarc.rst b/docs/heasarc/heasarc.rst index 773cf1d411..c0eb069bcc 100644 --- a/docs/heasarc/heasarc.rst +++ b/docs/heasarc/heasarc.rst @@ -198,9 +198,9 @@ catalog for all observations with exposure time greater than 190 ks. name obsid ra dec exposure deg deg s --------------- ----- --------- --------- -------- - Sgr A* 13842 266.41667 -29.00781 191760 + GW Transient 29852 -- -- 300000 + Sgr A* 13842 266.41667 -29.00781 191760 IGR J17480-2446 30481 267.02013 -24.78024 200000 - IGR J17480-2446 31425 267.02013 -24.78024 200000 Another example may be to search the ``xmmmaster`` for a observation in some time range: From d0ecb4521d95f70e0dbed15bd4b606f45146267f Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Fri, 26 Sep 2025 11:55:54 -0400 Subject: [PATCH 14/22] update the host info in download_data docstring --- astroquery/heasarc/core.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index df58ccc44f..1caa974091 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -784,7 +784,7 @@ def _guess_host(self, host): return 'aws' return 'heasarc' - def download_data(self, links, host='heasarc', location='.'): + def download_data(self, links, host=None, location='.'): """Download data products in links with a choice of getting the data from either the heasarc server, sciserver, or the cloud in AWS. @@ -793,8 +793,9 @@ def download_data(self, links, host='heasarc', location='.'): ---------- links : `astropy.table.Table` or `astropy.table.Row` The result from locate_data - host : str - The data host. The options are: heasarc (default), sciserver, aws. + host : str or None + The data host. The options are: None (default), heasarc, sciserver, aws. + If None, the host is guessed based on the environment. If host == 'sciserver', data is copied from the local mounted data drive. If host == 'aws', data is downloaded from Amazon S3 Open From 30d90a0ec7af78df45e33dae4015bf9aa476bccd Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 20 Oct 2025 16:08:12 -0400 Subject: [PATCH 15/22] rename query_by_columns to query_constraints --- astroquery/heasarc/core.py | 22 +++---- astroquery/heasarc/tests/test_heasarc.py | 58 +++++++++---------- .../heasarc/tests/test_heasarc_remote.py | 8 +-- docs/heasarc/heasarc.rst | 10 ++-- 4 files changed, 49 insertions(+), 49 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 1caa974091..4d25b2cc5c 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -295,7 +295,7 @@ def _query_execute(self, catalog=None, where=None, *, get_query_payload=False, columns=None, verbose=False, maxrec=None): """Queries some catalog using the HEASARC TAP server based on the - where condition and returns an `~astropy.table.Table`. + 'where' condition and returns an `~astropy.table.Table`. Parameters ---------- @@ -531,22 +531,22 @@ def query_object(self, object_name, mission, *, return self.query_region(pos, catalog=mission, spatial='cone', get_query_payload=get_query_payload) - def query_by_column(self, catalog, params, *, - get_query_payload=False, columns=None, - verbose=False, maxrec=None): + def query_constraints(self, catalog, column_filters, *, + get_query_payload=False, columns=None, + verbose=False, maxrec=None): """Query the HEASARC TAP server using a constraints on the columns. This is a simple wrapper around `~astroquery.heasarc.HeasarcClass.query_tap` - that constructs an ADQL query from a dictionary of parameters. + that constructs an ADQL query from a dictionary of filters. Parameters ---------- catalog : str The catalog to query. To list the available catalogs, use :meth:`~astroquery.heasarc.HeasarcClass.list_catalogs`. - params : dict - A dictionary of column constraint parameters to include in the query. + column_filters : dict + A dictionary of column constraint filters to include in the query. Each key-value pair will be translated into an ADQL condition. - For a range query, use a tuple of two values (min, max). e.g. ``{'flux': (1e-12, 1e-10)}`` translates to @@ -576,11 +576,11 @@ def query_by_column(self, catalog, params, *, """ - if not isinstance(params, dict): + if not isinstance(column_filters, dict): raise ValueError('params must be a dictionary of key-value pairs') conditions = [] - for key, value in params.items(): + for key, value in column_filters.items(): if isinstance(value, tuple): if ( len(value) == 2 @@ -784,7 +784,7 @@ def _guess_host(self, host): return 'aws' return 'heasarc' - def download_data(self, links, host=None, location='.'): + def download_data(self, links, *, host=None, location='.'): """Download data products in links with a choice of getting the data from either the heasarc server, sciserver, or the cloud in AWS. @@ -792,7 +792,7 @@ def download_data(self, links, host=None, location='.'): Parameters ---------- links : `astropy.table.Table` or `astropy.table.Row` - The result from locate_data + A table (or row) of data links, typically the result of locate_data. host : str or None The data host. The options are: None (default), heasarc, sciserver, aws. If None, the host is guessed based on the environment. diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index e0eba65438..5831e81ca4 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -231,11 +231,11 @@ def test_no_catalog(): OBJ_LIST[0], spatial="cone", columns="*", radius="2arcmin") -def test_by_columns_no_catalog(): +def test_query_constraints_no_catalog(): with pytest.raises(InvalidQueryError): # OBJ_LIST[0] and radius added to avoid a remote call - Heasarc.query_by_column( - None, params={"flux": (1e-12, 1e-10)}) + Heasarc.query_constraints( + None, column_filters={"flux": (1e-12, 1e-10)}) def test__query_execute_no_catalog(): @@ -244,25 +244,25 @@ def test__query_execute_no_catalog(): Heasarc._query_execute(None) -def test_by_columns_none_params(): +def test_query_constraints_none_params(): with pytest.raises(ValueError): - Heasarc.query_by_column('testcatalog', params=None) + Heasarc.query_constraints('testcatalog', column_filters=None) -def test_by_columns_no_params(): - query = Heasarc.query_by_column( +def test_query_constraints_no_params(): + query = Heasarc.query_constraints( catalog="suzamaster", - params={}, + column_filters={}, columns="*", get_query_payload=True, ) assert query == "SELECT * FROM suzamaster" -def test_by_columns_limit(): - query = Heasarc.query_by_column( +def test_query_constraints_limit(): + query = Heasarc.query_constraints( catalog="suzamaster", - params={}, + column_filters={}, columns="*", get_query_payload=True, maxrec=500000, @@ -270,50 +270,50 @@ def test_by_columns_limit(): assert query == "SELECT TOP 2000000 * FROM suzamaster" -def test_by_columns_range(): - query = Heasarc.query_by_column( +def test_query_constraints_range(): + query = Heasarc.query_constraints( catalog="suzamaster", - params={"flux": (1e-12, 1e-10)}, + column_filters={"flux": (1e-12, 1e-10)}, columns="*", get_query_payload=True, ) assert query == "SELECT * FROM suzamaster WHERE flux BETWEEN 1e-12 AND 1e-10" -def test_by_columns_eq_float(): - query = Heasarc.query_by_column( +def test_query_constraints_eq_float(): + query = Heasarc.query_constraints( catalog="suzamaster", - params={"flux": 1.2}, + column_filters={"flux": 1.2}, columns="*", get_query_payload=True, ) assert query == "SELECT * FROM suzamaster WHERE flux = 1.2" -def test_by_columns_eq_str(): - query = Heasarc.query_by_column( +def test_query_constraints_eq_str(): + query = Heasarc.query_constraints( catalog="suzamaster", - params={"flux": "1.2"}, + column_filters={"flux": "1.2"}, columns="*", get_query_payload=True, ) assert query == "SELECT * FROM suzamaster WHERE flux = '1.2'" -def test_by_columns_cmp_float(): - query = Heasarc.query_by_column( +def test_query_constraints_cmp_float(): + query = Heasarc.query_constraints( catalog="suzamaster", - params={"flux": ('>', 1.2)}, + column_filters={"flux": ('>', 1.2)}, columns="*", get_query_payload=True, ) assert query == "SELECT * FROM suzamaster WHERE flux > 1.2" -def test_by_columns_cmp_float_2(): - query = Heasarc.query_by_column( +def test_query_constraints_cmp_float_2(): + query = Heasarc.query_constraints( catalog="suzamaster", - params={"flux": ('>', 1.2), "magnitude": ('<=', 15)}, + column_filters={"flux": ('>', 1.2), "magnitude": ('<=', 15)}, columns="*", get_query_payload=True, ) @@ -321,10 +321,10 @@ def test_by_columns_cmp_float_2(): "AND magnitude <= 15") -def test_by_columns_list(): - query = Heasarc.query_by_column( +def test_query_constraints_by_columns_list(): + query = Heasarc.query_constraints( catalog="suzamaster", - params={"flux": [1.2, 2.3, 3.4]}, + column_filters={"flux": [1.2, 2.3, 3.4]}, columns="*", get_query_payload=True, ) diff --git a/astroquery/heasarc/tests/test_heasarc_remote.py b/astroquery/heasarc/tests/test_heasarc_remote.py index 1d46770172..4d765c7583 100644 --- a/astroquery/heasarc/tests/test_heasarc_remote.py +++ b/astroquery/heasarc/tests/test_heasarc_remote.py @@ -188,7 +188,7 @@ def test_download_data__heasarc_file(self): filename = "00README" tab = Table({ "access_url": [ - ("https://heasarc.gsfc.nasa.gov/FTP/rxte/" + ("https://heasarc.gsfc.nasa.gov/FTP/xte/" f"data/archive/{filename}") ] }) @@ -199,7 +199,7 @@ def test_download_data__heasarc_file(self): def test_download_data__heasarc_folder(self): tab = Table({ "access_url": [ - ("https://heasarc.gsfc.nasa.gov/FTP/rxte/data/archive/" + ("https://heasarc.gsfc.nasa.gov/FTP/xte/data/archive/" "AO10/P91129/91129-01-68-00A/stdprod") ] }) @@ -213,7 +213,7 @@ def test_download_data__heasarc_folder(self): def test_download_data__s3_file(self): filename = "00README" tab = Table( - {"aws": [f"s3://nasa-heasarc/rxte/data/archive/{filename}"]} + {"aws": [f"s3://nasa-heasarc/xte/data/archive/{filename}"]} ) with tempfile.TemporaryDirectory() as tmpdir: Heasarc.enable_cloud(provider='aws', profile=None) @@ -225,7 +225,7 @@ def test_download_data__s3_folder(self, slash): tab = Table( { "aws": [ - (f"s3://nasa-heasarc/rxte/data/archive/AO10/" + (f"s3://nasa-heasarc/xte/data/archive/AO10/" f"P91129/91129-01-68-00A/stdprod{slash}") ] } diff --git a/docs/heasarc/heasarc.rst b/docs/heasarc/heasarc.rst index c0eb069bcc..e42a459cd5 100644 --- a/docs/heasarc/heasarc.rst +++ b/docs/heasarc/heasarc.rst @@ -183,7 +183,7 @@ following for instance will find master catalogs that have keywords 'nicer' or ' Other non-region queries ---------------------------------------- -In addition to `~astroquery.heasarc.HeasarcClass.query_region`, `~astroquery.heasarc.HeasarcClass.query_by_column` +In addition to `~astroquery.heasarc.HeasarcClass.query_region`, `~astroquery.heasarc.HeasarcClass.query_constraints` is also available. This method allows you to query a catalog by specifying various column constraints. For example, the following query searches the ``chanmaster`` catalog for all observations with exposure time greater than 190 ks. @@ -191,8 +191,8 @@ catalog for all observations with exposure time greater than 190 ks. .. doctest-remote-data:: >>> from astroquery.heasarc import Heasarc - >>> tab = Heasarc.query_by_column( - ... catalog='chanmaster', params={'exposure': ('>', '190000')} + >>> tab = Heasarc.query_constraints( + ... catalog='chanmaster', column_filters={'exposure': ('>', '190000')} ... ) >>> tab['name', 'obsid', 'ra', 'dec', 'exposure'][:3].pprint() name obsid ra dec exposure @@ -207,8 +207,8 @@ Another example may be to search the ``xmmmaster`` for a observation in some tim .. doctest-remote-data:: >>> from astroquery.heasarc import Heasarc - >>> tab = Heasarc.query_by_column( - ... catalog='xmmmaster', params={'time': (52300, 52310)} + >>> tab = Heasarc.query_constraints( + ... catalog='xmmmaster', column_filters={'time': (52300, 52310)} ... ) >>> tab['name', 'obsid', 'ra', 'dec', 'time', 'duration'][:3].pprint() name obsid ra dec time duration From 69d37f247f675ce084dd31a314f038f2d7ba221a Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 20 Oct 2025 16:13:59 -0400 Subject: [PATCH 16/22] remove the maxrec fix for now --- astroquery/heasarc/core.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 4d25b2cc5c..baaa89c2ae 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -351,10 +351,6 @@ def _query_execute(self, catalog=None, where=None, *, where = ' ' + where.strip() adql = f'SELECT {columns} FROM {catalog}{where}' - # if maxrec is more than the server limit, we set a higher limit - if maxrec is not None and maxrec > 100000: - adql = adql.replace('SELECT ', f'SELECT TOP {maxrec*4} ') - if get_query_payload: return adql response = self.query_tap(query=adql, maxrec=maxrec) From 74eaa1567fdb0498166b822d59e0150f1020c1d9 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Mon, 20 Oct 2025 16:18:10 -0400 Subject: [PATCH 17/22] move the changelog entries to 0.4.12 --- CHANGES.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index eb8680974e..42db0ece00 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,6 +14,13 @@ API changes Service fixes and enhancements ------------------------------ +heasarc +^^^^^^^ +- Add ``query_constraints`` to allow querying of different catalog columns. [#3403] +- Add support for uploading tables when using TAP directly through ``query_tap``. [#3403] +- Add automatic guessing for the data host in ``download_data``. [#3403] + + esa.hubble ^^^^^^^^^^ @@ -70,13 +77,6 @@ mast Service fixes and enhancements ------------------------------ -heasarc -^^^^^^^ -- Add ``query_by_column`` to allow querying of different catalog columns. [#3403] -- Add support for uploading tables when using TAP directly through ``query_tap``. [#3403] -- Improve how maxrec works. If it is bigger than the default server limit, add a TOP statement. [#3403] -- Add automatic guessing for the data host in ``download_data``. [#3403] - alma ^^^^ From 9366c3cda45060bcb6bc976388067e881750d37d Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Tue, 21 Oct 2025 16:13:14 -0400 Subject: [PATCH 18/22] move column_filter to query_region and remove query_constraints. Update tests and docs accordingly --- astroquery/heasarc/core.py | 175 +++++++++++------------ astroquery/heasarc/tests/test_heasarc.py | 114 ++++++++++----- docs/heasarc/heasarc.rst | 37 ++++- 3 files changed, 195 insertions(+), 131 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index baaa89c2ae..46d44ea8b6 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -364,6 +364,55 @@ def _query_execute(self, catalog=None, where=None, *, table.remove_column('__row') return table + def _parse_constraints(self, column_filters): + """Convert constraints dictionary to ADQL WHERE clause + + Parameters + ---------- + column_filters : dict + A dictionary of column constraint filters to include in the query. + Each key-value pair will be translated into an ADQL condition. + See `query_region` for details. + + Returns + ------- + conditions : list + a list of ADQL conditions as str + + """ + conditions = [] + if column_filters is None: + return conditions + for key, value in column_filters.items(): + if isinstance(value, tuple): + if ( + len(value) == 2 + and all(isinstance(v, (int, float)) for v in value) + ): + conditions.append( + f"{key} BETWEEN {value[0]} AND {value[1]}" + ) + elif ( + len(value) == 2 + and value[0] in (">", "<", ">=", "<=") + ): + conditions.append(f"{key} {value[0]} {value[1]}") + elif isinstance(value, list): + # handle list values: key IN (...) + formatted = [] + for v in value: + if isinstance(v, str): + formatted.append(f"'{v}'") + else: + formatted.append(str(v)) + conditions.append(f"{key} IN ({', '.join(formatted)})") + else: + conditions.append( + f"{key} = '{value}'" + if isinstance(value, str) else f"{key} = {value}" + ) + return conditions + @deprecated_renamed_argument( ('mission', 'fields', 'resultmax', 'entry', 'coordsys', 'equinox', 'displaymode', 'action', 'sortvar', 'cache'), @@ -374,8 +423,8 @@ def _query_execute(self, catalog=None, where=None, *, True, True, True, False) ) def query_region(self, position=None, catalog=None, radius=None, *, - spatial='cone', width=None, polygon=None, add_offset=False, - get_query_payload=False, columns=None, cache=False, + spatial='cone', width=None, polygon=None, column_filters=None, + add_offset=False, get_query_payload=False, columns=None, cache=False, verbose=False, maxrec=None, **kwargs): """Queries the HEASARC TAP server around a coordinate and returns a @@ -411,6 +460,23 @@ def query_region(self, position=None, catalog=None, radius=None, *, outlining the polygon to search in. It can also be a list of `astropy.coordinates` object or strings that can be parsed by `astropy.coordinates.ICRS`. + column_filters : dict + A dictionary of column constraint filters to include in the query. + Each key-value pair will be translated into an ADQL condition. + - For a range query, use a tuple of two values (min, max). + e.g. ``{'flux': (1e-12, 1e-10)}`` translates to + ``flux BETWEEN 1e-12 AND 1e-10``. + - For list values, use a list of values. + e.g. ``{'object_type': ['QSO', 'GALAXY']}`` translates to + ``object_type IN ('QSO', 'GALAXY')``. + - For comparison queries, use a tuple of (operator, value), + where operator is one of '=', '!=', '<', '>', '<=', '>='. + e.g. ``{'magnitude': ('<', 15)}`` translates to ``magnitude < 15``. + - For exact matches, use a single value (str, int, float). + e.g. ``{'object_type': 'QSO'}`` translates to + ``object_type = 'QSO'``. + The keys should correspond to valid column names in the catalog. + Use `list_columns` to see the available columns. add_offset: bool If True and spatial=='cone', add a search_offset column that indicates the separation (in arcmin) between the requested @@ -457,6 +523,11 @@ def query_region(self, position=None, catalog=None, radius=None, *, where = ("WHERE CONTAINS(POINT('ICRS',ra,dec)," f"POLYGON('ICRS',{','.join(coords_str)}))=1") else: + if position is None: + raise InvalidQueryError( + "position is required to for spatial='cone' (default). " + "Use spatial='all-sky' For all-sky searches." + ) coords_icrs = parse_coordinates(position).icrs ra, dec = coords_icrs.ra.deg, coords_icrs.dec.deg @@ -481,6 +552,16 @@ def query_region(self, position=None, catalog=None, radius=None, *, raise ValueError("Unrecognized spatial query type. Must be one" " of 'cone', 'box', 'polygon', or 'all-sky'.") + # handle column filters + if column_filters is not None: + conditions = self._parse_constraints(column_filters) + if len(conditions) > 0: + constraints_str = ' AND '.join(conditions) + if where == '': + where = 'WHERE ' + constraints_str + else: + where += ' AND ' + constraints_str + table_or_query = self._query_execute( catalog=catalog, where=where, get_query_payload=get_query_payload, @@ -527,96 +608,6 @@ def query_object(self, object_name, mission, *, return self.query_region(pos, catalog=mission, spatial='cone', get_query_payload=get_query_payload) - def query_constraints(self, catalog, column_filters, *, - get_query_payload=False, columns=None, - verbose=False, maxrec=None): - """Query the HEASARC TAP server using a constraints on the columns. - - This is a simple wrapper around - `~astroquery.heasarc.HeasarcClass.query_tap` - that constructs an ADQL query from a dictionary of filters. - - Parameters - ---------- - catalog : str - The catalog to query. To list the available catalogs, use - :meth:`~astroquery.heasarc.HeasarcClass.list_catalogs`. - column_filters : dict - A dictionary of column constraint filters to include in the query. - Each key-value pair will be translated into an ADQL condition. - - For a range query, use a tuple of two values (min, max). - e.g. ``{'flux': (1e-12, 1e-10)}`` translates to - ``flux BETWEEN 1e-12 AND 1e-10``. - - For list values, use a list of values. - e.g. ``{'object_type': ['QSO', 'GALAXY']}`` translates to - ``object_type IN ('QSO', 'GALAXY')``. - - For comparison queries, use a tuple of (operator, value), - where operator is one of '=', '!=', '<', '>', '<=', '>='. - e.g. ``{'magnitude': ('<', 15)}`` translates to ``magnitude < 15``. - - For exact matches, use a single value (str, int, float). - e.g. ``{'object_type': 'QSO'}`` translates to - ``object_type = 'QSO'``. - The keys should correspond to valid column names in the catalog. - Use `list_columns` to see the available columns. - get_query_payload : bool, optional - If `True` then returns the generated ADQL query as str. - Defaults to `False`. - columns : str, optional - Target column list with value separated by a comma(,). - Use * for all the columns. The default is to return a subset - of the columns that are generally the most useful. - verbose : bool, optional - If False, suppress vo warnings. - maxrec : int, optional - Maximum number of records - - """ - - if not isinstance(column_filters, dict): - raise ValueError('params must be a dictionary of key-value pairs') - - conditions = [] - for key, value in column_filters.items(): - if isinstance(value, tuple): - if ( - len(value) == 2 - and all(isinstance(v, (int, float)) for v in value) - ): - conditions.append( - f"{key} BETWEEN {value[0]} AND {value[1]}" - ) - elif ( - len(value) == 2 - and value[0] in (">", "<", ">=", "<=") - ): - conditions.append(f"{key} {value[0]} {value[1]}") - elif isinstance(value, list): - # handle list values: key IN (...) - formatted = [] - for v in value: - if isinstance(v, str): - formatted.append(f"'{v}'") - else: - formatted.append(str(v)) - conditions.append(f"{key} IN ({', '.join(formatted)})") - else: - conditions.append( - f"{key} = '{value}'" - if isinstance(value, str) else f"{key} = {value}" - ) - if len(conditions) == 0: - where = "" - else: - where = "WHERE " + (" AND ".join(conditions)) - - table_or_query = self._query_execute( - catalog=catalog, where=where, - get_query_payload=get_query_payload, - columns=columns, verbose=verbose, - maxrec=maxrec - ) - return table_or_query - def locate_data(self, query_result=None, catalog_name=None): """Get links to data products Use vo/datalinks to query the data products for some query_results. diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index 5831e81ca4..0091190f06 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -224,18 +224,16 @@ def test_spatial_invalid(spatial): ) -def test_no_catalog(): +def test_spatial_cone_no_position(): with pytest.raises(InvalidQueryError): - # OBJ_LIST[0] and radius added to avoid a remote call - Heasarc.query_region( - OBJ_LIST[0], spatial="cone", columns="*", radius="2arcmin") + Heasarc.query_region(catalog="xmmmaster", columns="*", spatial="cone") -def test_query_constraints_no_catalog(): +def test_no_catalog(): with pytest.raises(InvalidQueryError): # OBJ_LIST[0] and radius added to avoid a remote call - Heasarc.query_constraints( - None, column_filters={"flux": (1e-12, 1e-10)}) + Heasarc.query_region( + OBJ_LIST[0], spatial="cone", columns="*", radius="2arcmin") def test__query_execute_no_catalog(): @@ -244,35 +242,56 @@ def test__query_execute_no_catalog(): Heasarc._query_execute(None) -def test_query_constraints_none_params(): - with pytest.raises(ValueError): - Heasarc.query_constraints('testcatalog', column_filters=None) +def test_parse_constraints_no_filter(): + assert Heasarc._parse_constraints(column_filters=None) == [] + assert Heasarc._parse_constraints(column_filters={}) == [] -def test_query_constraints_no_params(): - query = Heasarc.query_constraints( - catalog="suzamaster", - column_filters={}, - columns="*", - get_query_payload=True, - ) - assert query == "SELECT * FROM suzamaster" +def test_parse_constraints_range(): + constraints = Heasarc._parse_constraints(column_filters={"flux": (1e-12, 1e-10)}) + assert constraints == ["flux BETWEEN 1e-12 AND 1e-10"] + +def test_parse_constraints_eq_float(): + constraints = Heasarc._parse_constraints(column_filters={"flux": 1.2}) + assert constraints == ["flux = 1.2"] -def test_query_constraints_limit(): - query = Heasarc.query_constraints( + +def test_parse_constraints_eq_str(): + constraints = Heasarc._parse_constraints(column_filters={"flux": "1.2"}) + assert constraints == ["flux = '1.2'"] + + +def test_parse_constraints_cmp_float(): + constraints = Heasarc._parse_constraints(column_filters={"flux": ('>', 1.2)}) + assert constraints == ["flux > 1.2"] + + +def test_parse_constraints_cmp_float_2(): + constraints = Heasarc._parse_constraints(column_filters={"flux": ('>', 1.2), "magnitude": ('<=', 15)}) + assert constraints == ["flux > 1.2", "magnitude <= 15"] + + +def test_parse_constraints_list(): + constraints = Heasarc._parse_constraints(column_filters={"flux": [1.2, 2.3, 3.4]}) + assert constraints == ["flux IN (1.2, 2.3, 3.4)"] + + +def test_query_region_no_filter(): + query = Heasarc.query_region( catalog="suzamaster", + spatial="all-sky", column_filters={}, columns="*", get_query_payload=True, - maxrec=500000, ) - assert query == "SELECT TOP 2000000 * FROM suzamaster" + assert query == "SELECT * FROM suzamaster" -def test_query_constraints_range(): - query = Heasarc.query_constraints( +def test_query_region_filter_range(): + query = Heasarc.query_region( catalog="suzamaster", + spatial="all-sky", column_filters={"flux": (1e-12, 1e-10)}, columns="*", get_query_payload=True, @@ -280,9 +299,10 @@ def test_query_constraints_range(): assert query == "SELECT * FROM suzamaster WHERE flux BETWEEN 1e-12 AND 1e-10" -def test_query_constraints_eq_float(): - query = Heasarc.query_constraints( +def test_query_region_filter_eq_float(): + query = Heasarc.query_region( catalog="suzamaster", + spatial="all-sky", column_filters={"flux": 1.2}, columns="*", get_query_payload=True, @@ -290,9 +310,10 @@ def test_query_constraints_eq_float(): assert query == "SELECT * FROM suzamaster WHERE flux = 1.2" -def test_query_constraints_eq_str(): - query = Heasarc.query_constraints( +def test_query_region_filter_eq_str(): + query = Heasarc.query_region( catalog="suzamaster", + spatial="all-sky", column_filters={"flux": "1.2"}, columns="*", get_query_payload=True, @@ -300,9 +321,10 @@ def test_query_constraints_eq_str(): assert query == "SELECT * FROM suzamaster WHERE flux = '1.2'" -def test_query_constraints_cmp_float(): - query = Heasarc.query_constraints( +def test_query_region_filter_cmp_float(): + query = Heasarc.query_region( catalog="suzamaster", + spatial="all-sky", column_filters={"flux": ('>', 1.2)}, columns="*", get_query_payload=True, @@ -310,9 +332,10 @@ def test_query_constraints_cmp_float(): assert query == "SELECT * FROM suzamaster WHERE flux > 1.2" -def test_query_constraints_cmp_float_2(): - query = Heasarc.query_constraints( +def test_query_region_filter_cmp_float_2(): + query = Heasarc.query_region( catalog="suzamaster", + spatial="all-sky", column_filters={"flux": ('>', 1.2), "magnitude": ('<=', 15)}, columns="*", get_query_payload=True, @@ -321,9 +344,10 @@ def test_query_constraints_cmp_float_2(): "AND magnitude <= 15") -def test_query_constraints_by_columns_list(): - query = Heasarc.query_constraints( +def test_query_region_filter_list(): + query = Heasarc.query_region( catalog="suzamaster", + spatial="all-sky", column_filters={"flux": [1.2, 2.3, 3.4]}, columns="*", get_query_payload=True, @@ -331,6 +355,28 @@ def test_query_constraints_by_columns_list(): assert query == "SELECT * FROM suzamaster WHERE flux IN (1.2, 2.3, 3.4)" +@pytest.mark.parametrize("coordinates", OBJ_LIST) +def test_query_region_cone_with_filter(coordinates): + # use columns='*' to avoid remote call to obtain the default columns + query = Heasarc.query_region( + coordinates, + catalog="suzamaster", + spatial="cone", + radius=2 * u.arcmin, + columns="*", + get_query_payload=True, + column_filters={"flux": (1e-12, 1e-10)}, + ) + assert ("SELECT *") in query + assert ( + "FROM suzamaster WHERE CONTAINS(POINT('ICRS',ra,dec)," + "CIRCLE('ICRS',182.63" in query + ) + assert ",39.40" in query + assert ",0.0333" in query + assert "AND flux BETWEEN 1e-12 AND 1e-10" in query + + def test__query_execute_none_where(): query = Heasarc._query_execute( catalog="suzamaster", diff --git a/docs/heasarc/heasarc.rst b/docs/heasarc/heasarc.rst index e42a459cd5..9da0ce8480 100644 --- a/docs/heasarc/heasarc.rst +++ b/docs/heasarc/heasarc.rst @@ -95,6 +95,23 @@ The list of returned columns can also be given as a comma-separated string to If no columns are given, the call will return a set of default columns. If you want all the columns returned, use ``columns='*'`` +To do a full sky search, use ``spatial='all-sky'``: + +.. doctest-remote-data:: + + >>> from astroquery.heasarc import Heasarc + >>> tab = Heasarc.query_region(catalog='chanmaster', spatial='all-sky', + ... columns='name, obsid, ra, dec') + >>> tab[:5].pprint() + name obsid ra dec + deg deg + -------------------- ----- --------- --------- + ESO005-G004 21421 91.42333 -86.63194 + 1RXSJ200924.1-853911 10143 302.30417 -85.64633 + RE J0317-853 22326 49.31604 -85.54043 + ACO 4023 15124 354.93333 -85.17583 + GRB020321 3477 242.76000 -83.70000 + List Available Catalogs ----------------------- The collection of available catalogs can be obtained by calling the `~astroquery.heasarc.HeasarcClass.list_catalogs` @@ -181,17 +198,26 @@ following for instance will find master catalogs that have keywords 'nicer' or ' swiftmastr Swift Master Catalog -Other non-region queries +Adding Column Constraints ---------------------------------------- -In addition to `~astroquery.heasarc.HeasarcClass.query_region`, `~astroquery.heasarc.HeasarcClass.query_constraints` -is also available. This method allows you to query a catalog by specifying +In addition to region search in `~astroquery.heasarc.HeasarcClass.query_region`, +you can also pass other column constraints. This is done by passing a dictionary +to the ``column_filters`` parameter. The keys of the dictionary are the column names +and the values are the constraints. Exampels include: +- ``{'flux': (1e-12, 1e-10)}`` translates to a flux range. +- ``{'exposure': ('>', 10000)}`` translates to exposure greater than 10000. +- ``{'instrument': ['ACIS', 'HRC']}`` translates to a value in a list. +- ``{'obsid': '12345'}`` translates to obsid equal to 12345. + +This allows you to query a catalog by specifying various column constraints. For example, the following query searches the ``chanmaster`` catalog for all observations with exposure time greater than 190 ks. .. doctest-remote-data:: >>> from astroquery.heasarc import Heasarc - >>> tab = Heasarc.query_constraints( + >>> tab = Heasarc.query_region( + ... spatial='all-sky', ... catalog='chanmaster', column_filters={'exposure': ('>', '190000')} ... ) >>> tab['name', 'obsid', 'ra', 'dec', 'exposure'][:3].pprint() @@ -207,7 +233,8 @@ Another example may be to search the ``xmmmaster`` for a observation in some tim .. doctest-remote-data:: >>> from astroquery.heasarc import Heasarc - >>> tab = Heasarc.query_constraints( + >>> tab = Heasarc.query_region( + ... spatial='all-sky', ... catalog='xmmmaster', column_filters={'time': (52300, 52310)} ... ) >>> tab['name', 'obsid', 'ra', 'dec', 'time', 'duration'][:3].pprint() From d58cad54eaa189323f8d03bfdc10985e516cdc26 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Wed, 22 Oct 2025 11:40:37 -0400 Subject: [PATCH 19/22] add extra check in locate_data; fix #3340 --- astroquery/heasarc/core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 46d44ea8b6..cfa77dc3e2 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -653,6 +653,9 @@ def locate_data(self, query_result=None, catalog_name=None): '__row to the requested columns') if catalog_name is None: + if not hasattr(self, '_last_catalog_name'): + raise ValueError('locate_data needs a catalog_name, and none ' + 'found from a previous search. Please provide one.') catalog_name = self._last_catalog_name if not ( isinstance(catalog_name, str) From 6abfc1929d9f327ef76b549cb7233145f002c663 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Wed, 22 Oct 2025 11:46:59 -0400 Subject: [PATCH 20/22] update datalink url to enable fast queries --- astroquery/heasarc/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index cfa77dc3e2..9c443c2d38 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -663,8 +663,8 @@ def locate_data(self, query_result=None, catalog_name=None): ): raise ValueError(f'Unknown catalog name: {catalog_name}') - # datalink url - dlink_url = f'{self.VO_URL}/datalink/{catalog_name}' + # datalink url; use sizefiles=false to speed up the response + dlink_url = f'{self.VO_URL}/datalink/{catalog_name}?sizefiles=false&' query = pyvo.dal.adhoc.DatalinkQuery( baseurl=dlink_url, id=query_result['__row'], From 8606f4f1f8786eeb6aac195ffbb80a8d291164b5 Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Wed, 22 Oct 2025 11:47:32 -0400 Subject: [PATCH 21/22] document add_offset in query_region; addresses #3315 --- docs/heasarc/heasarc.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/heasarc/heasarc.rst b/docs/heasarc/heasarc.rst index 9da0ce8480..dd6f362657 100644 --- a/docs/heasarc/heasarc.rst +++ b/docs/heasarc/heasarc.rst @@ -95,6 +95,10 @@ The list of returned columns can also be given as a comma-separated string to If no columns are given, the call will return a set of default columns. If you want all the columns returned, use ``columns='*'`` +To add a search offset column that gives the angular distance in arcminutes +between the query position and the positions in the catalog, +use the ``add_offset=True``: + To do a full sky search, use ``spatial='all-sky'``: .. doctest-remote-data:: From 5c2ee142d02ca333a2a77dce67ac8dd58da1bb7d Mon Sep 17 00:00:00 2001 From: Abdu Zoghbi Date: Thu, 23 Oct 2025 08:10:47 -0400 Subject: [PATCH 22/22] default to all-sky search when column_filters is given and position is None --- astroquery/heasarc/core.py | 4 ++++ docs/heasarc/heasarc.rst | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index 9c443c2d38..5e2190acda 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -499,6 +499,10 @@ def query_region(self, position=None, catalog=None, radius=None, *, table : A `~astropy.table.Table` object. """ + # if we have column_filters and no position, assume all-sky search + if position is None and column_filters is not None: + spatial = 'all-sky' + if spatial.lower() == 'all-sky': where = '' elif spatial.lower() == 'polygon': diff --git a/docs/heasarc/heasarc.rst b/docs/heasarc/heasarc.rst index dd6f362657..5aa7a28b36 100644 --- a/docs/heasarc/heasarc.rst +++ b/docs/heasarc/heasarc.rst @@ -217,11 +217,13 @@ This allows you to query a catalog by specifying various column constraints. For example, the following query searches the ``chanmaster`` catalog for all observations with exposure time greater than 190 ks. +Note that when column filters are given and no position is specified, +the search defaults to an all-sky search. + .. doctest-remote-data:: >>> from astroquery.heasarc import Heasarc >>> tab = Heasarc.query_region( - ... spatial='all-sky', ... catalog='chanmaster', column_filters={'exposure': ('>', '190000')} ... ) >>> tab['name', 'obsid', 'ra', 'dec', 'exposure'][:3].pprint() @@ -238,7 +240,6 @@ Another example may be to search the ``xmmmaster`` for a observation in some tim >>> from astroquery.heasarc import Heasarc >>> tab = Heasarc.query_region( - ... spatial='all-sky', ... catalog='xmmmaster', column_filters={'time': (52300, 52310)} ... ) >>> tab['name', 'obsid', 'ra', 'dec', 'time', 'duration'][:3].pprint()