diff --git a/CHANGES.rst b/CHANGES.rst index 6b7a418315..42db0ece00 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,6 +14,13 @@ API changes Service fixes and enhancements ------------------------------ +heasarc +^^^^^^^ +- Add ``query_constraints`` to allow querying of different catalog columns. [#3403] +- Add support for uploading tables when using TAP directly through ``query_tap``. [#3403] +- Add automatic guessing for the data host in ``download_data``. [#3403] + + esa.hubble ^^^^^^^^^^ diff --git a/astroquery/heasarc/core.py b/astroquery/heasarc/core.py index eaa29809d5..5e2190acda 100644 --- a/astroquery/heasarc/core.py +++ b/astroquery/heasarc/core.py @@ -1,6 +1,4 @@ - import os - import shutil import requests import tarfile @@ -261,7 +259,7 @@ def query_mission_cols(self, mission, *, cache=True, cols = [col.upper() for col in cols['name'] if '__' not in col] return cols - def query_tap(self, query, *, maxrec=None): + def query_tap(self, query, *, maxrec=None, uploads=None): """ Send query to HEASARC's Xamin TAP using ADQL. Results in `~pyvo.dal.TAPResults` format. @@ -273,6 +271,10 @@ def query_tap(self, query, *, maxrec=None): ADQL query to be executed maxrec : int maximum number of records to return + uploads : dict + a mapping from table names used in the query to file like + objects containing a votable + (e.g. a file path or `~astropy.table.Table`). Returns ------- @@ -286,7 +288,130 @@ def query_tap(self, query, *, maxrec=None): """ log.debug(f'TAP query: {query}') self._saved_query = query - return self.tap.search(query, language='ADQL', maxrec=maxrec) + return self.tap.search( + query, language='ADQL', maxrec=maxrec, uploads=uploads) + + def _query_execute(self, catalog=None, where=None, *, + get_query_payload=False, columns=None, + verbose=False, maxrec=None): + """Queries some catalog using the HEASARC TAP server based on the + 'where' condition and returns an `~astropy.table.Table`. + + Parameters + ---------- + catalog : str + The catalog to query. To list the available catalogs, use + :meth:`~astroquery.heasarc.HeasarcClass.list_catalogs`. + where : str + The WHERE condition to be used in the query. It must + include the 'WHERE' keyword or be empty. + get_query_payload : bool, optional + If `True` then returns the generated ADQL query as str. + Defaults to `False`. + columns : str, optional + Target column list with value separated by a comma(,). + Use * for all the columns. The default is to return a subset + of the columns that are generally the most useful. + verbose : bool, optional + If False, suppress vo warnings. + maxrec : int, optional + Maximum number of records + + + Returns + ------- + table : A `~astropy.table.Table` object. + """ + # if verbose is False then suppress any VOTable related warnings + if not verbose: + commons.suppress_vo_warnings() + + if catalog is None: + raise InvalidQueryError("catalog name is required! Use 'xray' " + "to search the master X-ray catalog") + + if where is None: + where = '' + + # __row is needed for locate_data; we add it if not already present + # and remove it afterwards only if the user requested specific + # columns. keep_row tracks that. + keep_row = ( + columns in (None, '*') + or isinstance(columns, str) and '__row' in columns + ) + + if columns is None: + columns = ', '.join(self._get_default_columns(catalog)) + + if '__row' not in columns and columns != '*': + columns += ', __row' + + if where != '' and not where.startswith(' '): + where = ' ' + where.strip() + adql = f'SELECT {columns} FROM {catalog}{where}' + + if get_query_payload: + return adql + response = self.query_tap(query=adql, maxrec=maxrec) + + # save the response in case we want to use it later + self._last_result = response + self._last_catalog_name = catalog + + table = response.to_table() + if not keep_row and '__row' in table.colnames: + table.remove_column('__row') + return table + + def _parse_constraints(self, column_filters): + """Convert constraints dictionary to ADQL WHERE clause + + Parameters + ---------- + column_filters : dict + A dictionary of column constraint filters to include in the query. + Each key-value pair will be translated into an ADQL condition. + See `query_region` for details. + + Returns + ------- + conditions : list + a list of ADQL conditions as str + + """ + conditions = [] + if column_filters is None: + return conditions + for key, value in column_filters.items(): + if isinstance(value, tuple): + if ( + len(value) == 2 + and all(isinstance(v, (int, float)) for v in value) + ): + conditions.append( + f"{key} BETWEEN {value[0]} AND {value[1]}" + ) + elif ( + len(value) == 2 + and value[0] in (">", "<", ">=", "<=") + ): + conditions.append(f"{key} {value[0]} {value[1]}") + elif isinstance(value, list): + # handle list values: key IN (...) + formatted = [] + for v in value: + if isinstance(v, str): + formatted.append(f"'{v}'") + else: + formatted.append(str(v)) + conditions.append(f"{key} IN ({', '.join(formatted)})") + else: + conditions.append( + f"{key} = '{value}'" + if isinstance(value, str) else f"{key} = {value}" + ) + return conditions @deprecated_renamed_argument( ('mission', 'fields', 'resultmax', 'entry', 'coordsys', 'equinox', @@ -298,8 +423,8 @@ def query_tap(self, query, *, maxrec=None): True, True, True, False) ) def query_region(self, position=None, catalog=None, radius=None, *, - spatial='cone', width=None, polygon=None, add_offset=False, - get_query_payload=False, columns=None, cache=False, + spatial='cone', width=None, polygon=None, column_filters=None, + add_offset=False, get_query_payload=False, columns=None, cache=False, verbose=False, maxrec=None, **kwargs): """Queries the HEASARC TAP server around a coordinate and returns a @@ -335,6 +460,23 @@ def query_region(self, position=None, catalog=None, radius=None, *, outlining the polygon to search in. It can also be a list of `astropy.coordinates` object or strings that can be parsed by `astropy.coordinates.ICRS`. + column_filters : dict + A dictionary of column constraint filters to include in the query. + Each key-value pair will be translated into an ADQL condition. + - For a range query, use a tuple of two values (min, max). + e.g. ``{'flux': (1e-12, 1e-10)}`` translates to + ``flux BETWEEN 1e-12 AND 1e-10``. + - For list values, use a list of values. + e.g. ``{'object_type': ['QSO', 'GALAXY']}`` translates to + ``object_type IN ('QSO', 'GALAXY')``. + - For comparison queries, use a tuple of (operator, value), + where operator is one of '=', '!=', '<', '>', '<=', '>='. + e.g. ``{'magnitude': ('<', 15)}`` translates to ``magnitude < 15``. + - For exact matches, use a single value (str, int, float). + e.g. ``{'object_type': 'QSO'}`` translates to + ``object_type = 'QSO'``. + The keys should correspond to valid column names in the catalog. + Use `list_columns` to see the available columns. add_offset: bool If True and spatial=='cone', add a search_offset column that indicates the separation (in arcmin) between the requested @@ -356,18 +498,10 @@ def query_region(self, position=None, catalog=None, radius=None, *, ------- table : A `~astropy.table.Table` object. """ - # if verbose is False then suppress any VOTable related warnings - if not verbose: - commons.suppress_vo_warnings() - if catalog is None: - raise InvalidQueryError("catalog name is required! Use 'xray' " - "to search the master X-ray catalog") - - if columns is None: - columns = ', '.join(self._get_default_columns(catalog)) - if '__row' not in columns: - columns += ',__row' + # if we have column_filters and no position, assume all-sky search + if position is None and column_filters is not None: + spatial = 'all-sky' if spatial.lower() == 'all-sky': where = '' @@ -390,9 +524,14 @@ def query_region(self, position=None, catalog=None, radius=None, *, coords_str = [f'{coord.ra.deg},{coord.dec.deg}' for coord in coords_list] - where = (" WHERE CONTAINS(POINT('ICRS',ra,dec)," + where = ("WHERE CONTAINS(POINT('ICRS',ra,dec)," f"POLYGON('ICRS',{','.join(coords_str)}))=1") else: + if position is None: + raise InvalidQueryError( + "position is required to for spatial='cone' (default). " + "Use spatial='all-sky' For all-sky searches." + ) coords_icrs = parse_coordinates(position).icrs ra, dec = coords_icrs.ra.deg, coords_icrs.dec.deg @@ -401,7 +540,7 @@ def query_region(self, position=None, catalog=None, radius=None, *, radius = self.get_default_radius(catalog) elif isinstance(radius, str): radius = coordinates.Angle(radius) - where = (" WHERE CONTAINS(POINT('ICRS',ra,dec),CIRCLE(" + where = ("WHERE CONTAINS(POINT('ICRS',ra,dec),CIRCLE(" f"'ICRS',{ra},{dec},{radius.to(u.deg).value}))=1") # add search_offset for the case of cone if add_offset: @@ -410,24 +549,33 @@ def query_region(self, position=None, catalog=None, radius=None, *, elif spatial.lower() == 'box': if isinstance(width, str): width = coordinates.Angle(width) - where = (" WHERE CONTAINS(POINT('ICRS',ra,dec)," + where = ("WHERE CONTAINS(POINT('ICRS',ra,dec)," f"BOX('ICRS',{ra},{dec},{width.to(u.deg).value}," f"{width.to(u.deg).value}))=1") else: raise ValueError("Unrecognized spatial query type. Must be one" " of 'cone', 'box', 'polygon', or 'all-sky'.") - adql = f'SELECT {columns} FROM {catalog}{where}' - + # handle column filters + if column_filters is not None: + conditions = self._parse_constraints(column_filters) + if len(conditions) > 0: + constraints_str = ' AND '.join(conditions) + if where == '': + where = 'WHERE ' + constraints_str + else: + where += ' AND ' + constraints_str + + table_or_query = self._query_execute( + catalog=catalog, where=where, + get_query_payload=get_query_payload, + columns=columns, verbose=verbose, + maxrec=maxrec + ) if get_query_payload: - return adql - response = self.query_tap(query=adql, maxrec=maxrec) - - # save the response in case we want to use it later - self._last_result = response - self._last_catalog_name = catalog + return table_or_query + table = table_or_query - table = response.to_table() if add_offset: table['search_offset'].unit = u.arcmin if len(table) == 0: @@ -505,9 +653,13 @@ def locate_data(self, query_result=None, catalog_name=None): if '__row' not in query_result.colnames: raise ValueError('No __row column found in query_result. ' 'query_result needs to be the output of ' - 'query_region or a subset.') + 'query_region or a subset. try adding ' + '__row to the requested columns') if catalog_name is None: + if not hasattr(self, '_last_catalog_name'): + raise ValueError('locate_data needs a catalog_name, and none ' + 'found from a previous search. Please provide one.') catalog_name = self._last_catalog_name if not ( isinstance(catalog_name, str) @@ -515,8 +667,8 @@ def locate_data(self, query_result=None, catalog_name=None): ): raise ValueError(f'Unknown catalog name: {catalog_name}') - # datalink url - dlink_url = f'{self.VO_URL}/datalink/{catalog_name}' + # datalink url; use sizefiles=false to speed up the response + dlink_url = f'{self.VO_URL}/datalink/{catalog_name}?sizefiles=false&' query = pyvo.dal.adhoc.DatalinkQuery( baseurl=dlink_url, id=query_result['__row'], @@ -592,7 +744,41 @@ def enable_cloud(self, provider='aws', profile=None): self.s3_client = self.s3_resource.meta.client - def download_data(self, links, host='heasarc', location='.'): + def _guess_host(self, host): + """Guess the host to use for downloading data + + Parameters + ---------- + host : str + The host provided by the user + + Returns + ------- + host : str + The guessed host + + """ + if host in ['heasarc', 'sciserver', 'aws']: + return host + elif host is not None: + raise ValueError( + 'host has to be one of heasarc, sciserver, aws or None') + + # host is None, so we guess + if ( + 'HOME' in os.environ + and os.environ['HOME'] == '/home/idies' + and os.path.exists('/FTP/') + ): + # we are on idies, so we can use sciserver + return 'sciserver' + + for var in ['AWS_REGION', 'AWS_DEFAULT_REGION', 'AWS_ROLE_ARN']: + if var in os.environ: + return 'aws' + return 'heasarc' + + def download_data(self, links, *, host=None, location='.'): """Download data products in links with a choice of getting the data from either the heasarc server, sciserver, or the cloud in AWS. @@ -600,9 +786,10 @@ def download_data(self, links, host='heasarc', location='.'): Parameters ---------- links : `astropy.table.Table` or `astropy.table.Row` - The result from locate_data - host : str - The data host. The options are: heasarc (default), sciserver, aws. + A table (or row) of data links, typically the result of locate_data. + host : str or None + The data host. The options are: None (default), heasarc, sciserver, aws. + If None, the host is guessed based on the environment. If host == 'sciserver', data is copied from the local mounted data drive. If host == 'aws', data is downloaded from Amazon S3 Open @@ -623,8 +810,8 @@ def download_data(self, links, host='heasarc', location='.'): if isinstance(links, Row): links = links.table[[links.index]] - if host not in ['heasarc', 'sciserver', 'aws']: - raise ValueError('host has to be one of heasarc, sciserver, aws') + # guess the host if not provided + host = self._guess_host(host) host_column = 'access_url' if host == 'heasarc' else host if host_column not in links.colnames: diff --git a/astroquery/heasarc/tests/test_heasarc.py b/astroquery/heasarc/tests/test_heasarc.py index a2f51b5c67..0091190f06 100644 --- a/astroquery/heasarc/tests/test_heasarc.py +++ b/astroquery/heasarc/tests/test_heasarc.py @@ -54,6 +54,14 @@ def __init__(self, desc, cols=[]): 'TAPname': None } + def search(self, query, language='ADQL', maxrec=1000, uploads=None): + return MockResult() + + +class MockResult: + def to_table(self): + return Table({'value': ['1.5', '1.2', '-0.3']}) + @pytest.fixture def mock_tap(): @@ -92,7 +100,7 @@ def test_query_region_cone(coordinates, radius, offset): radius=radius, columns="*", get_query_payload=True, - add_offset=True, + add_offset=offset, ) # We don't fully float compare in this string, there are slight @@ -169,6 +177,29 @@ def test_query_region_polygon(polygon): ) +def test_query_region_polygon_no_unit(): + # position is not used for polygon + poly = [ + (10.1, 10.1), + (10.0, 10.1), + (10.0, 10.0), + ] + with pytest.warns(UserWarning, match="Polygon endpoints are being interpreted as"): + query = Heasarc.query_region( + catalog="suzamaster", + spatial="polygon", + polygon=poly, + columns="*", + get_query_payload=True, + ) + + assert query == ( + "SELECT * FROM suzamaster " + "WHERE CONTAINS(POINT('ICRS',ra,dec),POLYGON('ICRS'," + "10.1,10.1,10.0,10.1,10.0,10.0))=1" + ) + + def test_query_allsky(): query1 = Heasarc.query_region( catalog="suzamaster", spatial="all-sky", columns="*", @@ -193,9 +224,230 @@ def test_spatial_invalid(spatial): ) +def test_spatial_cone_no_position(): + with pytest.raises(InvalidQueryError): + Heasarc.query_region(catalog="xmmmaster", columns="*", spatial="cone") + + def test_no_catalog(): with pytest.raises(InvalidQueryError): - Heasarc.query_region("m31", spatial="cone", columns="*") + # OBJ_LIST[0] and radius added to avoid a remote call + Heasarc.query_region( + OBJ_LIST[0], spatial="cone", columns="*", radius="2arcmin") + + +def test__query_execute_no_catalog(): + with pytest.raises(InvalidQueryError): + # OBJ_LIST[0] and radius added to avoid a remote call + Heasarc._query_execute(None) + + +def test_parse_constraints_no_filter(): + assert Heasarc._parse_constraints(column_filters=None) == [] + assert Heasarc._parse_constraints(column_filters={}) == [] + + +def test_parse_constraints_range(): + constraints = Heasarc._parse_constraints(column_filters={"flux": (1e-12, 1e-10)}) + assert constraints == ["flux BETWEEN 1e-12 AND 1e-10"] + + +def test_parse_constraints_eq_float(): + constraints = Heasarc._parse_constraints(column_filters={"flux": 1.2}) + assert constraints == ["flux = 1.2"] + + +def test_parse_constraints_eq_str(): + constraints = Heasarc._parse_constraints(column_filters={"flux": "1.2"}) + assert constraints == ["flux = '1.2'"] + + +def test_parse_constraints_cmp_float(): + constraints = Heasarc._parse_constraints(column_filters={"flux": ('>', 1.2)}) + assert constraints == ["flux > 1.2"] + + +def test_parse_constraints_cmp_float_2(): + constraints = Heasarc._parse_constraints(column_filters={"flux": ('>', 1.2), "magnitude": ('<=', 15)}) + assert constraints == ["flux > 1.2", "magnitude <= 15"] + + +def test_parse_constraints_list(): + constraints = Heasarc._parse_constraints(column_filters={"flux": [1.2, 2.3, 3.4]}) + assert constraints == ["flux IN (1.2, 2.3, 3.4)"] + + +def test_query_region_no_filter(): + query = Heasarc.query_region( + catalog="suzamaster", + spatial="all-sky", + column_filters={}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster" + + +def test_query_region_filter_range(): + query = Heasarc.query_region( + catalog="suzamaster", + spatial="all-sky", + column_filters={"flux": (1e-12, 1e-10)}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster WHERE flux BETWEEN 1e-12 AND 1e-10" + + +def test_query_region_filter_eq_float(): + query = Heasarc.query_region( + catalog="suzamaster", + spatial="all-sky", + column_filters={"flux": 1.2}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster WHERE flux = 1.2" + + +def test_query_region_filter_eq_str(): + query = Heasarc.query_region( + catalog="suzamaster", + spatial="all-sky", + column_filters={"flux": "1.2"}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster WHERE flux = '1.2'" + + +def test_query_region_filter_cmp_float(): + query = Heasarc.query_region( + catalog="suzamaster", + spatial="all-sky", + column_filters={"flux": ('>', 1.2)}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster WHERE flux > 1.2" + + +def test_query_region_filter_cmp_float_2(): + query = Heasarc.query_region( + catalog="suzamaster", + spatial="all-sky", + column_filters={"flux": ('>', 1.2), "magnitude": ('<=', 15)}, + columns="*", + get_query_payload=True, + ) + assert query == ("SELECT * FROM suzamaster WHERE flux > 1.2 " + "AND magnitude <= 15") + + +def test_query_region_filter_list(): + query = Heasarc.query_region( + catalog="suzamaster", + spatial="all-sky", + column_filters={"flux": [1.2, 2.3, 3.4]}, + columns="*", + get_query_payload=True, + ) + assert query == "SELECT * FROM suzamaster WHERE flux IN (1.2, 2.3, 3.4)" + + +@pytest.mark.parametrize("coordinates", OBJ_LIST) +def test_query_region_cone_with_filter(coordinates): + # use columns='*' to avoid remote call to obtain the default columns + query = Heasarc.query_region( + coordinates, + catalog="suzamaster", + spatial="cone", + radius=2 * u.arcmin, + columns="*", + get_query_payload=True, + column_filters={"flux": (1e-12, 1e-10)}, + ) + assert ("SELECT *") in query + assert ( + "FROM suzamaster WHERE CONTAINS(POINT('ICRS',ra,dec)," + "CIRCLE('ICRS',182.63" in query + ) + assert ",39.40" in query + assert ",0.0333" in query + assert "AND flux BETWEEN 1e-12 AND 1e-10" in query + + +def test__query_execute_none_where(): + query = Heasarc._query_execute( + catalog="suzamaster", + columns="*", + get_query_payload=True, + ) + assert query == ("SELECT * FROM suzamaster") + + +def test__query_execute_extra_where(): + query = Heasarc._query_execute( + catalog="suzamaster", + where=" EXTRA", + columns="*", + get_query_payload=True, + ) + assert query == ("SELECT * FROM suzamaster EXTRA") + + +def test__query_execute_add_row(): + query1 = Heasarc._query_execute( + catalog="suzamaster", + where="", + columns="col1, col2", + get_query_payload=True, + ) + query2 = Heasarc._query_execute( + catalog="suzamaster", + where=None, + columns="col1, col2", + get_query_payload=True, + ) + assert query1 == query2 == ("SELECT col1, col2, __row FROM suzamaster") + + +def test__query_execute_extra_space(): + query1 = Heasarc._query_execute( + catalog="suzamaster", + where="WHERE EXTRA", + columns="*", + get_query_payload=True, + ) + + query2 = Heasarc._query_execute( + catalog="suzamaster", + where=" WHERE EXTRA", + columns="*", + get_query_payload=True, + ) + assert query1 == query2 == ("SELECT * FROM suzamaster WHERE EXTRA") + + +def test_query_execute_columns1(mock_tap, mock_default_cols): + query = Heasarc._query_execute( + catalog="suzamaster", + where="WHERE EXTRA", + columns=None, + get_query_payload=True, + ) + assert query == ("SELECT col-3, col-2, __row FROM suzamaster WHERE EXTRA") + + +def test_query_execute_columns2(mock_tap, mock_default_cols): + _ = Heasarc._query_execute( + catalog="suzamaster", + where="WHERE EXTRA", + columns='*' + ) + assert Heasarc._last_catalog_name == "suzamaster" + # reset last result to avoid interference with other tests + Heasarc._last_result = None def test_tap_def(): @@ -207,9 +459,6 @@ def test_tap_def(): def test_meta_def(): - class MockResult: - def to_table(self): - return Table({'value': ['1.5', '1.2', '-0.3']}) # Use a new HeasarcClass object Heasarc = HeasarcClass() assert Heasarc._meta_info is None @@ -258,6 +507,13 @@ def test_list_catalogs_keywords_list_non_str(): Heasarc.list_catalogs(keywords=['x-ray', 12]) +def test__list_catalogs_keywords(mock_tap): + catalogs = Heasarc.list_catalogs(keywords=['xmm']) + assert list(catalogs['name']) == [ + lab for lab, desc in MockTap().tables.items() if 'TAP' not in lab and 'xmm' in desc.description.lower() + ] + + def test__list_columns__missing_table(mock_tap): with pytest.raises(ValueError, match="not available as a public catalog"): Heasarc.list_columns(catalog_name='missing-table') @@ -296,6 +552,29 @@ def test_locate_data_row(): Heasarc.locate_data(table[0:2], catalog_name="xray") +def test__guess_host_default(): + # Use a new HeasarcClass object + assert Heasarc._guess_host(host=None) == 'heasarc' + + +@pytest.mark.parametrize("host", ["heasarc", "sciserver", "aws"]) +def test__guess_host_know(host): + # Use a new HeasarcClass object + assert Heasarc._guess_host(host=host) == host + + +def test__guess_host_sciserver(monkeypatch): + monkeypatch.setenv("HOME", "/home/idies") + monkeypatch.setattr("os.path.exists", lambda path: path.startswith('/FTP')) + assert Heasarc._guess_host(host=None) == 'sciserver' + + +@pytest.mark.parametrize("var", ["AWS_REGION", "AWS_REGION_DEFAULT", "AWS_ROLE_ARN"]) +def test__guess_host_aws(monkeypatch, var): + monkeypatch.setenv("AWS_REGION", var) + assert Heasarc._guess_host(host=None) == 'aws' + + def test_download_data__empty(): with pytest.raises(ValueError, match="Input links table is empty"): Heasarc.download_data(Table()) diff --git a/astroquery/heasarc/tests/test_heasarc_remote.py b/astroquery/heasarc/tests/test_heasarc_remote.py index 1d46770172..4d765c7583 100644 --- a/astroquery/heasarc/tests/test_heasarc_remote.py +++ b/astroquery/heasarc/tests/test_heasarc_remote.py @@ -188,7 +188,7 @@ def test_download_data__heasarc_file(self): filename = "00README" tab = Table({ "access_url": [ - ("https://heasarc.gsfc.nasa.gov/FTP/rxte/" + ("https://heasarc.gsfc.nasa.gov/FTP/xte/" f"data/archive/{filename}") ] }) @@ -199,7 +199,7 @@ def test_download_data__heasarc_file(self): def test_download_data__heasarc_folder(self): tab = Table({ "access_url": [ - ("https://heasarc.gsfc.nasa.gov/FTP/rxte/data/archive/" + ("https://heasarc.gsfc.nasa.gov/FTP/xte/data/archive/" "AO10/P91129/91129-01-68-00A/stdprod") ] }) @@ -213,7 +213,7 @@ def test_download_data__heasarc_folder(self): def test_download_data__s3_file(self): filename = "00README" tab = Table( - {"aws": [f"s3://nasa-heasarc/rxte/data/archive/{filename}"]} + {"aws": [f"s3://nasa-heasarc/xte/data/archive/{filename}"]} ) with tempfile.TemporaryDirectory() as tmpdir: Heasarc.enable_cloud(provider='aws', profile=None) @@ -225,7 +225,7 @@ def test_download_data__s3_folder(self, slash): tab = Table( { "aws": [ - (f"s3://nasa-heasarc/rxte/data/archive/AO10/" + (f"s3://nasa-heasarc/xte/data/archive/AO10/" f"P91129/91129-01-68-00A/stdprod{slash}") ] } diff --git a/docs/heasarc/heasarc.rst b/docs/heasarc/heasarc.rst index 0812bf8477..5aa7a28b36 100644 --- a/docs/heasarc/heasarc.rst +++ b/docs/heasarc/heasarc.rst @@ -26,7 +26,7 @@ Query a Catalog The basic use case is one where we want to query a catalog from some position in the sky. In this example, we query the NuSTAR master catalog ``numaster`` for all observations of the AGN ``NGC 3783``. We use `~astropy.coordinates.SkyCoord` to obtain the coordinates -and then pass them to `~astroquery.heasarc.HeasarcClass.query_region`. In following, we +and then pass them to `~astroquery.heasarc.HeasarcClass.query_region`. In the following, we also select only columns with ``time > 0``. Zero values are typically used for observations that have been approved but not observed. @@ -95,11 +95,35 @@ The list of returned columns can also be given as a comma-separated string to If no columns are given, the call will return a set of default columns. If you want all the columns returned, use ``columns='*'`` +To add a search offset column that gives the angular distance in arcminutes +between the query position and the positions in the catalog, +use the ``add_offset=True``: + +To do a full sky search, use ``spatial='all-sky'``: + +.. doctest-remote-data:: + + >>> from astroquery.heasarc import Heasarc + >>> tab = Heasarc.query_region(catalog='chanmaster', spatial='all-sky', + ... columns='name, obsid, ra, dec') + >>> tab[:5].pprint() + name obsid ra dec + deg deg + -------------------- ----- --------- --------- + ESO005-G004 21421 91.42333 -86.63194 + 1RXSJ200924.1-853911 10143 302.30417 -85.64633 + RE J0317-853 22326 49.31604 -85.54043 + ACO 4023 15124 354.93333 -85.17583 + GRB020321 3477 242.76000 -83.70000 + List Available Catalogs ----------------------- The collection of available catalogs can be obtained by calling the `~astroquery.heasarc.HeasarcClass.list_catalogs` -method. In this example, we query the master catalogs only by passing ``master=True``. -which is ``False`` by default (i.e. return all catalogs). `~astroquery.heasarc.HeasarcClass.list_catalogs` returns an +method. In this example, we request the master catalogs only by passing ``master=True``. +Master catalogs are catalogs that contain one entry per observation, as opposed to +other catalogs that may record other information. There is typically one master catalog +per mission. The ``master`` parameter is a boolean flag, which is ``False`` by default +(i.e. return all catalogs). `~astroquery.heasarc.HeasarcClass.list_catalogs` returns an `~astropy.table.Table` with two columns containing the names and description of the available catalogs. @@ -111,8 +135,8 @@ catalogs. name description ---------- ------------------------------------------------------------- ascamaster ASCA Master Catalog + burcbmastr BurstCube Master Observation Catalog chanmaster Chandra Observations - cmbmaster LAMBDA Cosmic Microwave Background Experiments Master Catalog ... If you do not know the name of the catalog you are looking for, you can use the ``keywords`` @@ -138,7 +162,7 @@ are related to Chandra, you can do: cargm31cxo Carina Nebula Gum 31 Chandra X-Ray Point Source Catalog carinaclas Carina Nebula Chandra X-Ray Point Source Classes -If you are interested only finding the master catalogs, you can also set ``master`` to ``True``. +If you are interested only finding the master catalogs only, you can set ``master`` to ``True``. .. doctest-remote-data:: @@ -177,6 +201,58 @@ following for instance will find master catalogs that have keywords 'nicer' or ' nicermastr NICER Master Catalog swiftmastr Swift Master Catalog + +Adding Column Constraints +---------------------------------------- +In addition to region search in `~astroquery.heasarc.HeasarcClass.query_region`, +you can also pass other column constraints. This is done by passing a dictionary +to the ``column_filters`` parameter. The keys of the dictionary are the column names +and the values are the constraints. Exampels include: +- ``{'flux': (1e-12, 1e-10)}`` translates to a flux range. +- ``{'exposure': ('>', 10000)}`` translates to exposure greater than 10000. +- ``{'instrument': ['ACIS', 'HRC']}`` translates to a value in a list. +- ``{'obsid': '12345'}`` translates to obsid equal to 12345. + +This allows you to query a catalog by specifying +various column constraints. For example, the following query searches the ``chanmaster`` +catalog for all observations with exposure time greater than 190 ks. + +Note that when column filters are given and no position is specified, +the search defaults to an all-sky search. + +.. doctest-remote-data:: + + >>> from astroquery.heasarc import Heasarc + >>> tab = Heasarc.query_region( + ... catalog='chanmaster', column_filters={'exposure': ('>', '190000')} + ... ) + >>> tab['name', 'obsid', 'ra', 'dec', 'exposure'][:3].pprint() + name obsid ra dec exposure + deg deg s + --------------- ----- --------- --------- -------- + GW Transient 29852 -- -- 300000 + Sgr A* 13842 266.41667 -29.00781 191760 + IGR J17480-2446 30481 267.02013 -24.78024 200000 + +Another example may be to search the ``xmmmaster`` for a observation in some time range: + +.. doctest-remote-data:: + + >>> from astroquery.heasarc import Heasarc + >>> tab = Heasarc.query_region( + ... catalog='xmmmaster', column_filters={'time': (52300, 52310)} + ... ) + >>> tab['name', 'obsid', 'ra', 'dec', 'time', 'duration'][:3].pprint() + name obsid ra dec time duration + deg deg d s + ------------- ---------- -------- --------- ---------------- -------- + NGC 1316 0091770101 50.95833 -37.28333 52308.6872337963 60362 + NGC 1316 0091770201 50.67296 -37.20928 52308.642974537 3462 + Fei 16 offset 0154150101 28.64374 -6.86667 52305.2210416667 24619 + +To see the available columns that can be queried for a given catalog and their units, +use `~astroquery.heasarc.HeasarcClass.list_columns` (see below). + Links to Data Products ---------------------- Once the query result is obtained, you can query any data products associated @@ -191,10 +267,10 @@ with those results. >>> tab = tab[tab['exposure'] > 0] >>> links = Heasarc.locate_data(tab[:2]) >>> links['access_url'].pprint() - access_url + access_url --------------------------------------------------------------------- - https://heasarc.gsfc.nasa.gov/FTP/nicer/data/obs/2018_08//1100120101/ - https://heasarc.gsfc.nasa.gov/FTP/nicer/data/obs/2018_08//1100120102/ + https://heasarc.gsfc.nasa.gov/FTP/nicer/data/obs/2025_01//7100120102/ + https://heasarc.gsfc.nasa.gov/FTP/nicer/data/obs/2025_01//7100120101/ The ``links`` table has three relevant columns: ``access_url``, ``sciserver`` and ``aws``. The first gives the url to the data from the main heasarc server. The second gives @@ -203,6 +279,7 @@ You can specify where the data are to be downloaded using the ``location`` param To download the data, you can pass ``links`` table (or row) to `~astroquery.heasarc.HeasarcClass.download_data`, specifying from where you want the data to be fetched by specifying the ``host`` parameter. By default, +the function will try to guess the best host based on your environment. If it cannot guess, then the data is fetched from the main HEASARC servers. The recommendation is to use different hosts depending on where your code is running: * ``host='sciserver'``: Use this option if you running you analysis on Sciserver. Because @@ -250,6 +327,44 @@ returns the constructed ADQL query. 121.92084 39.00417 UGC4229 0138951401 121.92099 39.00422 MRK 622 0852180501 +Table Uploads +----------------- +You can also upload a table of positions to be queried. The table can be an +`~astropy.table.Table` or a path to a file in VOtable format. The following example +shows how to use the upload feature to do a cross-match between the +``chanmaster`` catalog and a list of known source positions: + +.. doctest-remote-data:: + + >>> from astroquery.heasarc import Heasarc + >>> from astropy.table import Table + >>> sample = Table({ + ... 'ra': [1.58, 188.90], + ... 'dec': [20.20, -39.90] + ... }) + >>> query = """ + ... SELECT cat.name, cat.ra, cat.dec, cat.obsid + ... FROM chanmaster cat, tap_upload.mytable mt + ... WHERE 1=CONTAINS(POINT('ICRS', mt.ra, mt.dec), CIRCLE('ICRS',cat.ra, cat.dec, 0.1)) + ... """ + >>> result = Heasarc.query_tap(query, uploads={'mytable': sample}).to_table() + >>> result.pprint() + name ra dec obsid + deg deg + ----------- --------- --------- ----- + NGC 4507 188.90250 -39.90928 12292 + NGC 4507 188.90208 -39.90925 2150 + HR4796 189.00417 -39.86950 7414 + KUG0003+199 1.58134 20.20291 23709 + Mrk 335 1.58142 20.20295 23292 + Mrk 335 1.58142 20.20295 23297 + Mrk 335 1.58142 20.20295 23298 + Mrk 335 1.58142 20.20295 23299 + Mrk 335 1.58142 20.20295 23300 + Mrk 335 1.58142 20.20295 23301 + Mrk 335 1.58142 20.20295 23302 + + Complex Regions --------------- In addition to a cone search (some position and search radius), ```Heasarc.query_region``` accepts