diff --git a/intake_esm/_search.py b/intake_esm/_search.py index 8a7a60ca..1ca07364 100644 --- a/intake_esm/_search.py +++ b/intake_esm/_search.py @@ -45,7 +45,10 @@ def search( column_has_iterables = column in columns_with_iterables for value in values: if column_has_iterables: - mask = df[column].str.contains(value, regex=False) + try: + mask = df[column].str.contains(value, regex=False) + except AttributeError: + mask = df[column].apply(tuple).str.contains(value, regex=False) elif column_is_stringtype and is_pattern(value): mask = df[column].str.contains(value, regex=True, case=True, flags=0) elif pd.isna(value): diff --git a/tests/sample-catalogs/access-columns-with-iterables.csv.gz b/tests/sample-catalogs/access-columns-with-iterables.csv.gz new file mode 100644 index 00000000..737f18b7 Binary files /dev/null and b/tests/sample-catalogs/access-columns-with-iterables.csv.gz differ diff --git a/tests/sample-catalogs/access-columns-with-iterables.json b/tests/sample-catalogs/access-columns-with-iterables.json new file mode 100644 index 00000000..0a648007 --- /dev/null +++ b/tests/sample-catalogs/access-columns-with-iterables.json @@ -0,0 +1,81 @@ +{ + "esmcat_version": "0.0.1", + "attributes": [ + { + "column_name": "filename", + "vocabulary": "" + }, + { + "column_name": "file_id", + "vocabulary": "" + }, + { + "column_name": "path", + "vocabulary": "" + }, + { + "column_name": "filename_timestamp", + "vocabulary": "" + }, + { + "column_name": "frequency", + "vocabulary": "" + }, + { + "column_name": "start_date", + "vocabulary": "" + }, + { + "column_name": "end_date", + "vocabulary": "" + }, + { + "column_name": "variable", + "vocabulary": "" + }, + { + "column_name": "variable_long_name", + "vocabulary": "" + }, + { + "column_name": "variable_standard_name", + "vocabulary": "" + }, + { + "column_name": "variable_cell_methods", + "vocabulary": "" + }, + { + "column_name": "variable_units", + "vocabulary": "" + }, + { + "column_name": "realm", + "vocabulary": "" + } + ], + "assets": { + "column_name": "path", + "format": "netcdf", + "format_column_name": null + }, + "aggregation_control": { + "variable_column_name": "variable", + "groupby_attrs": ["file_id", "frequency"], + "aggregations": [ + { + "type": "join_existing", + "attribute_name": "start_date", + "options": { + "dim": "time", + "combine": "by_coords" + } + } + ] + }, + "id": "01deg_jra55v13_ryf9091", + "description": "0.1 degree ACCESS-OM2 global model configuration with JRA55-do v1.3 RYF9091 repeat year forcing (May 1990 to Apr 1991)", + "title": null, + "last_updated": "2025-03-04T01:25:35Z", + "catalog_file": "access-columns-with-iterables.csv.gz" +} diff --git a/tests/test_core.py b/tests/test_core.py index bb5aa3aa..3cc3215b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -19,6 +19,7 @@ import intake_esm from .utils import ( + access_columns_with_iterables_cat, catalog_dict_records, cdf_cat_sample_cesmle, cdf_cat_sample_cmip5, @@ -201,6 +202,18 @@ def test_catalog_search(path, query, expected_size): assert len(new_cat) == expected_size +@pytest.mark.parametrize( + 'path, columns_with_iterables, query, expected_size', + [ + (access_columns_with_iterables_cat, ['variable'], {'variable': ['aice_m']}, 1), + ], +) +def test_catalog_search_columns_with_iterables(path, columns_with_iterables, query, expected_size): + cat = intake.open_esm_datastore(path, columns_with_iterables=columns_with_iterables) + new_cat = cat.search(**query) + assert len(new_cat) == expected_size + + def test_catalog_with_registry_search(): cat = intake.open_esm_datastore(zarr_cat_aws_cesm, registry=registry) new_cat = cat.search(variable='FOO') diff --git a/tests/utils.py b/tests/utils.py index 51f01307..ea284fa1 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -16,6 +16,9 @@ 'https://raw.githubusercontent.com/NCAR/cesm-lens-aws/master/intake-catalogs/aws-cesm1-le.json' ) mixed_cat_sample_cmip6 = os.path.join(here, 'sample-catalogs/cmip6-bcc-mixed-formats.json') +access_columns_with_iterables_cat = os.path.join( + here, 'sample-catalogs/access-columns-with-iterables.json' +) sample_df = pd.DataFrame(