Skip to content

Commit c18d1b3

Browse files
CodyCBakerPhDpre-commit-ci[bot]rly
authored
Expose AWS Region to HDF5IO (#1040)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ryan Ly <[email protected]>
1 parent 6a0f9d8 commit c18d1b3

File tree

8 files changed

+159
-17
lines changed

8 files changed

+159
-17
lines changed

.github/workflows/run_all_tests.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ jobs:
197197
run: |
198198
tox -e wheelinstall --installpkg dist/*.tar.gz
199199
200-
run-gallery-ros3-tests:
200+
run-ros3-tests:
201201
name: ${{ matrix.name }}
202202
runs-on: ${{ matrix.os }}
203203
defaults:
@@ -210,9 +210,9 @@ jobs:
210210
fail-fast: false
211211
matrix:
212212
include:
213-
- { name: linux-gallery-python3.12-ros3 , python-ver: "3.12", os: ubuntu-latest }
214-
- { name: windows-gallery-python3.12-ros3 , python-ver: "3.12", os: windows-latest }
215-
- { name: macos-gallery-python3.12-ros3 , python-ver: "3.12", os: macos-latest }
213+
- { name: linux-python3.12-ros3 , python-ver: "3.12", os: ubuntu-latest }
214+
- { name: windows-python3.12-ros3 , python-ver: "3.12", os: windows-latest }
215+
- { name: macos-python3.12-ros3 , python-ver: "3.12", os: macos-latest }
216216
steps:
217217
- name: Checkout repo with submodules
218218
uses: actions/checkout@v4

.github/workflows/run_coverage.yml

+55
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,58 @@ jobs:
7070
file: ./coverage.xml
7171
env:
7272
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
73+
74+
run-ros3-coverage:
75+
name: ${{ matrix.name }}
76+
runs-on: ${{ matrix.os }}
77+
defaults:
78+
run:
79+
shell: bash -l {0} # necessary for conda
80+
concurrency:
81+
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.name }}
82+
cancel-in-progress: true
83+
strategy:
84+
fail-fast: false
85+
matrix:
86+
include:
87+
- { name: linux-python3.12-ros3 , python-ver: "3.12", os: ubuntu-latest }
88+
steps:
89+
- name: Checkout repo with submodules
90+
uses: actions/checkout@v4
91+
with:
92+
submodules: 'recursive'
93+
fetch-depth: 0 # tags are required to determine the version
94+
95+
- name: Set up Conda
96+
uses: conda-incubator/setup-miniconda@v3
97+
with:
98+
auto-update-conda: true
99+
activate-environment: ros3
100+
environment-file: environment-ros3.yml
101+
python-version: ${{ matrix.python-ver }}
102+
channels: conda-forge
103+
auto-activate-base: false
104+
mamba-version: "*"
105+
106+
- name: Install run dependencies
107+
run: |
108+
pip install .
109+
pip list
110+
111+
- name: Conda reporting
112+
run: |
113+
conda info
114+
conda config --show-sources
115+
conda list --show-channel-urls
116+
117+
- name: Run ros3 tests # TODO include gallery tests after they are written
118+
run: |
119+
pytest --cov --cov-report=xml --cov-report=term tests/unit/test_io_hdf5_streaming.py
120+
121+
- name: Upload coverage to Codecov
122+
uses: codecov/codecov-action@v4
123+
with:
124+
fail_ci_if_error: true
125+
file: ./coverage.xml
126+
env:
127+
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

.github/workflows/run_tests.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ jobs:
209209
--token ${{ secrets.BOT_GITHUB_TOKEN }} \
210210
--re-upload
211211
212-
run-gallery-ros3-tests:
212+
run-ros3-tests:
213213
name: ${{ matrix.name }}
214214
runs-on: ${{ matrix.os }}
215215
defaults:
@@ -222,7 +222,7 @@ jobs:
222222
fail-fast: false
223223
matrix:
224224
include:
225-
- { name: linux-gallery-python3.12-ros3 , python-ver: "3.12", os: ubuntu-latest }
225+
- { name: linux-python3.12-ros3 , python-ver: "3.12", os: ubuntu-latest }
226226
steps:
227227
- name: Checkout repo with submodules
228228
uses: actions/checkout@v4

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
- Updated `TermSetWrapper` to support validating a single field within a compound array. @mavaylon1 [#1061](https://github.com/hdmf-dev/hdmf/pull/1061)
99
- Updated testing to not install in editable mode and not run `coverage` by default. @rly [#1107](https://github.com/hdmf-dev/hdmf/pull/1107)
1010
- Add `post_init_method` parameter when generating classes to perform post-init functionality, i.e., validation. @mavaylon1 [#1089](https://github.com/hdmf-dev/hdmf/pull/1089)
11+
- Exposed `aws_region` to `HDF5IO` and downstream passes to `h5py.File`. @codycbakerphd [#1040](https://github.com/hdmf-dev/hdmf/pull/1040)
1112
- Exposed `progress_bar_class` to the `GenericDataChunkIterator` for more custom control over display of progress while iterating. @codycbakerphd [#1110](https://github.com/hdmf-dev/hdmf/pull/1110)
1213
- Updated loading, unloading, and getting the `TypeConfigurator` to support a `TypeMap` parameter. @mavaylon1 [#1117](https://github.com/hdmf-dev/hdmf/pull/1117)
1314

src/hdmf/backends/hdf5/h5tools.py

+30-8
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,21 @@ def can_read(path):
6262
{'name': 'file', 'type': [File, "S3File", "RemFile"],
6363
'doc': 'a pre-existing h5py.File, S3File, or RemFile object', 'default': None},
6464
{'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None},
65+
{
66+
'name': 'aws_region',
67+
'type': str,
68+
'doc': 'If driver is ros3, then specify the aws region of the url.',
69+
'default': None
70+
},
6571
{'name': 'herd_path', 'type': str,
6672
'doc': 'The path to read/write the HERD file', 'default': None},)
6773
def __init__(self, **kwargs):
6874
"""Open an HDF5 file for IO.
6975
"""
7076
self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__))
71-
path, manager, mode, comm, file_obj, driver, herd_path = popargs('path', 'manager', 'mode',
77+
path, manager, mode, comm, file_obj, driver, aws_region, herd_path = popargs('path', 'manager', 'mode',
7278
'comm', 'file', 'driver',
73-
'herd_path',
79+
'aws_region', 'herd_path',
7480
kwargs)
7581

7682
self.__open_links = [] # keep track of other files opened from links in this file
@@ -91,6 +97,7 @@ def __init__(self, **kwargs):
9197
elif isinstance(manager, TypeMap):
9298
manager = BuildManager(manager)
9399
self.__driver = driver
100+
self.__aws_region = aws_region
94101
self.__comm = comm
95102
self.__mode = mode
96103
self.__file = file_obj
@@ -116,6 +123,10 @@ def _file(self):
116123
def driver(self):
117124
return self.__driver
118125

126+
@property
127+
def aws_region(self):
128+
return self.__aws_region
129+
119130
@classmethod
120131
def __check_path_file_obj(cls, path, file_obj):
121132
if isinstance(path, Path):
@@ -133,13 +144,17 @@ def __check_path_file_obj(cls, path, file_obj):
133144
return path
134145

135146
@classmethod
136-
def __resolve_file_obj(cls, path, file_obj, driver):
147+
def __resolve_file_obj(cls, path, file_obj, driver, aws_region=None):
148+
"""Helper function to return a File when loading or getting namespaces from a file."""
137149
path = cls.__check_path_file_obj(path, file_obj)
138150

139151
if file_obj is None:
140152
file_kwargs = dict()
141153
if driver is not None:
142154
file_kwargs.update(driver=driver)
155+
156+
if aws_region is not None:
157+
file_kwargs.update(aws_region=bytes(aws_region, "ascii"))
143158
file_obj = File(path, 'r', **file_kwargs)
144159
return file_obj
145160

@@ -150,6 +165,8 @@ def __resolve_file_obj(cls, path, file_obj, driver):
150165
{'name': 'namespaces', 'type': list, 'doc': 'the namespaces to load', 'default': None},
151166
{'name': 'file', 'type': File, 'doc': 'a pre-existing h5py.File object', 'default': None},
152167
{'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None},
168+
{'name': 'aws_region', 'type': str, 'doc': 'If driver is ros3, then specify the aws region of the url.',
169+
'default': None},
153170
returns=("dict mapping the names of the loaded namespaces to a dict mapping included namespace names and "
154171
"the included data types"),
155172
rtype=dict)
@@ -162,10 +179,10 @@ def load_namespaces(cls, **kwargs):
162179
163180
:raises ValueError: if both `path` and `file` are supplied but `path` is not the same as the path of `file`.
164181
"""
165-
namespace_catalog, path, namespaces, file_obj, driver = popargs(
166-
'namespace_catalog', 'path', 'namespaces', 'file', 'driver', kwargs)
182+
namespace_catalog, path, namespaces, file_obj, driver, aws_region = popargs(
183+
'namespace_catalog', 'path', 'namespaces', 'file', 'driver', 'aws_region', kwargs)
167184

168-
open_file_obj = cls.__resolve_file_obj(path, file_obj, driver)
185+
open_file_obj = cls.__resolve_file_obj(path, file_obj, driver, aws_region=aws_region)
169186
if file_obj is None: # need to close the file object that we just opened
170187
with open_file_obj:
171188
return cls.__load_namespaces(namespace_catalog, namespaces, open_file_obj)
@@ -214,6 +231,8 @@ def __check_specloc(cls, file_obj):
214231
@docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None},
215232
{'name': 'file', 'type': File, 'doc': 'a pre-existing h5py.File object', 'default': None},
216233
{'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None},
234+
{'name': 'aws_region', 'type': str, 'doc': 'If driver is ros3, then specify the aws region of the url.',
235+
'default': None},
217236
returns="dict mapping names to versions of the namespaces in the file", rtype=dict)
218237
def get_namespaces(cls, **kwargs):
219238
"""Get the names and versions of the cached namespaces from a file.
@@ -227,9 +246,9 @@ def get_namespaces(cls, **kwargs):
227246
228247
:raises ValueError: if both `path` and `file` are supplied but `path` is not the same as the path of `file`.
229248
"""
230-
path, file_obj, driver = popargs('path', 'file', 'driver', kwargs)
249+
path, file_obj, driver, aws_region = popargs('path', 'file', 'driver', 'aws_region', kwargs)
231250

232-
open_file_obj = cls.__resolve_file_obj(path, file_obj, driver)
251+
open_file_obj = cls.__resolve_file_obj(path, file_obj, driver, aws_region=aws_region)
233252
if file_obj is None: # need to close the file object that we just opened
234253
with open_file_obj:
235254
return cls.__get_namespaces(open_file_obj)
@@ -756,6 +775,9 @@ def open(self):
756775
if self.driver is not None:
757776
kwargs.update(driver=self.driver)
758777

778+
if self.driver == "ros3" and self.aws_region is not None:
779+
kwargs.update(aws_region=bytes(self.aws_region, "ascii"))
780+
759781
self.__file = File(self.source, open_flag, **kwargs)
760782

761783
def close(self, close_links=True):

tests/unit/test_io_hdf5_streaming.py

+56
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,17 @@
22
import os
33
import urllib.request
44
import h5py
5+
import warnings
56

7+
from hdmf.backends.hdf5.h5tools import HDF5IO
68
from hdmf.build import TypeMap, BuildManager
79
from hdmf.common import get_hdf5io, get_type_map
810
from hdmf.spec import GroupSpec, DatasetSpec, SpecNamespace, NamespaceBuilder, NamespaceCatalog
911
from hdmf.testing import TestCase
1012
from hdmf.utils import docval, get_docval
1113

1214

15+
1316
class TestRos3(TestCase):
1417
"""Test reading an HDMF file using HDF5 ROS3 streaming.
1518
@@ -77,6 +80,8 @@ def setUp(self):
7780

7881
self.manager = BuildManager(type_map)
7982

83+
warnings.filterwarnings(action="ignore", message="Ignoring cached namespace .*")
84+
8085
def tearDown(self):
8186
if os.path.exists(self.ns_filename):
8287
os.remove(self.ns_filename)
@@ -89,6 +94,57 @@ def test_basic_read(self):
8994
with get_hdf5io(s3_path, "r", manager=self.manager, driver="ros3") as io:
9095
io.read()
9196

97+
def test_basic_read_with_aws_region(self):
98+
s3_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991"
99+
100+
with get_hdf5io(s3_path, "r", manager=self.manager, driver="ros3", aws_region="us-east-2") as io:
101+
io.read()
102+
103+
def test_basic_read_s3_with_aws_region(self):
104+
# NOTE: if an s3 path is used with ros3 driver, aws_region must be specified
105+
s3_path = "s3://dandiarchive/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991"
106+
107+
with get_hdf5io(s3_path, "r", manager=self.manager, driver="ros3", aws_region="us-east-2") as io:
108+
io.read()
109+
assert io.aws_region == "us-east-2"
110+
111+
def test_get_namespaces(self):
112+
s3_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991"
113+
114+
namespaces = HDF5IO.get_namespaces(s3_path, driver="ros3")
115+
self.assertEqual(namespaces, {'core': '2.3.0', 'hdmf-common': '1.5.0', 'hdmf-experimental': '0.1.0'})
116+
117+
def test_get_namespaces_with_aws_region(self):
118+
s3_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991"
119+
120+
namespaces = HDF5IO.get_namespaces(s3_path, driver="ros3", aws_region="us-east-2")
121+
self.assertEqual(namespaces, {'core': '2.3.0', 'hdmf-common': '1.5.0', 'hdmf-experimental': '0.1.0'})
122+
123+
def test_get_namespaces_s3_with_aws_region(self):
124+
s3_path = "s3://dandiarchive/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991"
125+
126+
namespaces = HDF5IO.get_namespaces(s3_path, driver="ros3", aws_region="us-east-2")
127+
self.assertEqual(namespaces, {'core': '2.3.0', 'hdmf-common': '1.5.0', 'hdmf-experimental': '0.1.0'})
128+
129+
def test_load_namespaces(self):
130+
s3_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991"
131+
132+
HDF5IO.load_namespaces(self.manager.namespace_catalog, path=s3_path, driver="ros3")
133+
assert set(self.manager.namespace_catalog.namespaces) == set(["core", "hdmf-common", "hdmf-experimental"])
134+
135+
def test_load_namespaces_with_aws_region(self):
136+
s3_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991"
137+
138+
HDF5IO.load_namespaces(self.manager.namespace_catalog, path=s3_path, driver="ros3", aws_region="us-east-2")
139+
assert set(self.manager.namespace_catalog.namespaces) == set(["core", "hdmf-common", "hdmf-experimental"])
140+
141+
def test_load_namespaces_s3_with_aws_region(self):
142+
s3_path = "s3://dandiarchive/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991"
143+
144+
HDF5IO.load_namespaces(self.manager.namespace_catalog, path=s3_path, driver="ros3", aws_region="us-east-2")
145+
assert set(self.manager.namespace_catalog.namespaces) == set(["core", "hdmf-common", "hdmf-experimental"])
146+
147+
92148
# Util functions and classes to enable loading of the NWB namespace -- see pynwb/src/pynwb/spec.py
93149

94150

tests/unit/utils_test/test_core_DataIO.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from hdmf.container import Data
55
from hdmf.data_utils import DataIO
66
from hdmf.testing import TestCase
7+
import warnings
78

89

910
class DataIOTests(TestCase):
@@ -36,7 +37,9 @@ def test_set_dataio(self):
3637
dataio = DataIO()
3738
data = np.arange(30).reshape(5, 2, 3)
3839
container = Data('wrapped_data', data)
39-
container.set_dataio(dataio)
40+
msg = "Data.set_dataio() is deprecated. Please use Data.set_data_io() instead."
41+
with self.assertWarnsWith(DeprecationWarning, msg):
42+
container.set_dataio(dataio)
4043
self.assertIs(dataio.data, data)
4144
self.assertIs(dataio, container.data)
4245

@@ -48,7 +51,13 @@ def test_set_dataio_data_already_set(self):
4851
data = np.arange(30).reshape(5, 2, 3)
4952
container = Data('wrapped_data', data)
5053
with self.assertRaisesWith(ValueError, "cannot overwrite 'data' on DataIO"):
51-
container.set_dataio(dataio)
54+
with warnings.catch_warnings(record=True):
55+
warnings.filterwarnings(
56+
action='ignore',
57+
category=DeprecationWarning,
58+
message="Data.set_dataio() is deprecated. Please use Data.set_data_io() instead.",
59+
)
60+
container.set_dataio(dataio)
5261

5362
def test_dataio_options(self):
5463
"""

tests/unit/utils_test/test_core_GenericDataChunkIterator.py

-1
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,6 @@ def test_progress_bar(self):
410410

411411
@unittest.skipIf(not TQDM_INSTALLED, "optional tqdm module is not installed")
412412
def test_progress_bar_class(self):
413-
import tqdm
414413

415414
class MyCustomProgressBar(tqdm.tqdm):
416415
def update(self, n: int = 1) -> Union[bool, None]:

0 commit comments

Comments
 (0)