From 29640d56f33c79467b8c8a25d1694a73e52cc476 Mon Sep 17 00:00:00 2001 From: sblack-usu Date: Tue, 18 Nov 2025 08:08:22 -0700 Subject: [PATCH 1/2] update hydroshare download urls update the tests for updated hydroshare download location --- repo2docker/contentproviders/hydroshare.py | 9 ++--- .../unit/contentproviders/test_hydroshare.py | 33 +++++++++++-------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/repo2docker/contentproviders/hydroshare.py b/repo2docker/contentproviders/hydroshare.py index 4104d97a6..6a20dfa66 100755 --- a/repo2docker/contentproviders/hydroshare.py +++ b/repo2docker/contentproviders/hydroshare.py @@ -1,4 +1,3 @@ -import json import os import shutil import time @@ -33,8 +32,10 @@ def detect(self, doi, ref=None, extra_args=None): "hostname": [ "https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/", + "https://hydroshare.org/resource/", + "http://hydroshare.org/resource/", ], - "django_irods": "https://www.hydroshare.org/django_irods/download/bags/", + "django_s3": "https://www.hydroshare.org/django_s3/download/bags/", "version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements", } ] @@ -58,7 +59,7 @@ def fetch(self, spec, output_dir, yield_output=False, timeout=120): resource_id = spec["resource"] host = spec["host"] - bag_url = f'{host["django_irods"]}{resource_id}' + bag_url = f'{host["django_s3"]}{resource_id}.zip' yield f"Downloading {bag_url}.\n" @@ -67,7 +68,7 @@ def fetch(self, spec, output_dir, yield_output=False, timeout=120): total_wait_time = 0 while ( conn.status_code == 200 - and conn.headers["content-type"] != "application/zip" + and not conn.url.startswith(f"https://s3.hydroshare.org/bags/{resource_id}.zip") ): wait_time = 10 total_wait_time += wait_time diff --git a/tests/unit/contentproviders/test_hydroshare.py b/tests/unit/contentproviders/test_hydroshare.py index 41e234cd1..1f10aebf9 100755 --- a/tests/unit/contentproviders/test_hydroshare.py +++ b/tests/unit/contentproviders/test_hydroshare.py @@ -1,5 +1,4 @@ import os -import re from contextlib import contextmanager from tempfile import NamedTemporaryFile, TemporaryDirectory from unittest.mock import patch @@ -48,8 +47,10 @@ def test_detect_hydroshare(): "hostname": [ "https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/", + "https://hydroshare.org/resource/", + "http://hydroshare.org/resource/", ], - "django_irods": "https://www.hydroshare.org/django_irods/download/bags/", + "django_s3": "https://www.hydroshare.org/django_s3/download/bags/", "version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements", }, "resource": "b8f6eae9d89241cf8b5904033460af61", @@ -58,7 +59,7 @@ def test_detect_hydroshare(): assert ( Hydroshare().detect( - "https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61" + "https://hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61" ) == expected ) @@ -83,7 +84,7 @@ def test_detect_hydroshare(): @contextmanager -def hydroshare_archive(prefix="b8f6eae9d89241cf8b5904033460af61/data/contents"): +def hydroshare_archive(prefix="123456789/data/contents"): with NamedTemporaryFile(suffix=".zip") as zfile: with ZipFile(zfile.name, mode="w") as zip: zip.writestr(f"{prefix}/some-file.txt", "some content") @@ -93,10 +94,9 @@ def hydroshare_archive(prefix="b8f6eae9d89241cf8b5904033460af61/data/contents"): class MockResponse: - def __init__(self, content_type, status_code): + def __init__(self, url, status_code): self.status_code = status_code - self.headers = dict() - self.headers["content-type"] = content_type + self.url = url def test_fetch_bag(): @@ -106,22 +106,25 @@ def test_fetch_bag(): Hydroshare, "urlopen", side_effect=[ - MockResponse("application/html", 200), - MockResponse("application/zip", 200), + MockResponse("https://www.hydroshare.org/django_s3/download/bags/123456789.zip", + 200), + MockResponse("https://s3.hydroshare.org/bags/123456789.zip", 200), ], ): with patch.object( Hydroshare, "_urlretrieve", side_effect=[(hydro_path, None)] ): hydro = Hydroshare() - hydro.resource_id = "b8f6eae9d89241cf8b5904033460af61" + hydro.resource_id = "123456789" spec = { "host": { "hostname": [ "https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/", + "https://hydroshare.org/resource/", + "http://hydroshare.org/resource/", ], - "django_irods": "https://www.hydroshare.org/django_irods/download/bags/", + "django_s3": "https://www.hydroshare.org/django_s3/download/bags/", }, "resource": "123456789", } @@ -147,8 +150,10 @@ def test_fetch_bag_failure(): "hostname": [ "https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/", + "https://hydroshare.org/resource/", + "http://hydroshare.org/resource/", ], - "django_irods": "https://www.hydroshare.org/django_irods/download/bags/", + "django_s3": "https://www.hydroshare.org/django_s3/download/bags/", }, "resource": "123456789", } @@ -173,8 +178,10 @@ def test_fetch_bag_timeout(): "hostname": [ "https://www.hydroshare.org/resource/", "http://www.hydroshare.org/resource/", + "https://hydroshare.org/resource/", + "http://hydroshare.org/resource/", ], - "django_irods": "https://www.hydroshare.org/django_irods/download/bags/", + "django_s3": "https://www.hydroshare.org/django_s3/download/bags/", }, "resource": "123456789", } From 55a24021b698c1c5e0a4e693950430ce96fb3e29 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Nov 2025 19:03:29 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- repo2docker/contentproviders/hydroshare.py | 5 ++--- tests/unit/contentproviders/test_hydroshare.py | 6 ++++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/repo2docker/contentproviders/hydroshare.py b/repo2docker/contentproviders/hydroshare.py index 6a20dfa66..c7f8e3e61 100755 --- a/repo2docker/contentproviders/hydroshare.py +++ b/repo2docker/contentproviders/hydroshare.py @@ -66,9 +66,8 @@ def fetch(self, spec, output_dir, yield_output=False, timeout=120): # bag downloads are prepared on demand and may need some time conn = self.urlopen(bag_url) total_wait_time = 0 - while ( - conn.status_code == 200 - and not conn.url.startswith(f"https://s3.hydroshare.org/bags/{resource_id}.zip") + while conn.status_code == 200 and not conn.url.startswith( + f"https://s3.hydroshare.org/bags/{resource_id}.zip" ): wait_time = 10 total_wait_time += wait_time diff --git a/tests/unit/contentproviders/test_hydroshare.py b/tests/unit/contentproviders/test_hydroshare.py index 1f10aebf9..cc11346ac 100755 --- a/tests/unit/contentproviders/test_hydroshare.py +++ b/tests/unit/contentproviders/test_hydroshare.py @@ -106,8 +106,10 @@ def test_fetch_bag(): Hydroshare, "urlopen", side_effect=[ - MockResponse("https://www.hydroshare.org/django_s3/download/bags/123456789.zip", - 200), + MockResponse( + "https://www.hydroshare.org/django_s3/download/bags/123456789.zip", + 200, + ), MockResponse("https://s3.hydroshare.org/bags/123456789.zip", 200), ], ):