Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions repo2docker/contentproviders/hydroshare.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
import os
import shutil
import time
Expand Down Expand Up @@ -33,8 +32,10 @@ def detect(self, doi, ref=None, extra_args=None):
"hostname": [
"https://www.hydroshare.org/resource/",
"http://www.hydroshare.org/resource/",
"https://hydroshare.org/resource/",
"http://hydroshare.org/resource/",
],
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
"django_s3": "https://www.hydroshare.org/django_s3/download/bags/",
"version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements",
}
]
Expand All @@ -58,16 +59,15 @@ def fetch(self, spec, output_dir, yield_output=False, timeout=120):
resource_id = spec["resource"]
host = spec["host"]

bag_url = f'{host["django_irods"]}{resource_id}'
bag_url = f'{host["django_s3"]}{resource_id}.zip'

yield f"Downloading {bag_url}.\n"

# bag downloads are prepared on demand and may need some time
conn = self.urlopen(bag_url)
total_wait_time = 0
while (
conn.status_code == 200
and conn.headers["content-type"] != "application/zip"
while conn.status_code == 200 and not conn.url.startswith(
f"https://s3.hydroshare.org/bags/{resource_id}.zip"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be possible to test

conn.url != f"https://s3.hydroshare.org/bags/{resource_id}.zip"

instead of

not conn.url.startswith(f"https://s3.hydroshare.org/bags/{resource_id}.zip")

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is a good question. It is not possible to test the whole url because a query parameter follows which includes a temporary authorization signature.

):
wait_time = 10
total_wait_time += wait_time
Expand Down
35 changes: 22 additions & 13 deletions tests/unit/contentproviders/test_hydroshare.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import re
from contextlib import contextmanager
from tempfile import NamedTemporaryFile, TemporaryDirectory
from unittest.mock import patch
Expand Down Expand Up @@ -48,8 +47,10 @@ def test_detect_hydroshare():
"hostname": [
"https://www.hydroshare.org/resource/",
"http://www.hydroshare.org/resource/",
"https://hydroshare.org/resource/",
"http://hydroshare.org/resource/",
],
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
"django_s3": "https://www.hydroshare.org/django_s3/download/bags/",
"version": "https://www.hydroshare.org/hsapi/resource/{}/scimeta/elements",
},
"resource": "b8f6eae9d89241cf8b5904033460af61",
Expand All @@ -58,7 +59,7 @@ def test_detect_hydroshare():

assert (
Hydroshare().detect(
"https://www.hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61"
"https://hydroshare.org/resource/b8f6eae9d89241cf8b5904033460af61"
)
== expected
)
Expand All @@ -83,7 +84,7 @@ def test_detect_hydroshare():


@contextmanager
def hydroshare_archive(prefix="b8f6eae9d89241cf8b5904033460af61/data/contents"):
def hydroshare_archive(prefix="123456789/data/contents"):
with NamedTemporaryFile(suffix=".zip") as zfile:
with ZipFile(zfile.name, mode="w") as zip:
zip.writestr(f"{prefix}/some-file.txt", "some content")
Expand All @@ -93,10 +94,9 @@ def hydroshare_archive(prefix="b8f6eae9d89241cf8b5904033460af61/data/contents"):


class MockResponse:
def __init__(self, content_type, status_code):
def __init__(self, url, status_code):
self.status_code = status_code
self.headers = dict()
self.headers["content-type"] = content_type
self.url = url


def test_fetch_bag():
Expand All @@ -106,22 +106,27 @@ def test_fetch_bag():
Hydroshare,
"urlopen",
side_effect=[
MockResponse("application/html", 200),
MockResponse("application/zip", 200),
MockResponse(
"https://www.hydroshare.org/django_s3/download/bags/123456789.zip",
200,
),
MockResponse("https://s3.hydroshare.org/bags/123456789.zip", 200),
],
):
with patch.object(
Hydroshare, "_urlretrieve", side_effect=[(hydro_path, None)]
):
hydro = Hydroshare()
hydro.resource_id = "b8f6eae9d89241cf8b5904033460af61"
hydro.resource_id = "123456789"
spec = {
"host": {
"hostname": [
"https://www.hydroshare.org/resource/",
"http://www.hydroshare.org/resource/",
"https://hydroshare.org/resource/",
"http://hydroshare.org/resource/",
],
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
"django_s3": "https://www.hydroshare.org/django_s3/download/bags/",
},
"resource": "123456789",
}
Expand All @@ -147,8 +152,10 @@ def test_fetch_bag_failure():
"hostname": [
"https://www.hydroshare.org/resource/",
"http://www.hydroshare.org/resource/",
"https://hydroshare.org/resource/",
"http://hydroshare.org/resource/",
],
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
"django_s3": "https://www.hydroshare.org/django_s3/download/bags/",
},
"resource": "123456789",
}
Expand All @@ -173,8 +180,10 @@ def test_fetch_bag_timeout():
"hostname": [
"https://www.hydroshare.org/resource/",
"http://www.hydroshare.org/resource/",
"https://hydroshare.org/resource/",
"http://hydroshare.org/resource/",
],
"django_irods": "https://www.hydroshare.org/django_irods/download/bags/",
"django_s3": "https://www.hydroshare.org/django_s3/download/bags/",
},
"resource": "123456789",
}
Expand Down