Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use is_opendap_url to avoid hitting non-existent DAP link #209

Merged
merged 1 commit into from
Dec 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
Change History
**************

0.8.1 (unreleased)
==================

Changes:

* Before trying to open a netCDF dataset, determine whether link is a valid OPeNDAP endpoint to avoid unnecessarily raising the cryptic ``syntax error, unexpected WORD_WORD, expecting SCAN_ATTR or SCAN_DATASET or SCAN_ERROR``.


0.8.0 (2021-05-25)
==================

Expand Down
22 changes: 11 additions & 11 deletions birdy/client/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from importlib import import_module
from pathlib import Path
from typing import Sequence, Union

from birdy.utils import is_opendap_url
from owslib.wps import Output

from . import notebook as nb
Expand Down Expand Up @@ -174,12 +174,12 @@ def check_dependencies(self): # noqa: D102
def convert(self): # noqa: D102
import netCDF4

try:
# try OpenDAP url
# Try to access with OpenDAP url to avoid a download
if is_opendap_url(self.url):
return netCDF4.Dataset(self.url)
except IOError:
# download the file
return netCDF4.Dataset(self.file)

# Download the file and open the local copy
return netCDF4.Dataset(self.file)


class XarrayConverter(BaseConverter): # noqa: D101
Expand All @@ -194,12 +194,12 @@ def check_dependencies(self): # noqa: D102
def convert(self): # noqa: D102
import xarray as xr

try:
# try OpenDAP url
# Try to access with OpenDAP url to avoid a download
if is_opendap_url(self.url):
return xr.open_dataset(self.url)
except IOError:
# download the file
return xr.open_dataset(self.file)

# Download the file and open the local copy
return xr.open_dataset(self.file)


# TODO: Add test for this.
Expand Down
28 changes: 28 additions & 0 deletions birdy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,34 @@ def is_url(url):
return True


def is_opendap_url(url):
"""
Check if a provided url is an OpenDAP url.

The DAP Standard specifies that a specific tag must be included in the
Content-Description header of every request. This tag is one of:
"dods-dds" | "dods-das" | "dods-data" | "dods-error"

So we can check if the header starts with `dods`.

Note that this might not work with every DAP server implementation.
"""
import requests
from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema

try:
content_description = requests.head(url, timeout=5).headers.get(
"Content-Description"
)
except (ConnectionError, MissingSchema, InvalidSchema):
return False

if content_description:
return content_description.lower().startswith("dods")
else:
return False


def is_file(path):
"""Return True if `path` is a valid file."""
if not path:
Expand Down
29 changes: 29 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# noqa

import pytest
from pathlib import Path

from birdy import utils
Expand Down Expand Up @@ -114,3 +115,31 @@ def test_path(self): # noqa: D102
["application/x-netcdf", "application/x-ogc-dods"],
)
assert mime == "application/x-ogc-dods"


@pytest.mark.online
def test_is_opendap_url():
# This test uses online requests, and the servers are not as stable as hoped.
# We should record these requests so that the tests don't break when the servers are down.

url = (
"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/"
"birdhouse/nrcan/nrcan_canada_daily_v2/tasmin/nrcan_canada_daily_tasmin_2017.nc"
)
assert utils.is_opendap_url(url)

url = url.replace("dodsC", "fileServer")
assert not utils.is_opendap_url(url)

# no Content-Description header
# url = "http://test.opendap.org/opendap/netcdf/examples/tos_O1_2001-2002.nc"
# assert is_opendap_url(url)

url = "invalid_schema://something"
assert not utils.is_opendap_url(url)

url = "https://www.example.com"
assert not utils.is_opendap_url(url)

url = "/missing_schema"
assert not utils.is_opendap_url(url)