From 6082ef89ec4ce475fb48dba7594db1c5a4d54e08 Mon Sep 17 00:00:00 2001 From: Sylvain Brunato <61419125+sbrunato@users.noreply.github.com> Date: Fri, 17 Dec 2021 10:31:34 +0100 Subject: [PATCH 1/5] feat: facultative crs & resampling with get_data (#25) --- eodag_cube/api/product/_product.py | 93 +++++++++++++++++++++--------- 1 file changed, 65 insertions(+), 28 deletions(-) diff --git a/eodag_cube/api/product/_product.py b/eodag_cube/api/product/_product.py index 4a6bd8f..b8cf6cb 100644 --- a/eodag_cube/api/product/_product.py +++ b/eodag_cube/api/product/_product.py @@ -16,19 +16,19 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from contextlib import contextmanager import numpy as np import rasterio import rioxarray import xarray as xr -from rasterio.enums import Resampling from rasterio.vrt import WarpedVRT from eodag.api.product._product import EOProduct as EOProduct_core from eodag.utils import get_geometry_from_various from eodag.utils.exceptions import DownloadError, UnsupportedDatasetAddressScheme -logger = logging.getLogger("eodag_cube.api.product") +logger = logging.getLogger("eodag.api.product") class EOProduct(EOProduct_core): @@ -59,19 +59,26 @@ class EOProduct(EOProduct_core): def __init__(self, *args, **kwargs): super(EOProduct, self).__init__(*args, **kwargs) - def get_data(self, band, crs, resolution=None, extent=None, **rioxr_kwargs): + def get_data( + self, + band, + crs=None, + resolution=None, + extent=None, + resampling=None, + **rioxr_kwargs, + ): """Retrieves all or part of the raster data abstracted by the :class:`EOProduct` :param band: The band of the dataset to retrieve (e.g.: 'B01') :type band: str - :param crs: The coordinate reference system in which the dataset should be - returned + :param crs: (optional) The coordinate reference system in which the dataset should be returned :type crs: str - :param resolution: The resolution in which the dataset should be returned + :param resolution: (optional) The resolution in which the dataset should be returned (given in the unit of the crs) :type resolution: float - :param extent: The coordinates on which to zoom, matching the given CRS. Can be defined in different ways - (its bounds will be used): + :param extent: (optional) The coordinates on which to zoom, matching the given CRS. Can be defined in + different ways (its bounds will be used): * with a Shapely geometry object: :class:`shapely.geometry.base.BaseGeometry` @@ -82,6 +89,8 @@ def get_data(self, band, crs, resolution=None, extent=None, **rioxr_kwargs): * with a WKT str :type extent: Union[str, dict, shapely.geometry.base.BaseGeometry] + :param resampling: (optional) Warp resampling algorithm passed to :class:`rasterio.vrt.WarpedVRT` + :type resampling: Resampling :param rioxr_kwargs: kwargs passed to ``rioxarray.open_rasterio()`` :type rioxr_kwargs: dict :returns: The numeric matrix corresponding to the sub dataset or an empty @@ -90,6 +99,7 @@ def get_data(self, band, crs, resolution=None, extent=None, **rioxr_kwargs): """ fail_value = xr.DataArray(np.empty(0)) try: + logger.debug("Getting data address") dataset_address = self.driver.get_data_address(self, band) except UnsupportedDatasetAddressScheme: logger.warning( @@ -98,7 +108,7 @@ def get_data(self, band, crs, resolution=None, extent=None, **rioxr_kwargs): "data..." ) try: - path_of_downloaded_file = self.download() + path_of_downloaded_file = self.download(extract=True) except (RuntimeError, DownloadError): import traceback @@ -125,24 +135,51 @@ def get_data(self, band, crs, resolution=None, extent=None, **rioxr_kwargs): # rasterio/gdal needed env variables for auth gdal_env = self._get_rio_env(dataset_address) - with rasterio.Env(**gdal_env): - with rasterio.open(dataset_address) as src: - with WarpedVRT(src, crs=crs, resampling=Resampling.bilinear) as vrt: - - da = rioxarray.open_rasterio(vrt, **rioxr_kwargs) - if extent: - da = da.rio.clip_box(minx=minx, miny=miny, maxx=maxx, maxy=maxy) - if resolution: - height = int((maxy - miny) / resolution) - width = int((maxx - minx) / resolution) - out_shape = (height, width) - - da = da.rio.reproject( - dst_crs=crs, - shape=out_shape, - resampling=Resampling.bilinear, - ) - return da + warped_vrt_args = {} + if crs is not None: + warped_vrt_args["crs"] = crs + if resampling is not None: + warped_vrt_args["resampling"] = resampling + + @contextmanager + def pass_resource(resource, **kwargs): + yield resource + + if warped_vrt_args: + warped_vrt_class = WarpedVRT + else: + warped_vrt_class = pass_resource + + logger.debug(f"Getting data from {dataset_address}") + + try: + with rasterio.Env(**gdal_env): + with rasterio.open(dataset_address) as src: + with warped_vrt_class(src, **warped_vrt_args) as vrt: + da = rioxarray.open_rasterio(vrt, **rioxr_kwargs) + if extent: + da = da.rio.clip_box( + minx=minx, miny=miny, maxx=maxx, maxy=maxy + ) + if resolution: + height = int((maxy - miny) / resolution) + width = int((maxx - minx) / resolution) + out_shape = (height, width) + + reproject_args = {} + if crs is not None: + reproject_args["dst_crs"] = crs + if resampling is not None: + reproject_args["resampling"] = resampling + + da = da.rio.reproject( + shape=out_shape, + **reproject_args, + ) + return da + except Exception as e: + logger.error(e) + return fail_value def _get_rio_env(self, dataset_address): """Get rasterio environement variables needed for data access. @@ -153,7 +190,7 @@ def _get_rio_env(self, dataset_address): :return: The rasterio environement variables :rtype: dict """ - product_location_scheme = dataset_address.split("://")[0] + product_location_scheme = str(dataset_address).split("://")[0] if product_location_scheme == "s3" and hasattr( self.downloader, "get_bucket_name_and_prefix" ): From ae88bcfd35313c6d15db42553d705594356794ce Mon Sep 17 00:00:00 2001 From: Sylvain Brunato <61419125+sbrunato@users.noreply.github.com> Date: Fri, 17 Dec 2021 15:58:36 +0100 Subject: [PATCH 2/5] feat: generic driver (#26) --- eodag_cube/api/product/_product.py | 2 +- eodag_cube/api/product/drivers/__init__.py | 5 ++ eodag_cube/api/product/drivers/generic.py | 52 ++++++++++++++ tests/context.py | 1 + tests/units/test_eoproduct_driver_generic.py | 76 ++++++++++++++++++++ 5 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 eodag_cube/api/product/drivers/generic.py create mode 100644 tests/units/test_eoproduct_driver_generic.py diff --git a/eodag_cube/api/product/_product.py b/eodag_cube/api/product/_product.py index b8cf6cb..2c3e033 100644 --- a/eodag_cube/api/product/_product.py +++ b/eodag_cube/api/product/_product.py @@ -190,7 +190,7 @@ def _get_rio_env(self, dataset_address): :return: The rasterio environement variables :rtype: dict """ - product_location_scheme = str(dataset_address).split("://")[0] + product_location_scheme = dataset_address.split("://")[0] if product_location_scheme == "s3" and hasattr( self.downloader, "get_bucket_name_and_prefix" ): diff --git a/eodag_cube/api/product/drivers/__init__.py b/eodag_cube/api/product/drivers/__init__.py index 5a0b1bd..28d23dd 100644 --- a/eodag_cube/api/product/drivers/__init__.py +++ b/eodag_cube/api/product/drivers/__init__.py @@ -17,6 +17,7 @@ # limitations under the License. """EODAG drivers package""" from eodag.api.product.drivers.base import NoDriver # noqa +from eodag_cube.api.product.drivers.generic import GenericDriver from eodag_cube.api.product.drivers.sentinel2_l1c import Sentinel2L1C from eodag_cube.api.product.drivers.stac_assets import StacAssets @@ -37,4 +38,8 @@ ], "driver": Sentinel2L1C(), }, + { + "criteria": [lambda prod: True], + "driver": GenericDriver(), + }, ] diff --git a/eodag_cube/api/product/drivers/generic.py b/eodag_cube/api/product/drivers/generic.py new file mode 100644 index 0000000..62e8608 --- /dev/null +++ b/eodag_cube/api/product/drivers/generic.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +# Copyright 2021, CS GROUP - France, http://www.c-s.fr +# +# This file is part of EODAG project +# https://www.github.com/CS-SI/EODAG +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from pathlib import Path + +import rasterio + +from eodag.api.product.drivers.base import DatasetDriver +from eodag.utils import uri_to_path +from eodag.utils.exceptions import AddressNotFound, UnsupportedDatasetAddressScheme + + +class GenericDriver(DatasetDriver): + """Generic Driver for products that need to be downloaded""" + + def get_data_address(self, eo_product, band): + """Get the address of a product subdataset. + + See :func:`~eodag.api.product.drivers.base.DatasetDriver.get_data_address` to get help on the formal + parameters. + """ + product_location_scheme = eo_product.location.split("://")[0] + if product_location_scheme == "file": + + filenames = Path(uri_to_path(eo_product.location)).glob(f"**/*{band}*") + + for filename in filenames: + try: + # return the first file readable by rasterio + rasterio.drivers.driver_from_extension(filename) + return str(filename) + except ValueError: + pass + raise AddressNotFound + raise UnsupportedDatasetAddressScheme( + "eo product {} is accessible through a location scheme that is not yet " + "supported by eodag: {}".format(eo_product, product_location_scheme) + ) diff --git a/tests/context.py b/tests/context.py index a604daa..dcf0779 100644 --- a/tests/context.py +++ b/tests/context.py @@ -29,6 +29,7 @@ from eodag.api.product import EOProduct from eodag.api.product.drivers import DRIVERS from eodag.api.product.drivers.base import NoDriver +from eodag_cube.api.product.drivers.generic import GenericDriver from eodag_cube.api.product.drivers.sentinel2_l1c import Sentinel2L1C from eodag_cube.api.product.drivers.stac_assets import StacAssets from eodag.api.search_result import SearchResult diff --git a/tests/units/test_eoproduct_driver_generic.py b/tests/units/test_eoproduct_driver_generic.py new file mode 100644 index 0000000..a900fbe --- /dev/null +++ b/tests/units/test_eoproduct_driver_generic.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +# Copyright 2021, CS GROUP - France, http://www.c-s.fr +# +# This file is part of EODAG project +# https://www.github.com/CS-SI/EODAG +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from contextlib import contextmanager + +from tests import TEST_RESOURCES_PATH, EODagTestCase +from tests.context import ( + AddressNotFound, + EOProduct, + GenericDriver, + UnsupportedDatasetAddressScheme, +) + + +class TestEOProductDriverGeneric(EODagTestCase): + def setUp(self): + super(TestEOProductDriverGeneric, self).setUp() + self.product = EOProduct( + self.provider, self.eoproduct_props, productType="FAKE_PRODUCT_TYPE" + ) + self.product.properties["title"] = os.path.join( + TEST_RESOURCES_PATH, + "products", + "S2A_MSIL1C_20180101T105441_N0206_R051_T31TDH_20180101T124911.SAFE", + ) + self.generic_driver = GenericDriver() + + def test_driver_get_local_dataset_address_bad_band(self): + """Driver must raise AddressNotFound if non existent band is requested""" + with self._filesystem_product() as product: + driver = GenericDriver() + band = "B02" + self.assertRaises(AddressNotFound, driver.get_data_address, product, band) + + def test_driver_get_local_dataset_address_ok(self): + """Driver returns a good address for an existing band""" + with self._filesystem_product() as product: + band = "B01" + address = self.generic_driver.get_data_address(product, band) + self.assertEqual(address, self.local_band_file) + + def test_driver_get_http_remote_dataset_address_fail(self): + """Driver must raise UnsupportedDatasetAddressScheme if location scheme is http or https""" + # Default value of self.product.location is 'https://...' + band = "B01" + self.assertRaises( + UnsupportedDatasetAddressScheme, + self.generic_driver.get_data_address, + self.product, + band, + ) + + @contextmanager + def _filesystem_product(self): + original = self.product.location + try: + self.product.location = "file://{}".format(self.product.properties["title"]) + yield self.product + finally: + self.product.location = original From 5d414e75809a9051efccf7f81660dcf20ea55c81 Mon Sep 17 00:00:00 2001 From: Sylvain Brunato <61419125+sbrunato@users.noreply.github.com> Date: Fri, 17 Mar 2023 10:14:35 +0100 Subject: [PATCH 3/5] fix: update precommit and dependencies (#33) --- .github/workflows/test.yml | 2 +- .pre-commit-config.yaml | 14 +++++++------- eodag_cube/api/product/protobuf/__init__.py | 8 ++++++++ eodag_cube/rpc/protocol/__init__.py | 8 ++++++++ setup.py | 4 ++-- tox.ini | 4 ++-- 6 files changed, 28 insertions(+), 12 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8873fbe..aa42eed 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,7 +28,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: [3.6, 3.9] + python-version: [3.7, 3.9] os: [ubuntu-latest] steps: - name: Checkout the repo diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 930d596..bb0201a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ fail_fast: false repos: - repo: https://github.com/pre-commit/pre-commit-hooks.git - rev: v3.4.0 + rev: v4.4.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -17,23 +17,23 @@ repos: - id: debug-statements - id: check-merge-conflict -- repo: 'https://gitlab.com/pycqa/flake8' - rev: 3.9.0 +- repo: 'https://github.com/PyCQA/flake8' + rev: 5.0.4 # needed for py < 3.8.1 hooks: - id: flake8 - repo: 'https://github.com/ambv/black' - rev: 20.8b1 + rev: 22.12.0 hooks: - id: black args: ['--safe'] -- repo: 'https://github.com/chewse/pre-commit-mirrors-pydocstyle' - rev: v2.1.1 +- repo: https://github.com/pycqa/pydocstyle + rev: 6.1.1 hooks: - id: pydocstyle - repo: https://github.com/pre-commit/mirrors-isort - rev: v5.7.0 + rev: v5.10.1 hooks: - id: isort diff --git a/eodag_cube/api/product/protobuf/__init__.py b/eodag_cube/api/product/protobuf/__init__.py index 3e0a3dd..2787d5c 100644 --- a/eodag_cube/api/product/protobuf/__init__.py +++ b/eodag_cube/api/product/protobuf/__init__.py @@ -16,3 +16,11 @@ # See the License for the specific language governing permissions and # limitations under the License. """EODAG product protobuf package""" +import os + +# Fixes: descriptors cannot not be created directly. +# If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0. +# If you cannot immediately regenerate your protos, some other possible workarounds are: +# 1. Downgrade the protobuf package to 3.20.x or lower. +# 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower). +os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" diff --git a/eodag_cube/rpc/protocol/__init__.py b/eodag_cube/rpc/protocol/__init__.py index 135e6b5..cd5e628 100644 --- a/eodag_cube/rpc/protocol/__init__.py +++ b/eodag_cube/rpc/protocol/__init__.py @@ -16,3 +16,11 @@ # See the License for the specific language governing permissions and # limitations under the License. """EODAG rpc.protocol package""" +import os + +# Fixes: descriptors cannot not be created directly. +# If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0. +# If you cannot immediately regenerate your protos, some other possible workarounds are: +# 1. Downgrade the protobuf package to 3.20.x or lower. +# 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower). +os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" diff --git a/setup.py b/setup.py index 1cac45f..35d5810 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ "eodag >= 2.3.2", "numpy", "rasterio", - "protobuf", + "protobuf <= 3.20", "grpcio", "xarray", "rioxarray", @@ -37,7 +37,7 @@ "flake8", "isort", "pre-commit", - "pytest==5.0.1", # pytest pined to v5.0.1 to avoid issue when run from VSCode + "pytest", "pytest-cov", "tox", "nose", diff --git a/tox.ini b/tox.ini index ca0cff3..7106e89 100644 --- a/tox.ini +++ b/tox.ini @@ -19,10 +19,10 @@ commands = pytest --show-capture=no --cov --cov-fail-under=50 [testenv:pypi] -whitelist_externals = /bin/bash +allowlist_externals = /bin/bash commands = # Check that the long description is ready to be published on PyPI without errors - bash -c 'ERROR=$(\{ python setup.py --long-description | rst2html.py >/dev/null;\} 2>&1) && if [[ ! -z $ERROR ]];'\ + /bin/bash -c 'ERROR=$(\{ python setup.py --long-description | rst2html.py >/dev/null;\} 2>&1) && if [[ ! -z $ERROR ]];'\ 'then echo "README.rst is incompatible with PyPI reST renderer." && echo $ERROR && exit 1; fi' [testenv:linters] From 449f081cc0f1dcec06cf94ab2764897a456a419e Mon Sep 17 00:00:00 2001 From: Sylvain Brunato <61419125+sbrunato@users.noreply.github.com> Date: Fri, 17 Mar 2023 10:46:56 +0100 Subject: [PATCH 4/5] feat: update supported versions from py37 to py311 (#34) --- .github/workflows/test.yml | 2 +- setup.py | 3 ++- tox.ini | 5 +++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index aa42eed..10a1ef3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,7 +28,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: [3.7, 3.9] + python-version: [3.7, 3.11] os: [ubuntu-latest] steps: - name: Checkout the repo diff --git a/setup.py b/setup.py index 35d5810..e05def1 100644 --- a/setup.py +++ b/setup.py @@ -57,10 +57,11 @@ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Programming Language :: Python :: Implementation :: CPython", "Topic :: Scientific/Engineering :: GIS", ], diff --git a/tox.ini b/tox.ini index 7106e89..de8f2cf 100644 --- a/tox.ini +++ b/tox.ini @@ -1,14 +1,15 @@ [tox] -envlist = py36, py37, py38, py39, pypi, linters +envlist = py37, py38, py39, py10, py311, pypi, linters skipsdist = True # Mapping required by tox-gh-actions, only used in CI [gh-actions] python = - 3.6: py36 3.7: py37 3.8: py38 3.9: py39 + 3.10: py310 + 3.11: py311 [testenv] deps = From 5e91defb08a97fa7c9fb9be1372be8dd23e781fa Mon Sep 17 00:00:00 2001 From: Sylvain Brunato <61419125+sbrunato@users.noreply.github.com> Date: Fri, 17 Mar 2023 12:07:16 +0100 Subject: [PATCH 5/5] build: bump version (#35) --- CHANGES.rst | 8 ++++++++ eodag_cube/__init__.py | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index aa04c21..3a2ba6a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,14 @@ Release history --------------- +0.3.0 (2023-03-17) +++++++++++++++++++ + +- New Generic driver (#26) +- `get_data()` `crs` and `resampling` parameters are now facultative (#25) +- Support python versions from `3.7` to `3.11` (#34) +- `pre-commit` and dependencies updates (#33) + 0.2.1 (2021-08-11) ++++++++++++++++++ diff --git a/eodag_cube/__init__.py b/eodag_cube/__init__.py index 0b9cb2c..b780744 100644 --- a/eodag_cube/__init__.py +++ b/eodag_cube/__init__.py @@ -19,9 +19,9 @@ __title__ = "eodag-cube" __description__ = "Data access for EODAG" -__version__ = "0.2.1" +__version__ = "0.3.0" __author__ = "CS GROUP - France (CSSI)" -__author_email__ = "admin@geostorm.eu" +__author_email__ = "eodag@csgroup.space" __url__ = "https://github.com/CS-SI/eodag-cube" __license__ = "Apache 2.0" __copyright__ = "Copyright 2021, CS GROUP - France, http://www.c-s.fr"