Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: keep Collection properties.created date when resupplying #1160

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions .github/workflows/format-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,28 +31,28 @@ jobs:

- name: End to end test - Aerial Imagery
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10 --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10 --create-footprints=true --current-datetime=2020-01-02T03:04:05Z
cmp --silent "${{ runner.temp }}/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829.tiff

- name: End to end test - Elevation
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/dem.json --preset dem_lerc --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 30 --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/dem.json --preset dem_lerc --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 30 --create-footprints=true --current-datetime=2020-01-02T03:04:05Z
cmp --silent "${{ runner.temp }}/BK39_10000_0102.tiff" ./scripts/tests/data/output/BK39_10000_0102.tiff
cmp --silent "${{ runner.temp }}/BK39_10000_0101.tiff" ./scripts/tests/data/output/BK39_10000_0101.tiff

- name: End to end test - Historical Aerial Imagery
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/hi.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 60 --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/hi.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 60 --create-footprints=true --current-datetime=2020-01-02T03:04:05Z
cmp --silent "${{ runner.temp }}/BQ31_5000_0608.tiff" ./scripts/tests/data/output/BQ31_5000_0608.tiff

- name: End to end test - Cutline (Aerial Imagery)
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/cutline/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --cutline ./tests/data/cutline_aerial.fgb --gsd 10 --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/cutline/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --cutline ./tests/data/cutline_aerial.fgb --gsd 10 --create-footprints=true --current-datetime=2020-01-02T03:04:05Z
cmp --silent "${{ runner.temp }}/cutline/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829_cut.tiff

- name: End to end test - Footprint
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m --create-footprints=true --current-datetime=2020-01-02T03:04:05Z
jq 'select(.xy_coordinate_resolution == 1E-8) // error("Wrong or missing X/Y coordinate resolution")' "${{ runner.temp }}/BG35_1000_4829_footprint.geojson"
cmp --silent <(jq "del(.features[0].properties.location, .xy_coordinate_resolution)" "${{ runner.temp }}/BG35_1000_4829_footprint.geojson") <(jq "del(.features[0].properties.location, .xy_coordinate_resolution)" ./scripts/tests/data/output/BG35_1000_4829_footprint.geojson)

Expand All @@ -64,7 +64,7 @@ jobs:

- name: End to end test - Restandardise Aerial Imagery
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/restandardise.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/restandardise/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10 --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/restandardise.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/restandardise/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10 --create-footprints=true --current-datetime=2020-01-02T03:04:05Z
cmp --silent "${{ runner.temp }}/restandardise/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829.tiff

- name: End to end test - Translate Ascii Files (Elevation)
Expand All @@ -74,10 +74,15 @@ jobs:

- name: End to end test - Remove empty files
run: |
docker run -v "${{ runner.temp }}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 --gsd 60 --create-footprints=true
docker run -v "${{ runner.temp }}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 --gsd 60 --create-footprints=true --current-datetime=2020-01-02T03:04:05Z
empty_target_directory="$(find "${{ runner.temp }}/tmp-empty" -maxdepth 0 -type d -empty)"
[[ -n "$empty_target_directory" ]]

- name: End to end test - Resupply Item
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/resupply.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --gsd 10 --create-footprints=false --start-datetime=2023-01-01 --end-datetime=2023-01-01 --current-datetime=2025-01-03T08:24:00Z
cmp --silent <(jq . "${{ runner.temp }}/BQ25_10000_0305.json") <(jq . ./scripts/tests/data/output/BQ25_10000_0305.json)

- uses: actions/[email protected]
with:
name: gdal-output
Expand Down
17 changes: 17 additions & 0 deletions scripts/collection_from_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ def parse_args(args: List[str] | None) -> Namespace:
parser = argparse.ArgumentParser()
parser.add_argument("--uri", dest="uri", help="s3 path to items and collection.json write location", required=True)
parser.add_argument("--collection-id", dest="collection_id", help="Collection ID", required=True)
parser.add_argument(
"--published-path",
dest="published_path",
help=("The path of the published dataset. Example: 's3://nz-imagery/wellington/porirua_2024_0.1m/rgb/2193/'"),
required=False,
)
parser.add_argument(
"--category",
dest="category",
Expand Down Expand Up @@ -92,6 +98,15 @@ def parse_args(args: List[str] | None) -> Namespace:
help="Add a title suffix to the collection title based on the lifecycle. For example, '[TITLE] - Preview'",
required=False,
)
parser.add_argument(
"--current-datetime",
dest="current_datetime",
help=(
"The datetime that is used as current datetime in the metadata. "
"Format: RFC 3339 UTC datetime, `YYYY-MM-DDThh:mm:ssZ`."
),
required=True,
)

return parser.parse_args(args)

Expand Down Expand Up @@ -171,13 +186,15 @@ def main(args: List[str] | None = None) -> None:
collection = create_collection(
collection_id=collection_id,
collection_metadata=collection_metadata,
current_datetime=arguments.current_datetime,
producers=coalesce_multi_single(arguments.producer_list, arguments.producer),
licensors=coalesce_multi_single(arguments.licensor_list, arguments.licensor),
stac_items=items_to_add,
item_polygons=polygons,
add_capture_dates=arguments.capture_dates,
uri=uri,
add_title_suffix=arguments.add_title_suffix,
published_path=arguments.published_path,
)

destination = os.path.join(uri, "collection.json")
Expand Down
16 changes: 0 additions & 16 deletions scripts/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,22 +39,6 @@ def format_rfc_3339_nz_midnight_datetime_string(datetime_object: datetime) -> st
return format_rfc_3339_datetime_string(datetime_utc)


def utc_now() -> datetime:
"""
Get the current datetime with UTC time zone

Should return something close to the current time:
>>> current_timestamp = datetime.now().timestamp()
>>> current_timestamp - 5 < utc_now().timestamp() < current_timestamp + 5
True

Should have UTC time zone:
>>> utc_now().tzname()
'UTC'
"""
return datetime.now(tz=timezone.utc)


class NaiveDatetimeError(Exception):
def __init__(self) -> None:
super().__init__("Can't convert naive datetime to UTC")
15 changes: 0 additions & 15 deletions scripts/files/fs.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import os
from concurrent.futures import Future, ThreadPoolExecutor
from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING

from boto3 import resource
from linz_logger import get_log
Expand All @@ -11,11 +8,6 @@
from scripts.files import fs_local, fs_s3
from scripts.stac.util.checksum import multihash_as_hex

if TYPE_CHECKING:
from mypy_boto3_s3 import S3Client
else:
S3Client = dict


def write(destination: str, source: bytes, content_type: str | None = None) -> str:
"""Write a file from its source to a destination path.
Expand Down Expand Up @@ -87,13 +79,6 @@ def exists(path: str) -> bool:
return fs_local.exists(path)


def modified(path: str, s3_client: S3Client | None = None) -> datetime:
"""Get modified datetime for S3 URL or local path"""
if is_s3(path):
return fs_s3.modified(fs_s3.bucket_name_from_path(path), fs_s3.prefix_from_path(path), s3_client)
return fs_local.modified(Path(path))


def write_all(inputs: list[str], target: str, concurrency: int | None = 4, generate_name: bool | None = True) -> list[str]:
"""Writes list of files to target destination using multithreading.
Args:
Expand Down
8 changes: 0 additions & 8 deletions scripts/files/fs_local.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import os
from datetime import datetime, timezone
from pathlib import Path


def write(destination: str, source: bytes) -> None:
Expand Down Expand Up @@ -38,9 +36,3 @@ def exists(path: str) -> bool:
True if the path exists
"""
return os.path.exists(path)


def modified(path: Path) -> datetime:
"""Get path modified datetime as UTC"""
modified_timestamp = os.path.getmtime(path)
return datetime.fromtimestamp(modified_timestamp, tz=timezone.utc)
6 changes: 0 additions & 6 deletions scripts/files/fs_s3.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from collections.abc import Generator
from concurrent import futures
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from typing import TYPE_CHECKING, Any

from boto3 import client, resource
Expand Down Expand Up @@ -241,8 +240,3 @@ def get_object_parallel_multithreading(
yield key, future.result()
else:
yield key, exception


def modified(bucket_name: str, key: str, s3_client: S3Client | None) -> datetime:
s3_client = s3_client or client("s3")
return _get_object(bucket_name, key, s3_client)["LastModified"]
12 changes: 1 addition & 11 deletions scripts/files/tests/fs_local_test.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import os
from pathlib import Path

import pytest

from scripts.files.fs_local import exists, modified, read, write
from scripts.tests.datetimes_test import any_epoch_datetime
from scripts.files.fs_local import exists, read, write


@pytest.mark.dependency(name="write")
Expand Down Expand Up @@ -45,11 +43,3 @@ def test_exists(setup: str) -> None:
def test_exists_file_not_found() -> None:
found = exists("/tmp/test.file")
assert found is False


def test_should_get_modified_datetime(setup: str) -> None:
path = Path(os.path.join(setup, "modified.file"))
path.touch()
modified_datetime = any_epoch_datetime()
os.utime(path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
assert modified(path) == modified_datetime
21 changes: 1 addition & 20 deletions scripts/files/tests/fs_s3_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,12 @@
from boto3 import client, resource
from botocore.exceptions import ClientError
from moto import mock_aws
from moto.core.models import DEFAULT_ACCOUNT_ID
from moto.s3.models import s3_backends
from moto.s3.responses import DEFAULT_REGION_NAME
from moto.wafv2.models import GLOBAL_REGION
from mypy_boto3_s3 import S3Client
from pytest import CaptureFixture, raises
from pytest_subtests import SubTests

from scripts.files.files_helper import ContentType
from scripts.files.fs_s3 import exists, list_files_in_uri, modified, read, write
from scripts.tests.datetimes_test import any_epoch_datetime
from scripts.files.fs_s3 import exists, list_files_in_uri, read, write


@mock_aws
Expand Down Expand Up @@ -174,17 +169,3 @@ def test_list_files_in_uri(subtests: SubTests) -> None:

with subtests.test():
assert "data/image.tiff" not in files


@mock_aws
def test_should_get_modified_datetime() -> None:
bucket_name = "any-bucket-name"
key = "any-key"
modified_datetime = any_epoch_datetime()

s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket=bucket_name)
s3_client.put_object(Bucket=bucket_name, Key=key, Body=b"any body")
s3_backends[DEFAULT_ACCOUNT_ID][GLOBAL_REGION].buckets[bucket_name].keys[key].last_modified = modified_datetime

assert modified(bucket_name, key, s3_client) == modified_datetime
31 changes: 2 additions & 29 deletions scripts/files/tests/fs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,13 @@
from shutil import rmtree
from tempfile import mkdtemp

from boto3 import client, resource
from boto3 import resource
from moto import mock_aws
from moto.core.models import DEFAULT_ACCOUNT_ID
from moto.s3.models import s3_backends
from moto.s3.responses import DEFAULT_REGION_NAME
from moto.wafv2.models import GLOBAL_REGION
from mypy_boto3_s3 import S3Client
from pytest import CaptureFixture, raises
from pytest_subtests import SubTests

from scripts.files.fs import NoSuchFileError, modified, read, write, write_all, write_sidecars
from scripts.tests.datetimes_test import any_epoch_datetime
from scripts.files.fs import NoSuchFileError, read, write, write_all, write_sidecars


def test_read_key_not_found_local() -> None:
Expand Down Expand Up @@ -103,25 +98,3 @@ def test_write_all_in_order(setup: str) -> None:
i += 1
written_files = write_all(inputs=inputs, target=setup, generate_name=False)
assert written_files == inputs


@mock_aws
def test_should_get_s3_object_modified_datetime() -> None:
bucket_name = "any-bucket-name"
key = "any-key"
modified_datetime = any_epoch_datetime()

s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME)
s3_client.create_bucket(Bucket=bucket_name)
s3_client.put_object(Bucket=bucket_name, Key=key, Body=b"any body")
s3_backends[DEFAULT_ACCOUNT_ID][GLOBAL_REGION].buckets[bucket_name].keys[key].last_modified = modified_datetime

assert modified(f"s3://{bucket_name}/{key}", s3_client) == modified_datetime


def test_should_get_local_file_modified_datetime(setup: str) -> None:
path = os.path.join(setup, "modified.file")
Path(path).touch()
modified_datetime = any_epoch_datetime()
os.utime(path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
assert modified(path) == modified_datetime
10 changes: 4 additions & 6 deletions scripts/stac/imagery/collection.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import os
from collections.abc import Callable
from datetime import datetime
from typing import Any

import ulid
Expand Down Expand Up @@ -41,7 +39,8 @@ class ImageryCollection:
def __init__(
self,
metadata: CollectionMetadata,
now: Callable[[], datetime],
created_datetime: str,
updated_datetime: str,
collection_id: str | None = None,
providers: list[Provider] | None = None,
add_title_suffix: bool = True,
Expand All @@ -51,7 +50,6 @@ def __init__(

self.metadata = metadata

now_string = format_rfc_3339_datetime_string(now())
self.stac = {
"type": "Collection",
"stac_version": STAC_VERSION,
Expand All @@ -65,8 +63,8 @@ def __init__(
"linz:geospatial_category": metadata["category"],
"linz:region": metadata["region"],
"linz:security_classification": "unclassified",
"created": now_string,
"updated": now_string,
"created": created_datetime,
"updated": updated_datetime,
}

# Optional metadata
Expand Down
Loading
Loading