Skip to content

Commit

Permalink
feat: keep asset created date when resupplying
Browse files Browse the repository at this point in the history
  • Loading branch information
l0b0 committed Oct 24, 2024
1 parent b1490d1 commit 2ec88dd
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 51 deletions.
28 changes: 22 additions & 6 deletions scripts/stac/imagery/create_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from linz_logger import get_log
from shapely.geometry.base import BaseGeometry

from scripts.datetimes import utc_now
from scripts.datetimes import format_rfc_3339_datetime_string, utc_now
from scripts.files.files_helper import get_file_name_from_path
from scripts.files.fs import NoSuchFileError, read
from scripts.files.fs import NoSuchFileError, modified, read
from scripts.files.geotiff import get_extents
from scripts.gdal.gdal_helper import gdal_info
from scripts.gdal.gdalinfo import GdalInfo
Expand All @@ -16,6 +16,7 @@
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole
from scripts.stac.link import Link, Relation
from scripts.stac.util.checksum import multihash_as_hex
from scripts.stac.util.media_type import StacMediaType


Expand Down Expand Up @@ -111,18 +112,33 @@ def create_item(

geometry, bbox = get_extents(gdalinfo_result)
created_datetime = current_datetime
file_content = read(file)
multihash = multihash_as_hex(file_content)
file_created_datetime = file_updated_datetime = format_rfc_3339_datetime_string(modified(file))

if published_path:
# FIXME: make this try/catch nicer
try:
existing_item_content = read(path.join(published_path, f"{id_}.json"))
existing_item = json.loads(existing_item_content.decode("UTF-8"))
created_datetime = existing_item["properties"]["created"]

try:
created_datetime = existing_item["properties"]["created"]
except KeyError:
get_log().info(f"Existing Item {id_} does not have 'properties.created' attribute")

try:
file_created_datetime = existing_item["assets"]["visual"]["created"]
if multihash == existing_item["assets"]["visual"]["file:checksum"]:
file_updated_datetime = existing_item["assets"]["visual"]["updated"]
except KeyError:
get_log().info(f"Existing Item {id_} does not have 'assets.visual' attributes")
except NoSuchFileError:
get_log().info(f"No Item is published for ID: {id_}")
except KeyError:
get_log().info(f"Existing Item {id_} does not have 'properties.created' attribute")

item = ImageryItem(id_, file, gdal_version, created_datetime, current_datetime)
item = ImageryItem(
id_, file, gdal_version, created_datetime, current_datetime, multihash, file_created_datetime, file_updated_datetime
)

if derived_from is not None:
for derived in derived_from:
Expand Down
25 changes: 14 additions & 11 deletions scripts/stac/imagery/item.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@
from os import environ
from typing import Any

from scripts.datetimes import format_rfc_3339_datetime_string
from scripts.files import fs
from scripts.files.fs import modified
from scripts.stac.link import Link, Relation
from scripts.stac.util import checksum
from scripts.stac.util.STAC_VERSION import STAC_VERSION
from scripts.stac.util.media_type import StacMediaType
from scripts.stac.util.stac_extensions import StacExtensions
Expand All @@ -15,10 +11,17 @@
class ImageryItem:
stac: dict[str, Any]

def __init__(self, id_: str, file: str, gdal_version: str, created_datetime: str, updated_datetime: str) -> None:
file_content = fs.read(file)
file_modified_datetime = format_rfc_3339_datetime_string(modified(file))

def __init__(
self,
id_: str,
file: str,
gdal_version: str,
created_datetime: str,
updated_datetime: str,
visual_asset_multihash: str,
visual_asset_created_datetime: str,
visual_asset_updated_datetime: str,
) -> None:
if (topo_imagery_hash := environ.get("GIT_HASH")) is not None:
commit_url = f"https://github.com/linz/topo-imagery/commit/{topo_imagery_hash}"
else:
Expand All @@ -33,9 +36,9 @@ def __init__(self, id_: str, file: str, gdal_version: str, created_datetime: str
"visual": {
"href": os.path.join(".", os.path.basename(file)),
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"file:checksum": checksum.multihash_as_hex(file_content),
"created": file_modified_datetime,
"updated": file_modified_datetime,
"file:checksum": visual_asset_multihash,
"created": visual_asset_created_datetime,
"updated": visual_asset_updated_datetime,
}
},
"stac_extensions": [StacExtensions.file.value, StacExtensions.processing.value],
Expand Down
30 changes: 21 additions & 9 deletions scripts/stac/imagery/tests/collection_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from scripts.stac.imagery.item import ImageryItem
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole
from scripts.stac.util.checksum import multihash_as_hex
from scripts.stac.util.stac_extensions import StacExtensions
from scripts.tests.datetimes_test import any_epoch_datetime

Expand Down Expand Up @@ -110,15 +111,32 @@ def func() -> datetime:
return func


def any_multihash_as_hex() -> str:
return multihash_as_hex(any_bytes(64))


def any_bytes(byte_count: int) -> bytes:
return os.urandom(byte_count)


def test_add_item(fake_collection_metadata: CollectionMetadata, subtests: SubTests) -> None:
now = any_epoch_datetime()
now_function = fixed_now_function(now)
current_datetime = format_rfc_3339_datetime_string(now)
created_datetime = format_rfc_3339_datetime_string(any_epoch_datetime())
collection = ImageryCollection(fake_collection_metadata, now_function)
item_file_path = "./scripts/tests/data/empty.tiff"
modified_datetime = datetime(2001, 2, 3, hour=4, minute=5, second=6, tzinfo=timezone.utc)
os.utime(item_file_path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
item = ImageryItem("BR34_5000_0304", item_file_path, "any GDAL version", current_datetime, current_datetime)
item = ImageryItem(
"BR34_5000_0304",
item_file_path,
"any GDAL version",
created_datetime,
format_rfc_3339_datetime_string(any_epoch_datetime()),
any_multihash_as_hex(),
format_rfc_3339_datetime_string(any_epoch_datetime()),
format_rfc_3339_datetime_string(any_epoch_datetime()),
)
geometry = {
"type": "Polygon",
"coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],
Expand Down Expand Up @@ -151,13 +169,7 @@ def test_add_item(fake_collection_metadata: CollectionMetadata, subtests: SubTes

for property_name in ["created", "updated"]:
with subtests.test(msg=f"collection {property_name}"):
assert collection.stac[property_name] == current_datetime

with subtests.test(msg=f"item properties.{property_name}"):
assert item.stac["properties"][property_name] == current_datetime

with subtests.test(msg=f"item assets.visual.{property_name}"):
assert item.stac["assets"]["visual"][property_name] == "2001-02-03T04:05:06Z"
assert collection.stac[property_name] == format_rfc_3339_datetime_string(now)


def test_write_collection(fake_collection_metadata: CollectionMetadata) -> None:
Expand Down
77 changes: 71 additions & 6 deletions scripts/stac/imagery/tests/create_stac_test.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import json
from datetime import timedelta
from os import utime
from pathlib import Path
from typing import cast

from pytest_subtests import SubTests

from scripts.datetimes import format_rfc_3339_datetime_string
from scripts.gdal.gdalinfo import GdalInfo
from scripts.stac.imagery.create_stac import create_collection, create_item
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.tests.collection_test import any_multihash_as_hex
from scripts.tests.datetimes_test import any_epoch_datetime


Expand Down Expand Up @@ -74,12 +79,23 @@ def test_create_item_with_derived_from_datetimes(tmp_path: Path) -> None:
assert item.stac["properties"]["end_datetime"] == "2024-09-02T12:00:00Z"


def test_create_item_when_resupplying(tmp_path: Path) -> None:
def test_create_item_when_resupplying(subtests: SubTests, tmp_path: Path) -> None:
item_name = "empty"
existing_item = tmp_path / f"{item_name}.json"
tiff_path = f"./scripts/tests/data/{item_name}.tiff"
file_modified_datetime = format_rfc_3339_datetime_string(any_epoch_datetime())
existing_item_content = {
"type": "Feature",
"id": f"{item_name}",
"id": item_name,
"assets": {
"visual": {
"href": tiff_path,
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"file:checksum": "12205f300ac3bd1d289da1517144d4851050e544c43c58c23ccfcc1f6968f764a45a",
"created": file_modified_datetime,
"updated": file_modified_datetime,
}
},
"properties": {"created": "2024-09-02T12:00:00Z", "updated": "2024-09-10T12:00:00Z"},
}

Expand All @@ -90,7 +106,7 @@ def test_create_item_when_resupplying(tmp_path: Path) -> None:

current_datetime = format_rfc_3339_datetime_string(any_epoch_datetime())
item = create_item(
f"./scripts/tests/data/{item_name}.tiff",
tiff_path,
"",
"",
"abc123",
Expand All @@ -100,15 +116,21 @@ def test_create_item_when_resupplying(tmp_path: Path) -> None:
published_path=tmp_path.as_posix(),
)

assert item.stac["properties"]["created"] == "2024-09-02T12:00:00Z"
assert item.stac["properties"]["updated"] == current_datetime
with subtests.test(msg="properties.created"):
assert item.stac["properties"]["created"] == "2024-09-02T12:00:00Z"
with subtests.test(msg="properties.updated"):
assert item.stac["properties"]["updated"] == current_datetime

for attribute in ["created", "updated"]:
with subtests.test(msg=f"assets.visual.{attribute}"):
assert item.stac["assets"]["visual"][attribute] == file_modified_datetime


def test_create_item_when_resupplying_from_incomplete_metadata(tmp_path: Path) -> None:
# TODO: Remove this test once TDE-1103 is DONE
item_name = "empty"
existing_item = tmp_path / f"{item_name}.json"
existing_item_content = {"type": "Feature", "id": f"{item_name}"}
existing_item_content = {"type": "Feature", "id": item_name}
existing_item.write_text(json.dumps(existing_item_content))
fake_gdal_info: GdalInfo = cast(
GdalInfo, {"wgs84Extent": {"type": "Polygon", "coordinates": [[[0, 1], [1, 1], [1, 0], [0, 0]]]}}
Expand Down Expand Up @@ -151,6 +173,49 @@ def test_create_item_when_resupplying_with_new_file(tmp_path: Path) -> None:
assert item.stac["properties"]["updated"] == current_datetime


def test_create_item_when_resupplying_with_changed_file(subtests: SubTests, tmp_path: Path) -> None:
item_name = "empty"
original_item = tmp_path / f"{item_name}.json"
asset_file = "./scripts/tests/data/empty.tiff"
created_datetime = any_epoch_datetime()
created_datetime_string = format_rfc_3339_datetime_string(created_datetime)
original_item_content = {
"type": "Feature",
"id": item_name,
"assets": {
"visual": {
"href": asset_file,
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"file:checksum": any_multihash_as_hex(),
"created": created_datetime_string,
"updated": created_datetime_string,
}
},
"properties": {"created": "2024-09-02T12:00:00Z", "updated": "2024-09-10T12:00:00Z"},
}
modified_datetime = created_datetime + timedelta(days=1)
utime(asset_file, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))

original_item.write_text(json.dumps(original_item_content))

item = create_item(
"./scripts/tests/data/empty.tiff",
"",
"",
"abc123",
"any GDAL version",
format_rfc_3339_datetime_string(any_epoch_datetime()),
cast(GdalInfo, {"wgs84Extent": {"type": "Polygon", "coordinates": [[[0, 1], [1, 1], [1, 0], [0, 0]]]}}),
published_path=tmp_path.as_posix(),
)

with subtests.test(msg="assets.visual.created"):
assert item.stac["assets"]["visual"]["created"] == created_datetime_string

with subtests.test(msg="assets.visual.updated"):
assert item.stac["assets"]["visual"]["updated"] == format_rfc_3339_datetime_string(modified_datetime)


def test_create_collection(fake_collection_metadata: CollectionMetadata) -> None:
collection_id = "test_collection"

Expand Down
55 changes: 38 additions & 17 deletions scripts/stac/imagery/tests/item_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.item import ImageryItem
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.tests.collection_test import any_multihash_as_hex
from scripts.stac.util.stac_extensions import StacExtensions
from scripts.tests.datetimes_test import any_epoch_datetime

Expand All @@ -29,13 +30,23 @@ def test_imagery_stac_item(mocker: MockerFixture, subtests: SubTests) -> None:
start_datetime = "2021-01-27T00:00:00Z"
end_datetime = "2021-01-27T00:00:00Z"

current_datetime = format_rfc_3339_datetime_string(any_epoch_datetime())

created_datetime = format_rfc_3339_datetime_string(any_epoch_datetime())
updated_datetime = format_rfc_3339_datetime_string(any_epoch_datetime())
git_hash = "any Git hash"
git_version = "any Git version string"
gdal_version_string = "any GDAL version string"
multihash = any_multihash_as_hex()
with patch.dict(environ, {"GIT_HASH": git_hash, "GIT_VERSION": git_version}):
item = ImageryItem(id_, path, gdal_version_string, current_datetime, current_datetime)
item = ImageryItem(
id_,
path,
gdal_version_string,
created_datetime,
updated_datetime,
multihash,
format_rfc_3339_datetime_string(any_epoch_datetime()),
format_rfc_3339_datetime_string(any_epoch_datetime()),
)
item.update_spatial(geometry, bbox)
item.update_datetime(start_datetime, end_datetime)
# checks
Expand All @@ -52,12 +63,9 @@ def test_imagery_stac_item(mocker: MockerFixture, subtests: SubTests) -> None:
assert item.stac["properties"]["datetime"] is None

with subtests.test():
assert (
item.stac["properties"]["created"]
== item.stac["properties"]["updated"]
== item.stac["properties"]["processing:datetime"]
== current_datetime
)
assert item.stac["properties"]["created"] == created_datetime

assert item.stac["properties"]["updated"] == item.stac["properties"]["processing:datetime"] == updated_datetime

with subtests.test():
assert item.stac["properties"]["processing:version"] == git_version
Expand All @@ -81,10 +89,7 @@ def test_imagery_stac_item(mocker: MockerFixture, subtests: SubTests) -> None:
assert item.stac["bbox"] == bbox

with subtests.test():
assert (
item.stac["assets"]["visual"]["file:checksum"]
== "1220e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
)
assert item.stac["assets"]["visual"]["file:checksum"] == multihash

with subtests.test():
assert {"rel": "self", "href": f"./{id_}.json", "type": "application/geo+json"} in item.stac["links"]
Expand All @@ -109,8 +114,16 @@ def test_imagery_add_collection(mocker: MockerFixture, subtests: SubTests) -> No
path = "./scripts/tests/data/empty.tiff"
id_ = get_file_name_from_path(path)
mocker.patch("scripts.files.fs.read", return_value=b"")
current_datetime = format_rfc_3339_datetime_string(any_epoch_datetime())
item = ImageryItem(id_, path, "any GDAL version", current_datetime, current_datetime)
item = ImageryItem(
id_,
path,
"any GDAL version",
format_rfc_3339_datetime_string(any_epoch_datetime()),
format_rfc_3339_datetime_string(any_epoch_datetime()),
any_multihash_as_hex(),
format_rfc_3339_datetime_string(any_epoch_datetime()),
format_rfc_3339_datetime_string(any_epoch_datetime()),
)

item.add_collection(collection.stac["id"])

Expand All @@ -125,8 +138,16 @@ def test_imagery_add_collection(mocker: MockerFixture, subtests: SubTests) -> No


def test_should_set_fallback_version_strings(subtests: SubTests) -> None:
current_datetime = format_rfc_3339_datetime_string(any_epoch_datetime())
item = ImageryItem("any ID", "./scripts/tests/data/empty.tiff", "any GDAL version", current_datetime, current_datetime)
item = ImageryItem(
"any ID",
"./scripts/tests/data/empty.tiff",
"any GDAL version",
format_rfc_3339_datetime_string(any_epoch_datetime()),
format_rfc_3339_datetime_string(any_epoch_datetime()),
any_multihash_as_hex(),
format_rfc_3339_datetime_string(any_epoch_datetime()),
format_rfc_3339_datetime_string(any_epoch_datetime()),
)

with subtests.test():
assert item.stac["properties"]["processing:software"]["linz/topo-imagery"] == "GIT_HASH not specified"
Expand Down
Loading

0 comments on commit 2ec88dd

Please sign in to comment.