Skip to content

Commit

Permalink
feat: keep asset created date when resupplying WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
l0b0 authored and paulfouquet committed Oct 24, 2024
1 parent b1490d1 commit f861c7b
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 54 deletions.
28 changes: 22 additions & 6 deletions scripts/stac/imagery/create_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from linz_logger import get_log
from shapely.geometry.base import BaseGeometry

from scripts.datetimes import utc_now
from scripts.datetimes import format_rfc_3339_datetime_string, utc_now
from scripts.files.files_helper import get_file_name_from_path
from scripts.files.fs import NoSuchFileError, read
from scripts.files.fs import NoSuchFileError, modified, read
from scripts.files.geotiff import get_extents
from scripts.gdal.gdal_helper import gdal_info
from scripts.gdal.gdalinfo import GdalInfo
Expand All @@ -16,6 +16,7 @@
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole
from scripts.stac.link import Link, Relation
from scripts.stac.util.checksum import multihash_as_hex
from scripts.stac.util.media_type import StacMediaType


Expand Down Expand Up @@ -111,18 +112,33 @@ def create_item(

geometry, bbox = get_extents(gdalinfo_result)
created_datetime = current_datetime
file_content = read(file)
multihash = multihash_as_hex(file_content)
file_created_datetime = file_updated_datetime = format_rfc_3339_datetime_string(modified(file))

if published_path:
# FIXME: make this try/catch nicer
try:
existing_item_content = read(path.join(published_path, f"{id_}.json"))
existing_item = json.loads(existing_item_content.decode("UTF-8"))
created_datetime = existing_item["properties"]["created"]

try:
created_datetime = existing_item["properties"]["created"]
except KeyError:
get_log().info(f"Existing Item {id_} does not have 'properties.created' attribute")

try:
file_created_datetime = existing_item["assets"]["visual"]["created"]
if multihash == existing_item["assets"]["visual"]["file:checksum"]:
file_updated_datetime = existing_item["assets"]["visual"]["updated"]
except KeyError:
get_log().info(f"Existing Item {id_} does not have 'assets.visual' attributes")
except NoSuchFileError:
get_log().info(f"No Item is published for ID: {id_}")
except KeyError:
get_log().info(f"Existing Item {id_} does not have 'properties.created' attribute")

item = ImageryItem(id_, file, gdal_version, created_datetime, current_datetime)
item = ImageryItem(
id_, file, gdal_version, created_datetime, current_datetime, multihash, file_created_datetime, file_updated_datetime
)

if derived_from is not None:
for derived in derived_from:
Expand Down
25 changes: 14 additions & 11 deletions scripts/stac/imagery/item.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@
from os import environ
from typing import Any

from scripts.datetimes import format_rfc_3339_datetime_string
from scripts.files import fs
from scripts.files.fs import modified
from scripts.stac.link import Link, Relation
from scripts.stac.util import checksum
from scripts.stac.util.STAC_VERSION import STAC_VERSION
from scripts.stac.util.media_type import StacMediaType
from scripts.stac.util.stac_extensions import StacExtensions
Expand All @@ -15,10 +11,17 @@
class ImageryItem:
stac: dict[str, Any]

def __init__(self, id_: str, file: str, gdal_version: str, created_datetime: str, updated_datetime: str) -> None:
file_content = fs.read(file)
file_modified_datetime = format_rfc_3339_datetime_string(modified(file))

def __init__(
self,
id_: str,
file: str,
gdal_version: str,
created_datetime: str,
updated_datetime: str,
visual_asset_multihash: str,
visual_asset_created_datetime: str,
visual_asset_updated_datetime: str,
) -> None:
if (topo_imagery_hash := environ.get("GIT_HASH")) is not None:
commit_url = f"https://github.com/linz/topo-imagery/commit/{topo_imagery_hash}"
else:
Expand All @@ -33,9 +36,9 @@ def __init__(self, id_: str, file: str, gdal_version: str, created_datetime: str
"visual": {
"href": os.path.join(".", os.path.basename(file)),
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"file:checksum": checksum.multihash_as_hex(file_content),
"created": file_modified_datetime,
"updated": file_modified_datetime,
"file:checksum": visual_asset_multihash,
"created": visual_asset_created_datetime,
"updated": visual_asset_updated_datetime,
}
},
"stac_extensions": [StacExtensions.file.value, StacExtensions.processing.value],
Expand Down
30 changes: 21 additions & 9 deletions scripts/stac/imagery/tests/collection_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from scripts.stac.imagery.item import ImageryItem
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole
from scripts.stac.util.checksum import multihash_as_hex
from scripts.stac.util.stac_extensions import StacExtensions
from scripts.tests.datetimes_test import any_epoch_datetime

Expand Down Expand Up @@ -110,15 +111,32 @@ def func() -> datetime:
return func


def any_multihash_as_hex() -> str:
return multihash_as_hex(any_bytes(64))


def any_bytes(byte_count: int) -> bytes:
return os.urandom(byte_count)


def test_add_item(fake_collection_metadata: CollectionMetadata, subtests: SubTests) -> None:
now = any_epoch_datetime()
now_function = fixed_now_function(now)
current_datetime = format_rfc_3339_datetime_string(now)
created_datetime = format_rfc_3339_datetime_string(any_epoch_datetime())
collection = ImageryCollection(fake_collection_metadata, now_function)
item_file_path = "./scripts/tests/data/empty.tiff"
modified_datetime = datetime(2001, 2, 3, hour=4, minute=5, second=6, tzinfo=timezone.utc)
os.utime(item_file_path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
item = ImageryItem("BR34_5000_0304", item_file_path, "any GDAL version", current_datetime, current_datetime)
item = ImageryItem(
"BR34_5000_0304",
item_file_path,
"any GDAL version",
created_datetime,
format_rfc_3339_datetime_string(any_epoch_datetime()),
any_multihash_as_hex(),
format_rfc_3339_datetime_string(any_epoch_datetime()),
format_rfc_3339_datetime_string(any_epoch_datetime()),
)
geometry = {
"type": "Polygon",
"coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],
Expand Down Expand Up @@ -151,13 +169,7 @@ def test_add_item(fake_collection_metadata: CollectionMetadata, subtests: SubTes

for property_name in ["created", "updated"]:
with subtests.test(msg=f"collection {property_name}"):
assert collection.stac[property_name] == current_datetime

with subtests.test(msg=f"item properties.{property_name}"):
assert item.stac["properties"][property_name] == current_datetime

with subtests.test(msg=f"item assets.visual.{property_name}"):
assert item.stac["assets"]["visual"][property_name] == "2001-02-03T04:05:06Z"
assert collection.stac[property_name] == format_rfc_3339_datetime_string(now)


def test_write_collection(fake_collection_metadata: CollectionMetadata) -> None:
Expand Down
85 changes: 76 additions & 9 deletions scripts/stac/imagery/tests/create_stac_test.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import json
from datetime import timedelta
from os import utime
from pathlib import Path
from typing import cast

from pytest_subtests import SubTests

from scripts.datetimes import format_rfc_3339_datetime_string
from scripts.gdal.gdalinfo import GdalInfo
from scripts.stac.imagery.create_stac import create_collection, create_item
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.tests.collection_test import any_multihash_as_hex
from scripts.tests.datetimes_test import any_epoch_datetime


Expand Down Expand Up @@ -74,12 +79,23 @@ def test_create_item_with_derived_from_datetimes(tmp_path: Path) -> None:
assert item.stac["properties"]["end_datetime"] == "2024-09-02T12:00:00Z"


def test_create_item_when_resupplying(tmp_path: Path) -> None:
def test_create_item_when_resupplying(subtests: SubTests, tmp_path: Path) -> None:
item_name = "empty"
existing_item = tmp_path / f"{item_name}.json"
tiff_path = f"./scripts/tests/data/{item_name}.tiff"
file_modified_datetime = format_rfc_3339_datetime_string(any_epoch_datetime())
existing_item_content = {
"type": "Feature",
"id": f"{item_name}",
"id": item_name,
"assets": {
"visual": {
"href": tiff_path,
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"file:checksum": "12205f300ac3bd1d289da1517144d4851050e544c43c58c23ccfcc1f6968f764a45a",
"created": file_modified_datetime,
"updated": file_modified_datetime,
}
},
"properties": {"created": "2024-09-02T12:00:00Z", "updated": "2024-09-10T12:00:00Z"},
}

Expand All @@ -90,7 +106,7 @@ def test_create_item_when_resupplying(tmp_path: Path) -> None:

current_datetime = format_rfc_3339_datetime_string(any_epoch_datetime())
item = create_item(
f"./scripts/tests/data/{item_name}.tiff",
tiff_path,
"",
"",
"abc123",
Expand All @@ -100,15 +116,21 @@ def test_create_item_when_resupplying(tmp_path: Path) -> None:
published_path=tmp_path.as_posix(),
)

assert item.stac["properties"]["created"] == "2024-09-02T12:00:00Z"
assert item.stac["properties"]["updated"] == current_datetime
with subtests.test(msg="properties.created"):
assert item.stac["properties"]["created"] == "2024-09-02T12:00:00Z"
with subtests.test(msg="properties.updated"):
assert item.stac["properties"]["updated"] == current_datetime

for attribute in ["created", "updated"]:
with subtests.test(msg=f"assets.visual.{attribute}"):
assert item.stac["assets"]["visual"][attribute] == file_modified_datetime


def test_create_item_when_resupplying_from_incomplete_metadata(tmp_path: Path) -> None:
# TODO: Remove this test once TDE-1103 is DONE
item_name = "empty"
existing_item = tmp_path / f"{item_name}.json"
existing_item_content = {"type": "Feature", "id": f"{item_name}"}
existing_item_content = {"type": "Feature", "id": item_name}
existing_item.write_text(json.dumps(existing_item_content))
fake_gdal_info: GdalInfo = cast(
GdalInfo, {"wgs84Extent": {"type": "Polygon", "coordinates": [[[0, 1], [1, 1], [1, 0], [0, 0]]]}}
Expand All @@ -131,9 +153,7 @@ def test_create_item_when_resupplying_from_incomplete_metadata(tmp_path: Path) -


def test_create_item_when_resupplying_with_new_file(tmp_path: Path) -> None:
fake_gdal_info: GdalInfo = cast(
GdalInfo, {"wgs84Extent": {"type": "Polygon", "coordinates": [[[0, 1], [1, 1], [1, 0], [0, 0]]]}}
)
fake_gdal_info: GdalInfo = any_gdal_info()

current_datetime = format_rfc_3339_datetime_string(any_epoch_datetime())
item = create_item(
Expand All @@ -151,6 +171,49 @@ def test_create_item_when_resupplying_with_new_file(tmp_path: Path) -> None:
assert item.stac["properties"]["updated"] == current_datetime


def test_create_item_when_resupplying_with_changed_file(subtests: SubTests, tmp_path: Path) -> None:
item_name = "empty"
original_item = tmp_path / f"{item_name}.json"
asset_file = "./scripts/tests/data/empty.tiff"
created_datetime = any_epoch_datetime()
created_datetime_string = format_rfc_3339_datetime_string(created_datetime)
original_item_content = {
"type": "Feature",
"id": item_name,
"assets": {
"visual": {
"href": asset_file,
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"file:checksum": any_multihash_as_hex(),
"created": created_datetime_string,
"updated": created_datetime_string,
}
},
"properties": {"created": "2024-09-02T12:00:00Z", "updated": "2024-09-10T12:00:00Z"},
}
modified_datetime = created_datetime + timedelta(days=1)
utime(asset_file, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))

original_item.write_text(json.dumps(original_item_content))

item = create_item(
"./scripts/tests/data/empty.tiff",
"",
"",
"abc123",
"any GDAL version",
format_rfc_3339_datetime_string(any_epoch_datetime()),
any_gdal_info(),
published_path=tmp_path.as_posix(),
)

with subtests.test(msg=f"assets.visual.created"):
assert item.stac["assets"]["visual"]["created"] == created_datetime_string

with subtests.test(msg=f"assets.visual.updated"):
assert item.stac["assets"]["visual"]["updated"] == format_rfc_3339_datetime_string(modified_datetime)


def test_create_collection(fake_collection_metadata: CollectionMetadata) -> None:
collection_id = "test_collection"

Expand All @@ -166,3 +229,7 @@ def test_create_collection(fake_collection_metadata: CollectionMetadata) -> None
)

assert collection.stac["id"] == collection_id


def any_gdal_info() -> GdalInfo:
return GdalInfo(wgs84Extent={"type": "Polygon", "coordinates": [[[0, 1], [1, 1], [1, 0], [0, 0]]]})
Loading

0 comments on commit f861c7b

Please sign in to comment.