Skip to content

Commit

Permalink
feat!: keep asset created date when resupplying TDE-1298
Browse files Browse the repository at this point in the history
  • Loading branch information
l0b0 committed Oct 25, 2024
1 parent b1490d1 commit 8abd002
Show file tree
Hide file tree
Showing 10 changed files with 316 additions and 144 deletions.
14 changes: 7 additions & 7 deletions .github/workflows/format-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,28 +31,28 @@ jobs:
- name: End to end test - Aerial Imagery
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10 --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10 --create-footprints=true --current-datetime=2020-01-02T03:04:05Z
cmp --silent "${{ runner.temp }}/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829.tiff
- name: End to end test - Elevation
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/dem.json --preset dem_lerc --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 30 --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/dem.json --preset dem_lerc --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 30 --create-footprints=true --current-datetime=2020-01-02T03:04:05Z
cmp --silent "${{ runner.temp }}/BK39_10000_0102.tiff" ./scripts/tests/data/output/BK39_10000_0102.tiff
cmp --silent "${{ runner.temp }}/BK39_10000_0101.tiff" ./scripts/tests/data/output/BK39_10000_0101.tiff
- name: End to end test - Historical Aerial Imagery
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/hi.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 60 --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/hi.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 60 --create-footprints=true --current-datetime=2020-01-02T03:04:05Z
cmp --silent "${{ runner.temp }}/BQ31_5000_0608.tiff" ./scripts/tests/data/output/BQ31_5000_0608.tiff
- name: End to end test - Cutline (Aerial Imagery)
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/cutline/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --cutline ./tests/data/cutline_aerial.fgb --gsd 10 --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/cutline/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --cutline ./tests/data/cutline_aerial.fgb --gsd 10 --create-footprints=true --current-datetime=2020-01-02T03:04:05Z
cmp --silent "${{ runner.temp }}/cutline/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829_cut.tiff
- name: End to end test - Footprint
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/aerial.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10m --create-footprints=true --current-datetime=2020-01-02T03:04:05Z
jq 'select(.xy_coordinate_resolution == 1E-8) // error("Wrong or missing X/Y coordinate resolution")' "${{ runner.temp }}/BG35_1000_4829_footprint.geojson"
cmp --silent <(jq "del(.features[0].properties.location, .xy_coordinate_resolution)" "${{ runner.temp }}/BG35_1000_4829_footprint.geojson") <(jq "del(.features[0].properties.location, .xy_coordinate_resolution)" ./scripts/tests/data/output/BG35_1000_4829_footprint.geojson)
Expand All @@ -64,7 +64,7 @@ jobs:
- name: End to end test - Restandardise Aerial Imagery
run: |
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/restandardise.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/restandardise/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10 --create-footprints=true
docker run -v "${{ runner.temp }}:/tmp/" topo-imagery python3 standardise_validate.py --from-file ./tests/data/restandardise.json --preset webp --target-epsg 2193 --source-epsg 2193 --target /tmp/restandardise/ --collection-id 123 --start-datetime 2023-01-01 --end-datetime 2023-01-01 --gsd 10 --create-footprints=true --current-datetime=2020-01-02T03:04:05Z
cmp --silent "${{ runner.temp }}/restandardise/BG35_1000_4829.tiff" ./scripts/tests/data/output/BG35_1000_4829.tiff
- name: End to end test - Translate Ascii Files (Elevation)
Expand All @@ -74,7 +74,7 @@ jobs:
- name: End to end test - Remove empty files
run: |
docker run -v "${{ runner.temp }}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 --gsd 60 --create-footprints=true
docker run -v "${{ runner.temp }}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 --gsd 60 --create-footprints=true --current-datetime=2020-01-02T03:04:05Z
empty_target_directory="$(find "${{ runner.temp }}/tmp-empty" -maxdepth 0 -type d -empty)"
[[ -n "$empty_target_directory" ]]
Expand Down
2 changes: 2 additions & 0 deletions scripts/gdal/gdal_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import subprocess
from enum import Enum
from functools import lru_cache
from shutil import rmtree
from tempfile import mkdtemp
from typing import cast
Expand Down Expand Up @@ -53,6 +54,7 @@ def command_to_string(command: list[str]) -> str:
return " ".join(command)


@lru_cache(maxsize=1)
def get_gdal_version() -> str:
"""Return the GDAL version assuming all GDAL commands are in the same version of gdalinfo.
Expand Down
83 changes: 61 additions & 22 deletions scripts/stac/imagery/create_stac.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
import json
from os import path
from typing import Any
from typing import Any, cast

from linz_logger import get_log
from shapely.geometry.base import BaseGeometry

from scripts.datetimes import utc_now
from scripts.datetimes import format_rfc_3339_datetime_string, utc_now
from scripts.files.files_helper import get_file_name_from_path
from scripts.files.fs import NoSuchFileError, read
from scripts.files.fs import NoSuchFileError, modified, read
from scripts.files.geotiff import get_extents
from scripts.gdal.gdal_helper import gdal_info
from scripts.gdal.gdalinfo import GdalInfo
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.item import ImageryItem
from scripts.stac.imagery.item import ImageryItem, STACAsset
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole
from scripts.stac.link import Link, Relation
from scripts.stac.util.checksum import multihash_as_hex
from scripts.stac.util.media_type import StacMediaType


Expand Down Expand Up @@ -77,12 +78,15 @@ def create_collection(
return collection


def get_item_created_datetime(existing_item: dict[str, Any], current_datetime: str) -> str:
return cast(str, existing_item.get("properties", {}).get("created", current_datetime))


def create_item(
file: str,
asset_path: str,
start_datetime: str,
end_datetime: str,
collection_id: str,
gdal_version: str,
current_datetime: str,
gdalinfo_result: GdalInfo | None = None,
derived_from: list[str] | None = None,
Expand All @@ -91,11 +95,10 @@ def create_item(
"""Create an ImageryItem (STAC) to be linked to a Collection.
Args:
file: asset tiff file
asset_path: asset tiff file
start_datetime: start date of the survey
end_datetime: end date of the survey
collection_id: collection id to link to the Item
gdal_version: GDAL version
current_datetime: datetime string that represents the current time when the item is created.
gdalinfo_result: result of the gdalinfo command. Defaults to None.
derived_from: list of STAC Items from where this Item is derived. Defaults to None.
Expand All @@ -104,25 +107,24 @@ def create_item(
Returns:
a STAC Item wrapped in ImageryItem
"""
id_ = get_file_name_from_path(file)
item_id = get_file_name_from_path(asset_path)

if not gdalinfo_result:
gdalinfo_result = gdal_info(file)
gdalinfo_result = gdal_info(asset_path)

geometry, bbox = get_extents(gdalinfo_result)
created_datetime = current_datetime
existing_item = {}
if published_path:
# FIXME: make this try/catch nicer
try:
existing_item_content = read(path.join(published_path, f"{id_}.json"))
existing_item = json.loads(existing_item_content.decode("UTF-8"))
created_datetime = existing_item["properties"]["created"]
existing_item = json.loads(read(path.join(published_path, f"{item_id}.json")).decode("UTF-8"))
except NoSuchFileError:
get_log().info(f"No Item is published for ID: {id_}")
except KeyError:
get_log().info(f"Existing Item {id_} does not have 'properties.created' attribute")
get_log().info(f"No Item is published for ID: {item_id}")

item = ImageryItem(id_, file, gdal_version, created_datetime, current_datetime)
item = ImageryItem(
item_id,
get_stac_asset(item_id, asset_path, existing_item),
get_item_created_datetime(existing_item, current_datetime),
current_datetime,
)

if derived_from is not None:
for derived in derived_from:
Expand All @@ -142,8 +144,45 @@ def create_item(
)

item.update_datetime(start_datetime, end_datetime)
item.update_spatial(geometry, bbox)
item.update_spatial(*get_extents(gdalinfo_result))
item.add_collection(collection_id)

get_log().info("ImageryItem created", path=file)
get_log().info("ImageryItem created", path=asset_path)
return item


def get_stac_asset(item_id: str, asset_path: str, existing_item: dict[str, Any]) -> STACAsset:
"""Make a STAC Asset object.
Args:
item_id: ID of the STAC Item
asset_path: Path of the visual asset file
existing_item: STAC object of the existing Item
Returns:
a STAC Asset object
"""
file_content = read(asset_path)
multihash = multihash_as_hex(file_content)

file_created_datetime = file_updated_datetime = format_rfc_3339_datetime_string(modified(asset_path))

try:
file_created_datetime = existing_item["assets"]["visual"]["created"]
except KeyError:
get_log().info(f"Existing Item for {item_id} does not have 'assets.visual.created' attribute")

try:
if multihash == existing_item["assets"]["visual"]["file:checksum"]:
file_updated_datetime = existing_item["assets"]["visual"]["updated"]
except KeyError:
get_log().info(f"Existing Item for {item_id} does not have 'assets.visual' attributes")

return STACAsset(
**{
"href": asset_path,
"file:checksum": multihash,
"created": file_created_datetime,
"updated": file_updated_datetime,
}
)
30 changes: 14 additions & 16 deletions scripts/stac/imagery/item.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
import os
from os import environ
from typing import Any
from typing import Any, TypedDict

from scripts.datetimes import format_rfc_3339_datetime_string
from scripts.files import fs
from scripts.files.fs import modified
from scripts.gdal.gdal_helper import get_gdal_version
from scripts.stac.link import Link, Relation
from scripts.stac.util import checksum
from scripts.stac.util.STAC_VERSION import STAC_VERSION
from scripts.stac.util.media_type import StacMediaType
from scripts.stac.util.stac_extensions import StacExtensions

STACAsset = TypedDict("STACAsset", {"href": str, "file:checksum": str, "created": str, "updated": str})


class ImageryItem:
stac: dict[str, Any]

def __init__(self, id_: str, file: str, gdal_version: str, created_datetime: str, updated_datetime: str) -> None:
file_content = fs.read(file)
file_modified_datetime = format_rfc_3339_datetime_string(modified(file))

def __init__(
self,
id_: str,
stac_asset: STACAsset,
created_datetime: str,
updated_datetime: str,
) -> None:
if (topo_imagery_hash := environ.get("GIT_HASH")) is not None:
commit_url = f"https://github.com/linz/topo-imagery/commit/{topo_imagery_hash}"
else:
Expand All @@ -30,20 +31,17 @@ def __init__(self, id_: str, file: str, gdal_version: str, created_datetime: str
"id": id_,
"links": [Link(path=f"./{id_}.json", rel=Relation.SELF, media_type=StacMediaType.GEOJSON).stac],
"assets": {
"visual": {
"href": os.path.join(".", os.path.basename(file)),
"visual": stac_asset
| {
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"file:checksum": checksum.multihash_as_hex(file_content),
"created": file_modified_datetime,
"updated": file_modified_datetime,
}
},
"stac_extensions": [StacExtensions.file.value, StacExtensions.processing.value],
"properties": {
"created": created_datetime,
"updated": updated_datetime,
"processing:datetime": updated_datetime,
"processing:software": {"gdal": gdal_version, "linz/topo-imagery": commit_url},
"processing:software": {"gdal": get_gdal_version(), "linz/topo-imagery": commit_url},
"processing:version": environ.get("GIT_VERSION", "GIT_VERSION not specified"),
},
}
Expand Down
41 changes: 31 additions & 10 deletions scripts/stac/imagery/tests/collection_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
import tempfile
from collections.abc import Callable
from datetime import datetime, timezone
from random import choice
from shutil import rmtree
from string import printable
from tempfile import mkdtemp
from unittest.mock import patch

import shapely.geometry
from boto3 import resource
Expand All @@ -17,9 +20,10 @@
from scripts.files.fs import read
from scripts.files.fs_s3 import write
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.item import ImageryItem
from scripts.stac.imagery.item import ImageryItem, STACAsset
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole
from scripts.stac.util.checksum import multihash_as_hex
from scripts.stac.util.stac_extensions import StacExtensions
from scripts.tests.datetimes_test import any_epoch_datetime

Expand Down Expand Up @@ -110,15 +114,38 @@ def func() -> datetime:
return func


def any_multihash_as_hex() -> str:
return multihash_as_hex(os.urandom(64))


def any_gdal_version() -> str:
return "".join(choice(printable) for _ in range(10))


def test_add_item(fake_collection_metadata: CollectionMetadata, subtests: SubTests) -> None:
now = any_epoch_datetime()
now_function = fixed_now_function(now)
current_datetime = format_rfc_3339_datetime_string(now)
created_datetime = format_rfc_3339_datetime_string(any_epoch_datetime())
collection = ImageryCollection(fake_collection_metadata, now_function)
item_file_path = "./scripts/tests/data/empty.tiff"
modified_datetime = datetime(2001, 2, 3, hour=4, minute=5, second=6, tzinfo=timezone.utc)
os.utime(item_file_path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
item = ImageryItem("BR34_5000_0304", item_file_path, "any GDAL version", current_datetime, current_datetime)

with patch("scripts.stac.imagery.item.get_gdal_version", return_value=any_gdal_version()):
item = ImageryItem(
"BR34_5000_0304",
STACAsset(
**{
"href": item_file_path,
"file:checksum": any_multihash_as_hex(),
"created": format_rfc_3339_datetime_string(any_epoch_datetime()),
"updated": format_rfc_3339_datetime_string(any_epoch_datetime()),
}
),
created_datetime,
format_rfc_3339_datetime_string(any_epoch_datetime()),
)

geometry = {
"type": "Polygon",
"coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],
Expand Down Expand Up @@ -151,13 +178,7 @@ def test_add_item(fake_collection_metadata: CollectionMetadata, subtests: SubTes

for property_name in ["created", "updated"]:
with subtests.test(msg=f"collection {property_name}"):
assert collection.stac[property_name] == current_datetime

with subtests.test(msg=f"item properties.{property_name}"):
assert item.stac["properties"][property_name] == current_datetime

with subtests.test(msg=f"item assets.visual.{property_name}"):
assert item.stac["assets"]["visual"][property_name] == "2001-02-03T04:05:06Z"
assert collection.stac[property_name] == format_rfc_3339_datetime_string(now)


def test_write_collection(fake_collection_metadata: CollectionMetadata) -> None:
Expand Down
Loading

0 comments on commit 8abd002

Please sign in to comment.