From 5000a65f7516a9a934fdd0937a71d1f5aeebf2b4 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Thu, 15 Dec 2022 11:59:55 +1300 Subject: [PATCH] feat: optimise cutline handling by only using the cutline if required (#258) * feat: optimise cutline handling by only using the cutline if required * refactor: black/lint * refactor: more lint * refactor: ignore pylint for osgeo imports * refactor: sort imports --- scripts/gdal/gdal_cutline.py | 101 +++++++++++++++++++++++++++++++++++ scripts/standardising.py | 16 ++++-- 2 files changed, 112 insertions(+), 5 deletions(-) create mode 100644 scripts/gdal/gdal_cutline.py diff --git a/scripts/gdal/gdal_cutline.py b/scripts/gdal/gdal_cutline.py new file mode 100644 index 00000000..364b76d1 --- /dev/null +++ b/scripts/gdal/gdal_cutline.py @@ -0,0 +1,101 @@ +from typing import Optional + +# osgeo is embbed in the Docker image +from osgeo import gdal # pylint: disable=import-error +from osgeo import Geometry, Layer, ogr # pylint: disable=import-error + + +def get_tiff_geom(tiff_path: str) -> Geometry: + """Load a tiff with gdal and create a geometry for the bounding box""" + gdal_tiff = gdal.Open(tiff_path, gdal.GA_ReadOnly) + + # Convert tiff into polygon + transform = gdal_tiff.GetGeoTransform() + pixel_width = transform[1] + pixel_height = transform[5] + cols = gdal_tiff.RasterXSize + rows = gdal_tiff.RasterYSize + + xLeft = transform[0] + yTop = transform[3] + xRight = xLeft + cols * pixel_width + yBottom = yTop + rows * pixel_height + + ring = ogr.Geometry(ogr.wkbLinearRing) + ring.AddPoint(xLeft, yTop) + ring.AddPoint(xLeft, yBottom) + ring.AddPoint(xRight, yBottom) + ring.AddPoint(xRight, yTop) + ring.AddPoint(xLeft, yTop) + tiff_geom = ogr.Geometry(ogr.wkbPolygon) + tiff_geom.AddGeometry(ring) + return tiff_geom + + +def clamp_cutline_to_tiff(output_location: str, tiff_geom: Geometry, cutline_geom: Geometry, input_layer: Layer) -> str: + """Optimize a cutline to a specific tiff_geomemtry + + Args: + output_location: output location + tiff_geom: Geometry of the tiff + cutline_geom: Geometry of the cutline + input_layer: Cutline input layer + + Returns: + Location of optimised cutline + """ + output_path = output_location + ".optimized.geojson" + + # Optimize the cutline to the intersection area + driver = ogr.GetDriverByName("geojson") + optimized_cutline = driver.CreateDataSource(output_path) + output_layer = optimized_cutline.CreateLayer("optimized-cutline", input_layer.GetSpatialRef(), input_layer.GetGeomType()) + + # Buffer the TIFF by 1M + tiff_geom_buffer = tiff_geom.Buffer(1.0) + cutline_buffer_intersection = tiff_geom_buffer.Intersection(cutline_geom) + + # Copying the features + output_cutline = ogr.Feature(input_layer.GetLayerDefn()) + output_cutline.SetGeometry(cutline_buffer_intersection) + output_layer.CreateFeature(output_cutline) + + return output_path + + +def optimize_cutline(tiff_path: str, cutline_path: str, optimize: bool = False) -> Optional[str]: + """Determine if a cutline needs to be applied to a tiff + + Args: + tiff_path: the tiff to check the cutline against + cutline_path: cutline to use + optimize: Reduce the scope of the cutline to bounds of the imagery + + Returns: + Location to a new optimised cutline or None if no cutline is required + """ + + tiff_geom = get_tiff_geom(tiff_path) + gdal_cutline = ogr.Open(cutline_path, gdal.GA_ReadOnly) + + # Compare cutline against raster + input_layer = gdal_cutline.GetLayer() + input_feature = input_layer.GetFeature(0) + cutline_geom = input_feature.GetGeometryRef() + + # Check the amount of intersection between the cutline and the tiff + cutline_intersection = tiff_geom.Intersection(cutline_geom) + intersection_area = cutline_intersection.GetArea() + raster_area = tiff_geom.GetArea() + + intersection_area = intersection_area / raster_area + + # 99.99% of the tiff is covered by the cutline, so we likely dont need the cutline + if intersection_area >= 0.9999: + return None + + # Not asked to optimize the cutline + if not optimize: + return cutline_path + + return clamp_cutline_to_tiff(cutline_path, cutline_geom, tiff_geom, input_layer) diff --git a/scripts/standardising.py b/scripts/standardising.py index 7d737d6c..f8236b1a 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -14,6 +14,7 @@ from scripts.files.files_helper import get_file_name_from_path, is_tiff, is_vrt from scripts.files.fs import read, write from scripts.gdal.gdal_bands import get_gdal_band_offset +from scripts.gdal.gdal_cutline import optimize_cutline from scripts.gdal.gdal_helper import get_gdal_version, run_gdal from scripts.gdal.gdal_preset import get_cutline_command, get_gdal_command from scripts.logging.time_helper import time_in_ms @@ -55,6 +56,8 @@ def download_tiff_file(input_file: str, tmp_path: str) -> str: """ target_file_path = os.path.join(tmp_path, str(ulid.ULID())) input_file_path = target_file_path + ".tiff" + get_log().info("download_tiff", path=input_file, target_path=input_file_path) + write(input_file_path, read(input_file)) base_file_path = os.path.splitext(input_file)[0] @@ -62,6 +65,8 @@ def download_tiff_file(input_file: str, tmp_path: str) -> str: for ext in [".prj", ".tfw"]: try: write(target_file_path + ext, read(base_file_path + ext)) + get_log().info("download_tiff_sidecar", path=base_file_path + ext, target_path=target_file_path + ext) + except: # pylint: disable-msg=bare-except pass @@ -92,11 +97,12 @@ def standardising(file: str, preset: str, cutline: Optional[str]) -> FileTiff: # Ensure the input cutline is a easy spot for GDAL to read write(input_cutline_path, read(cutline)) - target_vrt = os.path.join(tmp_path, str(ulid.ULID()) + ".vrt") - # TODO check if the cutline actually intersects with the input_file - # as apply a cutline is much slower than conversion - run_gdal(get_cutline_command(input_cutline_path), input_file=input_file, output_file=target_vrt) - input_file = target_vrt + optimized_cutline = optimize_cutline(input_file, input_cutline_path) + get_log().info("optimize_cutline", optimized_cutline=optimized_cutline, path=file) + if optimized_cutline: + target_vrt = os.path.join(tmp_path, str(ulid.ULID()) + ".vrt") + run_gdal(get_cutline_command(optimized_cutline), input_file=input_file, output_file=target_vrt) + input_file = target_vrt command = get_gdal_command(preset) command.extend(get_gdal_band_offset(input_file))