Skip to content

Commit

Permalink
feat: allow Create Polygons script to run with multiple files (#48)
Browse files Browse the repository at this point in the history
* feat: create polygon script return temporary file path

* fix: main return value and formatting

* feat: Allow Create Polygons script to run with multiple files
  • Loading branch information
paulfouquet authored Jul 13, 2022
1 parent 8411779 commit 8372cc7
Showing 1 changed file with 43 additions and 35 deletions.
78 changes: 43 additions & 35 deletions scripts/create_polygons.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import os
import tempfile
from collections import Counter
from typing import List
from urllib.parse import urlparse

from aws_helper import get_bucket
from format_source import format_source
from linz_logger import get_log

# osgeo is embbed in the Docker image
Expand Down Expand Up @@ -37,48 +39,54 @@ def get_pixel_count(file_path: str) -> int:
return data_pixels_count


def main() -> str:
def main() -> List[str]: # pylint: disable=too-many-locals
logger = get_log()

parser = argparse.ArgumentParser()
parser.add_argument("--source", dest="source", required=True)
arguments = parser.parse_args()
source = arguments.source

with tempfile.TemporaryDirectory() as tmp_dir:
source_file_name = os.path.basename(source)
# Download the file
if str(source).startswith("s3://"):
uri_parse = urlparse(source, allow_fragments=False)
bucket_name = uri_parse.netloc
bucket = get_bucket(bucket_name)
source = os.path.join(tmp_dir, "temp.tif")
logger.debug(
"download_file",
source=uri_parse.path[1:],
bucket=bucket_name,
destination=source,
sourceFileName=source_file_name,
)
bucket.download_file(uri_parse.path[1:], source)

# Run create_mask
logger.debug("create_mask", source=uri_parse.path[1:], bucket=bucket_name, destination=source)
mask_file = os.path.join(tmp_dir, "mask.tif")
create_mask(source, mask_file)

# Run create_polygon
data_px_count = get_pixel_count(mask_file)
if data_px_count == 0:
# exclude extents if tif is all white or black
logger.debug(f"- data_px_count was zero in create_mask function for the tif {mask_file}")
else:
destination_file_name = os.path.splitext(source_file_name)[0] + ".geojson"
temp_file_path = os.path.join(tmp_dir, destination_file_name)
polygonize_command = f'gdal_polygonize.py -q "{mask_file}" "{temp_file_path}" -f GeoJSON'
os.system(polygonize_command)

return temp_file_path
source = format_source(source)
output_files = []

for file in source:
with tempfile.TemporaryDirectory() as tmp_dir:
source_file_name = os.path.basename(file)
# Download the file
if str(file).startswith("s3://"):
uri_parse = urlparse(file, allow_fragments=False)
bucket_name = uri_parse.netloc
bucket = get_bucket(bucket_name)
file = os.path.join(tmp_dir, "temp.tif")
logger.debug(
"download_file",
source=uri_parse.path[1:],
bucket=bucket_name,
destination=file,
sourceFileName=source_file_name,
)
bucket.download_file(uri_parse.path[1:], file)

# Run create_mask
logger.debug("create_mask", source=uri_parse.path[1:], bucket=bucket_name, destination=file)
mask_file = os.path.join(tmp_dir, "mask.tif")
create_mask(file, mask_file)

# Run create_polygon
data_px_count = get_pixel_count(mask_file)
if data_px_count == 0:
# exclude extents if tif is all white or black
logger.debug(f"- data_px_count was zero in create_mask function for the tif {mask_file}")
else:
destination_file_name = os.path.splitext(source_file_name)[0] + ".geojson"
temp_file_path = os.path.join(tmp_dir, destination_file_name)
polygonize_command = f'gdal_polygonize.py -q "{mask_file}" "{temp_file_path}" -f GeoJSON'
os.system(polygonize_command)

output_files.append(temp_file_path)

return output_files


if __name__ == "__main__":
Expand Down

0 comments on commit 8372cc7

Please sign in to comment.