Skip to content

Commit

Permalink
fix: handle percent-encoded urls TDE-1054 (#881)
Browse files Browse the repository at this point in the history
* fix: handle percent-encoded urls TDE-1054

* fix: typo
  • Loading branch information
paulfouquet authored Feb 27, 2024
1 parent 6e33fe7 commit eacc453
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 7 deletions.
16 changes: 12 additions & 4 deletions scripts/files/file_tiff.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
from enum import Enum
from typing import Any, Dict, List, Optional
from urllib.parse import unquote

from scripts.gdal.gdal_helper import GDALExecutionException, gdal_info, run_gdal
from scripts.gdal.gdalinfo import GdalInfo
Expand All @@ -24,10 +25,17 @@ class FileTiff:

def __init__(
self,
path: List[str],
paths: List[str],
preset: Optional[str] = None,
) -> None:
self._path_original = path
paths_original = []
for p in paths:
# paths can be URL containing percent-encoded (like `%20` for space) sequences
# which would make the process fail later TDE-1054
# FIXME: we should use URLs in the code base
paths_original.append(unquote(p))

self._paths_original = paths_original
self._path_standardised = ""
self._errors: List[Dict[str, Any]] = []
self._gdalinfo: Optional[GdalInfo] = None
Expand Down Expand Up @@ -138,14 +146,14 @@ def get_errors(self) -> List[Dict[str, Any]]:
"""
return self._errors

def get_path_original(self) -> List[str]:
def get_paths_original(self) -> List[str]:
"""Get the path(es) of the original (non standardised) file.
It can be a list of path if the standardised file is a retiled image.
Returns:
a list of file path
"""
return self._path_original
return self._paths_original

def get_path_standardised(self) -> str:
"""Get the path of the standardised file.
Expand Down
2 changes: 1 addition & 1 deletion scripts/standardise_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def main() -> None:
# If the file is not valid (Non Visual QA errors)
# Logs the `vsis3` path to use `gdal` on the file directly from `s3`
# This is to help data analysts to verify the file.
original_path: List[str] = file.get_path_original()
original_path: List[str] = file.get_paths_original()
standardised_path = file.get_path_standardised()
env_argo_template = os.environ.get("ARGO_TEMPLATE")
if env_argo_template:
Expand Down
4 changes: 2 additions & 2 deletions scripts/standardising.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,9 @@ def standardising(
footprint_tmp_path = os.path.join(tmp_path, footprint_file_name)
sidecars: List[str] = []
for extension in [".prj", ".tfw"]:
for file_input in files.inputs:
for file_input in tiff.get_paths_original():
sidecars.append(f"{os.path.splitext(file_input)[0]}{extension}")
source_files = write_all(files.inputs, f"{tmp_path}/source/")
source_files = write_all(tiff.get_paths_original(), f"{tmp_path}/source/")
write_sidecars(sidecars, f"{tmp_path}/source/")

source_tiffs = [file for file in source_files if is_tiff(file)]
Expand Down

0 comments on commit eacc453

Please sign in to comment.