Skip to content

Commit

Permalink
feat: non visual qa log original tiff path TDE-563 (#241)
Browse files Browse the repository at this point in the history
* feat: non visual qa log original tiff path TDE-563

* fix: undefined variables
  • Loading branch information
paulfouquet authored Dec 12, 2022
1 parent b8bb95a commit 6320cc9
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 97 deletions.
2 changes: 1 addition & 1 deletion scripts/create_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,5 @@ def create_item(
item.update_spatial(geometry, bbox)
item.add_collection(collection_id)

get_log().info("imagery stac item created", file=file)
get_log().info("imagery stac item created", path=file)
return item
82 changes: 52 additions & 30 deletions scripts/files/file_check.py → scripts/files/file_tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from scripts.tile.tile_index import Point, TileIndexException, get_tile_name


class FileCheckErrorType(str, Enum):
class FileTiffErrorType(str, Enum):
GDAL_INFO = "gdalinfo"
NO_DATA = "nodata"
BANDS = "bands"
Expand All @@ -20,47 +20,66 @@ class FileCheckErrorType(str, Enum):
COLOR = "color"


class FileCheck:
class FileTiff:
"""Wrapper for the TIFF files"""

def __init__(
self,
path: str,
scale: int,
srs: bytes,
) -> None:
self.path = path
self.scale = scale
self.errors: List[Dict[str, Any]] = []
self._path_original = path
self._path_standardised = ""
self._errors: List[Dict[str, Any]] = []
self._scale = 0
self._valid = True
self._gdalinfo: Optional[GdalInfo] = None
self._srs: Optional[bytes] = None

def set_srs(self, srs: bytes) -> None:
self._srs = srs

def set_scale(self, scale: int) -> None:
self._scale = scale

def set_path_standardised(self, path: str) -> None:
self._path_standardised = path

def get_gdalinfo(self) -> Optional[GdalInfo]:
if self.is_error_type(FileCheckErrorType.GDAL_INFO):
if self.is_error_type(FileTiffErrorType.GDAL_INFO):
return None
if not self._gdalinfo:
try:
self._gdalinfo = gdal_info(self.path)
self._gdalinfo = gdal_info(self._path_standardised)
except json.JSONDecodeError as jde:
self.add_error(error_type=FileCheckErrorType.GDAL_INFO, error_message=f"parsing result issue: {str(jde)}")
self.add_error(error_type=FileTiffErrorType.GDAL_INFO, error_message=f"parsing result issue: {str(jde)}")
except GDALExecutionException as gee:
self.add_error(error_type=FileCheckErrorType.GDAL_INFO, error_message=f"failed: {str(gee)}")
self.add_error(error_type=FileTiffErrorType.GDAL_INFO, error_message=f"failed: {str(gee)}")
except Exception as e: # pylint: disable=broad-except
self.add_error(error_type=FileCheckErrorType.GDAL_INFO, error_message=f"error(s): {str(e)}")
self.add_error(error_type=FileTiffErrorType.GDAL_INFO, error_message=f"error(s): {str(e)}")
return self._gdalinfo

def get_errors(self) -> List[Dict[str, Any]]:
return self._errors

def get_path_original(self) -> str:
return self._path_original

def get_path_standardised(self) -> str:
return self._path_standardised

def add_error(
self, error_type: FileCheckErrorType, error_message: str, custom_fields: Optional[Dict[str, str]] = None
self, error_type: FileTiffErrorType, error_message: str, custom_fields: Optional[Dict[str, str]] = None
) -> None:
if not custom_fields:
custom_fields = {}
self.errors.append({"type": error_type, "message": error_message, **custom_fields})
self._errors.append({"type": error_type, "message": error_message, **custom_fields})
self._valid = False

def is_valid(self) -> bool:
return self._valid

def is_error_type(self, error_type: str) -> bool:
for error in self.errors:
for error in self._errors:
if error["type"] == error_type:
return True
return False
Expand All @@ -72,20 +91,20 @@ def check_no_data(self, gdalinfo: GdalInfo) -> None:
current_nodata_val = bands[0]["noDataValue"]
if current_nodata_val != 255:
self.add_error(
error_type=FileCheckErrorType.NO_DATA,
error_type=FileTiffErrorType.NO_DATA,
error_message="noDataValue is not 255",
custom_fields={"current": f"{current_nodata_val}"},
)
else:
self.add_error(error_type=FileCheckErrorType.NO_DATA, error_message="noDataValue not set")
self.add_error(error_type=FileTiffErrorType.NO_DATA, error_message="noDataValue not set")

def check_band_count(self, gdalinfo: GdalInfo) -> None:
"""Add an error if there is no exactly 3 bands found."""
bands = gdalinfo["bands"]
bands_num = len(bands)
if bands_num != 3:
self.add_error(
error_type=FileCheckErrorType.BANDS,
error_type=FileTiffErrorType.BANDS,
error_message="bands count is not 3",
custom_fields={"count": f"{int(bands_num)}"},
)
Expand All @@ -96,8 +115,11 @@ def check_srs(self, gdalsrsinfo_tif: bytes) -> None:
Args:
gdalsrsinfo_tif (str): Value returned by gdalsrsinfo for the tif as a string.
"""
if gdalsrsinfo_tif != self._srs:
self.add_error(error_type=FileCheckErrorType.SRS, error_message="different srs")
if self._srs:
if gdalsrsinfo_tif != self._srs:
self.add_error(error_type=FileTiffErrorType.SRS, error_message="different srs")
else:
self.add_error(error_type=FileTiffErrorType.SRS, error_message="srs not defined")

def check_color_interpretation(self, gdalinfo: GdalInfo) -> None:
"""Add an error if the colors don't match RGB.
Expand All @@ -120,23 +142,23 @@ def check_color_interpretation(self, gdalinfo: GdalInfo) -> None:
if missing_bands:
missing_bands.sort()
self.add_error(
error_type=FileCheckErrorType.COLOR,
error_type=FileTiffErrorType.COLOR,
error_message="unexpected color interpretation bands",
custom_fields={"missing": f"{', '.join(missing_bands)}"},
)

def check_tile_and_rename(self, gdalinfo: GdalInfo) -> None:
origin = Point(gdalinfo["cornerCoordinates"]["upperLeft"][0], gdalinfo["cornerCoordinates"]["upperLeft"][1])
try:
tile_name = get_tile_name(origin, self.scale)
if not tile_name == get_file_name_from_path(self.path):
new_path = os.path.join(os.path.dirname(self.path), tile_name + ".tiff")
os.rename(self.path, new_path)
get_log().info("renaming_file", path=new_path, old=self.path)
self.path = new_path
tile_name = get_tile_name(origin, self._scale)
if not tile_name == get_file_name_from_path(self._path_standardised):
new_path = os.path.join(os.path.dirname(self._path_standardised), tile_name + ".tiff")
os.rename(self._path_standardised, new_path)
get_log().info("renaming_file", path=new_path, old=self._path_standardised)
self._path_standardised = new_path

except TileIndexException as tie:
self.add_error(FileCheckErrorType.TILE_ALIGNMENT, error_message=f"{tie}")
self.add_error(FileTiffErrorType.TILE_ALIGNMENT, error_message=f"{tie}")

def validate(self) -> bool:
gdalinfo = self.get_gdalinfo()
Expand All @@ -148,8 +170,8 @@ def validate(self) -> bool:

gdalsrsinfo_tif_command = ["gdalsrsinfo", "-o", "wkt"]
try:
gdalsrsinfo_tif_result = run_gdal(gdalsrsinfo_tif_command, self.path)
gdalsrsinfo_tif_result = run_gdal(gdalsrsinfo_tif_command, self._path_standardised)
self.check_srs(gdalsrsinfo_tif_result.stdout)
except GDALExecutionException as gee:
self.add_error(error_type=FileCheckErrorType.SRS, error_message=f"not checked: {str(gee)}")
self.add_error(error_type=FileTiffErrorType.SRS, error_message=f"not checked: {str(gee)}")
return self.is_valid()
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from scripts.files.file_check import FileCheck
from scripts.files.file_tiff import FileTiff
from scripts.gdal.tests.gdalinfo import add_band, fake_gdal_info


Expand All @@ -12,10 +12,10 @@ def test_check_band_count_valid() -> None:
add_band(gdalinfo)
add_band(gdalinfo)

file_check = FileCheck("test", 500, b"test")
file_check.check_band_count(gdalinfo)
file_tiff = FileTiff("test")
file_tiff.check_band_count(gdalinfo)

assert not file_check.errors
assert not file_tiff.get_errors()


def test_check_band_count_invalid() -> None:
Expand All @@ -27,10 +27,10 @@ def test_check_band_count_invalid() -> None:
add_band(gdalinfo)
add_band(gdalinfo)

file_check = FileCheck("test", 500, b"test")
file_check.check_band_count(gdalinfo)
file_tiff = FileTiff("test")
file_tiff.check_band_count(gdalinfo)

assert file_check.errors
assert file_tiff.get_errors()


def test_check_color_interpretation_valid() -> None:
Expand All @@ -42,10 +42,10 @@ def test_check_color_interpretation_valid() -> None:
add_band(gdalinfo, color_interpretation="Green")
add_band(gdalinfo, color_interpretation="Blue")

file_check = FileCheck("test", 500, b"test")
file_check.check_color_interpretation(gdalinfo)
file_tiff = FileTiff("test")
file_tiff.check_color_interpretation(gdalinfo)

assert not file_check.errors
assert not file_tiff.get_errors()


def test_check_color_interpretation_invalid() -> None:
Expand All @@ -58,10 +58,10 @@ def test_check_color_interpretation_invalid() -> None:
add_band(gdalinfo, color_interpretation="Blue")
add_band(gdalinfo, color_interpretation="undefined")

file_check = FileCheck("test", 500, b"test")
file_check.check_color_interpretation(gdalinfo)
file_tiff = FileTiff("test")
file_tiff.check_color_interpretation(gdalinfo)

assert file_check.errors
assert file_tiff.get_errors()


def test_check_no_data_valid() -> None:
Expand All @@ -71,10 +71,10 @@ def test_check_no_data_valid() -> None:
gdalinfo = fake_gdal_info()
add_band(gdalinfo, no_data_value=255)

file_check = FileCheck("test", 500, b"test")
file_check.check_no_data(gdalinfo)
file_tiff = FileTiff("test")
file_tiff.check_no_data(gdalinfo)

assert not file_check.errors
assert not file_tiff.get_errors()


def test_check_no_data_no_value() -> None:
Expand All @@ -84,10 +84,10 @@ def test_check_no_data_no_value() -> None:
gdalinfo = fake_gdal_info()
add_band(gdalinfo)

file_check = FileCheck("test", 500, b"test")
file_check.check_no_data(gdalinfo)
file_tiff = FileTiff("test")
file_tiff.check_no_data(gdalinfo)

assert file_check.errors
assert file_tiff.get_errors()


def test_check_no_data_invalid_value() -> None:
Expand All @@ -97,10 +97,10 @@ def test_check_no_data_invalid_value() -> None:
gdalinfo = fake_gdal_info()
add_band(gdalinfo, no_data_value=0)

file_check = FileCheck("test", 500, b"test")
file_check.check_no_data(gdalinfo)
file_tiff = FileTiff("test")
file_tiff.check_no_data(gdalinfo)

assert file_check.errors
assert file_tiff.get_errors()


def test_check_srs_valid() -> None:
Expand All @@ -110,10 +110,11 @@ def test_check_srs_valid() -> None:
srs_to_test_against = b"SRS Test"
srs_tif = b"SRS Test"

file_check = FileCheck("test", 500, srs_to_test_against)
file_check.check_srs(srs_tif)
file_tiff = FileTiff("test")
file_tiff.set_srs(srs_to_test_against)
file_tiff.check_srs(srs_tif)

assert not file_check.errors
assert not file_tiff.get_errors()


def test_check_srs_invalid() -> None:
Expand All @@ -123,7 +124,8 @@ def test_check_srs_invalid() -> None:
srs_to_test_against = b"SRS Test"
srs_tif = b"SRS Different"

file_check = FileCheck("test", 500, srs_to_test_against)
file_check.check_srs(srs_tif)
file_tiff = FileTiff("test")
file_tiff.set_srs(srs_to_test_against)
file_tiff.check_srs(srs_tif)

assert file_check.errors
assert file_tiff.get_errors()
54 changes: 35 additions & 19 deletions scripts/standardise_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@

from scripts.cli.cli_helper import format_date, format_source, is_argo, valid_date
from scripts.create_stac import create_item
from scripts.files.file_check import FileCheck
from scripts.files.files_helper import is_tiff
from scripts.files.fs import write
from scripts.gdal.gdal_helper import get_srs
from scripts.standardising import start_standardising
from scripts.standardising import run_standardising


def main() -> None:
Expand All @@ -28,43 +26,61 @@ def main() -> None:
"--end-datetime", dest="end_datetime", help="end datetime in format YYYY-MM-DD", type=valid_date, required=True
)
arguments = parser.parse_args()

source = format_source(arguments.source)
start_datetime = format_date(arguments.start_datetime)
end_datetime = format_date(arguments.end_datetime)
concurrency: int = 1
if is_argo():
concurrency = 4

standardised_files = start_standardising(source, arguments.preset, arguments.cutline, concurrency)
if not standardised_files:
get_log().info("Process skipped because no file has been standardised", action="standardise_validate", reason="skip")
# Standardize the tiffs
tiff_files = run_standardising(source, arguments.preset, arguments.cutline, concurrency)
if len(tiff_files) == 0:
get_log().info("no_tiff_file", action="standardise_validate", reason="skipped")
return

# SRS needed for FileCheck (non visual QA)
srs = get_srs()
for file in standardised_files:
if not is_tiff(file):
get_log().trace("file_not_tiff_skipped", file=file)
continue

for file in tiff_files:
file.set_srs(srs)
file.set_scale(int(arguments.scale))

# Validate the file
file_check = FileCheck(file, int(arguments.scale), srs)
if not file_check.validate():
vfs_path = ""
if not file.validate():
get_log().info(
"non_visual_qa_errors",
originalPath=file.get_path_original(),
errors=file.get_errors(),
)
original_path = file.get_path_original()
standardised_path = file.get_path_standardised()
env_argo_template = os.environ.get("ARGO_TEMPLATE")
if env_argo_template:
argo_template = json.loads(env_argo_template)
s3_information = argo_template["archiveLocation"]["s3"]
vfs_path = os.path.join("/vsis3", s3_information["bucket"], s3_information["key"], file_check.path)
get_log().info("non_visual_qa_errors", file=file_check.path, vfspath=vfs_path, errors=file_check.errors)
standardised_path = os.path.join(
"/vsis3", s3_information["bucket"], s3_information["key"], file.get_path_standardised()
)
original_path = os.path.join(
"/vsis3", s3_information["bucket"], s3_information["key"], file.get_path_original()
)
get_log().info(
"non_visual_qa_errors",
originalPath=original_path,
standardisedPath=standardised_path,
errors=file.get_errors(),
)
else:
get_log().info("non_visual_qa_passed", file=file_check.path)
get_log().info("non_visual_qa_passed", path=file.get_path_original())

# Create STAC
item = create_item(file_check.path, start_datetime, end_datetime, arguments.collection_id, file_check.get_gdalinfo())
item = create_item(
file.get_path_standardised(), start_datetime, end_datetime, arguments.collection_id, file.get_gdalinfo()
)
tmp_file_path = os.path.join("/tmp/", f"{item.stac['id']}.json")
write(tmp_file_path, json.dumps(item.stac).encode("utf-8"))
get_log().info("stac item written to tmp", location=tmp_file_path)
get_log().info("stac_saved", path=tmp_file_path)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 6320cc9

Please sign in to comment.