Skip to content

Commit

Permalink
fix the validation of the metadata (#477)
Browse files Browse the repository at this point in the history
* fix the validation of the metadata

* minor changes in the class Validator, update unit tests

* update 'VERSION'

* fix issues raised by 'flake8'

---------

Co-authored-by: Hanna Imshenetska <[email protected]@EVZZAMZSA0021.epam.com>
  • Loading branch information
Anna050689 and Hanna Imshenetska authored Oct 28, 2024
1 parent a4620b0 commit f549636
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 21 deletions.
2 changes: 1 addition & 1 deletion src/syngen/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.9.47
0.9.48
9 changes: 6 additions & 3 deletions src/syngen/ml/config/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,12 +257,12 @@ def _check_existence_of_referenced_columns(self, table_name: str):
]
if non_existed_columns:
message = (
f"The 'referenced.columns' of the {config_of_key['type']} '{key}' - "
f"The 'references.columns' of the {config_of_key['type']} '{key}' - "
f"{', '.join(non_existed_columns)} "
f"don't exist in the referenced table - '{referenced_table}'"
)
self.errors[
"check existence of the key columns in 'referenced.columns'"
"check existence of the key columns in 'references.columns'"
][key] = message

def _fetch_existed_columns(self, table_name: str) -> List[str]:
Expand Down Expand Up @@ -293,9 +293,12 @@ def _run(self):
self.merged_metadata.pop("global", None)
self.metadata.pop("global", None)

if self.type_of_process == "train" and self.validation_source:
for table_name in self.merged_metadata.keys():
self._gather_existed_columns(table_name)

for table_name in self.merged_metadata.keys():
if self.type_of_process == "train" and self.validation_source:
self._gather_existed_columns(table_name)
self._check_existence_of_source(table_name)
self._check_existence_of_key_columns(table_name)
self._check_existence_of_referenced_columns(table_name)
Expand Down
12 changes: 9 additions & 3 deletions src/syngen/ml/metrics/metrics_classes/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ def get_common_min_max(original, synthetic):

@staticmethod
def __format_float_tick_labels(labels: List, nan_label: str = 'NaN') -> List:
labels = [nan_label if pd.isna(l) else l for l in labels]
labels = [nan_label if pd.isna(label) else label for label in labels]
if all([isinstance(i, float) for i in labels]) and (
max(labels) > 1e5 or min(labels) < 1e-03
):
Expand All @@ -494,8 +494,14 @@ def _plot_heatmap(
ax = self._axes.flat[plt_index]
ax.tick_params(labelsize=14)
heatmap, x_tick_labels, y_tick_labels = heatmap_data
x_tick_labels = self.__format_float_tick_labels(x_tick_labels, self.columns_nan_labels.get(xfeature, 'NaN'))
y_tick_labels = self.__format_float_tick_labels(y_tick_labels, self.columns_nan_labels.get(yfeature, 'NaN'))
x_tick_labels = self.__format_float_tick_labels(
x_tick_labels,
self.columns_nan_labels.get(xfeature, 'NaN')
)
y_tick_labels = self.__format_float_tick_labels(
y_tick_labels,
self.columns_nan_labels.get(yfeature, 'NaN')
)
ax = sns.heatmap(
heatmap,
xticklabels=x_tick_labels,
Expand Down
28 changes: 14 additions & 14 deletions src/tests/unit/validation_metadata/test_validation_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,17 +237,6 @@ def test_validate_metadata_of_related_tables_with_fk_key_in_train_process(
"contained only the primary key and the foreign key used in the training process"
)
test_metadata = {
"table_a": {
"train_settings": {
"source": "path/to/table_a.csv"
},
"keys": {
"pk_id": {
"type": "PK",
"columns": ["id"]
}
}
},
"table_b": {
"train_settings": {
"source": "path/to/table_b.csv"
Expand All @@ -266,7 +255,18 @@ def test_validate_metadata_of_related_tables_with_fk_key_in_train_process(
}
}
}
}
},
"table_a": {
"train_settings": {
"source": "path/to/table_a.csv"
},
"keys": {
"pk_id": {
"type": "PK",
"columns": ["id"]
}
}
},
}
validator = Validator(
metadata=test_metadata,
Expand Down Expand Up @@ -2021,8 +2021,8 @@ def test_check_not_existent_referenced_columns_in_fk(rp_logger):
assert validator.merged_metadata == test_metadata
assert str(error.value) == (
"The validation of the metadata has been failed. The error(s) found in - \n"
"\"check existence of the key columns in 'referenced.columns'\": {\n \"fk_id\": "
"\"The 'referenced.columns' of the FK 'fk_id' - 'non-existent column' don't exist "
"\"check existence of the key columns in 'references.columns'\": {\n \"fk_id\": "
"\"The 'references.columns' of the FK 'fk_id' - 'non-existent column' don't exist "
"in the referenced table - 'table_b'\"}"
)
rp_logger.info(SUCCESSFUL_MESSAGE)

0 comments on commit f549636

Please sign in to comment.