From 25791ff43c9061fd3137fd5c28d19d6f63b3f32c Mon Sep 17 00:00:00 2001 From: jana-starkova <45129167+jana-starkova@users.noreply.github.com> Date: Wed, 2 Oct 2024 17:06:05 +0200 Subject: [PATCH] fix ignore_metadata flag propagation for arrays of structs (#139) * fix ignore_metadata flag propagation for arrays of structs * remove unused code --- chispa/schema_comparer.py | 2 +- tests/test_structfield_comparer.py | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/chispa/schema_comparer.py b/chispa/schema_comparer.py index 3773989..5671cd5 100644 --- a/chispa/schema_comparer.py +++ b/chispa/schema_comparer.py @@ -115,7 +115,7 @@ def are_datatypes_equal_ignore_nullable(dt1, dt2, ignore_metadata: bool = False) if dt1.typeName() == dt2.typeName(): # Account for array types by inspecting elementType. if dt1.typeName() == "array": - return are_datatypes_equal_ignore_nullable(dt1.elementType, dt2.elementType) + return are_datatypes_equal_ignore_nullable(dt1.elementType, dt2.elementType, ignore_metadata) elif dt1.typeName() == "struct": return are_schemas_equal_ignore_nullable(dt1, dt2, ignore_metadata) else: diff --git a/tests/test_structfield_comparer.py b/tests/test_structfield_comparer.py index 287f451..c5c285b 100644 --- a/tests/test_structfield_comparer.py +++ b/tests/test_structfield_comparer.py @@ -1,6 +1,6 @@ from __future__ import annotations -from pyspark.sql.types import DoubleType, IntegerType, StructField, StructType +from pyspark.sql.types import ArrayType, DoubleType, IntegerType, StructField, StructType from chispa.structfield_comparer import are_structfields_equal @@ -60,3 +60,26 @@ def it_returns_true_when_inner_metadata_is_different_but_ignored(): sf1 = StructField("hi", StructType([StructField("world", IntegerType(), False)]), False) sf2 = StructField("hi", StructType([StructField("world", IntegerType(), False, {"a": "b"})]), False) assert are_structfields_equal(sf1, sf2, ignore_metadata=True) is True + + def it_returns_true_when_inner_array_metadata_is_different_but_ignored(): + sf1 = StructField( + "hi", + ArrayType( + StructType([ + StructField("world", IntegerType(), True, {"comment": "Comment"}), + ]), + True, + ), + True, + ) + sf2 = StructField( + "hi", + ArrayType( + StructType([ + StructField("world", IntegerType(), True, {"comment": "Some other comment"}), + ]), + True, + ), + True, + ) + assert are_structfields_equal(sf1, sf2, ignore_metadata=True) is True