From 83ae0777463a6050693a976840eb8a244a1967fa Mon Sep 17 00:00:00 2001
From: geruh <imgeru@gmail.com>
Date: Mon, 15 Sep 2025 00:16:42 -0700
Subject: [PATCH 1/9] Fix variant type filtering in
 ParquetMetricsRowGroupFilter

---
 .../data/TestMetricsRowGroupFilter.java       | 96 +++++++++++++++++++
 .../parquet/ParquetMetricsRowGroupFilter.java |  6 +-
 2 files changed, 100 insertions(+), 2 deletions(-)
diff --git a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
index 384dcacd10cf..a475fecf004b 100644
--- a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
+++ b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
@@ -62,6 +62,7 @@
 import org.apache.iceberg.avro.AvroSchemaUtil;
 import org.apache.iceberg.data.orc.GenericOrcReader;
 import org.apache.iceberg.data.orc.GenericOrcWriter;
+import org.apache.iceberg.data.parquet.GenericParquetWriter;
 import org.apache.iceberg.exceptions.ValidationException;
 import org.apache.iceberg.expressions.Expression;
 import org.apache.iceberg.io.CloseableIterable;
@@ -78,6 +79,10 @@
 import org.apache.iceberg.types.Types.FloatType;
 import org.apache.iceberg.types.Types.IntegerType;
 import org.apache.iceberg.types.Types.StringType;
+import org.apache.iceberg.variants.ShreddedObject;
+import org.apache.iceberg.variants.Variant;
+import org.apache.iceberg.variants.VariantMetadata;
+import org.apache.iceberg.variants.Variants;
 import org.apache.orc.OrcFile;
 import org.apache.orc.Reader;
 import org.apache.parquet.hadoop.ParquetFileReader;
@@ -988,6 +993,97 @@ public void testTransformFilter() {
         .isTrue();
   }
 
+  @TestTemplate
+  public void testVariantFilterNotNull() throws IOException {
+    assumeThat(format).isEqualTo(FileFormat.PARQUET);
+
+    Schema variantSchema =
+        new Schema(
+            required(1, "id", IntegerType.get()),
+            optional(2, "variant_field", Types.VariantType.get()));
+
+    File parquetFile = new File(tempDir, "test-variant" + System.nanoTime());
+
+    OutputFile outFile = Files.localOutput(parquetFile);
+    try (FileAppender<GenericRecord> appender =
+        Parquet.write(outFile)
+            .schema(variantSchema)
+            .createWriterFunc(GenericParquetWriter::create)
+            .build()) {
+
+      for (int i = 0; i < 10; i++) {
+        GenericRecord record = GenericRecord.create(variantSchema);
+        record.setField("id", i);
+
+        if (i % 2 == 0) {
+          VariantMetadata metadata = Variants.metadata("field");
+          ShreddedObject obj = Variants.object(metadata);
+          obj.put("field", Variants.of("value" + i));
+          Variant variant = Variant.of(metadata, obj);
+          record.setField("variant_field", variant);
+        }
+
+        appender.add(record);
+      }
+    }
+
+    InputFile inFile = Files.localInput(parquetFile);
+    try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
+      assertThat(reader.getRowGroups()).as("Should create only one row group").hasSize(1);
+      BlockMetaData blockMetaData = reader.getRowGroups().get(0);
+      MessageType fileSchema = reader.getFileMetaData().getSchema();
+
+      ParquetMetricsRowGroupFilter rowGroupFilter =
+          new ParquetMetricsRowGroupFilter(variantSchema, notNull("variant_field"), true);
+      boolean shouldRead = rowGroupFilter.shouldRead(fileSchema, blockMetaData);
+      assertThat(shouldRead)
+          .as("Should read: variant notNull filters must be evaluated post scan")
+          .isTrue();
+    }
+    parquetFile.deleteOnExit();
+  }
+
+  @TestTemplate
+  public void testAllNullsVariantNotNull() throws IOException {
+    assumeThat(format).isEqualTo(FileFormat.PARQUET);
+
+    Schema variantSchema =
+        new Schema(
+            required(1, "id", IntegerType.get()),
+            optional(2, "variant_field", Types.VariantType.get()));
+
+    File parquetFile = new File(tempDir, "test-variant-nulls" + System.nanoTime());
+
+    OutputFile outFile = Files.localOutput(parquetFile);
+    try (FileAppender<GenericRecord> appender =
+        Parquet.write(outFile)
+            .schema(variantSchema)
+            .createWriterFunc(GenericParquetWriter::create)
+            .build()) {
+
+      for (int i = 0; i < 10; i++) {
+        GenericRecord record = GenericRecord.create(variantSchema);
+        record.setField("id", i);
+        record.setField("variant_field", null);
+        appender.add(record);
+      }
+    }
+
+    InputFile inFile = Files.localInput(parquetFile);
+    try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
+      BlockMetaData blockMetaData = reader.getRowGroups().get(0);
+      MessageType fileSchema = reader.getFileMetaData().getSchema();
+
+      ParquetMetricsRowGroupFilter rowGroupFilter =
+          new ParquetMetricsRowGroupFilter(variantSchema, notNull("variant_field"), true);
+      boolean shouldRead = rowGroupFilter.shouldRead(fileSchema, blockMetaData);
+      assertThat(shouldRead)
+          .as("Should read: variant notNull filters must be evaluated post scan even for all nulls")
+          .isTrue();
+    }
+    parquetFile.deleteOnExit();
+  }
+
   private boolean shouldRead(Expression expression) {
     return shouldRead(expression, true);
   }
diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java
index 1ad346d39ab7..40ff9048d173 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java
@@ -157,8 +157,10 @@ public <T> Boolean notNull(BoundReference<T> ref) {
 
       // When filtering nested types notNull() is implicit filter passed even though complex
       // filters aren't pushed down in Parquet. Leave all nested column type filters to be
-      // evaluated post scan.
-      if (schema.findType(id) instanceof Type.NestedType) {
+      // evaluated post scan. Variant types also need to be evaluated post scan to access
+      // shredded statistics.
+      Type type = schema.findType(id);
+      if (type instanceof Type.NestedType || type.isVariantType()) {
         return ROWS_MIGHT_MATCH;
       }
 

From 1d41313c0f99899f2e4adbe8c9b01b97f5537d1a Mon Sep 17 00:00:00 2001
From: geruh <imgeru@gmail.com>
Date: Tue, 16 Sep 2025 00:25:39 -0700
Subject: [PATCH 2/9] Address comments and add more tests

---
 .../data/TestMetricsRowGroupFilter.java       | 252 +++++++++++-------
 .../parquet/ParquetMetricsRowGroupFilter.java |   7 +-
 .../iceberg/spark/SparkTestHelperBase.java    |   7 +
 .../iceberg/spark/sql/TestFilterPushDown.java |  74 ++++-
 4 files changed, 234 insertions(+), 106 deletions(-)

diff --git a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
index a475fecf004b..9b26168e4904 100644
--- a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
+++ b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
@@ -18,7 +18,6 @@
  */
 package org.apache.iceberg.data;
 
-import static org.apache.iceberg.avro.AvroSchemaUtil.convert;
 import static org.apache.iceberg.expressions.Expressions.and;
 import static org.apache.iceberg.expressions.Expressions.equal;
 import static org.apache.iceberg.expressions.Expressions.greaterThan;
@@ -49,8 +48,6 @@
 import java.util.Arrays;
 import java.util.List;
 import java.util.UUID;
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.avro.generic.GenericRecordBuilder;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.iceberg.FileFormat;
@@ -59,7 +56,6 @@
 import org.apache.iceberg.ParameterizedTestExtension;
 import org.apache.iceberg.Parameters;
 import org.apache.iceberg.Schema;
-import org.apache.iceberg.avro.AvroSchemaUtil;
 import org.apache.iceberg.data.orc.GenericOrcReader;
 import org.apache.iceberg.data.orc.GenericOrcWriter;
 import org.apache.iceberg.data.parquet.GenericParquetWriter;
@@ -73,6 +69,7 @@
 import org.apache.iceberg.orc.ORC;
 import org.apache.iceberg.parquet.Parquet;
 import org.apache.iceberg.parquet.ParquetMetricsRowGroupFilter;
+import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.types.Types;
 import org.apache.iceberg.types.Types.DoubleType;
@@ -159,12 +156,16 @@ public static List<Object> parameters() {
   private File orcFile = null;
   private MessageType parquetSchema = null;
   private BlockMetaData rowGroupMetadata = null;
+  private Schema testSchema;
+  private Schema fileSchema;
 
   @Parameter private FileFormat format;
   @TempDir private File tempDir;
 
   @BeforeEach
   public void createInputFile() throws IOException {
+    this.testSchema = schemaForFormat(format);
+    this.fileSchema = fileSchemaForFormat(format);
     switch (format) {
       case ORC:
         createOrcInputFile();
@@ -178,16 +179,42 @@ public void createInputFile() throws IOException {
     }
   }
 
+  private static Schema schemaForFormat(FileFormat format) {
+    if (format == FileFormat.PARQUET) {
+      return new Schema(
+          Lists.newArrayList(
+              Iterables.concat(
+                  SCHEMA.columns(),
+                  Lists.newArrayList(
+                      optional(18, "some_nulls_variant", Types.VariantType.get()),
+                      optional(19, "all_nulls_variant", Types.VariantType.get())))));
+    }
+    return SCHEMA;
+  }
+
+  private static Schema fileSchemaForFormat(FileFormat format) {
+    if (format == FileFormat.PARQUET) {
+      return new Schema(
+          Lists.newArrayList(
+              Iterables.concat(
+                  FILE_SCHEMA.columns(),
+                  Lists.newArrayList(
+                      optional(18, "_some_nulls_variant", Types.VariantType.get()),
+                      optional(19, "_all_nulls_variant", Types.VariantType.get())))));
+    }
+    return FILE_SCHEMA;
+  }
+
   public void createOrcInputFile() throws IOException {
     this.orcFile = new File(tempDir, "junit" + System.nanoTime());
 
     OutputFile outFile = Files.localOutput(orcFile);
     try (FileAppender<GenericRecord> appender =
         ORC.write(outFile)
-            .schema(FILE_SCHEMA)
+            .schema(fileSchema)
             .createWriterFunc(GenericOrcWriter::buildWriter)
             .build()) {
-      GenericRecord record = GenericRecord.create(FILE_SCHEMA);
+      GenericRecord record = GenericRecord.create(fileSchema);
       // create 50 records
       for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
         record.setField("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0
@@ -227,35 +254,46 @@ public void createOrcInputFile() throws IOException {
   private void createParquetInputFile() throws IOException {
     File parquetFile = new File(tempDir, "junit" + System.nanoTime());
 
-    // build struct field schema
-    org.apache.avro.Schema structSchema = AvroSchemaUtil.convert(UNDERSCORE_STRUCT_FIELD_TYPE);
-
     OutputFile outFile = Files.localOutput(parquetFile);
-    try (FileAppender<Record> appender = Parquet.write(outFile).schema(FILE_SCHEMA).build()) {
-      GenericRecordBuilder builder = new GenericRecordBuilder(convert(FILE_SCHEMA, "table"));
+    try (FileAppender<GenericRecord> appender =
+        Parquet.write(outFile)
+            .schema(fileSchema)
+            .createWriterFunc(GenericParquetWriter::create)
+            .build()) {
       // create 50 records
       for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
-        builder.set("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0
-        builder.set(
+        GenericRecord record = GenericRecord.create(fileSchema);
+        record.setField("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0
+        record.setField(
             "_no_stats_parquet",
             TOO_LONG_FOR_STATS_PARQUET); // value longer than 4k will produce no stats
         // in Parquet
-        builder.set("_required", "req"); // required, always non-null
-        builder.set("_all_nulls", null); // never non-null
-        builder.set("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values
-        builder.set("_no_nulls", ""); // optional, but always non-null
-        builder.set("_all_nans", Double.NaN); // never non-nan
-        builder.set("_some_nans", (i % 10 == 0) ? Float.NaN : 2F); // includes some nan values
-        builder.set(
+        record.setField("_required", "req"); // required, always non-null
+        record.setField("_all_nulls", null); // never non-null
+        record.setField("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values
+        record.setField("_no_nulls", ""); // optional, but always non-null
+        record.setField("_all_nans", Double.NaN); // never non-nan
+        record.setField("_some_nans", (i % 10 == 0) ? Float.NaN : 2F); // includes some nan values
+        record.setField(
             "_some_double_nans", (i % 10 == 0) ? Double.NaN : 2D); // includes some nan values
-        builder.set("_no_nans", 3D); // optional, but always non-nan
-        builder.set("_str", i + "str" + i);
+        record.setField("_no_nans", 3D); // optional, but always non-nan
+        record.setField("_str", i + "str" + i);
 
-        Record structNotNull = new Record(structSchema);
-        structNotNull.put("_int_field", INT_MIN_VALUE + i);
-        builder.set("_struct_not_null", structNotNull); // struct with int
+        GenericRecord structNotNull = GenericRecord.create(UNDERSCORE_STRUCT_FIELD_TYPE);
+        structNotNull.setField("_int_field", INT_MIN_VALUE + i);
+        record.setField("_struct_not_null", structNotNull); // struct with int
+
+        if (i % 2 == 0) {
+          VariantMetadata metadata = Variants.metadata("data");
+          ShreddedObject obj = Variants.object(metadata);
+          obj.put("data", Variants.of(25 + i));
+          record.setField("_some_nulls_variant", Variant.of(metadata, obj));
+        } else {
+          record.setField("_some_nulls_variant", null);
+        }
+        record.setField("_all_nulls_variant", null);
 
-        appender.add(builder.build());
+        appender.add(record);
       }
     }
 
@@ -986,7 +1024,8 @@ public void testTransformFilter() {
     assumeThat(format).isEqualTo(FileFormat.PARQUET);
 
     boolean shouldRead =
-        new ParquetMetricsRowGroupFilter(SCHEMA, equal(truncate("required", 2), "some_value"), true)
+        new ParquetMetricsRowGroupFilter(
+                testSchema, equal(truncate("required", 2), "some_value"), true)
             .shouldRead(parquetSchema, rowGroupMetadata);
     assertThat(shouldRead)
         .as("Should read: filter contains non-reference evaluate as True")
@@ -994,94 +1033,105 @@ public void testTransformFilter() {
   }
 
   @TestTemplate
-  public void testVariantFilterNotNull() throws IOException {
-    assumeThat(format).isEqualTo(FileFormat.PARQUET);
+  public void testVariantNotNull() {
+    assumeThat(format).isNotEqualTo(FileFormat.ORC).as("ORC does not fully support variant type");
 
-    Schema variantSchema =
-        new Schema(
-            required(1, "id", IntegerType.get()),
-            optional(2, "variant_field", Types.VariantType.get()));
+    boolean shouldRead = shouldRead(notNull("some_nulls_variant"));
+    assertThat(shouldRead)
+        .as("Should read: variant notNull filters must be evaluated post scan")
+        .isTrue();
 
-    File parquetFile = new File(tempDir, "test-variant" + System.nanoTime());
+    shouldRead = shouldRead(notNull("all_nulls_variant"));
+    assertThat(shouldRead)
+        .as("Should read: variant notNull filters must be evaluated post scan even for all nulls")
+        .isTrue();
+  }
 
-    OutputFile outFile = Files.localOutput(parquetFile);
-    try (FileAppender<GenericRecord> appender =
-        Parquet.write(outFile)
-            .schema(variantSchema)
-            .createWriterFunc(GenericParquetWriter::create)
-            .build()) {
+  @TestTemplate
+  public void testVariantEq() {
+    assumeThat(format).isNotEqualTo(FileFormat.ORC).as("ORC does not fully support variant type");
 
-      for (int i = 0; i < 10; i++) {
-        GenericRecord record = GenericRecord.create(variantSchema);
-        record.setField("id", i);
+    assertThatThrownBy(() -> shouldRead(equal("some_nulls_variant", "test")))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("test");
+  }
 
-        if (i % 2 == 0) {
-          VariantMetadata metadata = Variants.metadata("field");
-          ShreddedObject obj = Variants.object(metadata);
-          obj.put("field", Variants.of("value" + i));
-          Variant variant = Variant.of(metadata, obj);
-          record.setField("variant_field", variant);
-        }
+  @TestTemplate
+  public void testVariantIn() {
+    assumeThat(format).isNotEqualTo(FileFormat.ORC).as("ORC does not fully support variant type");
 
-        appender.add(record);
-      }
-    }
+    assertThatThrownBy(() -> shouldRead(in("some_nulls_variant", "test1", "test2")))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("test1");
 
-    InputFile inFile = Files.localInput(parquetFile);
-    try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
-      assertThat(reader.getRowGroups()).as("Should create only one row group").hasSize(1);
-      BlockMetaData blockMetaData = reader.getRowGroups().get(0);
-      MessageType fileSchema = reader.getFileMetaData().getSchema();
+    assertThatThrownBy(() -> shouldRead(in("all_nulls_variant", "test1", "test2")))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("test1");
+  }
 
-      ParquetMetricsRowGroupFilter rowGroupFilter =
-          new ParquetMetricsRowGroupFilter(variantSchema, notNull("variant_field"), true);
-      boolean shouldRead = rowGroupFilter.shouldRead(fileSchema, blockMetaData);
-      assertThat(shouldRead)
-          .as("Should read: variant notNull filters must be evaluated post scan")
-          .isTrue();
-    }
-    parquetFile.deleteOnExit();
+  @TestTemplate
+  public void testVariantNotIn() {
+    assumeThat(format).isNotEqualTo(FileFormat.ORC);
+
+    // Variant columns cannot be used in 'notIn' expressions with literals
+    assertThatThrownBy(() -> shouldRead(notIn("some_nulls_variant", "test1", "test2")))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("test1");
+
+    assertThatThrownBy(() -> shouldRead(notIn("all_nulls_variant", "test1", "test2")))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("test1");
   }
 
   @TestTemplate
-  public void testAllNullsVariantNotNull() throws IOException {
-    assumeThat(format).isEqualTo(FileFormat.PARQUET);
+  public void testVariantIsNull() {
+    assumeThat(format).isNotEqualTo(FileFormat.ORC).as("ORC does not fully support variant type");
+
+    boolean shouldRead = shouldRead(isNull("some_nulls_variant"));
+    assertThat(shouldRead)
+        .as("Should read: variant isNull filters must be evaluated post scan")
+        .isTrue();
 
-    Schema variantSchema =
-        new Schema(
-            required(1, "id", IntegerType.get()),
-            optional(2, "variant_field", Types.VariantType.get()));
+    shouldRead = shouldRead(isNull("all_nulls_variant"));
+    assertThat(shouldRead)
+        .as("Should read: variant isNull filters must be evaluated post scan even for all nulls")
+        .isTrue();
+  }
 
-    File parquetFile = new File(tempDir, "test-variant-nulls" + System.nanoTime());
+  @TestTemplate
+  public void testVariantComparisons() {
+    assumeThat(format).isNotEqualTo(FileFormat.ORC).as("ORC does not fully support variant type");
 
-    OutputFile outFile = Files.localOutput(parquetFile);
-    try (FileAppender<GenericRecord> appender =
-        Parquet.write(outFile)
-            .schema(variantSchema)
-            .createWriterFunc(GenericParquetWriter::create)
-            .build()) {
+    // Variant columns cannot be used in comparison expressions with literals
+    assertThatThrownBy(() -> shouldRead(lessThan("some_nulls_variant", 42)))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("42");
 
-      for (int i = 0; i < 10; i++) {
-        GenericRecord record = GenericRecord.create(variantSchema);
-        record.setField("id", i);
-        record.setField("variant_field", null);
-        appender.add(record);
-      }
-    }
+    assertThatThrownBy(() -> shouldRead(lessThanOrEqual("some_nulls_variant", 42)))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("42");
 
-    InputFile inFile = Files.localInput(parquetFile);
-    try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
-      BlockMetaData blockMetaData = reader.getRowGroups().get(0);
-      MessageType fileSchema = reader.getFileMetaData().getSchema();
+    assertThatThrownBy(() -> shouldRead(greaterThan("some_nulls_variant", 42)))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("42");
 
-      ParquetMetricsRowGroupFilter rowGroupFilter =
-          new ParquetMetricsRowGroupFilter(variantSchema, notNull("variant_field"), true);
-      boolean shouldRead = rowGroupFilter.shouldRead(fileSchema, blockMetaData);
-      assertThat(shouldRead)
-          .as("Should read: variant notNull filters must be evaluated post scan even for all nulls")
-          .isTrue();
-    }
-    parquetFile.deleteOnExit();
+    assertThatThrownBy(() -> shouldRead(greaterThanOrEqual("some_nulls_variant", 42)))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("42");
+
+    assertThatThrownBy(() -> shouldRead(notEqual("some_nulls_variant", 42)))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("42");
   }
 
   private boolean shouldRead(Expression expression) {
@@ -1103,8 +1153,8 @@ private boolean shouldRead(Expression expression, boolean caseSensitive) {
   private boolean shouldReadOrc(Expression expression, boolean caseSensitive) {
     try (CloseableIterable<org.apache.iceberg.data.Record> reader =
         ORC.read(Files.localInput(orcFile))
-            .project(SCHEMA)
-            .createReaderFunc(fileSchema -> GenericOrcReader.buildReader(SCHEMA, fileSchema))
+            .project(testSchema)
+            .createReaderFunc(fileSchema -> GenericOrcReader.buildReader(testSchema, fileSchema))
             .filter(expression)
             .caseSensitive(caseSensitive)
             .build()) {
@@ -1119,7 +1169,7 @@ private boolean shouldReadParquet(
       boolean caseSensitive,
       MessageType messageType,
       BlockMetaData blockMetaData) {
-    return new ParquetMetricsRowGroupFilter(SCHEMA, expression, caseSensitive)
+    return new ParquetMetricsRowGroupFilter(testSchema, expression, caseSensitive)
         .shouldRead(messageType, blockMetaData);
   }
 
diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java
index 40ff9048d173..598e5dd23548 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java
@@ -155,10 +155,9 @@ public <T> Boolean notNull(BoundReference<T> ref) {
       // if the column has no non-null values, the expression cannot match
       int id = ref.fieldId();
 
-      // When filtering nested types notNull() is implicit filter passed even though complex
-      // filters aren't pushed down in Parquet. Leave all nested column type filters to be
-      // evaluated post scan. Variant types also need to be evaluated post scan to access
-      // shredded statistics.
+      // When filtering nested types or variant types, notNull() is an implicit filter passed
+      // even though complex filters aren't pushed down in Parquet. Leave these type filters
+      // to be evaluated post scan.
       Type type = schema.findType(id);
       if (type instanceof Type.NestedType || type.isVariantType()) {
         return ROWS_MIGHT_MATCH;
diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestHelperBase.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestHelperBase.java
index 9fc71125a92e..70286c837cdd 100644
--- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestHelperBase.java
+++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestHelperBase.java
@@ -24,6 +24,7 @@
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import org.apache.spark.sql.Row;
+import org.apache.spark.unsafe.types.VariantVal;
 
 public class SparkTestHelperBase {
   protected static final Object ANY = new Object();
@@ -79,6 +80,12 @@ protected void assertEquals(String context, Object[] expectedRow, Object[] actua
         } else {
           assertEquals(newContext, (Object[]) expectedValue, (Object[]) actualValue);
         }
+      } else if (expectedValue instanceof VariantVal && actualValue instanceof VariantVal) {
+        // Spark VariantVal may have fixed-width padding, so we compare their JSON representation
+        // instead of raw byte arrays.
+        assertThat((actualValue).toString())
+            .as("%s contents should match (VariantVal JSON)", context)
+            .isEqualTo((expectedValue).toString());
       } else if (expectedValue != ANY) {
         assertThat(actualValue).as("%s contents should match", context).isEqualTo(expectedValue);
       }
diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java
index 9d2ce2b388a2..ee45958ab47f 100644
--- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java
+++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java
@@ -23,6 +23,8 @@
 import static org.assertj.core.api.Assertions.assertThat;
 
 import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
 import java.sql.Timestamp;
 import java.time.Instant;
 import java.util.List;
@@ -35,7 +37,12 @@
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.spark.SparkCatalogConfig;
 import org.apache.iceberg.spark.TestBaseWithCatalog;
+import org.apache.iceberg.util.ByteBuffers;
+import org.apache.iceberg.variants.ShreddedObject;
+import org.apache.iceberg.variants.VariantMetadata;
+import org.apache.iceberg.variants.Variants;
 import org.apache.spark.sql.execution.SparkPlan;
+import org.apache.spark.unsafe.types.VariantVal;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.TestTemplate;
 import org.junit.jupiter.api.extension.ExtendWith;
@@ -578,6 +585,53 @@ public void testFilterPushdownWithSpecialFloatingPointPartitionValues() {
         ImmutableList.of(row(4, Double.NEGATIVE_INFINITY)));
   }
 
+  @TestTemplate
+  public void testVariantExtractFiltering() {
+    sql(
+        "CREATE TABLE %s (id BIGINT, data VARIANT) USING iceberg TBLPROPERTIES"
+            + "('format-version'='3')",
+        tableName);
+    configurePlanningMode(planningMode);
+
+    sql(
+        "INSERT INTO %s VALUES "
+            + "(1, parse_json('{\"field\": \"foo\", \"num\": 25}')), "
+            + "(2, parse_json('{\"field\": \"bar\", \"num\": 30}')), "
+            + "(3, parse_json('{\"field\": \"baz\", \"num\": 35}')), "
+            + "(4, null)",
+        tableName);
+
+    withDefaultTimeZone(
+        "UTC",
+        () -> {
+          checkFilters(
+              "try_variant_get(data, '$.num', 'int') > 30",
+              "isnotnull(data) AND (try_variant_get(data, $.num, IntegerType, false, Some(UTC)) > 30)",
+              "data IS NOT NULL",
+              ImmutableList.of(row(3L, toSparkVariantRow("baz", 35))));
+
+          checkFilters(
+              "try_variant_get(data, '$.num', 'int') = 30",
+              "isnotnull(data) AND (try_variant_get(data, $.num, IntegerType, false, Some(UTC)) = 30)",
+              "data IS NOT NULL",
+              ImmutableList.of(row(2L, toSparkVariantRow("bar", 30))));
+
+          checkFilters(
+              "try_variant_get(data, '$.num', 'int') IN (25, 35)",
+              "try_variant_get(data, $.num, IntegerType, false, Some(UTC)) IN (25,35)",
+              "",
+              ImmutableList.of(
+                  row(1L, toSparkVariantRow("foo", 25)), row(3L, toSparkVariantRow("baz", 35))));
+
+          checkFilters(
+              "try_variant_get(data, '$.num', 'int') != 25",
+              "isnotnull(data) AND NOT (try_variant_get(data, $.num, IntegerType, false, Some(UTC)) = 25)",
+              "data IS NOT NULL",
+              ImmutableList.of(
+                  row(2L, toSparkVariantRow("bar", 30)), row(3L, toSparkVariantRow("baz", 35))));
+        });
+  }
+
   private void checkOnlyIcebergFilters(
       String predicate, String icebergFilters, List<Object[]> expectedRows) {
 
@@ -600,7 +654,7 @@ private void checkFilters(
     if (sparkFilter != null) {
       assertThat(planAsString)
           .as("Post scan filter should match")
-          .contains("Filter (" + sparkFilter + ")");
+          .containsAnyOf("Filter (" + sparkFilter + ")", "Filter " + sparkFilter);
     } else {
       assertThat(planAsString).as("Should be no post scan filter").doesNotContain("Filter (");
     }
@@ -613,4 +667,22 @@ private void checkFilters(
   private Timestamp timestamp(String timestampAsString) {
     return Timestamp.from(Instant.parse(timestampAsString));
   }
+
+  private VariantVal toSparkVariantRow(String field, int num) {
+    VariantMetadata metadata = Variants.metadata("field", "num");
+
+    ShreddedObject obj = Variants.object(metadata);
+    obj.put("field", Variants.of(field));
+    obj.put("num", Variants.of(num));
+
+    ByteBuffer metadataBuffer =
+        ByteBuffer.allocate(metadata.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN);
+    metadata.writeTo(metadataBuffer, 0);
+
+    ByteBuffer valueBuffer = ByteBuffer.allocate(obj.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN);
+    obj.writeTo(valueBuffer, 0);
+
+    return new VariantVal(
+        ByteBuffers.toByteArray(valueBuffer), ByteBuffers.toByteArray(metadataBuffer));
+  }
 }

From 99dc168683c408d7a3b4150a44c799488d7ae2fb Mon Sep 17 00:00:00 2001
From: geruh <imgeru@gmail.com>
Date: Tue, 16 Sep 2025 13:40:45 -0700
Subject: [PATCH 3/9] Refactor tests to be format specific

---
 .../data/MetricsRowGroupFilterTestBase.java   |  619 +++++++++
 .../data/TestMetricsRowGroupFilter.java       | 1200 -----------------
 .../orc/TestOrcMetricsRowGroupFilter.java     |   99 ++
 .../TestParquetMetricsRowGroupFilter.java     |  444 ++++++
 4 files changed, 1162 insertions(+), 1200 deletions(-)
 create mode 100644 data/src/test/java/org/apache/iceberg/data/MetricsRowGroupFilterTestBase.java
 delete mode 100644 data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
 create mode 100644 data/src/test/java/org/apache/iceberg/data/orc/TestOrcMetricsRowGroupFilter.java
 create mode 100644 data/src/test/java/org/apache/iceberg/data/parquet/TestParquetMetricsRowGroupFilter.java

diff --git a/data/src/test/java/org/apache/iceberg/data/MetricsRowGroupFilterTestBase.java b/data/src/test/java/org/apache/iceberg/data/MetricsRowGroupFilterTestBase.java
new file mode 100644
index 000000000000..3e8e7f9d66e3
--- /dev/null
+++ b/data/src/test/java/org/apache/iceberg/data/MetricsRowGroupFilterTestBase.java
@@ -0,0 +1,619 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.data;
+
+import static org.apache.iceberg.expressions.Expressions.and;
+import static org.apache.iceberg.expressions.Expressions.equal;
+import static org.apache.iceberg.expressions.Expressions.greaterThan;
+import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual;
+import static org.apache.iceberg.expressions.Expressions.in;
+import static org.apache.iceberg.expressions.Expressions.isNaN;
+import static org.apache.iceberg.expressions.Expressions.isNull;
+import static org.apache.iceberg.expressions.Expressions.lessThan;
+import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual;
+import static org.apache.iceberg.expressions.Expressions.not;
+import static org.apache.iceberg.expressions.Expressions.notEqual;
+import static org.apache.iceberg.expressions.Expressions.notIn;
+import static org.apache.iceberg.expressions.Expressions.notNaN;
+import static org.apache.iceberg.expressions.Expressions.notNull;
+import static org.apache.iceberg.expressions.Expressions.or;
+import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import java.io.File;
+import java.io.IOException;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.exceptions.ValidationException;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.types.Types;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+public abstract class MetricsRowGroupFilterTestBase {
+
+  protected static final Types.StructType STRUCT_FIELD_TYPE =
+      Types.StructType.of(Types.NestedField.required(7, "int_field", Types.IntegerType.get()));
+
+  protected static final Types.StructType UNDERSCORE_STRUCT_FIELD_TYPE =
+      Types.StructType.of(Types.NestedField.required(7, "_int_field", Types.IntegerType.get()));
+
+  protected static final Schema BASE_SCHEMA =
+      new Schema(
+          required(1, "id", Types.IntegerType.get()),
+          required(2, "required", Types.StringType.get()),
+          optional(3, "all_nulls", Types.DoubleType.get()),
+          optional(4, "some_nulls", Types.StringType.get()),
+          optional(5, "no_nulls", Types.StringType.get()),
+          optional(6, "struct_not_null", STRUCT_FIELD_TYPE),
+          optional(8, "not_in_file", Types.FloatType.get()),
+          optional(9, "str", Types.StringType.get()),
+          optional(
+              10,
+              "map_not_null",
+              Types.MapType.ofRequired(11, 12, Types.StringType.get(), Types.IntegerType.get())),
+          optional(13, "all_nans", Types.DoubleType.get()),
+          optional(14, "some_nans", Types.FloatType.get()),
+          optional(15, "no_nans", Types.DoubleType.get()),
+          optional(16, "some_double_nans", Types.DoubleType.get()));
+
+  protected static final Schema BASE_FILE_SCHEMA =
+      new Schema(
+          Types.NestedField.required(1, "_id", Types.IntegerType.get()),
+          Types.NestedField.required(2, "_required", Types.StringType.get()),
+          Types.NestedField.optional(3, "_all_nulls", Types.DoubleType.get()),
+          Types.NestedField.optional(4, "_some_nulls", Types.StringType.get()),
+          Types.NestedField.optional(5, "_no_nulls", Types.StringType.get()),
+          Types.NestedField.optional(6, "_struct_not_null", UNDERSCORE_STRUCT_FIELD_TYPE),
+          Types.NestedField.optional(9, "_str", Types.StringType.get()),
+          Types.NestedField.optional(13, "_all_nans", Types.DoubleType.get()),
+          Types.NestedField.optional(14, "_some_nans", Types.FloatType.get()),
+          Types.NestedField.optional(15, "_no_nans", Types.DoubleType.get()),
+          Types.NestedField.optional(16, "_some_double_nans", Types.DoubleType.get()));
+
+  protected static final int INT_MIN_VALUE = 30;
+  protected static final int INT_MAX_VALUE = 79;
+
+  @TempDir protected File tempDir;
+
+  @BeforeEach
+  public final void before() throws IOException {
+    createInputFile();
+  }
+
+  protected abstract void createInputFile() throws IOException;
+
+  protected void populateBaseFields(GenericRecord record, int rowIndex) {
+    record.setField("_id", INT_MIN_VALUE + rowIndex);
+    record.setField("_required", "req");
+    record.setField("_all_nulls", null);
+    record.setField("_some_nulls", (rowIndex % 10 == 0) ? null : "some");
+    record.setField("_no_nulls", "");
+    record.setField("_all_nans", Double.NaN);
+    record.setField("_some_nans", (rowIndex % 10 == 0) ? Float.NaN : 2F);
+    record.setField("_some_double_nans", (rowIndex % 10 == 0) ? Double.NaN : 2D);
+    record.setField("_no_nans", 3D);
+    record.setField("_str", rowIndex + "str" + rowIndex);
+    GenericRecord struct = GenericRecord.create(UNDERSCORE_STRUCT_FIELD_TYPE);
+    struct.setField("_int_field", INT_MIN_VALUE + rowIndex);
+    record.setField("_struct_not_null", struct);
+  }
+
+  protected boolean shouldRead(Expression expression) {
+    return shouldRead(expression, true);
+  }
+
+  protected abstract boolean shouldRead(Expression expression, boolean caseSensitive);
+
+  @Test
+  public void testAllNulls() {
+    boolean shouldRead;
+
+    shouldRead = shouldRead(notNull("all_nulls"));
+    assertThat(shouldRead).as("Should skip: no non-null value in all null column").isFalse();
+
+    shouldRead = shouldRead(notNull("some_nulls"));
+    assertThat(shouldRead)
+        .as("Should read: column with some nulls contains a non-null value")
+        .isTrue();
+
+    shouldRead = shouldRead(notNull("no_nulls"));
+    assertThat(shouldRead).as("Should read: non-null column contains a non-null value").isTrue();
+
+    shouldRead = shouldRead(notNull("map_not_null"));
+    assertThat(shouldRead).as("Should read: map type is not skipped").isTrue();
+
+    shouldRead = shouldRead(notNull("struct_not_null"));
+    assertThat(shouldRead).as("Should read: struct type is not skipped").isTrue();
+  }
+
+  @Test
+  public void testNoNulls() {
+    boolean shouldRead = shouldRead(isNull("all_nulls"));
+    assertThat(shouldRead).as("Should read: at least one null value in all null column").isTrue();
+
+    shouldRead = shouldRead(isNull("some_nulls"));
+    assertThat(shouldRead).as("Should read: column with some nulls contains a null value").isTrue();
+
+    shouldRead = shouldRead(isNull("no_nulls"));
+    assertThat(shouldRead).as("Should skip: non-null column contains no null values").isFalse();
+
+    shouldRead = shouldRead(isNull("map_not_null"));
+    assertThat(shouldRead).as("Should read: map type is not skipped").isTrue();
+
+    shouldRead = shouldRead(isNull("struct_not_null"));
+    assertThat(shouldRead).as("Should read: struct type is not skipped").isTrue();
+  }
+
+  @Test
+  public void testFloatWithNan() {
+    boolean shouldRead = shouldRead(greaterThan("some_nans", 1.0));
+    assertThat(shouldRead).isTrue();
+
+    shouldRead = shouldRead(greaterThanOrEqual("some_nans", 1.0));
+    assertThat(shouldRead).isTrue();
+
+    shouldRead = shouldRead(lessThan("some_nans", 3.0));
+    assertThat(shouldRead).isTrue();
+
+    shouldRead = shouldRead(lessThanOrEqual("some_nans", 1.0));
+    assertThat(shouldRead).isTrue();
+
+    shouldRead = shouldRead(equal("some_nans", 2.0));
+    assertThat(shouldRead).isTrue();
+  }
+
+  @Test
+  public void testDoubleWithNan() {
+    boolean shouldRead = shouldRead(greaterThan("some_double_nans", 1.0));
+    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
+
+    shouldRead = shouldRead(greaterThanOrEqual("some_double_nans", 1.0));
+    assertThat(shouldRead)
+        .as("Should read: column with some nans contains the target value")
+        .isTrue();
+
+    shouldRead = shouldRead(lessThan("some_double_nans", 3.0));
+    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
+
+    shouldRead = shouldRead(lessThanOrEqual("some_double_nans", 1.0));
+    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
+
+    shouldRead = shouldRead(equal("some_double_nans", 2.0));
+    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
+  }
+
+  @Test
+  public void testIsNaN() {
+    boolean shouldRead = shouldRead(isNaN("all_nans"));
+    assertThat(shouldRead).isTrue();
+
+    shouldRead = shouldRead(isNaN("some_nans"));
+    assertThat(shouldRead).isTrue();
+
+    shouldRead = shouldRead(isNaN("all_nulls"));
+    assertThat(shouldRead).as("Should skip: all null column will not contain NaN").isFalse();
+  }
+
+  @Test
+  public void testNotNaN() {
+    boolean shouldRead = shouldRead(notNaN("all_nans"));
+    assertThat(shouldRead).as("Should read: NaN counts are not tracked in metrics").isTrue();
+
+    shouldRead = shouldRead(notNaN("some_nans"));
+    assertThat(shouldRead).as("Should read: NaN counts are not tracked in metrics").isTrue();
+
+    shouldRead = shouldRead(notNaN("no_nans"));
+    assertThat(shouldRead).as("Should read: NaN counts are not tracked in metrics").isTrue();
+
+    shouldRead = shouldRead(notNaN("all_nulls"));
+    assertThat(shouldRead).as("Shou  ld read: NaN counts are not tracked in metrics").isTrue();
+  }
+
+  @Test
+  public void testRequiredColumn() {
+    boolean shouldRead = shouldRead(notNull("required"));
+    assertThat(shouldRead).as("Should read: required columns are always non-null").isTrue();
+
+    shouldRead = shouldRead(isNull("required"));
+    assertThat(shouldRead).as("Should skip: required columns are always non-null").isFalse();
+  }
+
+  @Test
+  public void testMissingColumn() {
+    assertThatThrownBy(() -> shouldRead(lessThan("missing", 5)))
+        .as("Should complain about missing column in expression")
+        .isInstanceOf(ValidationException.class)
+        .hasMessageStartingWith("Cannot find field 'missing'");
+  }
+
+  @Test
+  public void testNot() {
+    // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
+    boolean shouldRead = shouldRead(not(lessThan("id", INT_MIN_VALUE - 25)));
+    assertThat(shouldRead).as("Should read: not(false)").isTrue();
+
+    shouldRead = shouldRead(not(greaterThan("id", INT_MIN_VALUE - 25)));
+    assertThat(shouldRead).as("Should skip: not(true)").isFalse();
+  }
+
+  @Test
+  public void testAnd() {
+    // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
+    boolean shouldRead =
+        shouldRead(
+            and(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MIN_VALUE - 30)));
+    assertThat(shouldRead).as("Should skip: and(false, true)").isFalse();
+
+    shouldRead =
+        shouldRead(
+            and(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MAX_VALUE + 1)));
+    assertThat(shouldRead).as("Should skip: and(false, false)").isFalse();
+
+    shouldRead =
+        shouldRead(
+            and(greaterThan("id", INT_MIN_VALUE - 25), lessThanOrEqual("id", INT_MIN_VALUE)));
+    assertThat(shouldRead).as("Should read: and(true, true)").isTrue();
+  }
+
+  @Test
+  public void testOr() {
+    // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
+    boolean shouldRead =
+        shouldRead(
+            or(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MAX_VALUE + 1)));
+    assertThat(shouldRead).as("Should skip: or(false, false)").isFalse();
+
+    shouldRead =
+        shouldRead(
+            or(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MAX_VALUE - 19)));
+    assertThat(shouldRead).as("Should read: or(false, true)").isTrue();
+  }
+
+  @Test
+  public void testIntegerLt() {
+    boolean shouldRead = shouldRead(lessThan("id", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
+
+    shouldRead = shouldRead(lessThan("id", INT_MIN_VALUE));
+    assertThat(shouldRead)
+        .as("Should not read: id range below lower bound (30 is not < 30)")
+        .isFalse();
+
+    shouldRead = shouldRead(lessThan("id", INT_MIN_VALUE + 1));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(lessThan("id", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
+  }
+
+  @Test
+  public void testIntegerLtEq() {
+    boolean shouldRead = shouldRead(lessThanOrEqual("id", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
+
+    shouldRead = shouldRead(lessThanOrEqual("id", INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should not read: id range below lower bound (29 < 30)").isFalse();
+
+    shouldRead = shouldRead(lessThanOrEqual("id", INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(lessThanOrEqual("id", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: many possible ids").isTrue();
+  }
+
+  @Test
+  public void testIntegerGt() {
+    boolean shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
+
+    shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE));
+    assertThat(shouldRead)
+        .as("Should not read: id range above upper bound (79 is not > 79)")
+        .isFalse();
+
+    shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE - 1));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
+  }
+
+  @Test
+  public void testIntegerGtEq() {
+    boolean shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
+
+    shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should not read: id range above upper bound (80 > 79)").isFalse();
+
+    shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
+  }
+
+  @Test
+  public void testIntegerEq() {
+    boolean shouldRead = shouldRead(equal("id", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
+
+    shouldRead = shouldRead(equal("id", INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
+
+    shouldRead = shouldRead(equal("id", INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
+
+    shouldRead = shouldRead(equal("id", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
+
+    shouldRead = shouldRead(equal("id", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
+
+    shouldRead = shouldRead(equal("id", INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
+
+    shouldRead = shouldRead(equal("id", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
+  }
+
+  @Test
+  public void testIntegerNotEq() {
+    boolean shouldRead = shouldRead(notEqual("id", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("id", INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("id", INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
+
+    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
+  }
+
+  @Test
+  public void testIntegerNotEqRewritten() {
+    boolean shouldRead = shouldRead(not(equal("id", INT_MIN_VALUE - 25)));
+    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
+
+    shouldRead = shouldRead(not(equal("id", INT_MIN_VALUE - 1)));
+    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
+
+    shouldRead = shouldRead(not(equal("id", INT_MIN_VALUE)));
+    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
+
+    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE - 4)));
+    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
+
+    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE)));
+    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
+
+    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE + 1)));
+    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
+
+    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE + 6)));
+    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
+  }
+
+  @Test
+  public void testStructFieldLt() {
+    boolean shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
+
+    shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MIN_VALUE));
+    assertThat(shouldRead)
+        .as("Should not read: id range below lower bound (30 is not < 30)")
+        .isFalse();
+
+    shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MIN_VALUE + 1));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
+  }
+
+  @Test
+  public void testStructFieldLtEq() {
+    boolean shouldRead =
+        shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
+
+    shouldRead = shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should not read: id range below lower bound (29 < 30)").isFalse();
+
+    shouldRead = shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: many possible ids").isTrue();
+  }
+
+  @Test
+  public void testStructFieldGt() {
+    boolean shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
+
+    shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE));
+    assertThat(shouldRead)
+        .as("Should not read: id range above upper bound (79 is not > 79)")
+        .isFalse();
+
+    shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE - 1));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
+  }
+
+  @Test
+  public void testStructFieldGtEq() {
+    boolean shouldRead =
+        shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
+
+    shouldRead = shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should not read: id range above upper bound (80 > 79)").isFalse();
+
+    shouldRead = shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
+  }
+
+  @Test
+  public void testStructFieldEq() {
+    boolean shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
+
+    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
+
+    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
+
+    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
+
+    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
+
+    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
+
+    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
+  }
+
+  @Test
+  public void testStructFieldNotEq() {
+    boolean shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
+
+    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
+  }
+
+  @Test
+  public void testCaseInsensitive() {
+    boolean shouldRead = shouldRead(equal("ID", INT_MIN_VALUE - 25), false);
+    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
+  }
+
+  @Test
+  public void testIntegerIn() {
+    boolean shouldRead = shouldRead(in("id", INT_MIN_VALUE - 25, INT_MIN_VALUE - 24));
+    assertThat(shouldRead).as("Should not read: id below lower bound (5 < 30, 6 < 30)").isFalse();
+
+    shouldRead = shouldRead(in("id", INT_MIN_VALUE - 2, INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should not read: id below lower bound (28 < 30, 29 < 30)").isFalse();
+
+    shouldRead = shouldRead(in("id", INT_MIN_VALUE - 1, INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to lower bound (30 == 30)").isTrue();
+
+    shouldRead = shouldRead(in("id", INT_MAX_VALUE - 4, INT_MAX_VALUE - 3));
+    assertThat(shouldRead)
+        .as("Should read: id between lower and upper bounds (30 < 75 < 79, 30 < 76 < 79)")
+        .isTrue();
+
+    shouldRead = shouldRead(in("id", INT_MAX_VALUE, INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should read: id equal to upper bound (79 == 79)").isTrue();
+
+    shouldRead = shouldRead(in("id", INT_MAX_VALUE + 1, INT_MAX_VALUE + 2));
+    assertThat(shouldRead).as("Should not read: id above upper bound (80 > 79, 81 > 79)").isFalse();
+
+    shouldRead = shouldRead(in("id", INT_MAX_VALUE + 6, INT_MAX_VALUE + 7));
+    assertThat(shouldRead).as("Should not read: id above upper bound (85 > 79, 86 > 79)").isFalse();
+
+    shouldRead = shouldRead(in("all_nulls", 1, 2));
+    assertThat(shouldRead).as("Should skip: in on all nulls column").isFalse();
+
+    shouldRead = shouldRead(in("some_nulls", "aaa", "some"));
+    assertThat(shouldRead).as("Should read: in on some nulls column").isTrue();
+
+    shouldRead = shouldRead(in("no_nulls", "aaa", ""));
+    assertThat(shouldRead).as("Should read: in on no nulls column").isTrue();
+  }
+
+  @Test
+  public void testIntegerNotIn() {
+    boolean shouldRead = shouldRead(notIn("id", INT_MIN_VALUE - 25, INT_MIN_VALUE - 24));
+    assertThat(shouldRead).as("Should read: id below lower bound (5 < 30, 6 < 30)").isTrue();
+
+    shouldRead = shouldRead(notIn("id", INT_MIN_VALUE - 2, INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should read: id below lower bound (28 < 30, 29 < 30)").isTrue();
+
+    shouldRead = shouldRead(notIn("id", INT_MIN_VALUE - 1, INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to lower bound (30 == 30)").isTrue();
+
+    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE - 4, INT_MAX_VALUE - 3));
+    assertThat(shouldRead)
+        .as("Should read: id between lower and upper bounds (30 < 75 < 79, 30 < 76 < 79)")
+        .isTrue();
+
+    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE, INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should read: id equal to upper bound (79 == 79)").isTrue();
+
+    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE + 1, INT_MAX_VALUE + 2));
+    assertThat(shouldRead).as("Should read: id above upper bound (80 > 79, 81 > 79)").isTrue();
+
+    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE + 6, INT_MAX_VALUE + 7));
+    assertThat(shouldRead).as("Should read: id above upper bound (85 > 79, 86 > 79)").isTrue();
+
+    shouldRead = shouldRead(notIn("all_nulls", 1, 2));
+    assertThat(shouldRead).as("Should read: notIn on all nulls column").isTrue();
+
+    shouldRead = shouldRead(notIn("some_nulls", "aaa", "some"));
+    assertThat(shouldRead).as("Should read: notIn on some nulls column").isTrue();
+  }
+
+  @Test
+  public void testSomeNullsNotEq() {
+    boolean shouldRead = shouldRead(notEqual("some_nulls", "some"));
+    assertThat(shouldRead).as("Should read: notEqual on some nulls column").isTrue();
+  }
+}
diff --git a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
deleted file mode 100644
index 9b26168e4904..000000000000
--- a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
+++ /dev/null
@@ -1,1200 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.data;
-
-import static org.apache.iceberg.expressions.Expressions.and;
-import static org.apache.iceberg.expressions.Expressions.equal;
-import static org.apache.iceberg.expressions.Expressions.greaterThan;
-import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual;
-import static org.apache.iceberg.expressions.Expressions.in;
-import static org.apache.iceberg.expressions.Expressions.isNaN;
-import static org.apache.iceberg.expressions.Expressions.isNull;
-import static org.apache.iceberg.expressions.Expressions.lessThan;
-import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual;
-import static org.apache.iceberg.expressions.Expressions.not;
-import static org.apache.iceberg.expressions.Expressions.notEqual;
-import static org.apache.iceberg.expressions.Expressions.notIn;
-import static org.apache.iceberg.expressions.Expressions.notNaN;
-import static org.apache.iceberg.expressions.Expressions.notNull;
-import static org.apache.iceberg.expressions.Expressions.notStartsWith;
-import static org.apache.iceberg.expressions.Expressions.or;
-import static org.apache.iceberg.expressions.Expressions.startsWith;
-import static org.apache.iceberg.expressions.Expressions.truncate;
-import static org.apache.iceberg.types.Types.NestedField.optional;
-import static org.apache.iceberg.types.Types.NestedField.required;
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.Assertions.assertThatThrownBy;
-import static org.assertj.core.api.Assumptions.assumeThat;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.UncheckedIOException;
-import java.util.Arrays;
-import java.util.List;
-import java.util.UUID;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.iceberg.FileFormat;
-import org.apache.iceberg.Files;
-import org.apache.iceberg.Parameter;
-import org.apache.iceberg.ParameterizedTestExtension;
-import org.apache.iceberg.Parameters;
-import org.apache.iceberg.Schema;
-import org.apache.iceberg.data.orc.GenericOrcReader;
-import org.apache.iceberg.data.orc.GenericOrcWriter;
-import org.apache.iceberg.data.parquet.GenericParquetWriter;
-import org.apache.iceberg.exceptions.ValidationException;
-import org.apache.iceberg.expressions.Expression;
-import org.apache.iceberg.io.CloseableIterable;
-import org.apache.iceberg.io.FileAppender;
-import org.apache.iceberg.io.InputFile;
-import org.apache.iceberg.io.OutputFile;
-import org.apache.iceberg.io.SeekableInputStream;
-import org.apache.iceberg.orc.ORC;
-import org.apache.iceberg.parquet.Parquet;
-import org.apache.iceberg.parquet.ParquetMetricsRowGroupFilter;
-import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
-import org.apache.iceberg.types.Types;
-import org.apache.iceberg.types.Types.DoubleType;
-import org.apache.iceberg.types.Types.FloatType;
-import org.apache.iceberg.types.Types.IntegerType;
-import org.apache.iceberg.types.Types.StringType;
-import org.apache.iceberg.variants.ShreddedObject;
-import org.apache.iceberg.variants.Variant;
-import org.apache.iceberg.variants.VariantMetadata;
-import org.apache.iceberg.variants.Variants;
-import org.apache.orc.OrcFile;
-import org.apache.orc.Reader;
-import org.apache.parquet.hadoop.ParquetFileReader;
-import org.apache.parquet.hadoop.metadata.BlockMetaData;
-import org.apache.parquet.io.DelegatingSeekableInputStream;
-import org.apache.parquet.schema.MessageType;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.TestTemplate;
-import org.junit.jupiter.api.extension.ExtendWith;
-import org.junit.jupiter.api.io.TempDir;
-
-@ExtendWith(ParameterizedTestExtension.class)
-public class TestMetricsRowGroupFilter {
-
-  @Parameters(name = "fileFormat = {0}")
-  public static List<Object> parameters() {
-    return Arrays.asList(FileFormat.PARQUET, FileFormat.ORC);
-  }
-
-  private static final Types.StructType STRUCT_FIELD_TYPE =
-      Types.StructType.of(Types.NestedField.required(8, "int_field", IntegerType.get()));
-
-  private static final Schema SCHEMA =
-      new Schema(
-          required(1, "id", IntegerType.get()),
-          optional(2, "no_stats_parquet", StringType.get()),
-          required(3, "required", StringType.get()),
-          optional(4, "all_nulls", DoubleType.get()),
-          optional(5, "some_nulls", StringType.get()),
-          optional(6, "no_nulls", StringType.get()),
-          optional(7, "struct_not_null", STRUCT_FIELD_TYPE),
-          optional(9, "not_in_file", FloatType.get()),
-          optional(10, "str", StringType.get()),
-          optional(
-              11,
-              "map_not_null",
-              Types.MapType.ofRequired(12, 13, StringType.get(), IntegerType.get())),
-          optional(14, "all_nans", DoubleType.get()),
-          optional(15, "some_nans", FloatType.get()),
-          optional(16, "no_nans", DoubleType.get()),
-          optional(17, "some_double_nans", DoubleType.get()));
-
-  private static final Types.StructType UNDERSCORE_STRUCT_FIELD_TYPE =
-      Types.StructType.of(Types.NestedField.required(8, "_int_field", IntegerType.get()));
-
-  private static final Schema FILE_SCHEMA =
-      new Schema(
-          required(1, "_id", IntegerType.get()),
-          optional(2, "_no_stats_parquet", StringType.get()),
-          required(3, "_required", StringType.get()),
-          optional(4, "_all_nulls", DoubleType.get()),
-          optional(5, "_some_nulls", StringType.get()),
-          optional(6, "_no_nulls", StringType.get()),
-          optional(7, "_struct_not_null", UNDERSCORE_STRUCT_FIELD_TYPE),
-          optional(10, "_str", StringType.get()),
-          optional(14, "_all_nans", Types.DoubleType.get()),
-          optional(15, "_some_nans", FloatType.get()),
-          optional(16, "_no_nans", Types.DoubleType.get()),
-          optional(17, "_some_double_nans", Types.DoubleType.get()));
-
-  private static final String TOO_LONG_FOR_STATS_PARQUET;
-
-  static {
-    StringBuilder sb = new StringBuilder();
-    for (int i = 0; i < 200; i += 1) {
-      sb.append(UUID.randomUUID());
-    }
-    TOO_LONG_FOR_STATS_PARQUET = sb.toString();
-  }
-
-  private static final int INT_MIN_VALUE = 30;
-  private static final int INT_MAX_VALUE = 79;
-
-  private File orcFile = null;
-  private MessageType parquetSchema = null;
-  private BlockMetaData rowGroupMetadata = null;
-  private Schema testSchema;
-  private Schema fileSchema;
-
-  @Parameter private FileFormat format;
-  @TempDir private File tempDir;
-
-  @BeforeEach
-  public void createInputFile() throws IOException {
-    this.testSchema = schemaForFormat(format);
-    this.fileSchema = fileSchemaForFormat(format);
-    switch (format) {
-      case ORC:
-        createOrcInputFile();
-        break;
-      case PARQUET:
-        createParquetInputFile();
-        break;
-      default:
-        throw new UnsupportedOperationException(
-            "Row group filter tests not supported for " + format);
-    }
-  }
-
-  private static Schema schemaForFormat(FileFormat format) {
-    if (format == FileFormat.PARQUET) {
-      return new Schema(
-          Lists.newArrayList(
-              Iterables.concat(
-                  SCHEMA.columns(),
-                  Lists.newArrayList(
-                      optional(18, "some_nulls_variant", Types.VariantType.get()),
-                      optional(19, "all_nulls_variant", Types.VariantType.get())))));
-    }
-    return SCHEMA;
-  }
-
-  private static Schema fileSchemaForFormat(FileFormat format) {
-    if (format == FileFormat.PARQUET) {
-      return new Schema(
-          Lists.newArrayList(
-              Iterables.concat(
-                  FILE_SCHEMA.columns(),
-                  Lists.newArrayList(
-                      optional(18, "_some_nulls_variant", Types.VariantType.get()),
-                      optional(19, "_all_nulls_variant", Types.VariantType.get())))));
-    }
-    return FILE_SCHEMA;
-  }
-
-  public void createOrcInputFile() throws IOException {
-    this.orcFile = new File(tempDir, "junit" + System.nanoTime());
-
-    OutputFile outFile = Files.localOutput(orcFile);
-    try (FileAppender<GenericRecord> appender =
-        ORC.write(outFile)
-            .schema(fileSchema)
-            .createWriterFunc(GenericOrcWriter::buildWriter)
-            .build()) {
-      GenericRecord record = GenericRecord.create(fileSchema);
-      // create 50 records
-      for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
-        record.setField("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0
-        record.setField(
-            "_no_stats_parquet",
-            TOO_LONG_FOR_STATS_PARQUET); // value longer than 4k will produce no stats
-        // in Parquet, but will produce stats for ORC
-        record.setField("_required", "req"); // required, always non-null
-        record.setField("_all_nulls", null); // never non-null
-        record.setField("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values
-        record.setField("_no_nulls", ""); // optional, but always non-null
-        record.setField("_str", i + "str" + i);
-        record.setField("_all_nans", Double.NaN); // never non-nan
-        record.setField("_some_nans", (i % 10 == 0) ? Float.NaN : 2F); // includes some nan values
-        record.setField(
-            "_some_double_nans", (i % 10 == 0) ? Double.NaN : 2D); // includes some nan values
-        record.setField("_no_nans", 3D); // optional, but always non-nan
-
-        GenericRecord structNotNull = GenericRecord.create(UNDERSCORE_STRUCT_FIELD_TYPE);
-        structNotNull.setField("_int_field", INT_MIN_VALUE + i);
-        record.setField("_struct_not_null", structNotNull); // struct with int
-
-        appender.add(record);
-      }
-    }
-
-    InputFile inFile = Files.localInput(orcFile);
-    try (Reader reader =
-        OrcFile.createReader(
-            new Path(inFile.location()), OrcFile.readerOptions(new Configuration()))) {
-      assertThat(reader.getStripes()).as("Should create only one stripe").hasSize(1);
-    }
-
-    orcFile.deleteOnExit();
-  }
-
-  private void createParquetInputFile() throws IOException {
-    File parquetFile = new File(tempDir, "junit" + System.nanoTime());
-
-    OutputFile outFile = Files.localOutput(parquetFile);
-    try (FileAppender<GenericRecord> appender =
-        Parquet.write(outFile)
-            .schema(fileSchema)
-            .createWriterFunc(GenericParquetWriter::create)
-            .build()) {
-      // create 50 records
-      for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
-        GenericRecord record = GenericRecord.create(fileSchema);
-        record.setField("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0
-        record.setField(
-            "_no_stats_parquet",
-            TOO_LONG_FOR_STATS_PARQUET); // value longer than 4k will produce no stats
-        // in Parquet
-        record.setField("_required", "req"); // required, always non-null
-        record.setField("_all_nulls", null); // never non-null
-        record.setField("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values
-        record.setField("_no_nulls", ""); // optional, but always non-null
-        record.setField("_all_nans", Double.NaN); // never non-nan
-        record.setField("_some_nans", (i % 10 == 0) ? Float.NaN : 2F); // includes some nan values
-        record.setField(
-            "_some_double_nans", (i % 10 == 0) ? Double.NaN : 2D); // includes some nan values
-        record.setField("_no_nans", 3D); // optional, but always non-nan
-        record.setField("_str", i + "str" + i);
-
-        GenericRecord structNotNull = GenericRecord.create(UNDERSCORE_STRUCT_FIELD_TYPE);
-        structNotNull.setField("_int_field", INT_MIN_VALUE + i);
-        record.setField("_struct_not_null", structNotNull); // struct with int
-
-        if (i % 2 == 0) {
-          VariantMetadata metadata = Variants.metadata("data");
-          ShreddedObject obj = Variants.object(metadata);
-          obj.put("data", Variants.of(25 + i));
-          record.setField("_some_nulls_variant", Variant.of(metadata, obj));
-        } else {
-          record.setField("_some_nulls_variant", null);
-        }
-        record.setField("_all_nulls_variant", null);
-
-        appender.add(record);
-      }
-    }
-
-    InputFile inFile = Files.localInput(parquetFile);
-    try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
-      assertThat(reader.getRowGroups()).as("Should create only one row group").hasSize(1);
-      rowGroupMetadata = reader.getRowGroups().get(0);
-      parquetSchema = reader.getFileMetaData().getSchema();
-    }
-
-    parquetFile.deleteOnExit();
-  }
-
-  @TestTemplate
-  public void testAllNulls() {
-    boolean shouldRead;
-
-    shouldRead = shouldRead(notNull("all_nulls"));
-    assertThat(shouldRead).as("Should skip: no non-null value in all null column").isFalse();
-
-    shouldRead = shouldRead(notNull("some_nulls"));
-    assertThat(shouldRead)
-        .as("Should read: column with some nulls contains a non-null value")
-        .isTrue();
-
-    shouldRead = shouldRead(notNull("no_nulls"));
-    assertThat(shouldRead).as("Should read: non-null column contains a non-null value").isTrue();
-
-    shouldRead = shouldRead(notNull("map_not_null"));
-    assertThat(shouldRead).as("Should read: map type is not skipped").isTrue();
-
-    shouldRead = shouldRead(notNull("struct_not_null"));
-    assertThat(shouldRead).as("Should read: struct type is not skipped").isTrue();
-  }
-
-  @TestTemplate
-  public void testNoNulls() {
-    boolean shouldRead = shouldRead(isNull("all_nulls"));
-    assertThat(shouldRead).as("Should read: at least one null value in all null column").isTrue();
-
-    shouldRead = shouldRead(isNull("some_nulls"));
-    assertThat(shouldRead).as("Should read: column with some nulls contains a null value").isTrue();
-
-    shouldRead = shouldRead(isNull("no_nulls"));
-    assertThat(shouldRead).as("Should skip: non-null column contains no null values").isFalse();
-
-    shouldRead = shouldRead(isNull("map_not_null"));
-    assertThat(shouldRead).as("Should read: map type is not skipped").isTrue();
-
-    shouldRead = shouldRead(isNull("struct_not_null"));
-    assertThat(shouldRead).as("Should read: struct type is not skipped").isTrue();
-  }
-
-  @TestTemplate
-  public void testFloatWithNan() {
-    // NaN's should break Parquet's Min/Max stats we should be reading in all cases
-    boolean shouldRead = shouldRead(greaterThan("some_nans", 1.0));
-    assertThat(shouldRead).isTrue();
-
-    shouldRead = shouldRead(greaterThanOrEqual("some_nans", 1.0));
-    assertThat(shouldRead).isTrue();
-
-    shouldRead = shouldRead(lessThan("some_nans", 3.0));
-    assertThat(shouldRead).isTrue();
-
-    shouldRead = shouldRead(lessThanOrEqual("some_nans", 1.0));
-    assertThat(shouldRead).isTrue();
-
-    shouldRead = shouldRead(equal("some_nans", 2.0));
-    assertThat(shouldRead).isTrue();
-  }
-
-  @TestTemplate
-  public void testDoubleWithNan() {
-    boolean shouldRead = shouldRead(greaterThan("some_double_nans", 1.0));
-    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
-
-    shouldRead = shouldRead(greaterThanOrEqual("some_double_nans", 1.0));
-    assertThat(shouldRead)
-        .as("Should read: column with some nans contains the target value")
-        .isTrue();
-
-    shouldRead = shouldRead(lessThan("some_double_nans", 3.0));
-    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
-
-    shouldRead = shouldRead(lessThanOrEqual("some_double_nans", 1.0));
-    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
-
-    shouldRead = shouldRead(equal("some_double_nans", 2.0));
-    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
-  }
-
-  @TestTemplate
-  public void testIsNaN() {
-    boolean shouldRead = shouldRead(isNaN("all_nans"));
-    assertThat(shouldRead)
-        .as("Should read: NaN counts are not tracked in Parquet metrics")
-        .isTrue();
-
-    shouldRead = shouldRead(isNaN("some_nans"));
-    assertThat(shouldRead)
-        .as("Should read: NaN counts are not tracked in Parquet metrics")
-        .isTrue();
-
-    shouldRead = shouldRead(isNaN("no_nans"));
-    switch (format) {
-      case ORC:
-        assertThat(shouldRead)
-            .as("Should read 0 rows due to the ORC filter push-down feature")
-            .isFalse();
-        break;
-      case PARQUET:
-        assertThat(shouldRead)
-            .as("Should read: NaN counts are not tracked in Parquet metrics")
-            .isTrue();
-        break;
-      default:
-        throw new UnsupportedOperationException(
-            "Row group filter tests not supported for " + format);
-    }
-
-    shouldRead = shouldRead(isNaN("all_nulls"));
-    assertThat(shouldRead).as("Should skip: all null column will not contain nan value").isFalse();
-  }
-
-  @TestTemplate
-  public void testNotNaN() {
-    boolean shouldRead = shouldRead(notNaN("all_nans"));
-    assertThat(shouldRead)
-        .as("Should read: NaN counts are not tracked in Parquet metrics")
-        .isTrue();
-
-    shouldRead = shouldRead(notNaN("some_nans"));
-    assertThat(shouldRead)
-        .as("Should read: NaN counts are not tracked in Parquet metrics")
-        .isTrue();
-
-    shouldRead = shouldRead(notNaN("no_nans"));
-    assertThat(shouldRead)
-        .as("Should read: NaN counts are not tracked in Parquet metrics")
-        .isTrue();
-
-    shouldRead = shouldRead(notNaN("all_nulls"));
-    assertThat(shouldRead)
-        .as("Should read: NaN counts are not tracked in Parquet metrics")
-        .isTrue();
-  }
-
-  @TestTemplate
-  public void testRequiredColumn() {
-    boolean shouldRead = shouldRead(notNull("required"));
-    assertThat(shouldRead).as("Should read: required columns are always non-null").isTrue();
-
-    shouldRead = shouldRead(isNull("required"));
-    assertThat(shouldRead).as("Should skip: required columns are always non-null").isFalse();
-  }
-
-  @TestTemplate
-  public void testMissingColumn() {
-    assertThatThrownBy(() -> shouldRead(lessThan("missing", 5)))
-        .as("Should complain about missing column in expression")
-        .isInstanceOf(ValidationException.class)
-        .hasMessageStartingWith("Cannot find field 'missing'");
-  }
-
-  @TestTemplate
-  public void testColumnNotInFile() {
-    assumeThat(format)
-        .as(
-            "If a column is not in file, ORC does NOT try to apply predicates assuming null values for the column")
-        .isNotEqualTo(FileFormat.ORC);
-
-    Expression[] cannotMatch =
-        new Expression[] {
-          lessThan("not_in_file", 1.0f), lessThanOrEqual("not_in_file", 1.0f),
-          equal("not_in_file", 1.0f), greaterThan("not_in_file", 1.0f),
-          greaterThanOrEqual("not_in_file", 1.0f), notNull("not_in_file")
-        };
-
-    for (Expression expr : cannotMatch) {
-      boolean shouldRead = shouldRead(expr);
-      assertThat(shouldRead)
-          .as("Should skip when column is not in file (all nulls): " + expr)
-          .isFalse();
-    }
-
-    Expression[] canMatch = new Expression[] {isNull("not_in_file"), notEqual("not_in_file", 1.0f)};
-
-    for (Expression expr : canMatch) {
-      boolean shouldRead = shouldRead(expr);
-      assertThat(shouldRead)
-          .as("Should read when column is not in file (all nulls): " + expr)
-          .isTrue();
-    }
-  }
-
-  @TestTemplate
-  public void testMissingStatsParquet() {
-    assumeThat(format).isEqualTo(FileFormat.PARQUET);
-
-    Expression[] exprs =
-        new Expression[] {
-          lessThan("no_stats_parquet", "a"),
-          lessThanOrEqual("no_stats_parquet", "b"),
-          equal("no_stats_parquet", "c"),
-          greaterThan("no_stats_parquet", "d"),
-          greaterThanOrEqual("no_stats_parquet", "e"),
-          notEqual("no_stats_parquet", "f"),
-          isNull("no_stats_parquet"),
-          notNull("no_stats_parquet"),
-          startsWith("no_stats_parquet", "a"),
-          notStartsWith("no_stats_parquet", "a")
-        };
-
-    for (Expression expr : exprs) {
-      boolean shouldRead = shouldRead(expr);
-      assertThat(shouldRead).as("Should read when missing stats for expr: " + expr).isTrue();
-    }
-  }
-
-  @TestTemplate
-  public void testZeroRecordFileParquet() {
-    assumeThat(format).isEqualTo(FileFormat.PARQUET);
-
-    BlockMetaData emptyBlock = new BlockMetaData();
-    emptyBlock.setRowCount(0);
-
-    Expression[] exprs =
-        new Expression[] {
-          lessThan("id", 5),
-          lessThanOrEqual("id", 30),
-          equal("id", 70),
-          greaterThan("id", 78),
-          greaterThanOrEqual("id", 90),
-          notEqual("id", 101),
-          isNull("some_nulls"),
-          notNull("some_nulls")
-        };
-
-    for (Expression expr : exprs) {
-      boolean shouldRead = shouldReadParquet(expr, true, parquetSchema, emptyBlock);
-      assertThat(shouldRead).as("Should never read 0-record file: " + expr).isFalse();
-    }
-  }
-
-  @TestTemplate
-  public void testNot() {
-    // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
-    boolean shouldRead = shouldRead(not(lessThan("id", INT_MIN_VALUE - 25)));
-    assertThat(shouldRead).as("Should read: not(false)").isTrue();
-
-    shouldRead = shouldRead(not(greaterThan("id", INT_MIN_VALUE - 25)));
-    assertThat(shouldRead).as("Should skip: not(true)").isFalse();
-  }
-
-  @TestTemplate
-  public void testAnd() {
-    // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
-    boolean shouldRead =
-        shouldRead(
-            and(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MIN_VALUE - 30)));
-    assertThat(shouldRead).as("Should skip: and(false, true)").isFalse();
-
-    shouldRead =
-        shouldRead(
-            and(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MAX_VALUE + 1)));
-    assertThat(shouldRead).as("Should skip: and(false, false)").isFalse();
-
-    shouldRead =
-        shouldRead(
-            and(greaterThan("id", INT_MIN_VALUE - 25), lessThanOrEqual("id", INT_MIN_VALUE)));
-    assertThat(shouldRead).as("Should read: and(true, true)").isTrue();
-  }
-
-  @TestTemplate
-  public void testOr() {
-    // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
-    boolean shouldRead =
-        shouldRead(
-            or(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MAX_VALUE + 1)));
-    assertThat(shouldRead).as("Should skip: or(false, false)").isFalse();
-
-    shouldRead =
-        shouldRead(
-            or(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MAX_VALUE - 19)));
-    assertThat(shouldRead).as("Should read: or(false, true)").isTrue();
-  }
-
-  @TestTemplate
-  public void testIntegerLt() {
-    boolean shouldRead = shouldRead(lessThan("id", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
-
-    shouldRead = shouldRead(lessThan("id", INT_MIN_VALUE));
-    assertThat(shouldRead)
-        .as("Should not read: id range below lower bound (30 is not < 30)")
-        .isFalse();
-
-    shouldRead = shouldRead(lessThan("id", INT_MIN_VALUE + 1));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(lessThan("id", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
-  }
-
-  @TestTemplate
-  public void testIntegerLtEq() {
-    boolean shouldRead = shouldRead(lessThanOrEqual("id", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
-
-    shouldRead = shouldRead(lessThanOrEqual("id", INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should not read: id range below lower bound (29 < 30)").isFalse();
-
-    shouldRead = shouldRead(lessThanOrEqual("id", INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(lessThanOrEqual("id", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: many possible ids").isTrue();
-  }
-
-  @TestTemplate
-  public void testIntegerGt() {
-    boolean shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
-
-    shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE));
-    assertThat(shouldRead)
-        .as("Should not read: id range above upper bound (79 is not > 79)")
-        .isFalse();
-
-    shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE - 1));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
-  }
-
-  @TestTemplate
-  public void testIntegerGtEq() {
-    boolean shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
-
-    shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should not read: id range above upper bound (80 > 79)").isFalse();
-
-    shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
-  }
-
-  @TestTemplate
-  public void testIntegerEq() {
-    boolean shouldRead = shouldRead(equal("id", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
-
-    shouldRead = shouldRead(equal("id", INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
-
-    shouldRead = shouldRead(equal("id", INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
-
-    shouldRead = shouldRead(equal("id", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
-
-    shouldRead = shouldRead(equal("id", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
-
-    shouldRead = shouldRead(equal("id", INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
-
-    shouldRead = shouldRead(equal("id", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
-  }
-
-  @TestTemplate
-  public void testIntegerNotEq() {
-    boolean shouldRead = shouldRead(notEqual("id", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("id", INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("id", INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
-
-    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
-  }
-
-  @TestTemplate
-  public void testIntegerNotEqRewritten() {
-    boolean shouldRead = shouldRead(not(equal("id", INT_MIN_VALUE - 25)));
-    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
-
-    shouldRead = shouldRead(not(equal("id", INT_MIN_VALUE - 1)));
-    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
-
-    shouldRead = shouldRead(not(equal("id", INT_MIN_VALUE)));
-    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
-
-    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE - 4)));
-    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
-
-    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE)));
-    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
-
-    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE + 1)));
-    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
-
-    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE + 6)));
-    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
-  }
-
-  @TestTemplate
-  public void testStructFieldLt() {
-    boolean shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
-
-    shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MIN_VALUE));
-    assertThat(shouldRead)
-        .as("Should not read: id range below lower bound (30 is not < 30)")
-        .isFalse();
-
-    shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MIN_VALUE + 1));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
-  }
-
-  @TestTemplate
-  public void testStructFieldLtEq() {
-    boolean shouldRead =
-        shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
-
-    shouldRead = shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should not read: id range below lower bound (29 < 30)").isFalse();
-
-    shouldRead = shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: many possible ids").isTrue();
-  }
-
-  @TestTemplate
-  public void testStructFieldGt() {
-    boolean shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
-
-    shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE));
-    assertThat(shouldRead)
-        .as("Should not read: id range above upper bound (79 is not > 79)")
-        .isFalse();
-
-    shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE - 1));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
-  }
-
-  @TestTemplate
-  public void testStructFieldGtEq() {
-    boolean shouldRead =
-        shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
-
-    shouldRead = shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should not read: id range above upper bound (80 > 79)").isFalse();
-
-    shouldRead = shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
-  }
-
-  @TestTemplate
-  public void testStructFieldEq() {
-    boolean shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
-
-    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
-
-    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
-
-    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
-
-    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
-
-    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
-
-    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
-  }
-
-  @TestTemplate
-  public void testStructFieldNotEq() {
-    boolean shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
-
-    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
-  }
-
-  @TestTemplate
-  public void testCaseInsensitive() {
-    boolean shouldRead = shouldRead(equal("ID", INT_MIN_VALUE - 25), false);
-    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
-  }
-
-  @TestTemplate
-  public void testStringStartsWith() {
-    assumeThat(format)
-        .as("ORC row group filter does not support StringStartsWith")
-        .isNotEqualTo(FileFormat.ORC);
-
-    boolean shouldRead = shouldRead(startsWith("str", "1"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(startsWith("str", "0st"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(startsWith("str", "1str1"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(startsWith("str", "1str1_xgd"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(startsWith("str", "2str"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(startsWith("str", "9xstr"));
-    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
-
-    shouldRead = shouldRead(startsWith("str", "0S"));
-    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
-
-    shouldRead = shouldRead(startsWith("str", "x"));
-    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
-
-    shouldRead = shouldRead(startsWith("str", "9str9aaa"));
-    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
-  }
-
-  @TestTemplate
-  public void testStringNotStartsWith() {
-    assumeThat(format)
-        .as("ORC row group filter does not support StringStartsWith")
-        .isNotEqualTo(FileFormat.ORC);
-
-    boolean shouldRead = shouldRead(notStartsWith("str", "1"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("str", "0st"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("str", "1str1"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("str", "1str1_xgd"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("str", "2str"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("str", "9xstr"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("required", "r"));
-    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
-
-    shouldRead = shouldRead(notStartsWith("required", "requ"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("some_nulls", "ssome"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("some_nulls", "som"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-  }
-
-  @TestTemplate
-  public void testIntegerIn() {
-    boolean shouldRead = shouldRead(in("id", INT_MIN_VALUE - 25, INT_MIN_VALUE - 24));
-    assertThat(shouldRead).as("Should not read: id below lower bound (5 < 30, 6 < 30)").isFalse();
-
-    shouldRead = shouldRead(in("id", INT_MIN_VALUE - 2, INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should not read: id below lower bound (28 < 30, 29 < 30)").isFalse();
-
-    shouldRead = shouldRead(in("id", INT_MIN_VALUE - 1, INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to lower bound (30 == 30)").isTrue();
-
-    shouldRead = shouldRead(in("id", INT_MAX_VALUE - 4, INT_MAX_VALUE - 3));
-    assertThat(shouldRead)
-        .as("Should read: id between lower and upper bounds (30 < 75 < 79, 30 < 76 < 79)")
-        .isTrue();
-
-    shouldRead = shouldRead(in("id", INT_MAX_VALUE, INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should read: id equal to upper bound (79 == 79)").isTrue();
-
-    shouldRead = shouldRead(in("id", INT_MAX_VALUE + 1, INT_MAX_VALUE + 2));
-    assertThat(shouldRead).as("Should not read: id above upper bound (80 > 79, 81 > 79)").isFalse();
-
-    shouldRead = shouldRead(in("id", INT_MAX_VALUE + 6, INT_MAX_VALUE + 7));
-    assertThat(shouldRead).as("Should not read: id above upper bound (85 > 79, 86 > 79)").isFalse();
-
-    shouldRead = shouldRead(in("all_nulls", 1, 2));
-    assertThat(shouldRead).as("Should skip: in on all nulls column").isFalse();
-
-    shouldRead = shouldRead(in("some_nulls", "aaa", "some"));
-    assertThat(shouldRead).as("Should read: in on some nulls column").isTrue();
-
-    shouldRead = shouldRead(in("no_nulls", "aaa", ""));
-    assertThat(shouldRead).as("Should read: in on no nulls column").isTrue();
-  }
-
-  @TestTemplate
-  public void testIntegerNotIn() {
-    boolean shouldRead = shouldRead(notIn("id", INT_MIN_VALUE - 25, INT_MIN_VALUE - 24));
-    assertThat(shouldRead).as("Should read: id below lower bound (5 < 30, 6 < 30)").isTrue();
-
-    shouldRead = shouldRead(notIn("id", INT_MIN_VALUE - 2, INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should read: id below lower bound (28 < 30, 29 < 30)").isTrue();
-
-    shouldRead = shouldRead(notIn("id", INT_MIN_VALUE - 1, INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to lower bound (30 == 30)").isTrue();
-
-    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE - 4, INT_MAX_VALUE - 3));
-    assertThat(shouldRead)
-        .as("Should read: id between lower and upper bounds (30 < 75 < 79, 30 < 76 < 79)")
-        .isTrue();
-
-    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE, INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should read: id equal to upper bound (79 == 79)").isTrue();
-
-    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE + 1, INT_MAX_VALUE + 2));
-    assertThat(shouldRead).as("Should read: id above upper bound (80 > 79, 81 > 79)").isTrue();
-
-    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE + 6, INT_MAX_VALUE + 7));
-    assertThat(shouldRead).as("Should read: id above upper bound (85 > 79, 86 > 79)").isTrue();
-
-    shouldRead = shouldRead(notIn("all_nulls", 1, 2));
-    assertThat(shouldRead).as("Should read: notIn on all nulls column").isTrue();
-
-    shouldRead = shouldRead(notIn("some_nulls", "aaa", "some"));
-    assertThat(shouldRead).as("Should read: notIn on some nulls column").isTrue();
-
-    shouldRead = shouldRead(notIn("no_nulls", "aaa", ""));
-    if (format == FileFormat.PARQUET) {
-      // no_nulls column has all values == "", so notIn("no_nulls", "") should always be false and
-      // so should be skipped
-      // However, the metrics evaluator in Parquets always reads row group for a notIn filter
-      assertThat(shouldRead).as("Should read: notIn on no nulls column").isTrue();
-    } else {
-      assertThat(shouldRead).as("Should skip: notIn on no nulls column").isFalse();
-    }
-  }
-
-  @TestTemplate
-  public void testSomeNullsNotEq() {
-    boolean shouldRead = shouldRead(notEqual("some_nulls", "some"));
-    assertThat(shouldRead).as("Should read: notEqual on some nulls column").isTrue();
-  }
-
-  @TestTemplate
-  public void testInLimitParquet() {
-    assumeThat(format).isEqualTo(FileFormat.PARQUET);
-
-    boolean shouldRead = shouldRead(in("id", 1, 2));
-    assertThat(shouldRead).as("Should not read if IN is evaluated").isFalse();
-
-    List<Integer> ids = Lists.newArrayListWithExpectedSize(400);
-    for (int id = -400; id <= 0; id++) {
-      ids.add(id);
-    }
-
-    shouldRead = shouldRead(in("id", ids));
-    assertThat(shouldRead).as("Should read if IN is not evaluated").isTrue();
-  }
-
-  @TestTemplate
-  public void testParquetTypePromotion() {
-    assumeThat(format).as("Only valid for Parquet").isEqualTo(FileFormat.PARQUET);
-
-    Schema promotedSchema = new Schema(required(1, "id", Types.LongType.get()));
-    boolean shouldRead =
-        new ParquetMetricsRowGroupFilter(promotedSchema, equal("id", INT_MIN_VALUE + 1), true)
-            .shouldRead(parquetSchema, rowGroupMetadata);
-    assertThat(shouldRead).as("Should succeed with promoted schema").isTrue();
-  }
-
-  @TestTemplate
-  public void testTransformFilter() {
-    assumeThat(format).isEqualTo(FileFormat.PARQUET);
-
-    boolean shouldRead =
-        new ParquetMetricsRowGroupFilter(
-                testSchema, equal(truncate("required", 2), "some_value"), true)
-            .shouldRead(parquetSchema, rowGroupMetadata);
-    assertThat(shouldRead)
-        .as("Should read: filter contains non-reference evaluate as True")
-        .isTrue();
-  }
-
-  @TestTemplate
-  public void testVariantNotNull() {
-    assumeThat(format).isNotEqualTo(FileFormat.ORC).as("ORC does not fully support variant type");
-
-    boolean shouldRead = shouldRead(notNull("some_nulls_variant"));
-    assertThat(shouldRead)
-        .as("Should read: variant notNull filters must be evaluated post scan")
-        .isTrue();
-
-    shouldRead = shouldRead(notNull("all_nulls_variant"));
-    assertThat(shouldRead)
-        .as("Should read: variant notNull filters must be evaluated post scan even for all nulls")
-        .isTrue();
-  }
-
-  @TestTemplate
-  public void testVariantEq() {
-    assumeThat(format).isNotEqualTo(FileFormat.ORC).as("ORC does not fully support variant type");
-
-    assertThatThrownBy(() -> shouldRead(equal("some_nulls_variant", "test")))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("test");
-  }
-
-  @TestTemplate
-  public void testVariantIn() {
-    assumeThat(format).isNotEqualTo(FileFormat.ORC).as("ORC does not fully support variant type");
-
-    assertThatThrownBy(() -> shouldRead(in("some_nulls_variant", "test1", "test2")))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("test1");
-
-    assertThatThrownBy(() -> shouldRead(in("all_nulls_variant", "test1", "test2")))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("test1");
-  }
-
-  @TestTemplate
-  public void testVariantNotIn() {
-    assumeThat(format).isNotEqualTo(FileFormat.ORC);
-
-    // Variant columns cannot be used in 'notIn' expressions with literals
-    assertThatThrownBy(() -> shouldRead(notIn("some_nulls_variant", "test1", "test2")))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("test1");
-
-    assertThatThrownBy(() -> shouldRead(notIn("all_nulls_variant", "test1", "test2")))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("test1");
-  }
-
-  @TestTemplate
-  public void testVariantIsNull() {
-    assumeThat(format).isNotEqualTo(FileFormat.ORC).as("ORC does not fully support variant type");
-
-    boolean shouldRead = shouldRead(isNull("some_nulls_variant"));
-    assertThat(shouldRead)
-        .as("Should read: variant isNull filters must be evaluated post scan")
-        .isTrue();
-
-    shouldRead = shouldRead(isNull("all_nulls_variant"));
-    assertThat(shouldRead)
-        .as("Should read: variant isNull filters must be evaluated post scan even for all nulls")
-        .isTrue();
-  }
-
-  @TestTemplate
-  public void testVariantComparisons() {
-    assumeThat(format).isNotEqualTo(FileFormat.ORC).as("ORC does not fully support variant type");
-
-    // Variant columns cannot be used in comparison expressions with literals
-    assertThatThrownBy(() -> shouldRead(lessThan("some_nulls_variant", 42)))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("42");
-
-    assertThatThrownBy(() -> shouldRead(lessThanOrEqual("some_nulls_variant", 42)))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("42");
-
-    assertThatThrownBy(() -> shouldRead(greaterThan("some_nulls_variant", 42)))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("42");
-
-    assertThatThrownBy(() -> shouldRead(greaterThanOrEqual("some_nulls_variant", 42)))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("42");
-
-    assertThatThrownBy(() -> shouldRead(notEqual("some_nulls_variant", 42)))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("42");
-  }
-
-  private boolean shouldRead(Expression expression) {
-    return shouldRead(expression, true);
-  }
-
-  private boolean shouldRead(Expression expression, boolean caseSensitive) {
-    switch (format) {
-      case ORC:
-        return shouldReadOrc(expression, caseSensitive);
-      case PARQUET:
-        return shouldReadParquet(expression, caseSensitive, parquetSchema, rowGroupMetadata);
-      default:
-        throw new UnsupportedOperationException(
-            "Row group filter tests not supported for " + format);
-    }
-  }
-
-  private boolean shouldReadOrc(Expression expression, boolean caseSensitive) {
-    try (CloseableIterable<org.apache.iceberg.data.Record> reader =
-        ORC.read(Files.localInput(orcFile))
-            .project(testSchema)
-            .createReaderFunc(fileSchema -> GenericOrcReader.buildReader(testSchema, fileSchema))
-            .filter(expression)
-            .caseSensitive(caseSensitive)
-            .build()) {
-      return !Lists.newArrayList(reader).isEmpty();
-    } catch (IOException e) {
-      throw new UncheckedIOException(e);
-    }
-  }
-
-  private boolean shouldReadParquet(
-      Expression expression,
-      boolean caseSensitive,
-      MessageType messageType,
-      BlockMetaData blockMetaData) {
-    return new ParquetMetricsRowGroupFilter(testSchema, expression, caseSensitive)
-        .shouldRead(messageType, blockMetaData);
-  }
-
-  private org.apache.parquet.io.InputFile parquetInputFile(InputFile inFile) {
-    return new org.apache.parquet.io.InputFile() {
-      @Override
-      public long getLength() throws IOException {
-        return inFile.getLength();
-      }
-
-      @Override
-      public org.apache.parquet.io.SeekableInputStream newStream() throws IOException {
-        SeekableInputStream stream = inFile.newStream();
-        return new DelegatingSeekableInputStream(stream) {
-          @Override
-          public long getPos() throws IOException {
-            return stream.getPos();
-          }
-
-          @Override
-          public void seek(long newPos) throws IOException {
-            stream.seek(newPos);
-          }
-        };
-      }
-    };
-  }
-}
diff --git a/data/src/test/java/org/apache/iceberg/data/orc/TestOrcMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/orc/TestOrcMetricsRowGroupFilter.java
new file mode 100644
index 000000000000..5a2ff2fb6727
--- /dev/null
+++ b/data/src/test/java/org/apache/iceberg/data/orc/TestOrcMetricsRowGroupFilter.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.data.orc;
+
+import static org.apache.iceberg.expressions.Expressions.isNaN;
+import static org.apache.iceberg.expressions.Expressions.notIn;
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.iceberg.Files;
+import org.apache.iceberg.data.GenericRecord;
+import org.apache.iceberg.data.MetricsRowGroupFilterTestBase;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.io.FileAppender;
+import org.apache.iceberg.io.InputFile;
+import org.apache.iceberg.io.OutputFile;
+import org.apache.iceberg.orc.ORC;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.orc.OrcFile;
+import org.apache.orc.Reader;
+import org.junit.jupiter.api.Test;
+
+public class TestOrcMetricsRowGroupFilter extends MetricsRowGroupFilterTestBase {
+
+  private File orcFile = null;
+
+  @Override
+  protected void createInputFile() throws IOException {
+    this.orcFile = new File(tempDir, "junit" + System.nanoTime());
+
+    OutputFile outFile = Files.localOutput(orcFile);
+    try (FileAppender<GenericRecord> appender =
+        ORC.write(outFile)
+            .schema(BASE_FILE_SCHEMA)
+            .createWriterFunc(GenericOrcWriter::buildWriter)
+            .build()) {
+      GenericRecord record = GenericRecord.create(BASE_FILE_SCHEMA);
+      for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
+        populateBaseFields(record, i);
+        appender.add(record);
+      }
+    }
+    InputFile inFile = Files.localInput(orcFile);
+    try (Reader reader =
+        OrcFile.createReader(
+            new Path(inFile.location()), OrcFile.readerOptions(new Configuration()))) {
+      assertThat(reader.getStripes()).as("Should create only one stripe").hasSize(1);
+    }
+    orcFile.deleteOnExit();
+  }
+
+  @Override
+  protected boolean shouldRead(Expression expression, boolean caseSensitive) {
+    try (CloseableIterable<org.apache.iceberg.data.Record> reader =
+        ORC.read(Files.localInput(orcFile))
+            .project(BASE_SCHEMA)
+            .createReaderFunc(fileSchema -> GenericOrcReader.buildReader(BASE_SCHEMA, fileSchema))
+            .filter(expression)
+            .caseSensitive(caseSensitive)
+            .build()) {
+      return !Lists.newArrayList(reader).isEmpty();
+    } catch (IOException e) {
+      throw new UncheckedIOException(e);
+    }
+  }
+
+  @Test
+  public void testIsNaNNoNans() {
+    boolean shouldRead = shouldRead(isNaN("no_nans"));
+    assertThat(shouldRead).as("ORC filter push-down can skip groups with no NaN").isFalse();
+  }
+
+  @Test
+  public void testNotInNoNulls() {
+    boolean shouldRead = shouldRead(notIn("no_nulls", "aaa", ""));
+    assertThat(shouldRead).as("ORC can skip when all values match excluded set").isFalse();
+  }
+}
diff --git a/data/src/test/java/org/apache/iceberg/data/parquet/TestParquetMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/parquet/TestParquetMetricsRowGroupFilter.java
new file mode 100644
index 000000000000..94d098f10a25
--- /dev/null
+++ b/data/src/test/java/org/apache/iceberg/data/parquet/TestParquetMetricsRowGroupFilter.java
@@ -0,0 +1,444 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.data.parquet;
+
+import static org.apache.iceberg.expressions.Expressions.equal;
+import static org.apache.iceberg.expressions.Expressions.greaterThan;
+import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual;
+import static org.apache.iceberg.expressions.Expressions.in;
+import static org.apache.iceberg.expressions.Expressions.isNaN;
+import static org.apache.iceberg.expressions.Expressions.isNull;
+import static org.apache.iceberg.expressions.Expressions.lessThan;
+import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual;
+import static org.apache.iceberg.expressions.Expressions.notEqual;
+import static org.apache.iceberg.expressions.Expressions.notIn;
+import static org.apache.iceberg.expressions.Expressions.notNull;
+import static org.apache.iceberg.expressions.Expressions.notStartsWith;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
+import static org.apache.iceberg.expressions.Expressions.truncate;
+import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.UUID;
+import org.apache.iceberg.Files;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.data.GenericRecord;
+import org.apache.iceberg.data.MetricsRowGroupFilterTestBase;
+import org.apache.iceberg.exceptions.ValidationException;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.io.FileAppender;
+import org.apache.iceberg.io.InputFile;
+import org.apache.iceberg.io.OutputFile;
+import org.apache.iceberg.io.SeekableInputStream;
+import org.apache.iceberg.parquet.Parquet;
+import org.apache.iceberg.parquet.ParquetMetricsRowGroupFilter;
+import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.types.Types.StringType;
+import org.apache.iceberg.variants.ShreddedObject;
+import org.apache.iceberg.variants.Variant;
+import org.apache.iceberg.variants.VariantMetadata;
+import org.apache.iceberg.variants.Variants;
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.io.DelegatingSeekableInputStream;
+import org.apache.parquet.schema.MessageType;
+import org.junit.jupiter.api.Test;
+
+public class TestParquetMetricsRowGroupFilter extends MetricsRowGroupFilterTestBase {
+
+  private static final Schema PARQUET_SCHEMA =
+      new Schema(
+          Lists.newArrayList(
+              Iterables.concat(
+                  BASE_SCHEMA.columns(),
+                  Lists.newArrayList(
+                      optional(17, "no_stats_parquet", StringType.get()),
+                      optional(18, "some_nulls_variant", Types.VariantType.get()),
+                      optional(19, "all_nulls_variant", Types.VariantType.get())))));
+
+  private static final Schema PARQUET_FILE_SCHEMA =
+      new Schema(
+          Lists.newArrayList(
+              Iterables.concat(
+                  BASE_FILE_SCHEMA.columns(),
+                  Lists.newArrayList(
+                      optional(17, "_no_stats_parquet", StringType.get()),
+                      optional(18, "_some_nulls_variant", Types.VariantType.get()),
+                      optional(19, "_all_nulls_variant", Types.VariantType.get())))));
+
+  private MessageType parquetSchema = null;
+  private BlockMetaData rowGroupMetadata = null;
+
+  @Override
+  protected void createInputFile() throws IOException {
+    File parquetFile = new File(tempDir, "junit" + System.nanoTime());
+
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < 200; i += 1) {
+      sb.append(UUID.randomUUID());
+    }
+    String tooLongForParquetValue = sb.toString();
+
+    OutputFile outFile = Files.localOutput(parquetFile);
+    try (FileAppender<GenericRecord> appender =
+        Parquet.write(outFile)
+            .schema(PARQUET_FILE_SCHEMA)
+            .createWriterFunc(GenericParquetWriter::create)
+            .build()) {
+      for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
+        GenericRecord record = GenericRecord.create(PARQUET_FILE_SCHEMA);
+        populateBaseFields(record, i);
+        record.setField(
+            "_no_stats_parquet",
+            tooLongForParquetValue); // value longer than 4k will produce no stats
+        if (i % 2 == 0) {
+          VariantMetadata metadata = Variants.metadata("data");
+          ShreddedObject obj = Variants.object(metadata);
+          obj.put("data", Variants.of(25 + i));
+          record.setField("_some_nulls_variant", Variant.of(metadata, obj));
+        } else {
+          record.setField("_some_nulls_variant", null);
+        }
+        record.setField("_all_nulls_variant", null);
+        appender.add(record);
+      }
+    }
+
+    InputFile inFile = Files.localInput(parquetFile);
+    try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
+      assertThat(reader.getRowGroups()).as("Should create only one row group").hasSize(1);
+      rowGroupMetadata = reader.getRowGroups().get(0);
+      parquetSchema = reader.getFileMetaData().getSchema();
+    }
+    parquetFile.deleteOnExit();
+  }
+
+  @Override
+  protected boolean shouldRead(Expression expression, boolean caseSensitive) {
+    return shouldRead(expression, caseSensitive, rowGroupMetadata);
+  }
+
+  @Test
+  public void testIsNaNNoNans() {
+    boolean shouldRead = shouldRead(isNaN("no_nans"));
+    assertThat(shouldRead).as("Parquet metrics don't track NaN counts").isTrue();
+  }
+
+  @Test
+  public void testNotInNoNulls() {
+    boolean shouldRead = shouldRead(notIn("no_nulls", "aaa", ""));
+    assertThat(shouldRead).as("Parquet metrics evaluator reads for notIn").isTrue();
+  }
+
+  @Test
+  public void testColumnNotInFile() {
+    Expression[] cannotMatch =
+        new Expression[] {
+          lessThan("not_in_file", 1.0f), lessThanOrEqual("not_in_file", 1.0f),
+          equal("not_in_file", 1.0f), greaterThan("not_in_file", 1.0f),
+          greaterThanOrEqual("not_in_file", 1.0f), notNull("not_in_file")
+        };
+
+    for (Expression expr : cannotMatch) {
+      boolean shouldRead = shouldRead(expr);
+      assertThat(shouldRead)
+          .as("Should skip when column is not in file (all nulls): " + expr)
+          .isFalse();
+    }
+
+    Expression[] canMatch = new Expression[] {isNull("not_in_file"), notEqual("not_in_file", 1.0f)};
+
+    for (Expression expr : canMatch) {
+      boolean shouldRead = shouldRead(expr);
+      assertThat(shouldRead)
+          .as("Should read when column is not in file (all nulls): " + expr)
+          .isTrue();
+    }
+  }
+
+  @Test
+  public void testMissingStatsParquet() {
+    Expression[] exprs =
+        new Expression[] {
+          lessThan("no_stats_parquet", "a"),
+          lessThanOrEqual("no_stats_parquet", "b"),
+          equal("no_stats_parquet", "c"),
+          greaterThan("no_stats_parquet", "d"),
+          greaterThanOrEqual("no_stats_parquet", "e"),
+          notEqual("no_stats_parquet", "f"),
+          isNull("no_stats_parquet"),
+          notNull("no_stats_parquet"),
+          startsWith("no_stats_parquet", "a"),
+          notStartsWith("no_stats_parquet", "a")
+        };
+
+    for (Expression expr : exprs) {
+      boolean shouldRead = shouldRead(expr);
+      assertThat(shouldRead).as("Should read when missing stats for expr: " + expr).isTrue();
+    }
+  }
+
+  @Test
+  public void testZeroRecordFileParquet() {
+    BlockMetaData emptyBlock = new BlockMetaData();
+    emptyBlock.setRowCount(0);
+
+    Expression[] exprs =
+        new Expression[] {
+          lessThan("id", 5),
+          lessThanOrEqual("id", 30),
+          equal("id", 70),
+          greaterThan("id", 78),
+          greaterThanOrEqual("id", 90),
+          notEqual("id", 101),
+          isNull("some_nulls"),
+          notNull("some_nulls")
+        };
+
+    for (Expression expr : exprs) {
+      boolean shouldRead = shouldRead(expr, true, emptyBlock);
+      assertThat(shouldRead).as("Should never read 0-record file: " + expr).isFalse();
+    }
+  }
+
+  @Test
+  public void testStringStartsWith() {
+    boolean shouldRead = shouldRead(startsWith("str", "1"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(startsWith("str", "0st"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(startsWith("str", "1str1"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(startsWith("str", "1str1_xgd"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(startsWith("str", "2str"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(startsWith("str", "9xstr"));
+    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
+
+    shouldRead = shouldRead(startsWith("str", "0S"));
+    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
+
+    shouldRead = shouldRead(startsWith("str", "x"));
+    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
+
+    shouldRead = shouldRead(startsWith("str", "9str9aaa"));
+    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
+  }
+
+  @Test
+  public void testStringNotStartsWith() {
+    boolean shouldRead = shouldRead(notStartsWith("str", "1"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("str", "0st"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("str", "1str1"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("str", "1str1_xgd"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("str", "2str"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("str", "9xstr"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("required", "r"));
+    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
+
+    shouldRead = shouldRead(notStartsWith("required", "requ"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("some_nulls", "ssome"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("some_nulls", "som"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+  }
+
+  @Test
+  public void testInLimitParquet() {
+    boolean shouldRead = shouldRead(in("id", 1, 2));
+    assertThat(shouldRead).as("Should not read if IN is evaluated").isFalse();
+
+    List<Integer> ids = Lists.newArrayListWithExpectedSize(400);
+    for (int id = -400; id <= 0; id++) {
+      ids.add(id);
+    }
+
+    shouldRead = shouldRead(in("id", ids));
+    assertThat(shouldRead).as("Should read if IN is not evaluated").isTrue();
+  }
+
+  @Test
+  public void testParquetTypePromotion() {
+    Schema promotedSchema = new Schema(Types.NestedField.required(1, "id", Types.LongType.get()));
+    boolean shouldRead =
+        new ParquetMetricsRowGroupFilter(promotedSchema, equal("id", INT_MIN_VALUE + 1), true)
+            .shouldRead(parquetSchema, rowGroupMetadata);
+    assertThat(shouldRead).as("Should succeed with promoted schema").isTrue();
+  }
+
+  @Test
+  public void testTransformFilter() {
+    boolean shouldRead =
+        new ParquetMetricsRowGroupFilter(
+                PARQUET_SCHEMA, equal(truncate("required", 2), "some_value"), true)
+            .shouldRead(parquetSchema, rowGroupMetadata);
+    assertThat(shouldRead)
+        .as("Should read: filter contains non-reference evaluate as True")
+        .isTrue();
+  }
+
+  @Test
+  public void testVariantNotNull() {
+    boolean shouldRead = shouldRead(notNull("some_nulls_variant"));
+    assertThat(shouldRead)
+        .as("Should read: variant notNull filters must be evaluated post scan")
+        .isTrue();
+
+    shouldRead = shouldRead(notNull("all_nulls_variant"));
+    assertThat(shouldRead)
+        .as("Should read: variant notNull filters must be evaluated post scan even for all nulls")
+        .isTrue();
+  }
+
+  @Test
+  public void testVariantEq() {
+    assertThatThrownBy(() -> shouldRead(equal("some_nulls_variant", "test")))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("test");
+  }
+
+  @Test
+  public void testVariantIn() {
+    assertThatThrownBy(() -> shouldRead(in("some_nulls_variant", "test1", "test2")))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("test1");
+
+    assertThatThrownBy(() -> shouldRead(in("all_nulls_variant", "test1", "test2")))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("test1");
+  }
+
+  @Test
+  public void testVariantNotIn() {
+    assertThatThrownBy(() -> shouldRead(notIn("some_nulls_variant", "test1", "test2")))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("test1");
+
+    assertThatThrownBy(() -> shouldRead(notIn("all_nulls_variant", "test1", "test2")))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("test1");
+  }
+
+  @Test
+  public void testVariantIsNull() {
+    boolean shouldRead = shouldRead(isNull("some_nulls_variant"));
+    assertThat(shouldRead)
+        .as("Should read: variant isNull filters must be evaluated post scan")
+        .isTrue();
+
+    shouldRead = shouldRead(isNull("all_nulls_variant"));
+    assertThat(shouldRead)
+        .as("Should read: variant isNull filters must be evaluated post scan even for all nulls")
+        .isTrue();
+  }
+
+  @Test
+  public void testVariantComparisons() {
+    assertThatThrownBy(() -> shouldRead(lessThan("some_nulls_variant", 42)))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("42");
+
+    assertThatThrownBy(() -> shouldRead(lessThanOrEqual("some_nulls_variant", 42)))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("42");
+
+    assertThatThrownBy(() -> shouldRead(greaterThan("some_nulls_variant", 42)))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("42");
+
+    assertThatThrownBy(() -> shouldRead(greaterThanOrEqual("some_nulls_variant", 42)))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("42");
+
+    assertThatThrownBy(() -> shouldRead(notEqual("some_nulls_variant", 42)))
+        .isInstanceOf(ValidationException.class)
+        .hasMessageContaining("Invalid value for conversion to type variant:")
+        .hasMessageContaining("42");
+  }
+
+  private boolean shouldRead(Expression expression, boolean caseSensitive, BlockMetaData block) {
+    if (parquetSchema == null || block == null) {
+      throw new IllegalStateException(
+          "Parquet file not created yet. Call createInputFile() first.");
+    }
+    return new ParquetMetricsRowGroupFilter(PARQUET_SCHEMA, expression, caseSensitive)
+        .shouldRead(parquetSchema, block);
+  }
+
+  private org.apache.parquet.io.InputFile parquetInputFile(InputFile inFile) {
+    return new org.apache.parquet.io.InputFile() {
+      @Override
+      public long getLength() throws IOException {
+        return inFile.getLength();
+      }
+
+      @Override
+      public org.apache.parquet.io.SeekableInputStream newStream() throws IOException {
+        SeekableInputStream stream = inFile.newStream();
+        return new DelegatingSeekableInputStream(stream) {
+          @Override
+          public long getPos() throws IOException {
+            return stream.getPos();
+          }
+
+          @Override
+          public void seek(long newPos) throws IOException {
+            stream.seek(newPos);
+          }
+        };
+      }
+    };
+  }
+}

From ab202af7eccb3d495dd362ad8da51b3269f4a2cc Mon Sep 17 00:00:00 2001
From: geruh <imgeru@gmail.com>
Date: Wed, 17 Sep 2025 13:19:56 -0700
Subject: [PATCH 4/9] Revert refactoring

---
 .../data/MetricsRowGroupFilterTestBase.java   |  619 ---------
 .../data/TestMetricsRowGroupFilter.java       | 1150 +++++++++++++++++
 .../orc/TestOrcMetricsRowGroupFilter.java     |   99 --
 .../TestParquetMetricsRowGroupFilter.java     |  444 -------
 4 files changed, 1150 insertions(+), 1162 deletions(-)
 delete mode 100644 data/src/test/java/org/apache/iceberg/data/MetricsRowGroupFilterTestBase.java
 create mode 100644 data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
 delete mode 100644 data/src/test/java/org/apache/iceberg/data/orc/TestOrcMetricsRowGroupFilter.java
 delete mode 100644 data/src/test/java/org/apache/iceberg/data/parquet/TestParquetMetricsRowGroupFilter.java

diff --git a/data/src/test/java/org/apache/iceberg/data/MetricsRowGroupFilterTestBase.java b/data/src/test/java/org/apache/iceberg/data/MetricsRowGroupFilterTestBase.java
deleted file mode 100644
index 3e8e7f9d66e3..000000000000
--- a/data/src/test/java/org/apache/iceberg/data/MetricsRowGroupFilterTestBase.java
+++ /dev/null
@@ -1,619 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.data;
-
-import static org.apache.iceberg.expressions.Expressions.and;
-import static org.apache.iceberg.expressions.Expressions.equal;
-import static org.apache.iceberg.expressions.Expressions.greaterThan;
-import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual;
-import static org.apache.iceberg.expressions.Expressions.in;
-import static org.apache.iceberg.expressions.Expressions.isNaN;
-import static org.apache.iceberg.expressions.Expressions.isNull;
-import static org.apache.iceberg.expressions.Expressions.lessThan;
-import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual;
-import static org.apache.iceberg.expressions.Expressions.not;
-import static org.apache.iceberg.expressions.Expressions.notEqual;
-import static org.apache.iceberg.expressions.Expressions.notIn;
-import static org.apache.iceberg.expressions.Expressions.notNaN;
-import static org.apache.iceberg.expressions.Expressions.notNull;
-import static org.apache.iceberg.expressions.Expressions.or;
-import static org.apache.iceberg.types.Types.NestedField.optional;
-import static org.apache.iceberg.types.Types.NestedField.required;
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.Assertions.assertThatThrownBy;
-
-import java.io.File;
-import java.io.IOException;
-import org.apache.iceberg.Schema;
-import org.apache.iceberg.exceptions.ValidationException;
-import org.apache.iceberg.expressions.Expression;
-import org.apache.iceberg.types.Types;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-
-public abstract class MetricsRowGroupFilterTestBase {
-
-  protected static final Types.StructType STRUCT_FIELD_TYPE =
-      Types.StructType.of(Types.NestedField.required(7, "int_field", Types.IntegerType.get()));
-
-  protected static final Types.StructType UNDERSCORE_STRUCT_FIELD_TYPE =
-      Types.StructType.of(Types.NestedField.required(7, "_int_field", Types.IntegerType.get()));
-
-  protected static final Schema BASE_SCHEMA =
-      new Schema(
-          required(1, "id", Types.IntegerType.get()),
-          required(2, "required", Types.StringType.get()),
-          optional(3, "all_nulls", Types.DoubleType.get()),
-          optional(4, "some_nulls", Types.StringType.get()),
-          optional(5, "no_nulls", Types.StringType.get()),
-          optional(6, "struct_not_null", STRUCT_FIELD_TYPE),
-          optional(8, "not_in_file", Types.FloatType.get()),
-          optional(9, "str", Types.StringType.get()),
-          optional(
-              10,
-              "map_not_null",
-              Types.MapType.ofRequired(11, 12, Types.StringType.get(), Types.IntegerType.get())),
-          optional(13, "all_nans", Types.DoubleType.get()),
-          optional(14, "some_nans", Types.FloatType.get()),
-          optional(15, "no_nans", Types.DoubleType.get()),
-          optional(16, "some_double_nans", Types.DoubleType.get()));
-
-  protected static final Schema BASE_FILE_SCHEMA =
-      new Schema(
-          Types.NestedField.required(1, "_id", Types.IntegerType.get()),
-          Types.NestedField.required(2, "_required", Types.StringType.get()),
-          Types.NestedField.optional(3, "_all_nulls", Types.DoubleType.get()),
-          Types.NestedField.optional(4, "_some_nulls", Types.StringType.get()),
-          Types.NestedField.optional(5, "_no_nulls", Types.StringType.get()),
-          Types.NestedField.optional(6, "_struct_not_null", UNDERSCORE_STRUCT_FIELD_TYPE),
-          Types.NestedField.optional(9, "_str", Types.StringType.get()),
-          Types.NestedField.optional(13, "_all_nans", Types.DoubleType.get()),
-          Types.NestedField.optional(14, "_some_nans", Types.FloatType.get()),
-          Types.NestedField.optional(15, "_no_nans", Types.DoubleType.get()),
-          Types.NestedField.optional(16, "_some_double_nans", Types.DoubleType.get()));
-
-  protected static final int INT_MIN_VALUE = 30;
-  protected static final int INT_MAX_VALUE = 79;
-
-  @TempDir protected File tempDir;
-
-  @BeforeEach
-  public final void before() throws IOException {
-    createInputFile();
-  }
-
-  protected abstract void createInputFile() throws IOException;
-
-  protected void populateBaseFields(GenericRecord record, int rowIndex) {
-    record.setField("_id", INT_MIN_VALUE + rowIndex);
-    record.setField("_required", "req");
-    record.setField("_all_nulls", null);
-    record.setField("_some_nulls", (rowIndex % 10 == 0) ? null : "some");
-    record.setField("_no_nulls", "");
-    record.setField("_all_nans", Double.NaN);
-    record.setField("_some_nans", (rowIndex % 10 == 0) ? Float.NaN : 2F);
-    record.setField("_some_double_nans", (rowIndex % 10 == 0) ? Double.NaN : 2D);
-    record.setField("_no_nans", 3D);
-    record.setField("_str", rowIndex + "str" + rowIndex);
-    GenericRecord struct = GenericRecord.create(UNDERSCORE_STRUCT_FIELD_TYPE);
-    struct.setField("_int_field", INT_MIN_VALUE + rowIndex);
-    record.setField("_struct_not_null", struct);
-  }
-
-  protected boolean shouldRead(Expression expression) {
-    return shouldRead(expression, true);
-  }
-
-  protected abstract boolean shouldRead(Expression expression, boolean caseSensitive);
-
-  @Test
-  public void testAllNulls() {
-    boolean shouldRead;
-
-    shouldRead = shouldRead(notNull("all_nulls"));
-    assertThat(shouldRead).as("Should skip: no non-null value in all null column").isFalse();
-
-    shouldRead = shouldRead(notNull("some_nulls"));
-    assertThat(shouldRead)
-        .as("Should read: column with some nulls contains a non-null value")
-        .isTrue();
-
-    shouldRead = shouldRead(notNull("no_nulls"));
-    assertThat(shouldRead).as("Should read: non-null column contains a non-null value").isTrue();
-
-    shouldRead = shouldRead(notNull("map_not_null"));
-    assertThat(shouldRead).as("Should read: map type is not skipped").isTrue();
-
-    shouldRead = shouldRead(notNull("struct_not_null"));
-    assertThat(shouldRead).as("Should read: struct type is not skipped").isTrue();
-  }
-
-  @Test
-  public void testNoNulls() {
-    boolean shouldRead = shouldRead(isNull("all_nulls"));
-    assertThat(shouldRead).as("Should read: at least one null value in all null column").isTrue();
-
-    shouldRead = shouldRead(isNull("some_nulls"));
-    assertThat(shouldRead).as("Should read: column with some nulls contains a null value").isTrue();
-
-    shouldRead = shouldRead(isNull("no_nulls"));
-    assertThat(shouldRead).as("Should skip: non-null column contains no null values").isFalse();
-
-    shouldRead = shouldRead(isNull("map_not_null"));
-    assertThat(shouldRead).as("Should read: map type is not skipped").isTrue();
-
-    shouldRead = shouldRead(isNull("struct_not_null"));
-    assertThat(shouldRead).as("Should read: struct type is not skipped").isTrue();
-  }
-
-  @Test
-  public void testFloatWithNan() {
-    boolean shouldRead = shouldRead(greaterThan("some_nans", 1.0));
-    assertThat(shouldRead).isTrue();
-
-    shouldRead = shouldRead(greaterThanOrEqual("some_nans", 1.0));
-    assertThat(shouldRead).isTrue();
-
-    shouldRead = shouldRead(lessThan("some_nans", 3.0));
-    assertThat(shouldRead).isTrue();
-
-    shouldRead = shouldRead(lessThanOrEqual("some_nans", 1.0));
-    assertThat(shouldRead).isTrue();
-
-    shouldRead = shouldRead(equal("some_nans", 2.0));
-    assertThat(shouldRead).isTrue();
-  }
-
-  @Test
-  public void testDoubleWithNan() {
-    boolean shouldRead = shouldRead(greaterThan("some_double_nans", 1.0));
-    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
-
-    shouldRead = shouldRead(greaterThanOrEqual("some_double_nans", 1.0));
-    assertThat(shouldRead)
-        .as("Should read: column with some nans contains the target value")
-        .isTrue();
-
-    shouldRead = shouldRead(lessThan("some_double_nans", 3.0));
-    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
-
-    shouldRead = shouldRead(lessThanOrEqual("some_double_nans", 1.0));
-    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
-
-    shouldRead = shouldRead(equal("some_double_nans", 2.0));
-    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
-  }
-
-  @Test
-  public void testIsNaN() {
-    boolean shouldRead = shouldRead(isNaN("all_nans"));
-    assertThat(shouldRead).isTrue();
-
-    shouldRead = shouldRead(isNaN("some_nans"));
-    assertThat(shouldRead).isTrue();
-
-    shouldRead = shouldRead(isNaN("all_nulls"));
-    assertThat(shouldRead).as("Should skip: all null column will not contain NaN").isFalse();
-  }
-
-  @Test
-  public void testNotNaN() {
-    boolean shouldRead = shouldRead(notNaN("all_nans"));
-    assertThat(shouldRead).as("Should read: NaN counts are not tracked in metrics").isTrue();
-
-    shouldRead = shouldRead(notNaN("some_nans"));
-    assertThat(shouldRead).as("Should read: NaN counts are not tracked in metrics").isTrue();
-
-    shouldRead = shouldRead(notNaN("no_nans"));
-    assertThat(shouldRead).as("Should read: NaN counts are not tracked in metrics").isTrue();
-
-    shouldRead = shouldRead(notNaN("all_nulls"));
-    assertThat(shouldRead).as("Shou  ld read: NaN counts are not tracked in metrics").isTrue();
-  }
-
-  @Test
-  public void testRequiredColumn() {
-    boolean shouldRead = shouldRead(notNull("required"));
-    assertThat(shouldRead).as("Should read: required columns are always non-null").isTrue();
-
-    shouldRead = shouldRead(isNull("required"));
-    assertThat(shouldRead).as("Should skip: required columns are always non-null").isFalse();
-  }
-
-  @Test
-  public void testMissingColumn() {
-    assertThatThrownBy(() -> shouldRead(lessThan("missing", 5)))
-        .as("Should complain about missing column in expression")
-        .isInstanceOf(ValidationException.class)
-        .hasMessageStartingWith("Cannot find field 'missing'");
-  }
-
-  @Test
-  public void testNot() {
-    // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
-    boolean shouldRead = shouldRead(not(lessThan("id", INT_MIN_VALUE - 25)));
-    assertThat(shouldRead).as("Should read: not(false)").isTrue();
-
-    shouldRead = shouldRead(not(greaterThan("id", INT_MIN_VALUE - 25)));
-    assertThat(shouldRead).as("Should skip: not(true)").isFalse();
-  }
-
-  @Test
-  public void testAnd() {
-    // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
-    boolean shouldRead =
-        shouldRead(
-            and(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MIN_VALUE - 30)));
-    assertThat(shouldRead).as("Should skip: and(false, true)").isFalse();
-
-    shouldRead =
-        shouldRead(
-            and(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MAX_VALUE + 1)));
-    assertThat(shouldRead).as("Should skip: and(false, false)").isFalse();
-
-    shouldRead =
-        shouldRead(
-            and(greaterThan("id", INT_MIN_VALUE - 25), lessThanOrEqual("id", INT_MIN_VALUE)));
-    assertThat(shouldRead).as("Should read: and(true, true)").isTrue();
-  }
-
-  @Test
-  public void testOr() {
-    // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
-    boolean shouldRead =
-        shouldRead(
-            or(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MAX_VALUE + 1)));
-    assertThat(shouldRead).as("Should skip: or(false, false)").isFalse();
-
-    shouldRead =
-        shouldRead(
-            or(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MAX_VALUE - 19)));
-    assertThat(shouldRead).as("Should read: or(false, true)").isTrue();
-  }
-
-  @Test
-  public void testIntegerLt() {
-    boolean shouldRead = shouldRead(lessThan("id", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
-
-    shouldRead = shouldRead(lessThan("id", INT_MIN_VALUE));
-    assertThat(shouldRead)
-        .as("Should not read: id range below lower bound (30 is not < 30)")
-        .isFalse();
-
-    shouldRead = shouldRead(lessThan("id", INT_MIN_VALUE + 1));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(lessThan("id", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
-  }
-
-  @Test
-  public void testIntegerLtEq() {
-    boolean shouldRead = shouldRead(lessThanOrEqual("id", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
-
-    shouldRead = shouldRead(lessThanOrEqual("id", INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should not read: id range below lower bound (29 < 30)").isFalse();
-
-    shouldRead = shouldRead(lessThanOrEqual("id", INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(lessThanOrEqual("id", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: many possible ids").isTrue();
-  }
-
-  @Test
-  public void testIntegerGt() {
-    boolean shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
-
-    shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE));
-    assertThat(shouldRead)
-        .as("Should not read: id range above upper bound (79 is not > 79)")
-        .isFalse();
-
-    shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE - 1));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
-  }
-
-  @Test
-  public void testIntegerGtEq() {
-    boolean shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
-
-    shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should not read: id range above upper bound (80 > 79)").isFalse();
-
-    shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
-  }
-
-  @Test
-  public void testIntegerEq() {
-    boolean shouldRead = shouldRead(equal("id", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
-
-    shouldRead = shouldRead(equal("id", INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
-
-    shouldRead = shouldRead(equal("id", INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
-
-    shouldRead = shouldRead(equal("id", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
-
-    shouldRead = shouldRead(equal("id", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
-
-    shouldRead = shouldRead(equal("id", INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
-
-    shouldRead = shouldRead(equal("id", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
-  }
-
-  @Test
-  public void testIntegerNotEq() {
-    boolean shouldRead = shouldRead(notEqual("id", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("id", INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("id", INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
-
-    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
-  }
-
-  @Test
-  public void testIntegerNotEqRewritten() {
-    boolean shouldRead = shouldRead(not(equal("id", INT_MIN_VALUE - 25)));
-    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
-
-    shouldRead = shouldRead(not(equal("id", INT_MIN_VALUE - 1)));
-    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
-
-    shouldRead = shouldRead(not(equal("id", INT_MIN_VALUE)));
-    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
-
-    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE - 4)));
-    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
-
-    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE)));
-    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
-
-    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE + 1)));
-    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
-
-    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE + 6)));
-    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
-  }
-
-  @Test
-  public void testStructFieldLt() {
-    boolean shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
-
-    shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MIN_VALUE));
-    assertThat(shouldRead)
-        .as("Should not read: id range below lower bound (30 is not < 30)")
-        .isFalse();
-
-    shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MIN_VALUE + 1));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
-  }
-
-  @Test
-  public void testStructFieldLtEq() {
-    boolean shouldRead =
-        shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
-
-    shouldRead = shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should not read: id range below lower bound (29 < 30)").isFalse();
-
-    shouldRead = shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: many possible ids").isTrue();
-  }
-
-  @Test
-  public void testStructFieldGt() {
-    boolean shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
-
-    shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE));
-    assertThat(shouldRead)
-        .as("Should not read: id range above upper bound (79 is not > 79)")
-        .isFalse();
-
-    shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE - 1));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
-  }
-
-  @Test
-  public void testStructFieldGtEq() {
-    boolean shouldRead =
-        shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
-
-    shouldRead = shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should not read: id range above upper bound (80 > 79)").isFalse();
-
-    shouldRead = shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: one possible id").isTrue();
-
-    shouldRead = shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
-  }
-
-  @Test
-  public void testStructFieldEq() {
-    boolean shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
-
-    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
-
-    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
-
-    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
-
-    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
-
-    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
-
-    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
-  }
-
-  @Test
-  public void testStructFieldNotEq() {
-    boolean shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MIN_VALUE - 25));
-    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MAX_VALUE - 4));
-    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
-
-    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MAX_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
-
-    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MAX_VALUE + 6));
-    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
-  }
-
-  @Test
-  public void testCaseInsensitive() {
-    boolean shouldRead = shouldRead(equal("ID", INT_MIN_VALUE - 25), false);
-    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
-  }
-
-  @Test
-  public void testIntegerIn() {
-    boolean shouldRead = shouldRead(in("id", INT_MIN_VALUE - 25, INT_MIN_VALUE - 24));
-    assertThat(shouldRead).as("Should not read: id below lower bound (5 < 30, 6 < 30)").isFalse();
-
-    shouldRead = shouldRead(in("id", INT_MIN_VALUE - 2, INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should not read: id below lower bound (28 < 30, 29 < 30)").isFalse();
-
-    shouldRead = shouldRead(in("id", INT_MIN_VALUE - 1, INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to lower bound (30 == 30)").isTrue();
-
-    shouldRead = shouldRead(in("id", INT_MAX_VALUE - 4, INT_MAX_VALUE - 3));
-    assertThat(shouldRead)
-        .as("Should read: id between lower and upper bounds (30 < 75 < 79, 30 < 76 < 79)")
-        .isTrue();
-
-    shouldRead = shouldRead(in("id", INT_MAX_VALUE, INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should read: id equal to upper bound (79 == 79)").isTrue();
-
-    shouldRead = shouldRead(in("id", INT_MAX_VALUE + 1, INT_MAX_VALUE + 2));
-    assertThat(shouldRead).as("Should not read: id above upper bound (80 > 79, 81 > 79)").isFalse();
-
-    shouldRead = shouldRead(in("id", INT_MAX_VALUE + 6, INT_MAX_VALUE + 7));
-    assertThat(shouldRead).as("Should not read: id above upper bound (85 > 79, 86 > 79)").isFalse();
-
-    shouldRead = shouldRead(in("all_nulls", 1, 2));
-    assertThat(shouldRead).as("Should skip: in on all nulls column").isFalse();
-
-    shouldRead = shouldRead(in("some_nulls", "aaa", "some"));
-    assertThat(shouldRead).as("Should read: in on some nulls column").isTrue();
-
-    shouldRead = shouldRead(in("no_nulls", "aaa", ""));
-    assertThat(shouldRead).as("Should read: in on no nulls column").isTrue();
-  }
-
-  @Test
-  public void testIntegerNotIn() {
-    boolean shouldRead = shouldRead(notIn("id", INT_MIN_VALUE - 25, INT_MIN_VALUE - 24));
-    assertThat(shouldRead).as("Should read: id below lower bound (5 < 30, 6 < 30)").isTrue();
-
-    shouldRead = shouldRead(notIn("id", INT_MIN_VALUE - 2, INT_MIN_VALUE - 1));
-    assertThat(shouldRead).as("Should read: id below lower bound (28 < 30, 29 < 30)").isTrue();
-
-    shouldRead = shouldRead(notIn("id", INT_MIN_VALUE - 1, INT_MIN_VALUE));
-    assertThat(shouldRead).as("Should read: id equal to lower bound (30 == 30)").isTrue();
-
-    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE - 4, INT_MAX_VALUE - 3));
-    assertThat(shouldRead)
-        .as("Should read: id between lower and upper bounds (30 < 75 < 79, 30 < 76 < 79)")
-        .isTrue();
-
-    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE, INT_MAX_VALUE + 1));
-    assertThat(shouldRead).as("Should read: id equal to upper bound (79 == 79)").isTrue();
-
-    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE + 1, INT_MAX_VALUE + 2));
-    assertThat(shouldRead).as("Should read: id above upper bound (80 > 79, 81 > 79)").isTrue();
-
-    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE + 6, INT_MAX_VALUE + 7));
-    assertThat(shouldRead).as("Should read: id above upper bound (85 > 79, 86 > 79)").isTrue();
-
-    shouldRead = shouldRead(notIn("all_nulls", 1, 2));
-    assertThat(shouldRead).as("Should read: notIn on all nulls column").isTrue();
-
-    shouldRead = shouldRead(notIn("some_nulls", "aaa", "some"));
-    assertThat(shouldRead).as("Should read: notIn on some nulls column").isTrue();
-  }
-
-  @Test
-  public void testSomeNullsNotEq() {
-    boolean shouldRead = shouldRead(notEqual("some_nulls", "some"));
-    assertThat(shouldRead).as("Should read: notEqual on some nulls column").isTrue();
-  }
-}
diff --git a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
new file mode 100644
index 000000000000..a475fecf004b
--- /dev/null
+++ b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
@@ -0,0 +1,1150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.data;
+
+import static org.apache.iceberg.avro.AvroSchemaUtil.convert;
+import static org.apache.iceberg.expressions.Expressions.and;
+import static org.apache.iceberg.expressions.Expressions.equal;
+import static org.apache.iceberg.expressions.Expressions.greaterThan;
+import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual;
+import static org.apache.iceberg.expressions.Expressions.in;
+import static org.apache.iceberg.expressions.Expressions.isNaN;
+import static org.apache.iceberg.expressions.Expressions.isNull;
+import static org.apache.iceberg.expressions.Expressions.lessThan;
+import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual;
+import static org.apache.iceberg.expressions.Expressions.not;
+import static org.apache.iceberg.expressions.Expressions.notEqual;
+import static org.apache.iceberg.expressions.Expressions.notIn;
+import static org.apache.iceberg.expressions.Expressions.notNaN;
+import static org.apache.iceberg.expressions.Expressions.notNull;
+import static org.apache.iceberg.expressions.Expressions.notStartsWith;
+import static org.apache.iceberg.expressions.Expressions.or;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
+import static org.apache.iceberg.expressions.Expressions.truncate;
+import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import static org.assertj.core.api.Assumptions.assumeThat;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.UUID;
+import org.apache.avro.generic.GenericData.Record;
+import org.apache.avro.generic.GenericRecordBuilder;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.Files;
+import org.apache.iceberg.Parameter;
+import org.apache.iceberg.ParameterizedTestExtension;
+import org.apache.iceberg.Parameters;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.avro.AvroSchemaUtil;
+import org.apache.iceberg.data.orc.GenericOrcReader;
+import org.apache.iceberg.data.orc.GenericOrcWriter;
+import org.apache.iceberg.data.parquet.GenericParquetWriter;
+import org.apache.iceberg.exceptions.ValidationException;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.io.FileAppender;
+import org.apache.iceberg.io.InputFile;
+import org.apache.iceberg.io.OutputFile;
+import org.apache.iceberg.io.SeekableInputStream;
+import org.apache.iceberg.orc.ORC;
+import org.apache.iceberg.parquet.Parquet;
+import org.apache.iceberg.parquet.ParquetMetricsRowGroupFilter;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.types.Types.DoubleType;
+import org.apache.iceberg.types.Types.FloatType;
+import org.apache.iceberg.types.Types.IntegerType;
+import org.apache.iceberg.types.Types.StringType;
+import org.apache.iceberg.variants.ShreddedObject;
+import org.apache.iceberg.variants.Variant;
+import org.apache.iceberg.variants.VariantMetadata;
+import org.apache.iceberg.variants.Variants;
+import org.apache.orc.OrcFile;
+import org.apache.orc.Reader;
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.io.DelegatingSeekableInputStream;
+import org.apache.parquet.schema.MessageType;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.TestTemplate;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.junit.jupiter.api.io.TempDir;
+
+@ExtendWith(ParameterizedTestExtension.class)
+public class TestMetricsRowGroupFilter {
+
+  @Parameters(name = "fileFormat = {0}")
+  public static List<Object> parameters() {
+    return Arrays.asList(FileFormat.PARQUET, FileFormat.ORC);
+  }
+
+  private static final Types.StructType STRUCT_FIELD_TYPE =
+      Types.StructType.of(Types.NestedField.required(8, "int_field", IntegerType.get()));
+
+  private static final Schema SCHEMA =
+      new Schema(
+          required(1, "id", IntegerType.get()),
+          optional(2, "no_stats_parquet", StringType.get()),
+          required(3, "required", StringType.get()),
+          optional(4, "all_nulls", DoubleType.get()),
+          optional(5, "some_nulls", StringType.get()),
+          optional(6, "no_nulls", StringType.get()),
+          optional(7, "struct_not_null", STRUCT_FIELD_TYPE),
+          optional(9, "not_in_file", FloatType.get()),
+          optional(10, "str", StringType.get()),
+          optional(
+              11,
+              "map_not_null",
+              Types.MapType.ofRequired(12, 13, StringType.get(), IntegerType.get())),
+          optional(14, "all_nans", DoubleType.get()),
+          optional(15, "some_nans", FloatType.get()),
+          optional(16, "no_nans", DoubleType.get()),
+          optional(17, "some_double_nans", DoubleType.get()));
+
+  private static final Types.StructType UNDERSCORE_STRUCT_FIELD_TYPE =
+      Types.StructType.of(Types.NestedField.required(8, "_int_field", IntegerType.get()));
+
+  private static final Schema FILE_SCHEMA =
+      new Schema(
+          required(1, "_id", IntegerType.get()),
+          optional(2, "_no_stats_parquet", StringType.get()),
+          required(3, "_required", StringType.get()),
+          optional(4, "_all_nulls", DoubleType.get()),
+          optional(5, "_some_nulls", StringType.get()),
+          optional(6, "_no_nulls", StringType.get()),
+          optional(7, "_struct_not_null", UNDERSCORE_STRUCT_FIELD_TYPE),
+          optional(10, "_str", StringType.get()),
+          optional(14, "_all_nans", Types.DoubleType.get()),
+          optional(15, "_some_nans", FloatType.get()),
+          optional(16, "_no_nans", Types.DoubleType.get()),
+          optional(17, "_some_double_nans", Types.DoubleType.get()));
+
+  private static final String TOO_LONG_FOR_STATS_PARQUET;
+
+  static {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < 200; i += 1) {
+      sb.append(UUID.randomUUID());
+    }
+    TOO_LONG_FOR_STATS_PARQUET = sb.toString();
+  }
+
+  private static final int INT_MIN_VALUE = 30;
+  private static final int INT_MAX_VALUE = 79;
+
+  private File orcFile = null;
+  private MessageType parquetSchema = null;
+  private BlockMetaData rowGroupMetadata = null;
+
+  @Parameter private FileFormat format;
+  @TempDir private File tempDir;
+
+  @BeforeEach
+  public void createInputFile() throws IOException {
+    switch (format) {
+      case ORC:
+        createOrcInputFile();
+        break;
+      case PARQUET:
+        createParquetInputFile();
+        break;
+      default:
+        throw new UnsupportedOperationException(
+            "Row group filter tests not supported for " + format);
+    }
+  }
+
+  public void createOrcInputFile() throws IOException {
+    this.orcFile = new File(tempDir, "junit" + System.nanoTime());
+
+    OutputFile outFile = Files.localOutput(orcFile);
+    try (FileAppender<GenericRecord> appender =
+        ORC.write(outFile)
+            .schema(FILE_SCHEMA)
+            .createWriterFunc(GenericOrcWriter::buildWriter)
+            .build()) {
+      GenericRecord record = GenericRecord.create(FILE_SCHEMA);
+      // create 50 records
+      for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
+        record.setField("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0
+        record.setField(
+            "_no_stats_parquet",
+            TOO_LONG_FOR_STATS_PARQUET); // value longer than 4k will produce no stats
+        // in Parquet, but will produce stats for ORC
+        record.setField("_required", "req"); // required, always non-null
+        record.setField("_all_nulls", null); // never non-null
+        record.setField("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values
+        record.setField("_no_nulls", ""); // optional, but always non-null
+        record.setField("_str", i + "str" + i);
+        record.setField("_all_nans", Double.NaN); // never non-nan
+        record.setField("_some_nans", (i % 10 == 0) ? Float.NaN : 2F); // includes some nan values
+        record.setField(
+            "_some_double_nans", (i % 10 == 0) ? Double.NaN : 2D); // includes some nan values
+        record.setField("_no_nans", 3D); // optional, but always non-nan
+
+        GenericRecord structNotNull = GenericRecord.create(UNDERSCORE_STRUCT_FIELD_TYPE);
+        structNotNull.setField("_int_field", INT_MIN_VALUE + i);
+        record.setField("_struct_not_null", structNotNull); // struct with int
+
+        appender.add(record);
+      }
+    }
+
+    InputFile inFile = Files.localInput(orcFile);
+    try (Reader reader =
+        OrcFile.createReader(
+            new Path(inFile.location()), OrcFile.readerOptions(new Configuration()))) {
+      assertThat(reader.getStripes()).as("Should create only one stripe").hasSize(1);
+    }
+
+    orcFile.deleteOnExit();
+  }
+
+  private void createParquetInputFile() throws IOException {
+    File parquetFile = new File(tempDir, "junit" + System.nanoTime());
+
+    // build struct field schema
+    org.apache.avro.Schema structSchema = AvroSchemaUtil.convert(UNDERSCORE_STRUCT_FIELD_TYPE);
+
+    OutputFile outFile = Files.localOutput(parquetFile);
+    try (FileAppender<Record> appender = Parquet.write(outFile).schema(FILE_SCHEMA).build()) {
+      GenericRecordBuilder builder = new GenericRecordBuilder(convert(FILE_SCHEMA, "table"));
+      // create 50 records
+      for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
+        builder.set("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0
+        builder.set(
+            "_no_stats_parquet",
+            TOO_LONG_FOR_STATS_PARQUET); // value longer than 4k will produce no stats
+        // in Parquet
+        builder.set("_required", "req"); // required, always non-null
+        builder.set("_all_nulls", null); // never non-null
+        builder.set("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values
+        builder.set("_no_nulls", ""); // optional, but always non-null
+        builder.set("_all_nans", Double.NaN); // never non-nan
+        builder.set("_some_nans", (i % 10 == 0) ? Float.NaN : 2F); // includes some nan values
+        builder.set(
+            "_some_double_nans", (i % 10 == 0) ? Double.NaN : 2D); // includes some nan values
+        builder.set("_no_nans", 3D); // optional, but always non-nan
+        builder.set("_str", i + "str" + i);
+
+        Record structNotNull = new Record(structSchema);
+        structNotNull.put("_int_field", INT_MIN_VALUE + i);
+        builder.set("_struct_not_null", structNotNull); // struct with int
+
+        appender.add(builder.build());
+      }
+    }
+
+    InputFile inFile = Files.localInput(parquetFile);
+    try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
+      assertThat(reader.getRowGroups()).as("Should create only one row group").hasSize(1);
+      rowGroupMetadata = reader.getRowGroups().get(0);
+      parquetSchema = reader.getFileMetaData().getSchema();
+    }
+
+    parquetFile.deleteOnExit();
+  }
+
+  @TestTemplate
+  public void testAllNulls() {
+    boolean shouldRead;
+
+    shouldRead = shouldRead(notNull("all_nulls"));
+    assertThat(shouldRead).as("Should skip: no non-null value in all null column").isFalse();
+
+    shouldRead = shouldRead(notNull("some_nulls"));
+    assertThat(shouldRead)
+        .as("Should read: column with some nulls contains a non-null value")
+        .isTrue();
+
+    shouldRead = shouldRead(notNull("no_nulls"));
+    assertThat(shouldRead).as("Should read: non-null column contains a non-null value").isTrue();
+
+    shouldRead = shouldRead(notNull("map_not_null"));
+    assertThat(shouldRead).as("Should read: map type is not skipped").isTrue();
+
+    shouldRead = shouldRead(notNull("struct_not_null"));
+    assertThat(shouldRead).as("Should read: struct type is not skipped").isTrue();
+  }
+
+  @TestTemplate
+  public void testNoNulls() {
+    boolean shouldRead = shouldRead(isNull("all_nulls"));
+    assertThat(shouldRead).as("Should read: at least one null value in all null column").isTrue();
+
+    shouldRead = shouldRead(isNull("some_nulls"));
+    assertThat(shouldRead).as("Should read: column with some nulls contains a null value").isTrue();
+
+    shouldRead = shouldRead(isNull("no_nulls"));
+    assertThat(shouldRead).as("Should skip: non-null column contains no null values").isFalse();
+
+    shouldRead = shouldRead(isNull("map_not_null"));
+    assertThat(shouldRead).as("Should read: map type is not skipped").isTrue();
+
+    shouldRead = shouldRead(isNull("struct_not_null"));
+    assertThat(shouldRead).as("Should read: struct type is not skipped").isTrue();
+  }
+
+  @TestTemplate
+  public void testFloatWithNan() {
+    // NaN's should break Parquet's Min/Max stats we should be reading in all cases
+    boolean shouldRead = shouldRead(greaterThan("some_nans", 1.0));
+    assertThat(shouldRead).isTrue();
+
+    shouldRead = shouldRead(greaterThanOrEqual("some_nans", 1.0));
+    assertThat(shouldRead).isTrue();
+
+    shouldRead = shouldRead(lessThan("some_nans", 3.0));
+    assertThat(shouldRead).isTrue();
+
+    shouldRead = shouldRead(lessThanOrEqual("some_nans", 1.0));
+    assertThat(shouldRead).isTrue();
+
+    shouldRead = shouldRead(equal("some_nans", 2.0));
+    assertThat(shouldRead).isTrue();
+  }
+
+  @TestTemplate
+  public void testDoubleWithNan() {
+    boolean shouldRead = shouldRead(greaterThan("some_double_nans", 1.0));
+    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
+
+    shouldRead = shouldRead(greaterThanOrEqual("some_double_nans", 1.0));
+    assertThat(shouldRead)
+        .as("Should read: column with some nans contains the target value")
+        .isTrue();
+
+    shouldRead = shouldRead(lessThan("some_double_nans", 3.0));
+    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
+
+    shouldRead = shouldRead(lessThanOrEqual("some_double_nans", 1.0));
+    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
+
+    shouldRead = shouldRead(equal("some_double_nans", 2.0));
+    assertThat(shouldRead).as("Should read: column with some nans contains target value").isTrue();
+  }
+
+  @TestTemplate
+  public void testIsNaN() {
+    boolean shouldRead = shouldRead(isNaN("all_nans"));
+    assertThat(shouldRead)
+        .as("Should read: NaN counts are not tracked in Parquet metrics")
+        .isTrue();
+
+    shouldRead = shouldRead(isNaN("some_nans"));
+    assertThat(shouldRead)
+        .as("Should read: NaN counts are not tracked in Parquet metrics")
+        .isTrue();
+
+    shouldRead = shouldRead(isNaN("no_nans"));
+    switch (format) {
+      case ORC:
+        assertThat(shouldRead)
+            .as("Should read 0 rows due to the ORC filter push-down feature")
+            .isFalse();
+        break;
+      case PARQUET:
+        assertThat(shouldRead)
+            .as("Should read: NaN counts are not tracked in Parquet metrics")
+            .isTrue();
+        break;
+      default:
+        throw new UnsupportedOperationException(
+            "Row group filter tests not supported for " + format);
+    }
+
+    shouldRead = shouldRead(isNaN("all_nulls"));
+    assertThat(shouldRead).as("Should skip: all null column will not contain nan value").isFalse();
+  }
+
+  @TestTemplate
+  public void testNotNaN() {
+    boolean shouldRead = shouldRead(notNaN("all_nans"));
+    assertThat(shouldRead)
+        .as("Should read: NaN counts are not tracked in Parquet metrics")
+        .isTrue();
+
+    shouldRead = shouldRead(notNaN("some_nans"));
+    assertThat(shouldRead)
+        .as("Should read: NaN counts are not tracked in Parquet metrics")
+        .isTrue();
+
+    shouldRead = shouldRead(notNaN("no_nans"));
+    assertThat(shouldRead)
+        .as("Should read: NaN counts are not tracked in Parquet metrics")
+        .isTrue();
+
+    shouldRead = shouldRead(notNaN("all_nulls"));
+    assertThat(shouldRead)
+        .as("Should read: NaN counts are not tracked in Parquet metrics")
+        .isTrue();
+  }
+
+  @TestTemplate
+  public void testRequiredColumn() {
+    boolean shouldRead = shouldRead(notNull("required"));
+    assertThat(shouldRead).as("Should read: required columns are always non-null").isTrue();
+
+    shouldRead = shouldRead(isNull("required"));
+    assertThat(shouldRead).as("Should skip: required columns are always non-null").isFalse();
+  }
+
+  @TestTemplate
+  public void testMissingColumn() {
+    assertThatThrownBy(() -> shouldRead(lessThan("missing", 5)))
+        .as("Should complain about missing column in expression")
+        .isInstanceOf(ValidationException.class)
+        .hasMessageStartingWith("Cannot find field 'missing'");
+  }
+
+  @TestTemplate
+  public void testColumnNotInFile() {
+    assumeThat(format)
+        .as(
+            "If a column is not in file, ORC does NOT try to apply predicates assuming null values for the column")
+        .isNotEqualTo(FileFormat.ORC);
+
+    Expression[] cannotMatch =
+        new Expression[] {
+          lessThan("not_in_file", 1.0f), lessThanOrEqual("not_in_file", 1.0f),
+          equal("not_in_file", 1.0f), greaterThan("not_in_file", 1.0f),
+          greaterThanOrEqual("not_in_file", 1.0f), notNull("not_in_file")
+        };
+
+    for (Expression expr : cannotMatch) {
+      boolean shouldRead = shouldRead(expr);
+      assertThat(shouldRead)
+          .as("Should skip when column is not in file (all nulls): " + expr)
+          .isFalse();
+    }
+
+    Expression[] canMatch = new Expression[] {isNull("not_in_file"), notEqual("not_in_file", 1.0f)};
+
+    for (Expression expr : canMatch) {
+      boolean shouldRead = shouldRead(expr);
+      assertThat(shouldRead)
+          .as("Should read when column is not in file (all nulls): " + expr)
+          .isTrue();
+    }
+  }
+
+  @TestTemplate
+  public void testMissingStatsParquet() {
+    assumeThat(format).isEqualTo(FileFormat.PARQUET);
+
+    Expression[] exprs =
+        new Expression[] {
+          lessThan("no_stats_parquet", "a"),
+          lessThanOrEqual("no_stats_parquet", "b"),
+          equal("no_stats_parquet", "c"),
+          greaterThan("no_stats_parquet", "d"),
+          greaterThanOrEqual("no_stats_parquet", "e"),
+          notEqual("no_stats_parquet", "f"),
+          isNull("no_stats_parquet"),
+          notNull("no_stats_parquet"),
+          startsWith("no_stats_parquet", "a"),
+          notStartsWith("no_stats_parquet", "a")
+        };
+
+    for (Expression expr : exprs) {
+      boolean shouldRead = shouldRead(expr);
+      assertThat(shouldRead).as("Should read when missing stats for expr: " + expr).isTrue();
+    }
+  }
+
+  @TestTemplate
+  public void testZeroRecordFileParquet() {
+    assumeThat(format).isEqualTo(FileFormat.PARQUET);
+
+    BlockMetaData emptyBlock = new BlockMetaData();
+    emptyBlock.setRowCount(0);
+
+    Expression[] exprs =
+        new Expression[] {
+          lessThan("id", 5),
+          lessThanOrEqual("id", 30),
+          equal("id", 70),
+          greaterThan("id", 78),
+          greaterThanOrEqual("id", 90),
+          notEqual("id", 101),
+          isNull("some_nulls"),
+          notNull("some_nulls")
+        };
+
+    for (Expression expr : exprs) {
+      boolean shouldRead = shouldReadParquet(expr, true, parquetSchema, emptyBlock);
+      assertThat(shouldRead).as("Should never read 0-record file: " + expr).isFalse();
+    }
+  }
+
+  @TestTemplate
+  public void testNot() {
+    // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
+    boolean shouldRead = shouldRead(not(lessThan("id", INT_MIN_VALUE - 25)));
+    assertThat(shouldRead).as("Should read: not(false)").isTrue();
+
+    shouldRead = shouldRead(not(greaterThan("id", INT_MIN_VALUE - 25)));
+    assertThat(shouldRead).as("Should skip: not(true)").isFalse();
+  }
+
+  @TestTemplate
+  public void testAnd() {
+    // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
+    boolean shouldRead =
+        shouldRead(
+            and(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MIN_VALUE - 30)));
+    assertThat(shouldRead).as("Should skip: and(false, true)").isFalse();
+
+    shouldRead =
+        shouldRead(
+            and(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MAX_VALUE + 1)));
+    assertThat(shouldRead).as("Should skip: and(false, false)").isFalse();
+
+    shouldRead =
+        shouldRead(
+            and(greaterThan("id", INT_MIN_VALUE - 25), lessThanOrEqual("id", INT_MIN_VALUE)));
+    assertThat(shouldRead).as("Should read: and(true, true)").isTrue();
+  }
+
+  @TestTemplate
+  public void testOr() {
+    // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out
+    boolean shouldRead =
+        shouldRead(
+            or(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MAX_VALUE + 1)));
+    assertThat(shouldRead).as("Should skip: or(false, false)").isFalse();
+
+    shouldRead =
+        shouldRead(
+            or(lessThan("id", INT_MIN_VALUE - 25), greaterThanOrEqual("id", INT_MAX_VALUE - 19)));
+    assertThat(shouldRead).as("Should read: or(false, true)").isTrue();
+  }
+
+  @TestTemplate
+  public void testIntegerLt() {
+    boolean shouldRead = shouldRead(lessThan("id", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
+
+    shouldRead = shouldRead(lessThan("id", INT_MIN_VALUE));
+    assertThat(shouldRead)
+        .as("Should not read: id range below lower bound (30 is not < 30)")
+        .isFalse();
+
+    shouldRead = shouldRead(lessThan("id", INT_MIN_VALUE + 1));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(lessThan("id", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
+  }
+
+  @TestTemplate
+  public void testIntegerLtEq() {
+    boolean shouldRead = shouldRead(lessThanOrEqual("id", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
+
+    shouldRead = shouldRead(lessThanOrEqual("id", INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should not read: id range below lower bound (29 < 30)").isFalse();
+
+    shouldRead = shouldRead(lessThanOrEqual("id", INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(lessThanOrEqual("id", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: many possible ids").isTrue();
+  }
+
+  @TestTemplate
+  public void testIntegerGt() {
+    boolean shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
+
+    shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE));
+    assertThat(shouldRead)
+        .as("Should not read: id range above upper bound (79 is not > 79)")
+        .isFalse();
+
+    shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE - 1));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(greaterThan("id", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
+  }
+
+  @TestTemplate
+  public void testIntegerGtEq() {
+    boolean shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
+
+    shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should not read: id range above upper bound (80 > 79)").isFalse();
+
+    shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(greaterThanOrEqual("id", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
+  }
+
+  @TestTemplate
+  public void testIntegerEq() {
+    boolean shouldRead = shouldRead(equal("id", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
+
+    shouldRead = shouldRead(equal("id", INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
+
+    shouldRead = shouldRead(equal("id", INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
+
+    shouldRead = shouldRead(equal("id", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
+
+    shouldRead = shouldRead(equal("id", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
+
+    shouldRead = shouldRead(equal("id", INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
+
+    shouldRead = shouldRead(equal("id", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
+  }
+
+  @TestTemplate
+  public void testIntegerNotEq() {
+    boolean shouldRead = shouldRead(notEqual("id", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("id", INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("id", INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
+
+    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
+  }
+
+  @TestTemplate
+  public void testIntegerNotEqRewritten() {
+    boolean shouldRead = shouldRead(not(equal("id", INT_MIN_VALUE - 25)));
+    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
+
+    shouldRead = shouldRead(not(equal("id", INT_MIN_VALUE - 1)));
+    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
+
+    shouldRead = shouldRead(not(equal("id", INT_MIN_VALUE)));
+    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
+
+    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE - 4)));
+    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
+
+    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE)));
+    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
+
+    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE + 1)));
+    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
+
+    shouldRead = shouldRead(not(equal("id", INT_MAX_VALUE + 6)));
+    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
+  }
+
+  @TestTemplate
+  public void testStructFieldLt() {
+    boolean shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
+
+    shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MIN_VALUE));
+    assertThat(shouldRead)
+        .as("Should not read: id range below lower bound (30 is not < 30)")
+        .isFalse();
+
+    shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MIN_VALUE + 1));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(lessThan("struct_not_null.int_field", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
+  }
+
+  @TestTemplate
+  public void testStructFieldLtEq() {
+    boolean shouldRead =
+        shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should not read: id range below lower bound (5 < 30)").isFalse();
+
+    shouldRead = shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should not read: id range below lower bound (29 < 30)").isFalse();
+
+    shouldRead = shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(lessThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: many possible ids").isTrue();
+  }
+
+  @TestTemplate
+  public void testStructFieldGt() {
+    boolean shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
+
+    shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE));
+    assertThat(shouldRead)
+        .as("Should not read: id range above upper bound (79 is not > 79)")
+        .isFalse();
+
+    shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE - 1));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(greaterThan("struct_not_null.int_field", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
+  }
+
+  @TestTemplate
+  public void testStructFieldGtEq() {
+    boolean shouldRead =
+        shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should not read: id range above upper bound (85 < 79)").isFalse();
+
+    shouldRead = shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should not read: id range above upper bound (80 > 79)").isFalse();
+
+    shouldRead = shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: one possible id").isTrue();
+
+    shouldRead = shouldRead(greaterThanOrEqual("struct_not_null.int_field", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: may possible ids").isTrue();
+  }
+
+  @TestTemplate
+  public void testStructFieldEq() {
+    boolean shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
+
+    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
+
+    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
+
+    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
+
+    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
+
+    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
+
+    shouldRead = shouldRead(equal("struct_not_null.int_field", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should not read: id above upper bound").isFalse();
+  }
+
+  @TestTemplate
+  public void testStructFieldNotEq() {
+    boolean shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MIN_VALUE - 25));
+    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should read: id below lower bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to lower bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MAX_VALUE - 4));
+    assertThat(shouldRead).as("Should read: id between lower and upper bounds").isTrue();
+
+    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MAX_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to upper bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("id", INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
+
+    shouldRead = shouldRead(notEqual("struct_not_null.int_field", INT_MAX_VALUE + 6));
+    assertThat(shouldRead).as("Should read: id above upper bound").isTrue();
+  }
+
+  @TestTemplate
+  public void testCaseInsensitive() {
+    boolean shouldRead = shouldRead(equal("ID", INT_MIN_VALUE - 25), false);
+    assertThat(shouldRead).as("Should not read: id below lower bound").isFalse();
+  }
+
+  @TestTemplate
+  public void testStringStartsWith() {
+    assumeThat(format)
+        .as("ORC row group filter does not support StringStartsWith")
+        .isNotEqualTo(FileFormat.ORC);
+
+    boolean shouldRead = shouldRead(startsWith("str", "1"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(startsWith("str", "0st"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(startsWith("str", "1str1"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(startsWith("str", "1str1_xgd"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(startsWith("str", "2str"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(startsWith("str", "9xstr"));
+    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
+
+    shouldRead = shouldRead(startsWith("str", "0S"));
+    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
+
+    shouldRead = shouldRead(startsWith("str", "x"));
+    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
+
+    shouldRead = shouldRead(startsWith("str", "9str9aaa"));
+    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
+  }
+
+  @TestTemplate
+  public void testStringNotStartsWith() {
+    assumeThat(format)
+        .as("ORC row group filter does not support StringStartsWith")
+        .isNotEqualTo(FileFormat.ORC);
+
+    boolean shouldRead = shouldRead(notStartsWith("str", "1"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("str", "0st"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("str", "1str1"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("str", "1str1_xgd"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("str", "2str"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("str", "9xstr"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("required", "r"));
+    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
+
+    shouldRead = shouldRead(notStartsWith("required", "requ"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("some_nulls", "ssome"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+
+    shouldRead = shouldRead(notStartsWith("some_nulls", "som"));
+    assertThat(shouldRead).as("Should read: range matches").isTrue();
+  }
+
+  @TestTemplate
+  public void testIntegerIn() {
+    boolean shouldRead = shouldRead(in("id", INT_MIN_VALUE - 25, INT_MIN_VALUE - 24));
+    assertThat(shouldRead).as("Should not read: id below lower bound (5 < 30, 6 < 30)").isFalse();
+
+    shouldRead = shouldRead(in("id", INT_MIN_VALUE - 2, INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should not read: id below lower bound (28 < 30, 29 < 30)").isFalse();
+
+    shouldRead = shouldRead(in("id", INT_MIN_VALUE - 1, INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to lower bound (30 == 30)").isTrue();
+
+    shouldRead = shouldRead(in("id", INT_MAX_VALUE - 4, INT_MAX_VALUE - 3));
+    assertThat(shouldRead)
+        .as("Should read: id between lower and upper bounds (30 < 75 < 79, 30 < 76 < 79)")
+        .isTrue();
+
+    shouldRead = shouldRead(in("id", INT_MAX_VALUE, INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should read: id equal to upper bound (79 == 79)").isTrue();
+
+    shouldRead = shouldRead(in("id", INT_MAX_VALUE + 1, INT_MAX_VALUE + 2));
+    assertThat(shouldRead).as("Should not read: id above upper bound (80 > 79, 81 > 79)").isFalse();
+
+    shouldRead = shouldRead(in("id", INT_MAX_VALUE + 6, INT_MAX_VALUE + 7));
+    assertThat(shouldRead).as("Should not read: id above upper bound (85 > 79, 86 > 79)").isFalse();
+
+    shouldRead = shouldRead(in("all_nulls", 1, 2));
+    assertThat(shouldRead).as("Should skip: in on all nulls column").isFalse();
+
+    shouldRead = shouldRead(in("some_nulls", "aaa", "some"));
+    assertThat(shouldRead).as("Should read: in on some nulls column").isTrue();
+
+    shouldRead = shouldRead(in("no_nulls", "aaa", ""));
+    assertThat(shouldRead).as("Should read: in on no nulls column").isTrue();
+  }
+
+  @TestTemplate
+  public void testIntegerNotIn() {
+    boolean shouldRead = shouldRead(notIn("id", INT_MIN_VALUE - 25, INT_MIN_VALUE - 24));
+    assertThat(shouldRead).as("Should read: id below lower bound (5 < 30, 6 < 30)").isTrue();
+
+    shouldRead = shouldRead(notIn("id", INT_MIN_VALUE - 2, INT_MIN_VALUE - 1));
+    assertThat(shouldRead).as("Should read: id below lower bound (28 < 30, 29 < 30)").isTrue();
+
+    shouldRead = shouldRead(notIn("id", INT_MIN_VALUE - 1, INT_MIN_VALUE));
+    assertThat(shouldRead).as("Should read: id equal to lower bound (30 == 30)").isTrue();
+
+    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE - 4, INT_MAX_VALUE - 3));
+    assertThat(shouldRead)
+        .as("Should read: id between lower and upper bounds (30 < 75 < 79, 30 < 76 < 79)")
+        .isTrue();
+
+    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE, INT_MAX_VALUE + 1));
+    assertThat(shouldRead).as("Should read: id equal to upper bound (79 == 79)").isTrue();
+
+    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE + 1, INT_MAX_VALUE + 2));
+    assertThat(shouldRead).as("Should read: id above upper bound (80 > 79, 81 > 79)").isTrue();
+
+    shouldRead = shouldRead(notIn("id", INT_MAX_VALUE + 6, INT_MAX_VALUE + 7));
+    assertThat(shouldRead).as("Should read: id above upper bound (85 > 79, 86 > 79)").isTrue();
+
+    shouldRead = shouldRead(notIn("all_nulls", 1, 2));
+    assertThat(shouldRead).as("Should read: notIn on all nulls column").isTrue();
+
+    shouldRead = shouldRead(notIn("some_nulls", "aaa", "some"));
+    assertThat(shouldRead).as("Should read: notIn on some nulls column").isTrue();
+
+    shouldRead = shouldRead(notIn("no_nulls", "aaa", ""));
+    if (format == FileFormat.PARQUET) {
+      // no_nulls column has all values == "", so notIn("no_nulls", "") should always be false and
+      // so should be skipped
+      // However, the metrics evaluator in Parquets always reads row group for a notIn filter
+      assertThat(shouldRead).as("Should read: notIn on no nulls column").isTrue();
+    } else {
+      assertThat(shouldRead).as("Should skip: notIn on no nulls column").isFalse();
+    }
+  }
+
+  @TestTemplate
+  public void testSomeNullsNotEq() {
+    boolean shouldRead = shouldRead(notEqual("some_nulls", "some"));
+    assertThat(shouldRead).as("Should read: notEqual on some nulls column").isTrue();
+  }
+
+  @TestTemplate
+  public void testInLimitParquet() {
+    assumeThat(format).isEqualTo(FileFormat.PARQUET);
+
+    boolean shouldRead = shouldRead(in("id", 1, 2));
+    assertThat(shouldRead).as("Should not read if IN is evaluated").isFalse();
+
+    List<Integer> ids = Lists.newArrayListWithExpectedSize(400);
+    for (int id = -400; id <= 0; id++) {
+      ids.add(id);
+    }
+
+    shouldRead = shouldRead(in("id", ids));
+    assertThat(shouldRead).as("Should read if IN is not evaluated").isTrue();
+  }
+
+  @TestTemplate
+  public void testParquetTypePromotion() {
+    assumeThat(format).as("Only valid for Parquet").isEqualTo(FileFormat.PARQUET);
+
+    Schema promotedSchema = new Schema(required(1, "id", Types.LongType.get()));
+    boolean shouldRead =
+        new ParquetMetricsRowGroupFilter(promotedSchema, equal("id", INT_MIN_VALUE + 1), true)
+            .shouldRead(parquetSchema, rowGroupMetadata);
+    assertThat(shouldRead).as("Should succeed with promoted schema").isTrue();
+  }
+
+  @TestTemplate
+  public void testTransformFilter() {
+    assumeThat(format).isEqualTo(FileFormat.PARQUET);
+
+    boolean shouldRead =
+        new ParquetMetricsRowGroupFilter(SCHEMA, equal(truncate("required", 2), "some_value"), true)
+            .shouldRead(parquetSchema, rowGroupMetadata);
+    assertThat(shouldRead)
+        .as("Should read: filter contains non-reference evaluate as True")
+        .isTrue();
+  }
+
+  @TestTemplate
+  public void testVariantFilterNotNull() throws IOException {
+    assumeThat(format).isEqualTo(FileFormat.PARQUET);
+
+    Schema variantSchema =
+        new Schema(
+            required(1, "id", IntegerType.get()),
+            optional(2, "variant_field", Types.VariantType.get()));
+
+    File parquetFile = new File(tempDir, "test-variant" + System.nanoTime());
+
+    OutputFile outFile = Files.localOutput(parquetFile);
+    try (FileAppender<GenericRecord> appender =
+        Parquet.write(outFile)
+            .schema(variantSchema)
+            .createWriterFunc(GenericParquetWriter::create)
+            .build()) {
+
+      for (int i = 0; i < 10; i++) {
+        GenericRecord record = GenericRecord.create(variantSchema);
+        record.setField("id", i);
+
+        if (i % 2 == 0) {
+          VariantMetadata metadata = Variants.metadata("field");
+          ShreddedObject obj = Variants.object(metadata);
+          obj.put("field", Variants.of("value" + i));
+          Variant variant = Variant.of(metadata, obj);
+          record.setField("variant_field", variant);
+        }
+
+        appender.add(record);
+      }
+    }
+
+    InputFile inFile = Files.localInput(parquetFile);
+    try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
+      assertThat(reader.getRowGroups()).as("Should create only one row group").hasSize(1);
+      BlockMetaData blockMetaData = reader.getRowGroups().get(0);
+      MessageType fileSchema = reader.getFileMetaData().getSchema();
+
+      ParquetMetricsRowGroupFilter rowGroupFilter =
+          new ParquetMetricsRowGroupFilter(variantSchema, notNull("variant_field"), true);
+      boolean shouldRead = rowGroupFilter.shouldRead(fileSchema, blockMetaData);
+      assertThat(shouldRead)
+          .as("Should read: variant notNull filters must be evaluated post scan")
+          .isTrue();
+    }
+    parquetFile.deleteOnExit();
+  }
+
+  @TestTemplate
+  public void testAllNullsVariantNotNull() throws IOException {
+    assumeThat(format).isEqualTo(FileFormat.PARQUET);
+
+    Schema variantSchema =
+        new Schema(
+            required(1, "id", IntegerType.get()),
+            optional(2, "variant_field", Types.VariantType.get()));
+
+    File parquetFile = new File(tempDir, "test-variant-nulls" + System.nanoTime());
+
+    OutputFile outFile = Files.localOutput(parquetFile);
+    try (FileAppender<GenericRecord> appender =
+        Parquet.write(outFile)
+            .schema(variantSchema)
+            .createWriterFunc(GenericParquetWriter::create)
+            .build()) {
+
+      for (int i = 0; i < 10; i++) {
+        GenericRecord record = GenericRecord.create(variantSchema);
+        record.setField("id", i);
+        record.setField("variant_field", null);
+        appender.add(record);
+      }
+    }
+
+    InputFile inFile = Files.localInput(parquetFile);
+    try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
+      BlockMetaData blockMetaData = reader.getRowGroups().get(0);
+      MessageType fileSchema = reader.getFileMetaData().getSchema();
+
+      ParquetMetricsRowGroupFilter rowGroupFilter =
+          new ParquetMetricsRowGroupFilter(variantSchema, notNull("variant_field"), true);
+      boolean shouldRead = rowGroupFilter.shouldRead(fileSchema, blockMetaData);
+      assertThat(shouldRead)
+          .as("Should read: variant notNull filters must be evaluated post scan even for all nulls")
+          .isTrue();
+    }
+    parquetFile.deleteOnExit();
+  }
+
+  private boolean shouldRead(Expression expression) {
+    return shouldRead(expression, true);
+  }
+
+  private boolean shouldRead(Expression expression, boolean caseSensitive) {
+    switch (format) {
+      case ORC:
+        return shouldReadOrc(expression, caseSensitive);
+      case PARQUET:
+        return shouldReadParquet(expression, caseSensitive, parquetSchema, rowGroupMetadata);
+      default:
+        throw new UnsupportedOperationException(
+            "Row group filter tests not supported for " + format);
+    }
+  }
+
+  private boolean shouldReadOrc(Expression expression, boolean caseSensitive) {
+    try (CloseableIterable<org.apache.iceberg.data.Record> reader =
+        ORC.read(Files.localInput(orcFile))
+            .project(SCHEMA)
+            .createReaderFunc(fileSchema -> GenericOrcReader.buildReader(SCHEMA, fileSchema))
+            .filter(expression)
+            .caseSensitive(caseSensitive)
+            .build()) {
+      return !Lists.newArrayList(reader).isEmpty();
+    } catch (IOException e) {
+      throw new UncheckedIOException(e);
+    }
+  }
+
+  private boolean shouldReadParquet(
+      Expression expression,
+      boolean caseSensitive,
+      MessageType messageType,
+      BlockMetaData blockMetaData) {
+    return new ParquetMetricsRowGroupFilter(SCHEMA, expression, caseSensitive)
+        .shouldRead(messageType, blockMetaData);
+  }
+
+  private org.apache.parquet.io.InputFile parquetInputFile(InputFile inFile) {
+    return new org.apache.parquet.io.InputFile() {
+      @Override
+      public long getLength() throws IOException {
+        return inFile.getLength();
+      }
+
+      @Override
+      public org.apache.parquet.io.SeekableInputStream newStream() throws IOException {
+        SeekableInputStream stream = inFile.newStream();
+        return new DelegatingSeekableInputStream(stream) {
+          @Override
+          public long getPos() throws IOException {
+            return stream.getPos();
+          }
+
+          @Override
+          public void seek(long newPos) throws IOException {
+            stream.seek(newPos);
+          }
+        };
+      }
+    };
+  }
+}
diff --git a/data/src/test/java/org/apache/iceberg/data/orc/TestOrcMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/orc/TestOrcMetricsRowGroupFilter.java
deleted file mode 100644
index 5a2ff2fb6727..000000000000
--- a/data/src/test/java/org/apache/iceberg/data/orc/TestOrcMetricsRowGroupFilter.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.data.orc;
-
-import static org.apache.iceberg.expressions.Expressions.isNaN;
-import static org.apache.iceberg.expressions.Expressions.notIn;
-import static org.assertj.core.api.Assertions.assertThat;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.UncheckedIOException;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.iceberg.Files;
-import org.apache.iceberg.data.GenericRecord;
-import org.apache.iceberg.data.MetricsRowGroupFilterTestBase;
-import org.apache.iceberg.expressions.Expression;
-import org.apache.iceberg.io.CloseableIterable;
-import org.apache.iceberg.io.FileAppender;
-import org.apache.iceberg.io.InputFile;
-import org.apache.iceberg.io.OutputFile;
-import org.apache.iceberg.orc.ORC;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
-import org.apache.orc.OrcFile;
-import org.apache.orc.Reader;
-import org.junit.jupiter.api.Test;
-
-public class TestOrcMetricsRowGroupFilter extends MetricsRowGroupFilterTestBase {
-
-  private File orcFile = null;
-
-  @Override
-  protected void createInputFile() throws IOException {
-    this.orcFile = new File(tempDir, "junit" + System.nanoTime());
-
-    OutputFile outFile = Files.localOutput(orcFile);
-    try (FileAppender<GenericRecord> appender =
-        ORC.write(outFile)
-            .schema(BASE_FILE_SCHEMA)
-            .createWriterFunc(GenericOrcWriter::buildWriter)
-            .build()) {
-      GenericRecord record = GenericRecord.create(BASE_FILE_SCHEMA);
-      for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
-        populateBaseFields(record, i);
-        appender.add(record);
-      }
-    }
-    InputFile inFile = Files.localInput(orcFile);
-    try (Reader reader =
-        OrcFile.createReader(
-            new Path(inFile.location()), OrcFile.readerOptions(new Configuration()))) {
-      assertThat(reader.getStripes()).as("Should create only one stripe").hasSize(1);
-    }
-    orcFile.deleteOnExit();
-  }
-
-  @Override
-  protected boolean shouldRead(Expression expression, boolean caseSensitive) {
-    try (CloseableIterable<org.apache.iceberg.data.Record> reader =
-        ORC.read(Files.localInput(orcFile))
-            .project(BASE_SCHEMA)
-            .createReaderFunc(fileSchema -> GenericOrcReader.buildReader(BASE_SCHEMA, fileSchema))
-            .filter(expression)
-            .caseSensitive(caseSensitive)
-            .build()) {
-      return !Lists.newArrayList(reader).isEmpty();
-    } catch (IOException e) {
-      throw new UncheckedIOException(e);
-    }
-  }
-
-  @Test
-  public void testIsNaNNoNans() {
-    boolean shouldRead = shouldRead(isNaN("no_nans"));
-    assertThat(shouldRead).as("ORC filter push-down can skip groups with no NaN").isFalse();
-  }
-
-  @Test
-  public void testNotInNoNulls() {
-    boolean shouldRead = shouldRead(notIn("no_nulls", "aaa", ""));
-    assertThat(shouldRead).as("ORC can skip when all values match excluded set").isFalse();
-  }
-}
diff --git a/data/src/test/java/org/apache/iceberg/data/parquet/TestParquetMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/parquet/TestParquetMetricsRowGroupFilter.java
deleted file mode 100644
index 94d098f10a25..000000000000
--- a/data/src/test/java/org/apache/iceberg/data/parquet/TestParquetMetricsRowGroupFilter.java
+++ /dev/null
@@ -1,444 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.data.parquet;
-
-import static org.apache.iceberg.expressions.Expressions.equal;
-import static org.apache.iceberg.expressions.Expressions.greaterThan;
-import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual;
-import static org.apache.iceberg.expressions.Expressions.in;
-import static org.apache.iceberg.expressions.Expressions.isNaN;
-import static org.apache.iceberg.expressions.Expressions.isNull;
-import static org.apache.iceberg.expressions.Expressions.lessThan;
-import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual;
-import static org.apache.iceberg.expressions.Expressions.notEqual;
-import static org.apache.iceberg.expressions.Expressions.notIn;
-import static org.apache.iceberg.expressions.Expressions.notNull;
-import static org.apache.iceberg.expressions.Expressions.notStartsWith;
-import static org.apache.iceberg.expressions.Expressions.startsWith;
-import static org.apache.iceberg.expressions.Expressions.truncate;
-import static org.apache.iceberg.types.Types.NestedField.optional;
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.Assertions.assertThatThrownBy;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-import java.util.UUID;
-import org.apache.iceberg.Files;
-import org.apache.iceberg.Schema;
-import org.apache.iceberg.data.GenericRecord;
-import org.apache.iceberg.data.MetricsRowGroupFilterTestBase;
-import org.apache.iceberg.exceptions.ValidationException;
-import org.apache.iceberg.expressions.Expression;
-import org.apache.iceberg.io.FileAppender;
-import org.apache.iceberg.io.InputFile;
-import org.apache.iceberg.io.OutputFile;
-import org.apache.iceberg.io.SeekableInputStream;
-import org.apache.iceberg.parquet.Parquet;
-import org.apache.iceberg.parquet.ParquetMetricsRowGroupFilter;
-import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
-import org.apache.iceberg.types.Types;
-import org.apache.iceberg.types.Types.StringType;
-import org.apache.iceberg.variants.ShreddedObject;
-import org.apache.iceberg.variants.Variant;
-import org.apache.iceberg.variants.VariantMetadata;
-import org.apache.iceberg.variants.Variants;
-import org.apache.parquet.hadoop.ParquetFileReader;
-import org.apache.parquet.hadoop.metadata.BlockMetaData;
-import org.apache.parquet.io.DelegatingSeekableInputStream;
-import org.apache.parquet.schema.MessageType;
-import org.junit.jupiter.api.Test;
-
-public class TestParquetMetricsRowGroupFilter extends MetricsRowGroupFilterTestBase {
-
-  private static final Schema PARQUET_SCHEMA =
-      new Schema(
-          Lists.newArrayList(
-              Iterables.concat(
-                  BASE_SCHEMA.columns(),
-                  Lists.newArrayList(
-                      optional(17, "no_stats_parquet", StringType.get()),
-                      optional(18, "some_nulls_variant", Types.VariantType.get()),
-                      optional(19, "all_nulls_variant", Types.VariantType.get())))));
-
-  private static final Schema PARQUET_FILE_SCHEMA =
-      new Schema(
-          Lists.newArrayList(
-              Iterables.concat(
-                  BASE_FILE_SCHEMA.columns(),
-                  Lists.newArrayList(
-                      optional(17, "_no_stats_parquet", StringType.get()),
-                      optional(18, "_some_nulls_variant", Types.VariantType.get()),
-                      optional(19, "_all_nulls_variant", Types.VariantType.get())))));
-
-  private MessageType parquetSchema = null;
-  private BlockMetaData rowGroupMetadata = null;
-
-  @Override
-  protected void createInputFile() throws IOException {
-    File parquetFile = new File(tempDir, "junit" + System.nanoTime());
-
-    StringBuilder sb = new StringBuilder();
-    for (int i = 0; i < 200; i += 1) {
-      sb.append(UUID.randomUUID());
-    }
-    String tooLongForParquetValue = sb.toString();
-
-    OutputFile outFile = Files.localOutput(parquetFile);
-    try (FileAppender<GenericRecord> appender =
-        Parquet.write(outFile)
-            .schema(PARQUET_FILE_SCHEMA)
-            .createWriterFunc(GenericParquetWriter::create)
-            .build()) {
-      for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
-        GenericRecord record = GenericRecord.create(PARQUET_FILE_SCHEMA);
-        populateBaseFields(record, i);
-        record.setField(
-            "_no_stats_parquet",
-            tooLongForParquetValue); // value longer than 4k will produce no stats
-        if (i % 2 == 0) {
-          VariantMetadata metadata = Variants.metadata("data");
-          ShreddedObject obj = Variants.object(metadata);
-          obj.put("data", Variants.of(25 + i));
-          record.setField("_some_nulls_variant", Variant.of(metadata, obj));
-        } else {
-          record.setField("_some_nulls_variant", null);
-        }
-        record.setField("_all_nulls_variant", null);
-        appender.add(record);
-      }
-    }
-
-    InputFile inFile = Files.localInput(parquetFile);
-    try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
-      assertThat(reader.getRowGroups()).as("Should create only one row group").hasSize(1);
-      rowGroupMetadata = reader.getRowGroups().get(0);
-      parquetSchema = reader.getFileMetaData().getSchema();
-    }
-    parquetFile.deleteOnExit();
-  }
-
-  @Override
-  protected boolean shouldRead(Expression expression, boolean caseSensitive) {
-    return shouldRead(expression, caseSensitive, rowGroupMetadata);
-  }
-
-  @Test
-  public void testIsNaNNoNans() {
-    boolean shouldRead = shouldRead(isNaN("no_nans"));
-    assertThat(shouldRead).as("Parquet metrics don't track NaN counts").isTrue();
-  }
-
-  @Test
-  public void testNotInNoNulls() {
-    boolean shouldRead = shouldRead(notIn("no_nulls", "aaa", ""));
-    assertThat(shouldRead).as("Parquet metrics evaluator reads for notIn").isTrue();
-  }
-
-  @Test
-  public void testColumnNotInFile() {
-    Expression[] cannotMatch =
-        new Expression[] {
-          lessThan("not_in_file", 1.0f), lessThanOrEqual("not_in_file", 1.0f),
-          equal("not_in_file", 1.0f), greaterThan("not_in_file", 1.0f),
-          greaterThanOrEqual("not_in_file", 1.0f), notNull("not_in_file")
-        };
-
-    for (Expression expr : cannotMatch) {
-      boolean shouldRead = shouldRead(expr);
-      assertThat(shouldRead)
-          .as("Should skip when column is not in file (all nulls): " + expr)
-          .isFalse();
-    }
-
-    Expression[] canMatch = new Expression[] {isNull("not_in_file"), notEqual("not_in_file", 1.0f)};
-
-    for (Expression expr : canMatch) {
-      boolean shouldRead = shouldRead(expr);
-      assertThat(shouldRead)
-          .as("Should read when column is not in file (all nulls): " + expr)
-          .isTrue();
-    }
-  }
-
-  @Test
-  public void testMissingStatsParquet() {
-    Expression[] exprs =
-        new Expression[] {
-          lessThan("no_stats_parquet", "a"),
-          lessThanOrEqual("no_stats_parquet", "b"),
-          equal("no_stats_parquet", "c"),
-          greaterThan("no_stats_parquet", "d"),
-          greaterThanOrEqual("no_stats_parquet", "e"),
-          notEqual("no_stats_parquet", "f"),
-          isNull("no_stats_parquet"),
-          notNull("no_stats_parquet"),
-          startsWith("no_stats_parquet", "a"),
-          notStartsWith("no_stats_parquet", "a")
-        };
-
-    for (Expression expr : exprs) {
-      boolean shouldRead = shouldRead(expr);
-      assertThat(shouldRead).as("Should read when missing stats for expr: " + expr).isTrue();
-    }
-  }
-
-  @Test
-  public void testZeroRecordFileParquet() {
-    BlockMetaData emptyBlock = new BlockMetaData();
-    emptyBlock.setRowCount(0);
-
-    Expression[] exprs =
-        new Expression[] {
-          lessThan("id", 5),
-          lessThanOrEqual("id", 30),
-          equal("id", 70),
-          greaterThan("id", 78),
-          greaterThanOrEqual("id", 90),
-          notEqual("id", 101),
-          isNull("some_nulls"),
-          notNull("some_nulls")
-        };
-
-    for (Expression expr : exprs) {
-      boolean shouldRead = shouldRead(expr, true, emptyBlock);
-      assertThat(shouldRead).as("Should never read 0-record file: " + expr).isFalse();
-    }
-  }
-
-  @Test
-  public void testStringStartsWith() {
-    boolean shouldRead = shouldRead(startsWith("str", "1"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(startsWith("str", "0st"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(startsWith("str", "1str1"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(startsWith("str", "1str1_xgd"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(startsWith("str", "2str"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(startsWith("str", "9xstr"));
-    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
-
-    shouldRead = shouldRead(startsWith("str", "0S"));
-    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
-
-    shouldRead = shouldRead(startsWith("str", "x"));
-    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
-
-    shouldRead = shouldRead(startsWith("str", "9str9aaa"));
-    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
-  }
-
-  @Test
-  public void testStringNotStartsWith() {
-    boolean shouldRead = shouldRead(notStartsWith("str", "1"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("str", "0st"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("str", "1str1"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("str", "1str1_xgd"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("str", "2str"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("str", "9xstr"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("required", "r"));
-    assertThat(shouldRead).as("Should not read: range doesn't match").isFalse();
-
-    shouldRead = shouldRead(notStartsWith("required", "requ"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("some_nulls", "ssome"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-
-    shouldRead = shouldRead(notStartsWith("some_nulls", "som"));
-    assertThat(shouldRead).as("Should read: range matches").isTrue();
-  }
-
-  @Test
-  public void testInLimitParquet() {
-    boolean shouldRead = shouldRead(in("id", 1, 2));
-    assertThat(shouldRead).as("Should not read if IN is evaluated").isFalse();
-
-    List<Integer> ids = Lists.newArrayListWithExpectedSize(400);
-    for (int id = -400; id <= 0; id++) {
-      ids.add(id);
-    }
-
-    shouldRead = shouldRead(in("id", ids));
-    assertThat(shouldRead).as("Should read if IN is not evaluated").isTrue();
-  }
-
-  @Test
-  public void testParquetTypePromotion() {
-    Schema promotedSchema = new Schema(Types.NestedField.required(1, "id", Types.LongType.get()));
-    boolean shouldRead =
-        new ParquetMetricsRowGroupFilter(promotedSchema, equal("id", INT_MIN_VALUE + 1), true)
-            .shouldRead(parquetSchema, rowGroupMetadata);
-    assertThat(shouldRead).as("Should succeed with promoted schema").isTrue();
-  }
-
-  @Test
-  public void testTransformFilter() {
-    boolean shouldRead =
-        new ParquetMetricsRowGroupFilter(
-                PARQUET_SCHEMA, equal(truncate("required", 2), "some_value"), true)
-            .shouldRead(parquetSchema, rowGroupMetadata);
-    assertThat(shouldRead)
-        .as("Should read: filter contains non-reference evaluate as True")
-        .isTrue();
-  }
-
-  @Test
-  public void testVariantNotNull() {
-    boolean shouldRead = shouldRead(notNull("some_nulls_variant"));
-    assertThat(shouldRead)
-        .as("Should read: variant notNull filters must be evaluated post scan")
-        .isTrue();
-
-    shouldRead = shouldRead(notNull("all_nulls_variant"));
-    assertThat(shouldRead)
-        .as("Should read: variant notNull filters must be evaluated post scan even for all nulls")
-        .isTrue();
-  }
-
-  @Test
-  public void testVariantEq() {
-    assertThatThrownBy(() -> shouldRead(equal("some_nulls_variant", "test")))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("test");
-  }
-
-  @Test
-  public void testVariantIn() {
-    assertThatThrownBy(() -> shouldRead(in("some_nulls_variant", "test1", "test2")))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("test1");
-
-    assertThatThrownBy(() -> shouldRead(in("all_nulls_variant", "test1", "test2")))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("test1");
-  }
-
-  @Test
-  public void testVariantNotIn() {
-    assertThatThrownBy(() -> shouldRead(notIn("some_nulls_variant", "test1", "test2")))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("test1");
-
-    assertThatThrownBy(() -> shouldRead(notIn("all_nulls_variant", "test1", "test2")))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("test1");
-  }
-
-  @Test
-  public void testVariantIsNull() {
-    boolean shouldRead = shouldRead(isNull("some_nulls_variant"));
-    assertThat(shouldRead)
-        .as("Should read: variant isNull filters must be evaluated post scan")
-        .isTrue();
-
-    shouldRead = shouldRead(isNull("all_nulls_variant"));
-    assertThat(shouldRead)
-        .as("Should read: variant isNull filters must be evaluated post scan even for all nulls")
-        .isTrue();
-  }
-
-  @Test
-  public void testVariantComparisons() {
-    assertThatThrownBy(() -> shouldRead(lessThan("some_nulls_variant", 42)))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("42");
-
-    assertThatThrownBy(() -> shouldRead(lessThanOrEqual("some_nulls_variant", 42)))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("42");
-
-    assertThatThrownBy(() -> shouldRead(greaterThan("some_nulls_variant", 42)))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("42");
-
-    assertThatThrownBy(() -> shouldRead(greaterThanOrEqual("some_nulls_variant", 42)))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("42");
-
-    assertThatThrownBy(() -> shouldRead(notEqual("some_nulls_variant", 42)))
-        .isInstanceOf(ValidationException.class)
-        .hasMessageContaining("Invalid value for conversion to type variant:")
-        .hasMessageContaining("42");
-  }
-
-  private boolean shouldRead(Expression expression, boolean caseSensitive, BlockMetaData block) {
-    if (parquetSchema == null || block == null) {
-      throw new IllegalStateException(
-          "Parquet file not created yet. Call createInputFile() first.");
-    }
-    return new ParquetMetricsRowGroupFilter(PARQUET_SCHEMA, expression, caseSensitive)
-        .shouldRead(parquetSchema, block);
-  }
-
-  private org.apache.parquet.io.InputFile parquetInputFile(InputFile inFile) {
-    return new org.apache.parquet.io.InputFile() {
-      @Override
-      public long getLength() throws IOException {
-        return inFile.getLength();
-      }
-
-      @Override
-      public org.apache.parquet.io.SeekableInputStream newStream() throws IOException {
-        SeekableInputStream stream = inFile.newStream();
-        return new DelegatingSeekableInputStream(stream) {
-          @Override
-          public long getPos() throws IOException {
-            return stream.getPos();
-          }
-
-          @Override
-          public void seek(long newPos) throws IOException {
-            stream.seek(newPos);
-          }
-        };
-      }
-    };
-  }
-}

From c7c715e2c8139c68538055dfa3a57390f8770b93 Mon Sep 17 00:00:00 2001
From: geruh <imgeru@gmail.com>
Date: Wed, 17 Sep 2025 18:14:36 -0700
Subject: [PATCH 5/9] Revert the test refactoring and clean up original

---
 .../data/TestMetricsRowGroupFilter.java       | 144 +++++++++++++-----
 1 file changed, 108 insertions(+), 36 deletions(-)

diff --git a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
index a475fecf004b..54ab97dd4f9e 100644
--- a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
+++ b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
@@ -18,7 +18,6 @@
  */
 package org.apache.iceberg.data;
 
-import static org.apache.iceberg.avro.AvroSchemaUtil.convert;
 import static org.apache.iceberg.expressions.Expressions.and;
 import static org.apache.iceberg.expressions.Expressions.equal;
 import static org.apache.iceberg.expressions.Expressions.greaterThan;
@@ -49,8 +48,6 @@
 import java.util.Arrays;
 import java.util.List;
 import java.util.UUID;
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.avro.generic.GenericRecordBuilder;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.iceberg.FileFormat;
@@ -59,7 +56,6 @@
 import org.apache.iceberg.ParameterizedTestExtension;
 import org.apache.iceberg.Parameters;
 import org.apache.iceberg.Schema;
-import org.apache.iceberg.avro.AvroSchemaUtil;
 import org.apache.iceberg.data.orc.GenericOrcReader;
 import org.apache.iceberg.data.orc.GenericOrcWriter;
 import org.apache.iceberg.data.parquet.GenericParquetWriter;
@@ -143,6 +139,11 @@ public static List<Object> parameters() {
           optional(16, "_no_nans", Types.DoubleType.get()),
           optional(17, "_some_double_nans", Types.DoubleType.get()));
 
+  private static final Schema VARIANT_SCHEMA =
+      new Schema(
+          required(1, "id", IntegerType.get()),
+          optional(2, "variant_field", Types.VariantType.get()));
+
   private static final String TOO_LONG_FOR_STATS_PARQUET;
 
   static {
@@ -225,40 +226,32 @@ public void createOrcInputFile() throws IOException {
   }
 
   private void createParquetInputFile() throws IOException {
-    File parquetFile = new File(tempDir, "junit" + System.nanoTime());
-
-    // build struct field schema
-    org.apache.avro.Schema structSchema = AvroSchemaUtil.convert(UNDERSCORE_STRUCT_FIELD_TYPE);
-
-    OutputFile outFile = Files.localOutput(parquetFile);
-    try (FileAppender<Record> appender = Parquet.write(outFile).schema(FILE_SCHEMA).build()) {
-      GenericRecordBuilder builder = new GenericRecordBuilder(convert(FILE_SCHEMA, "table"));
-      // create 50 records
-      for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
-        builder.set("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0
-        builder.set(
-            "_no_stats_parquet",
-            TOO_LONG_FOR_STATS_PARQUET); // value longer than 4k will produce no stats
-        // in Parquet
-        builder.set("_required", "req"); // required, always non-null
-        builder.set("_all_nulls", null); // never non-null
-        builder.set("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values
-        builder.set("_no_nulls", ""); // optional, but always non-null
-        builder.set("_all_nans", Double.NaN); // never non-nan
-        builder.set("_some_nans", (i % 10 == 0) ? Float.NaN : 2F); // includes some nan values
-        builder.set(
-            "_some_double_nans", (i % 10 == 0) ? Double.NaN : 2D); // includes some nan values
-        builder.set("_no_nans", 3D); // optional, but always non-nan
-        builder.set("_str", i + "str" + i);
-
-        Record structNotNull = new Record(structSchema);
-        structNotNull.put("_int_field", INT_MIN_VALUE + i);
-        builder.set("_struct_not_null", structNotNull); // struct with int
-
-        appender.add(builder.build());
-      }
+    List<GenericRecord> records = Lists.newArrayList();
+
+    for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
+      GenericRecord builder = GenericRecord.create(FILE_SCHEMA);
+      builder.setField("_id", INT_MIN_VALUE + i); // min=30, max=79, num-nulls=0
+      builder.setField(
+          "_no_stats_parquet",
+          TOO_LONG_FOR_STATS_PARQUET); // value longer than 4k will produce no stats
+      // in Parquet
+      builder.setField("_required", "req"); // required, always non-null
+      builder.setField("_all_nulls", null); // never non-null
+      builder.setField("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values
+      builder.setField("_no_nulls", ""); // optional, but always non-null
+      builder.setField("_all_nans", Double.NaN); // never non-nan
+      builder.setField("_some_nans", (i % 10 == 0) ? Float.NaN : 2F); // includes some nan values
+      builder.setField(
+          "_some_double_nans", (i % 10 == 0) ? Double.NaN : 2D); // includes some nan values
+      builder.setField("_no_nans", 3D); // optional, but always non-nan
+      builder.setField("_str", i + "str" + i);
+      GenericRecord structNotNull = GenericRecord.create(UNDERSCORE_STRUCT_FIELD_TYPE);
+      structNotNull.setField("_int_field", INT_MIN_VALUE + i);
+      builder.setField("_struct_not_null", structNotNull); // struct with int
+      records.add(builder);
     }
 
+    File parquetFile = writeParquetFile("junit", FILE_SCHEMA, records);
     InputFile inFile = Files.localInput(parquetFile);
     try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
       assertThat(reader.getRowGroups()).as("Should create only one row group").hasSize(1);
@@ -269,6 +262,24 @@ private void createParquetInputFile() throws IOException {
     parquetFile.deleteOnExit();
   }
 
+  private File writeParquetFile(String fileName, Schema schema, List<GenericRecord> records)
+      throws IOException {
+    File parquetFile = new File(tempDir, fileName + System.nanoTime());
+
+    OutputFile outFile = Files.localOutput(parquetFile);
+    try (FileAppender<GenericRecord> appender =
+        Parquet.write(outFile)
+            .schema(schema)
+            .createWriterFunc(GenericParquetWriter::create)
+            .build()) {
+      for (GenericRecord record : records) {
+        appender.add(record);
+      }
+    }
+    parquetFile.deleteOnExit();
+    return parquetFile;
+  }
+
   @TestTemplate
   public void testAllNulls() {
     boolean shouldRead;
@@ -1084,6 +1095,67 @@ public void testAllNullsVariantNotNull() throws IOException {
     parquetFile.deleteOnExit();
   }
 
+  @TestTemplate
+  public void testVariantFieldMixedValuesNotNull() throws IOException {
+    assumeThat(format).isEqualTo(FileFormat.PARQUET);
+
+    List<GenericRecord> records = Lists.newArrayList();
+    for (int i = 0; i < 10; i++) {
+      GenericRecord record = GenericRecord.create(VARIANT_SCHEMA);
+      record.setField("id", i);
+      if (i % 2 == 0) {
+        VariantMetadata metadata = Variants.metadata("field");
+        ShreddedObject obj = Variants.object(metadata);
+        obj.put("field", Variants.of("value" + i));
+        Variant variant = Variant.of(metadata, obj);
+        record.setField("variant_field", variant);
+      }
+      records.add(record);
+    }
+
+    File parquetFile = writeParquetFile("test-variant", VARIANT_SCHEMA, records);
+    InputFile inFile = Files.localInput(parquetFile);
+    try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
+      BlockMetaData blockMetaData = reader.getRowGroups().get(0);
+      MessageType fileSchema = reader.getFileMetaData().getSchema();
+      ParquetMetricsRowGroupFilter rowGroupFilter =
+          new ParquetMetricsRowGroupFilter(VARIANT_SCHEMA, notNull("variant_field"), true);
+      boolean shouldRead = rowGroupFilter.shouldRead(fileSchema, blockMetaData);
+      assertThat(shouldRead)
+          .as("Should read: variant notNull filters must be evaluated post scan")
+          .isTrue();
+    }
+  }
+
+  @TestTemplate
+  public void testVariantFieldAllNullsNotNull() throws IOException {
+    assumeThat(format).isEqualTo(FileFormat.PARQUET);
+
+    List<GenericRecord> records = Lists.newArrayListWithExpectedSize(10);
+    for (int i = 0; i < 10; i++) {
+      GenericRecord record = GenericRecord.create(VARIANT_SCHEMA);
+      record.setField("id", i);
+      record.setField("variant_field", null);
+      records.add(record);
+    }
+
+    File parquetFile = writeParquetFile("test-variant-nulls", VARIANT_SCHEMA, records);
+    InputFile inFile = Files.localInput(parquetFile);
+
+    try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
+      BlockMetaData blockMetaData = reader.getRowGroups().get(0);
+      MessageType fileSchema = reader.getFileMetaData().getSchema();
+
+      ParquetMetricsRowGroupFilter rowGroupFilter =
+          new ParquetMetricsRowGroupFilter(VARIANT_SCHEMA, notNull("variant_field"), true);
+      boolean shouldRead = rowGroupFilter.shouldRead(fileSchema, blockMetaData);
+
+      assertThat(shouldRead)
+          .as("Should read: variant notNull filters must be evaluated post scan even for all nulls")
+          .isTrue();
+    }
+  }
+
   private boolean shouldRead(Expression expression) {
     return shouldRead(expression, true);
   }

From 3f06a3d56e0e480c76e899555607c2a542468141 Mon Sep 17 00:00:00 2001
From: geruh <imgeru@gmail.com>
Date: Thu, 18 Sep 2025 16:51:42 -0700
Subject: [PATCH 6/9] remove old tests

---
 .../data/TestMetricsRowGroupFilter.java       | 91 -------------------
 1 file changed, 91 deletions(-)

diff --git a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
index 54ab97dd4f9e..3c867a9a72d1 100644
--- a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
+++ b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
@@ -1004,97 +1004,6 @@ public void testTransformFilter() {
         .isTrue();
   }
 
-  @TestTemplate
-  public void testVariantFilterNotNull() throws IOException {
-    assumeThat(format).isEqualTo(FileFormat.PARQUET);
-
-    Schema variantSchema =
-        new Schema(
-            required(1, "id", IntegerType.get()),
-            optional(2, "variant_field", Types.VariantType.get()));
-
-    File parquetFile = new File(tempDir, "test-variant" + System.nanoTime());
-
-    OutputFile outFile = Files.localOutput(parquetFile);
-    try (FileAppender<GenericRecord> appender =
-        Parquet.write(outFile)
-            .schema(variantSchema)
-            .createWriterFunc(GenericParquetWriter::create)
-            .build()) {
-
-      for (int i = 0; i < 10; i++) {
-        GenericRecord record = GenericRecord.create(variantSchema);
-        record.setField("id", i);
-
-        if (i % 2 == 0) {
-          VariantMetadata metadata = Variants.metadata("field");
-          ShreddedObject obj = Variants.object(metadata);
-          obj.put("field", Variants.of("value" + i));
-          Variant variant = Variant.of(metadata, obj);
-          record.setField("variant_field", variant);
-        }
-
-        appender.add(record);
-      }
-    }
-
-    InputFile inFile = Files.localInput(parquetFile);
-    try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
-      assertThat(reader.getRowGroups()).as("Should create only one row group").hasSize(1);
-      BlockMetaData blockMetaData = reader.getRowGroups().get(0);
-      MessageType fileSchema = reader.getFileMetaData().getSchema();
-
-      ParquetMetricsRowGroupFilter rowGroupFilter =
-          new ParquetMetricsRowGroupFilter(variantSchema, notNull("variant_field"), true);
-      boolean shouldRead = rowGroupFilter.shouldRead(fileSchema, blockMetaData);
-      assertThat(shouldRead)
-          .as("Should read: variant notNull filters must be evaluated post scan")
-          .isTrue();
-    }
-    parquetFile.deleteOnExit();
-  }
-
-  @TestTemplate
-  public void testAllNullsVariantNotNull() throws IOException {
-    assumeThat(format).isEqualTo(FileFormat.PARQUET);
-
-    Schema variantSchema =
-        new Schema(
-            required(1, "id", IntegerType.get()),
-            optional(2, "variant_field", Types.VariantType.get()));
-
-    File parquetFile = new File(tempDir, "test-variant-nulls" + System.nanoTime());
-
-    OutputFile outFile = Files.localOutput(parquetFile);
-    try (FileAppender<GenericRecord> appender =
-        Parquet.write(outFile)
-            .schema(variantSchema)
-            .createWriterFunc(GenericParquetWriter::create)
-            .build()) {
-
-      for (int i = 0; i < 10; i++) {
-        GenericRecord record = GenericRecord.create(variantSchema);
-        record.setField("id", i);
-        record.setField("variant_field", null);
-        appender.add(record);
-      }
-    }
-
-    InputFile inFile = Files.localInput(parquetFile);
-    try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
-      BlockMetaData blockMetaData = reader.getRowGroups().get(0);
-      MessageType fileSchema = reader.getFileMetaData().getSchema();
-
-      ParquetMetricsRowGroupFilter rowGroupFilter =
-          new ParquetMetricsRowGroupFilter(variantSchema, notNull("variant_field"), true);
-      boolean shouldRead = rowGroupFilter.shouldRead(fileSchema, blockMetaData);
-      assertThat(shouldRead)
-          .as("Should read: variant notNull filters must be evaluated post scan even for all nulls")
-          .isTrue();
-    }
-    parquetFile.deleteOnExit();
-  }
-
   @TestTemplate
   public void testVariantFieldMixedValuesNotNull() throws IOException {
     assumeThat(format).isEqualTo(FileFormat.PARQUET);

From 3ba38bb590b89ae8c0117298f1b4446aad801159 Mon Sep 17 00:00:00 2001
From: geruh <imgeru@gmail.com>
Date: Thu, 18 Sep 2025 18:26:33 -0700
Subject: [PATCH 7/9] clean up and add another test

---
 .../apache/iceberg/spark/SparkTestHelperBase.java |  4 ++--
 .../iceberg/spark/sql/TestFilterPushDown.java     | 15 +++++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestHelperBase.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestHelperBase.java
index 70286c837cdd..cfb4980df6e4 100644
--- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestHelperBase.java
+++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestHelperBase.java
@@ -81,8 +81,8 @@ protected void assertEquals(String context, Object[] expectedRow, Object[] actua
           assertEquals(newContext, (Object[]) expectedValue, (Object[]) actualValue);
         }
       } else if (expectedValue instanceof VariantVal && actualValue instanceof VariantVal) {
-        // Spark VariantVal may have fixed-width padding, so we compare their JSON representation
-        // instead of raw byte arrays.
+        // Spark VariantVal comparison is based on raw byte[] comparison, which can fail
+        // if Spark uses trailing null bytes. so, we compare their JSON representation instead.
         assertThat((actualValue).toString())
             .as("%s contents should match (VariantVal JSON)", context)
             .isEqualTo((expectedValue).toString());
diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java
index ee45958ab47f..0ebfde782e4f 100644
--- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java
+++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java
@@ -604,6 +604,21 @@ public void testVariantExtractFiltering() {
     withDefaultTimeZone(
         "UTC",
         () -> {
+          checkFilters(
+                  "try_variant_get(data, '$.num', 'int') IS NOT NULL",
+                  "isnotnull(data) AND isnotnull(try_variant_get(data, $.num, IntegerType, false, Some(UTC)))",
+                  "data IS NOT NULL",
+                  ImmutableList.of(
+                          row(1L, toSparkVariantRow("foo", 25)),
+                          row(2L, toSparkVariantRow("bar", 30)),
+                          row(3L, toSparkVariantRow("baz", 35))));
+
+          checkFilters(
+                  "try_variant_get(data, '$.num', 'int') IS NULL",
+                  "isnull(try_variant_get(data, $.num, IntegerType, false, Some(UTC)))",
+                  "",
+                  ImmutableList.of(row(4L, null)));
+
           checkFilters(
               "try_variant_get(data, '$.num', 'int') > 30",
               "isnotnull(data) AND (try_variant_get(data, $.num, IntegerType, false, Some(UTC)) > 30)",

From a618461215ccac685585dd294d0a15e0a16b8cc6 Mon Sep 17 00:00:00 2001
From: geruh <imgeru@gmail.com>
Date: Thu, 18 Sep 2025 18:33:57 -0700
Subject: [PATCH 8/9] checkstyle..

---
 .../iceberg/spark/sql/TestFilterPushDown.java | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java
index 0ebfde782e4f..a984c4c826d2 100644
--- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java
+++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestFilterPushDown.java
@@ -605,19 +605,19 @@ public void testVariantExtractFiltering() {
         "UTC",
         () -> {
           checkFilters(
-                  "try_variant_get(data, '$.num', 'int') IS NOT NULL",
-                  "isnotnull(data) AND isnotnull(try_variant_get(data, $.num, IntegerType, false, Some(UTC)))",
-                  "data IS NOT NULL",
-                  ImmutableList.of(
-                          row(1L, toSparkVariantRow("foo", 25)),
-                          row(2L, toSparkVariantRow("bar", 30)),
-                          row(3L, toSparkVariantRow("baz", 35))));
+              "try_variant_get(data, '$.num', 'int') IS NOT NULL",
+              "isnotnull(data) AND isnotnull(try_variant_get(data, $.num, IntegerType, false, Some(UTC)))",
+              "data IS NOT NULL",
+              ImmutableList.of(
+                  row(1L, toSparkVariantRow("foo", 25)),
+                  row(2L, toSparkVariantRow("bar", 30)),
+                  row(3L, toSparkVariantRow("baz", 35))));
 
           checkFilters(
-                  "try_variant_get(data, '$.num', 'int') IS NULL",
-                  "isnull(try_variant_get(data, $.num, IntegerType, false, Some(UTC)))",
-                  "",
-                  ImmutableList.of(row(4L, null)));
+              "try_variant_get(data, '$.num', 'int') IS NULL",
+              "isnull(try_variant_get(data, $.num, IntegerType, false, Some(UTC)))",
+              "",
+              ImmutableList.of(row(4L, null)));
 
           checkFilters(
               "try_variant_get(data, '$.num', 'int') > 30",

From a48ec72848b79bea1b6e299675a33ed5ace34e5e Mon Sep 17 00:00:00 2001
From: geruh <imgeru@gmail.com>
Date: Fri, 19 Sep 2025 18:02:58 -0700
Subject: [PATCH 9/9] address comments

---
 .../apache/iceberg/data/TestMetricsRowGroupFilter.java    | 8 +++-----
 .../org/apache/iceberg/spark/SparkTestHelperBase.java     | 3 ++-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
index 3c867a9a72d1..e12015d5eb73 100644
--- a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
+++ b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
@@ -1029,8 +1029,8 @@ public void testVariantFieldMixedValuesNotNull() throws IOException {
       MessageType fileSchema = reader.getFileMetaData().getSchema();
       ParquetMetricsRowGroupFilter rowGroupFilter =
           new ParquetMetricsRowGroupFilter(VARIANT_SCHEMA, notNull("variant_field"), true);
-      boolean shouldRead = rowGroupFilter.shouldRead(fileSchema, blockMetaData);
-      assertThat(shouldRead)
+
+      assertThat(rowGroupFilter.shouldRead(fileSchema, blockMetaData))
           .as("Should read: variant notNull filters must be evaluated post scan")
           .isTrue();
     }
@@ -1054,12 +1054,10 @@ public void testVariantFieldAllNullsNotNull() throws IOException {
     try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
       BlockMetaData blockMetaData = reader.getRowGroups().get(0);
       MessageType fileSchema = reader.getFileMetaData().getSchema();
-
       ParquetMetricsRowGroupFilter rowGroupFilter =
           new ParquetMetricsRowGroupFilter(VARIANT_SCHEMA, notNull("variant_field"), true);
-      boolean shouldRead = rowGroupFilter.shouldRead(fileSchema, blockMetaData);
 
-      assertThat(shouldRead)
+      assertThat(rowGroupFilter.shouldRead(fileSchema, blockMetaData))
           .as("Should read: variant notNull filters must be evaluated post scan even for all nulls")
           .isTrue();
     }
diff --git a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestHelperBase.java b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestHelperBase.java
index cfb4980df6e4..2754e891a481 100644
--- a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestHelperBase.java
+++ b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/SparkTestHelperBase.java
@@ -83,7 +83,8 @@ protected void assertEquals(String context, Object[] expectedRow, Object[] actua
       } else if (expectedValue instanceof VariantVal && actualValue instanceof VariantVal) {
         // Spark VariantVal comparison is based on raw byte[] comparison, which can fail
         // if Spark uses trailing null bytes. so, we compare their JSON representation instead.
-        assertThat((actualValue).toString())
+        assertThat(actualValue)
+            .asString()
             .as("%s contents should match (VariantVal JSON)", context)
             .isEqualTo((expectedValue).toString());
       } else if (expectedValue != ANY) {