Skip to content
5 changes: 5 additions & 0 deletions api/src/main/java/org/apache/iceberg/Accessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,9 @@ public interface Accessor<T> extends Serializable {
Object get(T container);

Type type();

/** Returns true if the current field or any ancestor in the access path is optional. */
default boolean hasOptionalFieldInPath() {
return false;
}
}
50 changes: 39 additions & 11 deletions api/src/main/java/org/apache/iceberg/Accessors.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,13 @@ private static class PositionAccessor implements Accessor<StructLike> {
private final int position;
private final Type type;
private final Class<?> javaClass;
private final boolean hasOptionalFieldInPath;

PositionAccessor(int pos, Type type) {
PositionAccessor(int pos, Type type, boolean isOptional) {
this.position = pos;
this.type = type;
this.javaClass = type.typeId().javaClass();
this.hasOptionalFieldInPath = isOptional;
}

@Override
Expand All @@ -84,6 +86,11 @@ public Class<?> javaClass() {
return javaClass;
}

@Override
public boolean hasOptionalFieldInPath() {
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "Accessor(positions=[" + position + "], type=" + type + ")";
Expand All @@ -95,12 +102,14 @@ private static class Position2Accessor implements Accessor<StructLike> {
private final int p1;
private final Type type;
private final Class<?> javaClass;
private final boolean hasOptionalFieldInPath;

Position2Accessor(int pos, PositionAccessor wrapped) {
Position2Accessor(int pos, PositionAccessor wrapped, boolean isOptional) {
this.p0 = pos;
this.p1 = wrapped.position();
this.type = wrapped.type();
this.javaClass = wrapped.javaClass();
this.hasOptionalFieldInPath = isOptional || wrapped.hasOptionalFieldInPath();
}

@Override
Expand All @@ -117,6 +126,11 @@ public Class<?> javaClass() {
return javaClass;
}

@Override
public boolean hasOptionalFieldInPath() {
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "Accessor(positions=[" + p0 + ", " + p1 + "], type=" + type + ")";
Expand All @@ -129,13 +143,15 @@ private static class Position3Accessor implements Accessor<StructLike> {
private final int p2;
private final Type type;
private final Class<?> javaClass;
private final boolean hasOptionalFieldInPath;

Position3Accessor(int pos, Position2Accessor wrapped) {
Position3Accessor(int pos, Position2Accessor wrapped, boolean isOptional) {
this.p0 = pos;
this.p1 = wrapped.p0;
this.p2 = wrapped.p1;
this.type = wrapped.type();
this.javaClass = wrapped.javaClass();
this.hasOptionalFieldInPath = isOptional || wrapped.hasOptionalFieldInPath();
}

@Override
Expand All @@ -148,6 +164,11 @@ public Type type() {
return type;
}

@Override
public boolean hasOptionalFieldInPath() {
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "Accessor(positions=[" + p0 + ", " + p1 + ", " + p2 + "], type=" + type + ")";
Expand All @@ -157,10 +178,12 @@ public String toString() {
private static class WrappedPositionAccessor implements Accessor<StructLike> {
private final int position;
private final Accessor<StructLike> accessor;
private final boolean hasOptionalFieldInPath;

WrappedPositionAccessor(int pos, Accessor<StructLike> accessor) {
WrappedPositionAccessor(int pos, Accessor<StructLike> accessor, boolean isOptional) {
this.position = pos;
this.accessor = accessor;
this.hasOptionalFieldInPath = isOptional || accessor.hasOptionalFieldInPath();
}

@Override
Expand All @@ -177,27 +200,32 @@ public Type type() {
return accessor.type();
}

@Override
public boolean hasOptionalFieldInPath() {
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "WrappedAccessor(position=" + position + ", wrapped=" + accessor + ")";
}
}

private static Accessor<StructLike> newAccessor(int pos, Type type) {
return new PositionAccessor(pos, type);
private static Accessor<StructLike> newAccessor(int pos, boolean isOptional, Type type) {
return new PositionAccessor(pos, type, isOptional);
}

private static Accessor<StructLike> newAccessor(
int pos, boolean isOptional, Accessor<StructLike> accessor) {
if (isOptional) {
// the wrapped position handles null layers
return new WrappedPositionAccessor(pos, accessor);
return new WrappedPositionAccessor(pos, accessor, isOptional);
} else if (accessor.getClass() == PositionAccessor.class) {
return new Position2Accessor(pos, (PositionAccessor) accessor);
return new Position2Accessor(pos, (PositionAccessor) accessor, isOptional);
} else if (accessor instanceof Position2Accessor) {
return new Position3Accessor(pos, (Position2Accessor) accessor);
return new Position3Accessor(pos, (Position2Accessor) accessor, isOptional);
} else {
return new WrappedPositionAccessor(pos, accessor);
return new WrappedPositionAccessor(pos, accessor, isOptional);
}
}

Expand Down Expand Up @@ -226,7 +254,7 @@ public Map<Integer, Accessor<StructLike>> struct(
}

// Add an accessor for this field as an Object (may or may not be primitive).
accessors.put(field.fieldId(), newAccessor(i, field.type()));
accessors.put(field.fieldId(), newAccessor(i, field.isOptional(), field.type()));
}

return accessors;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ public Type type() {

@Override
public boolean producesNull() {
return field.isOptional();
// A leaf required field can evaluate to null if it is optional itself or any
// ancestor on the path is optional.
return accessor.hasOptionalFieldInPath();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.expressions;

import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;
import static org.assertj.core.api.Assertions.assertThat;

import java.util.Arrays;
import java.util.List;
import java.util.stream.Stream;
import org.apache.iceberg.Accessor;
import org.apache.iceberg.Schema;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.types.Types;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

public class TestBoundReference {
// Build a schema with a single nested struct with optionalList.size() levels with the following
// structure:
// s1: struct(s2: struct(s3: struct(..., sn: struct(leaf: int))))
// where each s{i} is an optional struct if optionalList.get(i) is true and a required struct if
// false
private static Schema buildSchemaFromOptionalList(List<Boolean> optionalList, String leafName) {
Preconditions.checkArgument(
optionalList != null && !optionalList.isEmpty(), "optionalList must not be null or empty");
Types.NestedField leaf =
optionalList.get(optionalList.size() - 1)
? optional(optionalList.size(), leafName, Types.IntegerType.get())
: required(optionalList.size(), leafName, Types.IntegerType.get());

Types.StructType current = Types.StructType.of(leaf);

for (int i = optionalList.size() - 2; i >= 0; i--) {
int id = i + 1;
String name = "s" + (i + 1);
current =
Types.StructType.of(
optionalList.get(i) ? optional(id, name, current) : required(id, name, current));
}

return new Schema(current.fields());
}

private static Stream<Arguments> producesNullCases() {
// the test cases specify two arguments:
// - the first is a list of booleans that indicate whether fields in the nested sequence of
// structs are optional or required. For example, [false, true, false] will construct a
// struct like s1.s2.s3 with s1 being required, s2 being optional, and s3 being required.
// - the second is a boolean that indicates whether calling producesNull() on the BoundReference
// of the leaf field should return true or false.
return Stream.of(
// basic fields, no struct levels
Arguments.of(Arrays.asList(false), false),
Arguments.of(Arrays.asList(true), true),
// one level
Arguments.of(Arrays.asList(false, false), false),
Arguments.of(Arrays.asList(false, true), true),
Arguments.of(Arrays.asList(true, false), true),
// two levels
Arguments.of(Arrays.asList(false, false, false), false),
Arguments.of(Arrays.asList(false, false, true), true),
Arguments.of(Arrays.asList(true, false, false), true),
Arguments.of(Arrays.asList(false, true, false), true),
// three levels
Arguments.of(Arrays.asList(false, false, false, false), false),
Arguments.of(Arrays.asList(false, false, false, true), true),
Arguments.of(Arrays.asList(true, false, false, false), true),
Arguments.of(Arrays.asList(false, true, false, false), true),
// four levels
Arguments.of(Arrays.asList(false, false, false, false, false), false),
Arguments.of(Arrays.asList(false, false, false, false, true), true),
Arguments.of(Arrays.asList(true, false, false, false, false), true),
Arguments.of(Arrays.asList(false, true, true, true, false), true));
}

@ParameterizedTest
@MethodSource("producesNullCases")
public void testProducesNull(List<Boolean> optionalList, boolean expectedProducesNull) {
String leafName = "leaf";
Schema schema = buildSchemaFromOptionalList(optionalList, leafName);
int leafId = optionalList.size();
Types.NestedField leafField = schema.findField(leafId);
Accessor<StructLike> accessor = schema.accessorForField(leafId);

BoundReference<Integer> ref = new BoundReference<>(leafField, accessor, leafName);
assertThat(ref.producesNull()).isEqualTo(expectedProducesNull);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -297,9 +297,7 @@ public void testNotNull() {
shouldRead =
new ParquetBloomRowGroupFilter(SCHEMA, notNull("struct_not_null.int_field"))
.shouldRead(parquetSchema, rowGroupMetadata, bloomStore);
assertThat(shouldRead)
.as("Should read: this field is required and are always not-null")
.isTrue();
assertThat(shouldRead).as("Should read: bloom filter doesn't help").isTrue();
}

@Test
Expand All @@ -323,8 +321,8 @@ public void testIsNull() {
new ParquetBloomRowGroupFilter(SCHEMA, isNull("struct_not_null.int_field"))
.shouldRead(parquetSchema, rowGroupMetadata, bloomStore);
assertThat(shouldRead)
.as("Should skip: this field is required and are always not-null")
.isFalse();
.as("Should read: required nested field can still be null if any ancestor is optional")
.isTrue();
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -613,4 +613,21 @@ public void testComplexTypeFilter() {
assertEquals("Should return all expected rows", ImmutableList.of(row(1)), result);
sql("DROP TABLE IF EXISTS %s", complexTypeTableName);
}

@TestTemplate
public void testRequiredNestedFieldInOptionalStructFilter() {
String nestedStructTable = tableName("nested_struct_table");
sql(
"CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT NULL>) "
+ "USING iceberg",
nestedStructTable);
sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable);
sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable);

List<Object[]> result =
sql("SELECT id FROM %s WHERE address.street IS NULL", nestedStructTable);

assertEquals("Should return all expected rows", ImmutableList.of(row(0)), result);
sql("DROP TABLE IF EXISTS %s", nestedStructTable);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -613,4 +613,21 @@ public void testComplexTypeFilter() {
assertEquals("Should return all expected rows", ImmutableList.of(row(1)), result);
sql("DROP TABLE IF EXISTS %s", complexTypeTableName);
}

@TestTemplate
public void testRequiredNestedFieldInOptionalStructFilter() {
String nestedStructTable = tableName("nested_struct_table");
sql(
"CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT NULL>) "
+ "USING iceberg",
nestedStructTable);
sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable);
sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable);

List<Object[]> result =
sql("SELECT id FROM %s WHERE address.street IS NULL", nestedStructTable);

assertEquals("Should return all expected rows", ImmutableList.of(row(0)), result);
sql("DROP TABLE IF EXISTS %s", nestedStructTable);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -613,4 +613,21 @@ public void testComplexTypeFilter() {
assertEquals("Should return all expected rows", ImmutableList.of(row(1)), result);
sql("DROP TABLE IF EXISTS %s", complexTypeTableName);
}

@TestTemplate
public void testRequiredNestedFieldInOptionalStructFilter() {
String nestedStructTable = tableName("nested_struct_table");
sql(
"CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT NULL>) "
+ "USING iceberg",
nestedStructTable);
sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable);
sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable);

List<Object[]> result =
sql("SELECT id FROM %s WHERE address.street IS NULL", nestedStructTable);

assertEquals("Should return all expected rows", ImmutableList.of(row(0)), result);
sql("DROP TABLE IF EXISTS %s", nestedStructTable);
}
}