Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/src/main/java/org/apache/iceberg/Accessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,9 @@ public interface Accessor<T> extends Serializable {
Object get(T container);

Type type();

/** Returns true if the current field or any ancestor in the access path is optional. */
default boolean hasOptionalFieldInPath() {
return false;
}
}
50 changes: 39 additions & 11 deletions api/src/main/java/org/apache/iceberg/Accessors.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,13 @@ private static class PositionAccessor implements Accessor<StructLike> {
private final int position;
private final Type type;
private final Class<?> javaClass;
private final boolean hasOptionalFieldInPath;

PositionAccessor(int pos, Type type) {
PositionAccessor(int pos, Type type, boolean isOptional) {
this.position = pos;
this.type = type;
this.javaClass = type.typeId().javaClass();
this.hasOptionalFieldInPath = isOptional;
}

@Override
Expand All @@ -84,6 +86,11 @@ public Class<?> javaClass() {
return javaClass;
}

@Override
public boolean hasOptionalFieldInPath() {
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "Accessor(positions=[" + position + "], type=" + type + ")";
Expand All @@ -95,12 +102,14 @@ private static class Position2Accessor implements Accessor<StructLike> {
private final int p1;
private final Type type;
private final Class<?> javaClass;
private final boolean hasOptionalFieldInPath;

Position2Accessor(int pos, PositionAccessor wrapped) {
Position2Accessor(int pos, PositionAccessor wrapped, boolean isOptional) {
this.p0 = pos;
this.p1 = wrapped.position();
this.type = wrapped.type();
this.javaClass = wrapped.javaClass();
this.hasOptionalFieldInPath = isOptional || wrapped.hasOptionalFieldInPath();
}

@Override
Expand All @@ -117,6 +126,11 @@ public Class<?> javaClass() {
return javaClass;
}

@Override
public boolean hasOptionalFieldInPath() {
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "Accessor(positions=[" + p0 + ", " + p1 + "], type=" + type + ")";
Expand All @@ -129,13 +143,15 @@ private static class Position3Accessor implements Accessor<StructLike> {
private final int p2;
private final Type type;
private final Class<?> javaClass;
private final boolean hasOptionalFieldInPath;

Position3Accessor(int pos, Position2Accessor wrapped) {
Position3Accessor(int pos, Position2Accessor wrapped, boolean isOptional) {
this.p0 = pos;
this.p1 = wrapped.p0;
this.p2 = wrapped.p1;
this.type = wrapped.type();
this.javaClass = wrapped.javaClass();
this.hasOptionalFieldInPath = isOptional || wrapped.hasOptionalFieldInPath();
}

@Override
Expand All @@ -148,6 +164,11 @@ public Type type() {
return type;
}

@Override
public boolean hasOptionalFieldInPath() {
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "Accessor(positions=[" + p0 + ", " + p1 + ", " + p2 + "], type=" + type + ")";
Expand All @@ -157,10 +178,12 @@ public String toString() {
private static class WrappedPositionAccessor implements Accessor<StructLike> {
private final int position;
private final Accessor<StructLike> accessor;
private final boolean hasOptionalFieldInPath;

WrappedPositionAccessor(int pos, Accessor<StructLike> accessor) {
WrappedPositionAccessor(int pos, Accessor<StructLike> accessor, boolean isOptional) {
this.position = pos;
this.accessor = accessor;
this.hasOptionalFieldInPath = isOptional || accessor.hasOptionalFieldInPath();
}

@Override
Expand All @@ -177,27 +200,32 @@ public Type type() {
return accessor.type();
}

@Override
public boolean hasOptionalFieldInPath() {
return hasOptionalFieldInPath;
}

@Override
public String toString() {
return "WrappedAccessor(position=" + position + ", wrapped=" + accessor + ")";
}
}

private static Accessor<StructLike> newAccessor(int pos, Type type) {
return new PositionAccessor(pos, type);
private static Accessor<StructLike> newAccessor(int pos, boolean isOptional, Type type) {
return new PositionAccessor(pos, type, isOptional);
}

private static Accessor<StructLike> newAccessor(
int pos, boolean isOptional, Accessor<StructLike> accessor) {
if (isOptional) {
// the wrapped position handles null layers
return new WrappedPositionAccessor(pos, accessor);
return new WrappedPositionAccessor(pos, accessor, isOptional);
} else if (accessor.getClass() == PositionAccessor.class) {
return new Position2Accessor(pos, (PositionAccessor) accessor);
return new Position2Accessor(pos, (PositionAccessor) accessor, isOptional);
} else if (accessor instanceof Position2Accessor) {
return new Position3Accessor(pos, (Position2Accessor) accessor);
return new Position3Accessor(pos, (Position2Accessor) accessor, isOptional);
} else {
return new WrappedPositionAccessor(pos, accessor);
return new WrappedPositionAccessor(pos, accessor, isOptional);
}
}

Expand Down Expand Up @@ -226,7 +254,7 @@ public Map<Integer, Accessor<StructLike>> struct(
}

// Add an accessor for this field as an Object (may or may not be primitive).
accessors.put(field.fieldId(), newAccessor(i, field.type()));
accessors.put(field.fieldId(), newAccessor(i, field.isOptional(), field.type()));
}

return accessors;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ public Type type() {

@Override
public boolean producesNull() {
return field.isOptional();
// A leaf required field can evaluate to null if it is optional itself or any
// ancestor on the path is optional.
return accessor.hasOptionalFieldInPath();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.expressions;

import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;
import static org.assertj.core.api.Assertions.assertThat;

import java.util.Arrays;
import java.util.List;
import java.util.stream.Stream;
import org.apache.iceberg.Accessor;
import org.apache.iceberg.Schema;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.types.Types;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

public class TestBoundReference {
// Build a schema with a single nested struct with optionalList.size() levels with the following
// structure:
// s1: struct(s2: struct(s3: struct(..., sn: struct(leaf: int))))
// where each s{i} is an optional struct if optionalList.get(i) is true and a required struct if
// false
private static Schema buildSchemaFromOptionalList(List<Boolean> optionalList, String leafName) {
Preconditions.checkArgument(
optionalList != null && !optionalList.isEmpty(), "optionalList must not be null or empty");
Types.NestedField leaf =
optionalList.get(optionalList.size() - 1)
? optional(optionalList.size(), leafName, Types.IntegerType.get())
: required(optionalList.size(), leafName, Types.IntegerType.get());

Types.StructType current = Types.StructType.of(leaf);

for (int i = optionalList.size() - 2; i >= 0; i--) {
int id = i + 1;
String name = "s" + (i + 1);
current =
Types.StructType.of(
optionalList.get(i) ? optional(id, name, current) : required(id, name, current));
}

return new Schema(current.fields());
}

private static Stream<Arguments> producesNullCases() {
// the test cases specify two arguments:
// - the first is a list of booleans that indicate whether fields in the nested sequence of
// structs are optional or required. For example, [false, true, false] will construct a
// struct like s1.s2.s3 with s1 being required, s2 being optional, and s3 being required.
// - the second is a boolean that indicates whether calling producesNull() on the BoundReference
// of the leaf field should return true or false.
return Stream.of(
// basic fields, no struct levels
Arguments.of(Arrays.asList(false), false),
Arguments.of(Arrays.asList(true), true),
// one level
Arguments.of(Arrays.asList(false, false), false),
Arguments.of(Arrays.asList(false, true), true),
Arguments.of(Arrays.asList(true, false), true),
// two levels
Arguments.of(Arrays.asList(false, false, false), false),
Arguments.of(Arrays.asList(false, false, true), true),
Arguments.of(Arrays.asList(true, false, false), true),
Arguments.of(Arrays.asList(false, true, false), true),
// three levels
Arguments.of(Arrays.asList(false, false, false, false), false),
Arguments.of(Arrays.asList(false, false, false, true), true),
Arguments.of(Arrays.asList(true, false, false, false), true),
Arguments.of(Arrays.asList(false, true, false, false), true),
// four levels
Arguments.of(Arrays.asList(false, false, false, false, false), false),
Arguments.of(Arrays.asList(false, false, false, false, true), true),
Arguments.of(Arrays.asList(true, false, false, false, false), true),
Arguments.of(Arrays.asList(false, true, true, true, false), true));
}

@ParameterizedTest
@MethodSource("producesNullCases")
public void testProducesNull(List<Boolean> optionalList, boolean expectedProducesNull) {
String leafName = "leaf";
Schema schema = buildSchemaFromOptionalList(optionalList, leafName);
int leafId = optionalList.size();
Types.NestedField leafField = schema.findField(leafId);
Accessor<StructLike> accessor = schema.accessorForField(leafId);

BoundReference<Integer> ref = new BoundReference<>(leafField, accessor, leafName);
assertThat(ref.producesNull()).isEqualTo(expectedProducesNull);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -297,9 +297,7 @@ public void testNotNull() {
shouldRead =
new ParquetBloomRowGroupFilter(SCHEMA, notNull("struct_not_null.int_field"))
.shouldRead(parquetSchema, rowGroupMetadata, bloomStore);
assertThat(shouldRead)
.as("Should read: this field is required and are always not-null")
.isTrue();
assertThat(shouldRead).as("Should read: bloom filter doesn't help").isTrue();
}

@Test
Expand All @@ -323,8 +321,8 @@ public void testIsNull() {
new ParquetBloomRowGroupFilter(SCHEMA, isNull("struct_not_null.int_field"))
.shouldRead(parquetSchema, rowGroupMetadata, bloomStore);
assertThat(shouldRead)
.as("Should skip: this field is required and are always not-null")
.isFalse();
.as("Should read: required nested field can still be null if any ancestor is optional")
.isTrue();
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -613,4 +613,21 @@ public void testComplexTypeFilter() {
assertEquals("Should return all expected rows", ImmutableList.of(row(1)), result);
sql("DROP TABLE IF EXISTS %s", complexTypeTableName);
}

@TestTemplate
public void testRequiredNestedFieldInOptionalStructFilter() {
String nestedStructTable = tableName("nested_struct_table");
sql(
"CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT NULL>) "
+ "USING iceberg",
nestedStructTable);
sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable);
sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable);

List<Object[]> result =
sql("SELECT id FROM %s WHERE address.street IS NULL", nestedStructTable);

assertEquals("Should return all expected rows", ImmutableList.of(row(0)), result);
sql("DROP TABLE IF EXISTS %s", nestedStructTable);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -613,4 +613,21 @@ public void testComplexTypeFilter() {
assertEquals("Should return all expected rows", ImmutableList.of(row(1)), result);
sql("DROP TABLE IF EXISTS %s", complexTypeTableName);
}

@TestTemplate
public void testRequiredNestedFieldInOptionalStructFilter() {
String nestedStructTable = tableName("nested_struct_table");
sql(
"CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT NULL>) "
+ "USING iceberg",
nestedStructTable);
sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable);
sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable);

List<Object[]> result =
sql("SELECT id FROM %s WHERE address.street IS NULL", nestedStructTable);

assertEquals("Should return all expected rows", ImmutableList.of(row(0)), result);
sql("DROP TABLE IF EXISTS %s", nestedStructTable);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -613,4 +613,21 @@ public void testComplexTypeFilter() {
assertEquals("Should return all expected rows", ImmutableList.of(row(1)), result);
sql("DROP TABLE IF EXISTS %s", complexTypeTableName);
}

@TestTemplate
public void testRequiredNestedFieldInOptionalStructFilter() {
String nestedStructTable = tableName("nested_struct_table");
sql(
"CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT NULL>) "
+ "USING iceberg",
nestedStructTable);
sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable);
sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable);

List<Object[]> result =
sql("SELECT id FROM %s WHERE address.street IS NULL", nestedStructTable);

assertEquals("Should return all expected rows", ImmutableList.of(row(0)), result);
sql("DROP TABLE IF EXISTS %s", nestedStructTable);
}
}