diff --git a/api/src/main/java/com/netflix/iceberg/expressions/Binder.java b/api/src/main/java/com/netflix/iceberg/expressions/Binder.java index e024a7c7731f..b896e0ce6fcc 100644 --- a/api/src/main/java/com/netflix/iceberg/expressions/Binder.java +++ b/api/src/main/java/com/netflix/iceberg/expressions/Binder.java @@ -51,31 +51,63 @@ private Binder() { * * @param struct The {@link StructType struct type} to resolve references by name. * @param expr An {@link Expression expression} to rewrite with bound references. + * @param caseSensitive A boolean flag to control whether the bind should enforce case sensitivity. * @return the expression rewritten with bound references * @throws ValidationException if literals do not match bound references * @throws IllegalStateException if any references are already bound */ public static Expression bind(StructType struct, - Expression expr) { - return ExpressionVisitors.visit(expr, new BindVisitor(struct)); + Expression expr, + boolean caseSensitive) { + return ExpressionVisitors.visit(expr, new BindVisitor(struct, caseSensitive)); } - public static Set boundReferences(StructType struct, List exprs) { + /** + * Replaces all unbound/named references with bound references to fields in the given struct, + * defaulting to case sensitive mode. + * + * Access modifier is package-private, to only allow use from existing tests. + * + *

+ * When a reference is resolved, any literal used in a predicate for that field is converted to + * the field's type using {@link Literal#to(Type)}. If automatic conversion to that type isn't + * allowed, a {@link ValidationException validation exception} is thrown. + *

+ * The result expression may be simplified when constructed. For example, {@code isNull("a")} is + * replaced with {@code alwaysFalse()} when {@code "a"} is resolved to a required field. + *

+ * The expression cannot contain references that are already bound, or an + * {@link IllegalStateException} will be thrown. + * + * @param struct The {@link StructType struct type} to resolve references by name. + * @param expr An {@link Expression expression} to rewrite with bound references. + * @return the expression rewritten with bound references + * + * @throws IllegalStateException if any references are already bound + */ + static Expression bind(StructType struct, + Expression expr) { + return Binder.bind(struct, expr, true); + } + + public static Set boundReferences(StructType struct, List exprs, boolean caseSensitive) { if (exprs == null) { return ImmutableSet.of(); } ReferenceVisitor visitor = new ReferenceVisitor(); for (Expression expr : exprs) { - ExpressionVisitors.visit(bind(struct, expr), visitor); + ExpressionVisitors.visit(bind(struct, expr, caseSensitive), visitor); } return visitor.references; } private static class BindVisitor extends ExpressionVisitor { private final StructType struct; + private final boolean caseSensitive; - private BindVisitor(StructType struct) { + private BindVisitor(StructType struct, boolean caseSensitive) { this.struct = struct; + this.caseSensitive = caseSensitive; } @Override @@ -110,7 +142,7 @@ public Expression predicate(BoundPredicate pred) { @Override public Expression predicate(UnboundPredicate pred) { - return pred.bind(struct); + return pred.bind(struct, caseSensitive); } } diff --git a/api/src/main/java/com/netflix/iceberg/expressions/Evaluator.java b/api/src/main/java/com/netflix/iceberg/expressions/Evaluator.java index 38541663c587..fc6eacdc1a24 100644 --- a/api/src/main/java/com/netflix/iceberg/expressions/Evaluator.java +++ b/api/src/main/java/com/netflix/iceberg/expressions/Evaluator.java @@ -44,7 +44,7 @@ private EvalVisitor visitor() { } public Evaluator(Types.StructType struct, Expression unbound) { - this.expr = Binder.bind(struct, unbound); + this.expr = Binder.bind(struct, unbound, true); } public boolean eval(StructLike data) { diff --git a/api/src/main/java/com/netflix/iceberg/expressions/InclusiveManifestEvaluator.java b/api/src/main/java/com/netflix/iceberg/expressions/InclusiveManifestEvaluator.java index cac617d777a1..6493273a543b 100644 --- a/api/src/main/java/com/netflix/iceberg/expressions/InclusiveManifestEvaluator.java +++ b/api/src/main/java/com/netflix/iceberg/expressions/InclusiveManifestEvaluator.java @@ -54,7 +54,7 @@ private ManifestEvalVisitor visitor() { public InclusiveManifestEvaluator(PartitionSpec spec, Expression rowFilter) { this.struct = spec.partitionType(); - this.expr = Binder.bind(struct, rewriteNot(Projections.inclusive(spec).project(rowFilter))); + this.expr = Binder.bind(struct, rewriteNot(Projections.inclusive(spec).project(rowFilter)), true); } /** diff --git a/api/src/main/java/com/netflix/iceberg/expressions/InclusiveMetricsEvaluator.java b/api/src/main/java/com/netflix/iceberg/expressions/InclusiveMetricsEvaluator.java index 26c17e4f6078..54cc0be7f649 100644 --- a/api/src/main/java/com/netflix/iceberg/expressions/InclusiveMetricsEvaluator.java +++ b/api/src/main/java/com/netflix/iceberg/expressions/InclusiveMetricsEvaluator.java @@ -56,7 +56,7 @@ private MetricsEvalVisitor visitor() { public InclusiveMetricsEvaluator(Schema schema, Expression unbound) { this.schema = schema; this.struct = schema.asStruct(); - this.expr = Binder.bind(struct, rewriteNot(unbound)); + this.expr = Binder.bind(struct, rewriteNot(unbound), true); } /** diff --git a/api/src/main/java/com/netflix/iceberg/expressions/Projections.java b/api/src/main/java/com/netflix/iceberg/expressions/Projections.java index d9da0532c82f..b811e27d9d99 100644 --- a/api/src/main/java/com/netflix/iceberg/expressions/Projections.java +++ b/api/src/main/java/com/netflix/iceberg/expressions/Projections.java @@ -135,7 +135,7 @@ public Expression or(Expression leftResult, Expression rightResult) { @Override public Expression predicate(UnboundPredicate pred) { - Expression bound = pred.bind(spec.schema().asStruct()); + Expression bound = pred.bind(spec.schema().asStruct(), true); if (bound instanceof BoundPredicate) { return predicate((BoundPredicate) bound); diff --git a/api/src/main/java/com/netflix/iceberg/expressions/ResidualEvaluator.java b/api/src/main/java/com/netflix/iceberg/expressions/ResidualEvaluator.java index 290ae4afe034..610bdc5a13f9 100644 --- a/api/src/main/java/com/netflix/iceberg/expressions/ResidualEvaluator.java +++ b/api/src/main/java/com/netflix/iceberg/expressions/ResidualEvaluator.java @@ -170,7 +170,7 @@ public Expression predicate(BoundPredicate pred) { .projectStrict(part.name(), pred); if (strictProjection != null) { - Expression bound = strictProjection.bind(spec.partitionType()); + Expression bound = strictProjection.bind(spec.partitionType(), true); if (bound instanceof BoundPredicate) { // the predicate methods will evaluate and return alwaysTrue or alwaysFalse return super.predicate((BoundPredicate) bound); @@ -184,7 +184,7 @@ public Expression predicate(BoundPredicate pred) { @Override public Expression predicate(UnboundPredicate pred) { - Expression bound = pred.bind(spec.schema().asStruct()); + Expression bound = pred.bind(spec.schema().asStruct(), true); if (bound instanceof BoundPredicate) { Expression boundResidual = predicate((BoundPredicate) bound); diff --git a/api/src/main/java/com/netflix/iceberg/expressions/StrictMetricsEvaluator.java b/api/src/main/java/com/netflix/iceberg/expressions/StrictMetricsEvaluator.java index d3fa1df5321a..702c2557c908 100644 --- a/api/src/main/java/com/netflix/iceberg/expressions/StrictMetricsEvaluator.java +++ b/api/src/main/java/com/netflix/iceberg/expressions/StrictMetricsEvaluator.java @@ -57,7 +57,7 @@ private MetricsEvalVisitor visitor() { public StrictMetricsEvaluator(Schema schema, Expression unbound) { this.schema = schema; this.struct = schema.asStruct(); - this.expr = Binder.bind(struct, rewriteNot(unbound)); + this.expr = Binder.bind(struct, rewriteNot(unbound), true); } /** diff --git a/api/src/main/java/com/netflix/iceberg/expressions/UnboundPredicate.java b/api/src/main/java/com/netflix/iceberg/expressions/UnboundPredicate.java index 3523d1d4f9c0..da6f981f6679 100644 --- a/api/src/main/java/com/netflix/iceberg/expressions/UnboundPredicate.java +++ b/api/src/main/java/com/netflix/iceberg/expressions/UnboundPredicate.java @@ -44,8 +44,35 @@ public Expression negate() { return new UnboundPredicate<>(op().negate(), ref(), literal()); } - public Expression bind(Types.StructType struct) { - Types.NestedField field = struct.field(ref().name()); + /** + * Bind this UnboundPredicate, defaulting to case sensitive mode. + * + * Access modifier is package-private, to only allow use from existing tests. + * + * @param struct The {@link Types.StructType struct type} to resolve references by name. + * @return an {@link Expression} + * @throws ValidationException if literals do not match bound references, or if comparison on expression is invalid + */ + Expression bind(Types.StructType struct) { + return bind(struct, true); + } + + /** + * Bind this UnboundPredicate. + * + * @param struct The {@link Types.StructType struct type} to resolve references by name. + * @param caseSensitive A boolean flag to control whether the bind should enforce case sensitivity. + * @return an {@link Expression} + * @throws ValidationException if literals do not match bound references, or if comparison on expression is invalid + */ + public Expression bind(Types.StructType struct, boolean caseSensitive) { + Types.NestedField field; + if (caseSensitive) { + field = struct.field(ref().name()); + } else { + field = struct.caseInsensitiveField(ref().name()); + } + ValidationException.check(field != null, "Cannot find field '%s' in struct: %s", ref().name(), struct); diff --git a/api/src/main/java/com/netflix/iceberg/types/Types.java b/api/src/main/java/com/netflix/iceberg/types/Types.java index 22111cfddb84..a4ef5ac2bd59 100644 --- a/api/src/main/java/com/netflix/iceberg/types/Types.java +++ b/api/src/main/java/com/netflix/iceberg/types/Types.java @@ -55,7 +55,7 @@ public class Types { private static final Pattern DECIMAL = Pattern.compile("decimal\\((\\d+),\\s+(\\d+)\\)"); public static PrimitiveType fromPrimitiveString(String typeString) { - String lowerTypeString = typeString.toLowerCase(Locale.ENGLISH); + String lowerTypeString = typeString.toLowerCase(Locale.ROOT); if (TYPES.containsKey(lowerTypeString)) { return TYPES.get(lowerTypeString); } @@ -516,6 +516,7 @@ public static StructType of(List fields) { // lazy values private transient List fieldList = null; private transient Map fieldsByName = null; + private transient Map fieldsByLowerCaseName = null; private transient Map fieldsById = null; private StructType(List fields) { @@ -535,6 +536,10 @@ public NestedField field(String name) { return lazyFieldsByName().get(name); } + public NestedField caseInsensitiveField(String name) { + return lazyFieldsByLowerCaseName().get(name.toLowerCase(Locale.ROOT)); + } + @Override public Type fieldType(String name) { NestedField field = field(name); @@ -600,6 +605,13 @@ private Map lazyFieldsByName() { return fieldsByName; } + private Map lazyFieldsByLowerCaseName() { + if (fieldsByLowerCaseName == null) { + indexFields(); + } + return fieldsByLowerCaseName; + } + private Map lazyFieldsById() { if (fieldsById == null) { indexFields(); @@ -609,12 +621,15 @@ private Map lazyFieldsById() { private void indexFields() { ImmutableMap.Builder byNameBuilder = ImmutableMap.builder(); + ImmutableMap.Builder byLowerCaseNameBuilder = ImmutableMap.builder(); ImmutableMap.Builder byIdBuilder = ImmutableMap.builder(); for (NestedField field : fields) { byNameBuilder.put(field.name(), field); + byLowerCaseNameBuilder.put(field.name().toLowerCase(Locale.ROOT), field); byIdBuilder.put(field.fieldId(), field); } this.fieldsByName = byNameBuilder.build(); + this.fieldsByLowerCaseName = byLowerCaseNameBuilder.build(); this.fieldsById = byIdBuilder.build(); } } diff --git a/api/src/test/java/com/netflix/iceberg/expressions/TestExpressionBinding.java b/api/src/test/java/com/netflix/iceberg/expressions/TestExpressionBinding.java index 14b95b0760d5..266e863774db 100644 --- a/api/src/test/java/com/netflix/iceberg/expressions/TestExpressionBinding.java +++ b/api/src/test/java/com/netflix/iceberg/expressions/TestExpressionBinding.java @@ -64,7 +64,19 @@ public void testBoundExpressionFails() { @Test public void testSingleReference() { Expression expr = not(equal("x", 7)); - TestHelpers.assertAllReferencesBound("Single reference", Binder.bind(STRUCT, expr)); + TestHelpers.assertAllReferencesBound("Single reference", Binder.bind(STRUCT, expr, true)); + } + + @Test + public void testCaseInsensitiveReference() { + Expression expr = not(equal("X", 7)); + TestHelpers.assertAllReferencesBound("Single reference", Binder.bind(STRUCT, expr, false)); + } + + @Test(expected = ValidationException.class) + public void testCaseSensitiveReference() { + Expression expr = not(equal("X", 7)); + Binder.bind(STRUCT, expr, true); } @Test diff --git a/api/src/test/java/com/netflix/iceberg/expressions/TestPredicateBinding.java b/api/src/test/java/com/netflix/iceberg/expressions/TestPredicateBinding.java index 433d20e1aa8a..c30986f5dd24 100644 --- a/api/src/test/java/com/netflix/iceberg/expressions/TestPredicateBinding.java +++ b/api/src/test/java/com/netflix/iceberg/expressions/TestPredicateBinding.java @@ -179,7 +179,7 @@ public void testLongToIntegerConversion() { Assert.assertEquals("Less than or equal below min should be alwaysFalse", Expressions.alwaysFalse(), lteqMin.bind(struct)); - Expression ltExpr = new UnboundPredicate<>(LT, ref("i"), (long) Integer.MAX_VALUE).bind(struct); + Expression ltExpr = new UnboundPredicate<>(LT, ref("i"), (long) Integer.MAX_VALUE).bind(struct, true); BoundPredicate ltMax = assertAndUnwrap(ltExpr); Assert.assertEquals("Should translate bound to Integer", (Integer) Integer.MAX_VALUE, ltMax.literal().value()); diff --git a/api/src/test/java/com/netflix/iceberg/transforms/TestProjection.java b/api/src/test/java/com/netflix/iceberg/transforms/TestProjection.java index 8d8a34d46a5c..f08fe3fef53b 100644 --- a/api/src/test/java/com/netflix/iceberg/transforms/TestProjection.java +++ b/api/src/test/java/com/netflix/iceberg/transforms/TestProjection.java @@ -71,7 +71,7 @@ public void testIdentityProjection() { UnboundPredicate projected = assertAndUnwrapUnbound(expr); // check inclusive the bound predicate to ensure the types are correct - BoundPredicate bound = assertAndUnwrap(predicate.bind(spec.schema().asStruct())); + BoundPredicate bound = assertAndUnwrap(predicate.bind(spec.schema().asStruct(), true)); Assert.assertEquals("Field name should match partition struct field", "id", projected.ref().name()); @@ -109,7 +109,7 @@ public void testStrictIdentityProjection() { UnboundPredicate projected = assertAndUnwrapUnbound(expr); // check inclusive the bound predicate to ensure the types are correct - BoundPredicate bound = assertAndUnwrap(predicate.bind(spec.schema().asStruct())); + BoundPredicate bound = assertAndUnwrap(predicate.bind(spec.schema().asStruct(), true)); Assert.assertEquals("Field name should match partition struct field", "id", projected.ref().name()); diff --git a/core/src/main/java/com/netflix/iceberg/BaseTableScan.java b/core/src/main/java/com/netflix/iceberg/BaseTableScan.java index 8915461b0e0c..154b1715dd05 100644 --- a/core/src/main/java/com/netflix/iceberg/BaseTableScan.java +++ b/core/src/main/java/com/netflix/iceberg/BaseTableScan.java @@ -130,7 +130,7 @@ public TableScan select(Collection columns) { // all of the filter columns are required requiredFieldIds.addAll( - Binder.boundReferences(table.schema().asStruct(), Collections.singletonList(rowFilter))); + Binder.boundReferences(table.schema().asStruct(), Collections.singletonList(rowFilter), true)); // all of the projection columns are required requiredFieldIds.addAll(TypeUtil.getProjectedIds(table.schema().select(columns))); diff --git a/parquet/src/main/java/com/netflix/iceberg/parquet/ParquetDictionaryRowGroupFilter.java b/parquet/src/main/java/com/netflix/iceberg/parquet/ParquetDictionaryRowGroupFilter.java index f7db1fbf7783..51d8c127094d 100644 --- a/parquet/src/main/java/com/netflix/iceberg/parquet/ParquetDictionaryRowGroupFilter.java +++ b/parquet/src/main/java/com/netflix/iceberg/parquet/ParquetDictionaryRowGroupFilter.java @@ -68,7 +68,7 @@ private EvalVisitor visitor() { public ParquetDictionaryRowGroupFilter(Schema schema, Expression unbound) { this.schema = schema; this.struct = schema.asStruct(); - this.expr = Binder.bind(struct, rewriteNot(unbound)); + this.expr = Binder.bind(struct, rewriteNot(unbound), true); } /** diff --git a/parquet/src/main/java/com/netflix/iceberg/parquet/ParquetFilters.java b/parquet/src/main/java/com/netflix/iceberg/parquet/ParquetFilters.java index 11613aa7d54f..b93a8d8cde10 100644 --- a/parquet/src/main/java/com/netflix/iceberg/parquet/ParquetFilters.java +++ b/parquet/src/main/java/com/netflix/iceberg/parquet/ParquetFilters.java @@ -161,7 +161,7 @@ public FilterPredicate predicate(BoundPredicate pred) { } protected Expression bind(UnboundPredicate pred) { - return pred.bind(schema.asStruct()); + return pred.bind(schema.asStruct(), true); } @Override @@ -189,7 +189,7 @@ private ConvertColumnFilterToParquet(Schema schema, String column) { protected Expression bind(UnboundPredicate pred) { // instead of binding the predicate using the top-level schema, bind it to the partition data - return pred.bind(partitionStruct); + return pred.bind(partitionStruct, true); } } diff --git a/parquet/src/main/java/com/netflix/iceberg/parquet/ParquetMetricsRowGroupFilter.java b/parquet/src/main/java/com/netflix/iceberg/parquet/ParquetMetricsRowGroupFilter.java index 869439085cb3..490eb167391d 100644 --- a/parquet/src/main/java/com/netflix/iceberg/parquet/ParquetMetricsRowGroupFilter.java +++ b/parquet/src/main/java/com/netflix/iceberg/parquet/ParquetMetricsRowGroupFilter.java @@ -57,7 +57,7 @@ private MetricsEvalVisitor visitor() { public ParquetMetricsRowGroupFilter(Schema schema, Expression unbound) { this.schema = schema; this.struct = schema.asStruct(); - this.expr = Binder.bind(struct, rewriteNot(unbound)); + this.expr = Binder.bind(struct, rewriteNot(unbound), true); } /** diff --git a/spark/src/main/java/com/netflix/iceberg/spark/SparkExpressions.java b/spark/src/main/java/com/netflix/iceberg/spark/SparkExpressions.java index 99f0a81958b9..25f38ffaaf4a 100644 --- a/spark/src/main/java/com/netflix/iceberg/spark/SparkExpressions.java +++ b/spark/src/main/java/com/netflix/iceberg/spark/SparkExpressions.java @@ -336,7 +336,7 @@ private static com.netflix.iceberg.expressions.Expression filter( public static Expression convert(com.netflix.iceberg.expressions.Expression filter, Schema schema) { - return visit(Binder.bind(schema.asStruct(), filter), new ExpressionToSpark(schema)); + return visit(Binder.bind(schema.asStruct(), filter, true), new ExpressionToSpark(schema)); } private static class ExpressionToSpark extends ExpressionVisitors. diff --git a/spark/src/main/java/com/netflix/iceberg/spark/SparkSchemaUtil.java b/spark/src/main/java/com/netflix/iceberg/spark/SparkSchemaUtil.java index b63329fab1a7..bfe9390b7aee 100644 --- a/spark/src/main/java/com/netflix/iceberg/spark/SparkSchemaUtil.java +++ b/spark/src/main/java/com/netflix/iceberg/spark/SparkSchemaUtil.java @@ -202,7 +202,7 @@ public static Schema prune(Schema schema, StructType requestedType) { * @throws IllegalArgumentException if the Spark type does not match the Schema */ public static Schema prune(Schema schema, StructType requestedType, List filters) { - Set filterRefs = Binder.boundReferences(schema.asStruct(), filters); + Set filterRefs = Binder.boundReferences(schema.asStruct(), filters, true); return new Schema(visit(schema, new PruneColumnsWithoutReordering(requestedType, filterRefs)) .asNestedType() .asStructType() @@ -225,7 +225,7 @@ public static Schema prune(Schema schema, StructType requestedType, List filterRefs = Binder.boundReferences(schema.asStruct(), Collections.singletonList(filter)); + Set filterRefs = Binder.boundReferences(schema.asStruct(), Collections.singletonList(filter), true); return new Schema(visit(schema, new PruneColumnsWithoutReordering(requestedType, filterRefs)) .asNestedType() .asStructType()