From 671b1be536128566005549c48e16d33b1c63a69a Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Thu, 13 Feb 2025 13:50:16 -0800 Subject: [PATCH 1/7] API: Move Variant interfaces to API. --- .../java/org/apache/iceberg/types/Type.java | 3 +- .../apache/iceberg/variants/LogicalType.java | 0 .../apache/iceberg/variants/PhysicalType.java | 0 .../apache/iceberg/variants/Primitives.java | 0 .../org/apache/iceberg/variants/Variant.java | 19 +++++ .../apache/iceberg/variants/VariantArray.java | 0 .../apache/iceberg/variants/VariantData.java | 70 +++++++++++++++++++ .../iceberg/variants/VariantDataUtil.java | 49 +++++++++++++ .../iceberg/variants/VariantMetadata.java | 0 .../iceberg/variants/VariantObject.java | 0 .../iceberg/variants/VariantPrimitive.java | 0 .../apache/iceberg/variants/VariantValue.java | 0 12 files changed, 140 insertions(+), 1 deletion(-) rename {core => api}/src/main/java/org/apache/iceberg/variants/LogicalType.java (100%) rename {core => api}/src/main/java/org/apache/iceberg/variants/PhysicalType.java (100%) rename {core => api}/src/main/java/org/apache/iceberg/variants/Primitives.java (100%) rename {core => api}/src/main/java/org/apache/iceberg/variants/Variant.java (66%) rename {core => api}/src/main/java/org/apache/iceberg/variants/VariantArray.java (100%) create mode 100644 api/src/main/java/org/apache/iceberg/variants/VariantData.java create mode 100644 api/src/main/java/org/apache/iceberg/variants/VariantDataUtil.java rename {core => api}/src/main/java/org/apache/iceberg/variants/VariantMetadata.java (100%) rename {core => api}/src/main/java/org/apache/iceberg/variants/VariantObject.java (100%) rename {core => api}/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java (100%) rename {core => api}/src/main/java/org/apache/iceberg/variants/VariantValue.java (100%) diff --git a/api/src/main/java/org/apache/iceberg/types/Type.java b/api/src/main/java/org/apache/iceberg/types/Type.java index 67e40df9e939..184a17416eae 100644 --- a/api/src/main/java/org/apache/iceberg/types/Type.java +++ b/api/src/main/java/org/apache/iceberg/types/Type.java @@ -26,6 +26,7 @@ import java.util.Map; import java.util.Objects; import org.apache.iceberg.StructLike; +import org.apache.iceberg.variants.Variant; public interface Type extends Serializable { enum TypeID { @@ -46,7 +47,7 @@ enum TypeID { STRUCT(StructLike.class), LIST(List.class), MAP(Map.class), - VARIANT(Object.class), + VARIANT(Variant.class), UNKNOWN(Object.class); private final Class javaClass; diff --git a/core/src/main/java/org/apache/iceberg/variants/LogicalType.java b/api/src/main/java/org/apache/iceberg/variants/LogicalType.java similarity index 100% rename from core/src/main/java/org/apache/iceberg/variants/LogicalType.java rename to api/src/main/java/org/apache/iceberg/variants/LogicalType.java diff --git a/core/src/main/java/org/apache/iceberg/variants/PhysicalType.java b/api/src/main/java/org/apache/iceberg/variants/PhysicalType.java similarity index 100% rename from core/src/main/java/org/apache/iceberg/variants/PhysicalType.java rename to api/src/main/java/org/apache/iceberg/variants/PhysicalType.java diff --git a/core/src/main/java/org/apache/iceberg/variants/Primitives.java b/api/src/main/java/org/apache/iceberg/variants/Primitives.java similarity index 100% rename from core/src/main/java/org/apache/iceberg/variants/Primitives.java rename to api/src/main/java/org/apache/iceberg/variants/Primitives.java diff --git a/core/src/main/java/org/apache/iceberg/variants/Variant.java b/api/src/main/java/org/apache/iceberg/variants/Variant.java similarity index 66% rename from core/src/main/java/org/apache/iceberg/variants/Variant.java rename to api/src/main/java/org/apache/iceberg/variants/Variant.java index b5606fa094b6..0d6ce227b484 100644 --- a/core/src/main/java/org/apache/iceberg/variants/Variant.java +++ b/api/src/main/java/org/apache/iceberg/variants/Variant.java @@ -25,4 +25,23 @@ public interface Variant { /** Returns the variant value. */ VariantValue value(); + + /** + * Query the value with a given path. + * + *

The path format is a subset of JSON path that supports: + *

+ * + *

If the query result is a list, the value is a variant array of results. + * + * @param path a JSON-path formatted string. + */ + VariantValue query(String path); + + static Variant of(VariantMetadata metadata, VariantValue value) { + return new VariantData(metadata, value); + } } diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantArray.java b/api/src/main/java/org/apache/iceberg/variants/VariantArray.java similarity index 100% rename from core/src/main/java/org/apache/iceberg/variants/VariantArray.java rename to api/src/main/java/org/apache/iceberg/variants/VariantArray.java diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantData.java b/api/src/main/java/org/apache/iceberg/variants/VariantData.java new file mode 100644 index 000000000000..4f696f1da977 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/variants/VariantData.java @@ -0,0 +1,70 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.variants; + +import java.util.List; +import org.apache.iceberg.relocated.com.google.common.base.Joiner; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; + +class VariantData implements Variant { + private static final Joiner DOT_JOINER = Joiner.on("."); + + private final VariantMetadata metadata; + private final VariantValue value; + + VariantData(VariantMetadata metadata, VariantValue value) { + Preconditions.checkArgument(metadata != null, "Invalid variant metadata: null"); + Preconditions.checkArgument(value != null, "Invalid variant value: null"); + this.metadata = metadata; + this.value = value; + } + + @Override + public VariantMetadata metadata() { + return metadata; + } + + @Override + public VariantValue value() { + return value; + } + + @Override + public VariantValue query(String path) { + VariantValue current = value; + List resolved = Lists.newArrayList(); + for (String field : VariantDataUtil.parsePath(path)) { + Preconditions.checkArgument( + current != null && current.type() == PhysicalType.OBJECT, + "Cannot find %s: %s=%s is not an object", + path, + DOT_JOINER.join(resolved), + current); + + current = current.asObject().get(field); + resolved.add(field); + } + + return current; + } +} diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantDataUtil.java b/api/src/main/java/org/apache/iceberg/variants/VariantDataUtil.java new file mode 100644 index 000000000000..f9c8589e9cf2 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/variants/VariantDataUtil.java @@ -0,0 +1,49 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.variants; + +import java.util.List; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.base.Splitter; + +public class VariantDataUtil { + private VariantDataUtil() {} + + private static final Splitter DOT = Splitter.on("."); + private static final String ROOT = "$"; + + public static List parsePath(String path) { + Preconditions.checkArgument(path != null, "Invalid path: null"); + Preconditions.checkArgument( + !path.contains("[") && !path.contains("]"), "Unsupported path, contains bracket: %s", path); + Preconditions.checkArgument( + !path.contains("*"), "Unsupported path, contains wildcard: %s", path); + Preconditions.checkArgument( + !path.contains(".."), "Unsupported path, contains recursive descent: %s", path); + + List parts = DOT.splitToList(path); + Preconditions.checkArgument( + ROOT.equals(parts.get(0)), "Invalid path, does not start with %s: %s", ROOT, path); + + return parts.subList(1, parts.size()); + } +} diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantMetadata.java b/api/src/main/java/org/apache/iceberg/variants/VariantMetadata.java similarity index 100% rename from core/src/main/java/org/apache/iceberg/variants/VariantMetadata.java rename to api/src/main/java/org/apache/iceberg/variants/VariantMetadata.java diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantObject.java b/api/src/main/java/org/apache/iceberg/variants/VariantObject.java similarity index 100% rename from core/src/main/java/org/apache/iceberg/variants/VariantObject.java rename to api/src/main/java/org/apache/iceberg/variants/VariantObject.java diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java b/api/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java similarity index 100% rename from core/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java rename to api/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantValue.java b/api/src/main/java/org/apache/iceberg/variants/VariantValue.java similarity index 100% rename from core/src/main/java/org/apache/iceberg/variants/VariantValue.java rename to api/src/main/java/org/apache/iceberg/variants/VariantValue.java From 253298b30474e33da27d43c4e94be67baa9fdfc4 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Mon, 17 Feb 2025 12:24:01 -0800 Subject: [PATCH 2/7] API: Add extract expression for variants. --- .../iceberg/expressions/BoundExtract.java | 80 +++++++ .../iceberg/expressions/BoundReference.java | 5 + .../apache/iceberg/expressions/BoundTerm.java | 5 + .../iceberg/expressions/BoundTransform.java | 7 + .../iceberg/expressions/Expressions.java | 4 + .../iceberg/expressions/UnboundExtract.java | 72 ++++++ .../iceberg/expressions/UnboundPredicate.java | 17 +- .../expressions/TestExpressionBinding.java | 210 +++++++++++++++++- 8 files changed, 397 insertions(+), 3 deletions(-) create mode 100644 api/src/main/java/org/apache/iceberg/expressions/BoundExtract.java create mode 100644 api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java diff --git a/api/src/main/java/org/apache/iceberg/expressions/BoundExtract.java b/api/src/main/java/org/apache/iceberg/expressions/BoundExtract.java new file mode 100644 index 000000000000..9c37838dd379 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/expressions/BoundExtract.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.expressions; + +import java.util.List; +import org.apache.iceberg.StructLike; +import org.apache.iceberg.relocated.com.google.common.base.Joiner; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.variants.Variant; +import org.apache.iceberg.variants.VariantDataUtil; + +public class BoundExtract implements BoundTerm { + private final BoundReference ref; + private final String path; + private final List fields; + private final String fullFieldName; + private final Type type; + + BoundExtract(BoundReference ref, String path, Type type) { + this.ref = ref; + this.path = path; + this.fields = VariantDataUtil.parsePath(path); + this.fullFieldName = Joiner.on(".").join(fields); + this.type = type; + } + + @Override + public BoundReference ref() { + return ref; + } + + public String path() { + return path; + } + + public String fullFieldName() { + return fullFieldName; + } + + @Override + public Type type() { + return type; + } + + @Override + public boolean isEquivalentTo(BoundTerm other) { + if (other instanceof BoundExtract) { + BoundExtract that = (BoundExtract) other; + return ref.isEquivalentTo(that.ref) && fields.equals(that.fields) && type.equals(that.type); + } + + return false; + } + + @Override + public T eval(StructLike struct) { + throw new UnsupportedOperationException("Cannot evaluate " + this); + } + + @Override + public String toString() { + return "extract(" + ref + ", path=" + path + ", type=" + type + ")"; + } +} diff --git a/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java b/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java index 0ff73632b1d6..0295fe518fc3 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java +++ b/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java @@ -55,6 +55,11 @@ public Type type() { return field.type(); } + @Override + public boolean producesNull() { + return field.isOptional(); + } + @Override public String name() { return name; diff --git a/api/src/main/java/org/apache/iceberg/expressions/BoundTerm.java b/api/src/main/java/org/apache/iceberg/expressions/BoundTerm.java index 5ded3c903d19..5eb6769c4605 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/BoundTerm.java +++ b/api/src/main/java/org/apache/iceberg/expressions/BoundTerm.java @@ -31,6 +31,11 @@ public interface BoundTerm extends Bound, Term { /** Returns the type produced by this expression. */ Type type(); + /** Returns whether values produced by this expression may be null. */ + default boolean producesNull() { + return true; + } + /** Returns a {@link Comparator} for values produced by this term. */ default Comparator comparator() { return Comparators.forType(type().asPrimitiveType()); diff --git a/api/src/main/java/org/apache/iceberg/expressions/BoundTransform.java b/api/src/main/java/org/apache/iceberg/expressions/BoundTransform.java index 22271aaed9d5..9aa22d3b985f 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/BoundTransform.java +++ b/api/src/main/java/org/apache/iceberg/expressions/BoundTransform.java @@ -54,6 +54,13 @@ public Transform transform() { return transform; } + @Override + public boolean producesNull() { + // transforms must produce null for null input values + // transforms may produce null for non-null inputs when not order-preserving + return ref.producesNull() || !transform.preservesOrder(); + } + @Override public Type type() { return transform.getResultType(ref.type()); diff --git a/api/src/main/java/org/apache/iceberg/expressions/Expressions.java b/api/src/main/java/org/apache/iceberg/expressions/Expressions.java index 7020b259b1b5..22626866725b 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Expressions.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Expressions.java @@ -102,6 +102,10 @@ public static UnboundTerm truncate(String name, int width) { return new UnboundTransform<>(ref(name), Transforms.truncate(width)); } + public static UnboundTerm extract(String name, String path, String type) { + return new UnboundExtract<>(ref(name), path, type); + } + public static UnboundPredicate isNull(String name) { return new UnboundPredicate<>(Expression.Operation.IS_NULL, ref(name)); } diff --git a/api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java b/api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java new file mode 100644 index 000000000000..6c81a8ec2c2e --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java @@ -0,0 +1,72 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.expressions; + +import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.variants.Variant; +import org.apache.iceberg.variants.VariantDataUtil; + +public class UnboundExtract implements UnboundTerm { + private final NamedReference ref; + private final String path; + private final Type.PrimitiveType type; + + public UnboundExtract(NamedReference ref, String path, String type) { + this.ref = ref; + this.path = path; + this.type = Types.fromPrimitiveString(type); + // verify that the path is well-formed + VariantDataUtil.parsePath(path); + } + + @Override + public BoundTerm bind(Types.StructType struct, boolean caseSensitive) { + BoundReference boundRef = ref.bind(struct, caseSensitive); + ValidationException.check( + Types.VariantType.get().equals(boundRef.type()), + "Cannot bind extract, not a variant: %s", + boundRef.name()); + ValidationException.check( + !type.equals(Types.UnknownType.get()), "Invalid type to extract: unknown"); + return new BoundExtract<>(boundRef, path, type); + } + + @Override + public NamedReference ref() { + return ref; + } + + public String path() { + return path; + } + + public Type type() { + return type; + } + + @Override + public String toString() { + return "extract(" + ref + ", path=" + path + ", type=" + type + ")"; + } +} diff --git a/api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java b/api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java index 04513086e042..4736ca4a8668 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java +++ b/api/src/main/java/org/apache/iceberg/expressions/UnboundPredicate.java @@ -27,6 +27,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.StructType; import org.apache.iceberg.util.CharSequenceSet; @@ -124,13 +125,17 @@ public Expression bind(StructType struct, boolean caseSensitive) { private Expression bindUnaryOperation(BoundTerm boundTerm) { switch (op()) { case IS_NULL: - if (boundTerm.ref().field().isRequired()) { + if (!boundTerm.producesNull()) { return Expressions.alwaysFalse(); + } else if (boundTerm.type().equals(Types.UnknownType.get())) { + return Expressions.alwaysTrue(); } return new BoundUnaryPredicate<>(Operation.IS_NULL, boundTerm); case NOT_NULL: - if (boundTerm.ref().field().isRequired()) { + if (!boundTerm.producesNull()) { return Expressions.alwaysTrue(); + } else if (boundTerm.type().equals(Types.UnknownType.get())) { + return Expressions.alwaysFalse(); } return new BoundUnaryPredicate<>(Operation.NOT_NULL, boundTerm); case IS_NAN: @@ -155,6 +160,14 @@ private boolean floatingType(Type.TypeID typeID) { } private Expression bindLiteralOperation(BoundTerm boundTerm) { + if (op() == Operation.STARTS_WITH || op() == Operation.NOT_STARTS_WITH) { + ValidationException.check( + boundTerm.type().equals(Types.StringType.get()), + "Term for STARTS_WITH or NOT_STARTS_WITH must produce a string: %s: %s", + boundTerm, + boundTerm.type()); + } + Literal lit = literal().to(boundTerm.type()); if (lit == null) { diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java index 8dccc4e6a5d6..5f2ab2e37cbf 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java @@ -23,12 +23,17 @@ import static org.apache.iceberg.expressions.Expressions.and; import static org.apache.iceberg.expressions.Expressions.bucket; import static org.apache.iceberg.expressions.Expressions.equal; +import static org.apache.iceberg.expressions.Expressions.extract; import static org.apache.iceberg.expressions.Expressions.greaterThan; +import static org.apache.iceberg.expressions.Expressions.isNull; import static org.apache.iceberg.expressions.Expressions.lessThan; import static org.apache.iceberg.expressions.Expressions.not; +import static org.apache.iceberg.expressions.Expressions.notNull; import static org.apache.iceberg.expressions.Expressions.notStartsWith; import static org.apache.iceberg.expressions.Expressions.or; import static org.apache.iceberg.expressions.Expressions.startsWith; +import static org.apache.iceberg.expressions.Expressions.truncate; +import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -38,6 +43,8 @@ import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.StructType; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.FieldSource; public class TestExpressionBinding { private static final StructType STRUCT = @@ -45,7 +52,10 @@ public class TestExpressionBinding { required(0, "x", Types.IntegerType.get()), required(1, "y", Types.IntegerType.get()), required(2, "z", Types.IntegerType.get()), - required(3, "data", Types.StringType.get())); + required(3, "data", Types.StringType.get()), + required(4, "var", Types.VariantType.get()), + optional(5, "nullable", Types.IntegerType.get()), + optional(6, "always_null", Types.UnknownType.get())); @Test public void testMissingReference() { @@ -212,4 +222,202 @@ public void testTransformExpressionBinding() { .as("Should use a bucket[16] transform") .hasToString("bucket[16]"); } + + @Test + public void testIsNullWithUnknown() { + Expression bound = Binder.bind(STRUCT, isNull("always_null")); + TestHelpers.assertAllReferencesBound("IsNull", bound); + assertThat(bound).isEqualTo(Expressions.alwaysTrue()); + } + + @Test + public void testNotNullWithUnknown() { + Expression bound = Binder.bind(STRUCT, notNull("always_null")); + TestHelpers.assertAllReferencesBound("NotNull", bound); + assertThat(bound).isEqualTo(Expressions.alwaysFalse()); + } + + @Test + public void testIsNullWithNullable() { + Expression bound = Binder.bind(STRUCT, isNull("nullable")); + TestHelpers.assertAllReferencesBound("IsNull", bound); + BoundPredicate pred = TestHelpers.assertAndUnwrap(bound); + assertThat(pred.op()).isEqualTo(Expression.Operation.IS_NULL); + } + + @Test + public void testIsNullWithRequired() { + Expression bound = Binder.bind(STRUCT, isNull("x")); + TestHelpers.assertAllReferencesBound("IsNull", bound); + assertThat(bound).isEqualTo(Expressions.alwaysFalse()); + } + + @Test + public void testNotNullWithNullable() { + Expression bound = Binder.bind(STRUCT, notNull("nullable")); + TestHelpers.assertAllReferencesBound("NotNull", bound); + BoundPredicate pred = TestHelpers.assertAndUnwrap(bound); + assertThat(pred.op()).isEqualTo(Expression.Operation.NOT_NULL); + } + + @Test + public void testNotNullWithRequired() { + Expression bound = Binder.bind(STRUCT, notNull("x")); + TestHelpers.assertAllReferencesBound("NotNull", bound); + assertThat(bound).isEqualTo(Expressions.alwaysTrue()); + } + + @Test + public void testIsNullWithNullableTransformedOrderPreserving() { + Expression bound = Binder.bind(STRUCT, isNull(truncate("nullable", 10))); + TestHelpers.assertAllReferencesBound("IsNull", bound); + BoundPredicate pred = TestHelpers.assertAndUnwrap(bound); + assertThat(pred.op()).isEqualTo(Expression.Operation.IS_NULL); + } + + @Test + public void testIsNullWithRequiredTransformedOrderPreserving() { + Expression bound = Binder.bind(STRUCT, isNull(truncate("x", 10))); + TestHelpers.assertAllReferencesBound("IsNull", bound); + assertThat(bound).isEqualTo(Expressions.alwaysFalse()); + } + + @Test + public void testNotNullWithNullableTransformedOrderPreserving() { + Expression bound = Binder.bind(STRUCT, notNull(truncate("nullable", 10))); + TestHelpers.assertAllReferencesBound("NotNull", bound); + BoundPredicate pred = TestHelpers.assertAndUnwrap(bound); + assertThat(pred.op()).isEqualTo(Expression.Operation.NOT_NULL); + } + + @Test + public void testNotNullWithRequiredTransformedOrderPreserving() { + Expression bound = Binder.bind(STRUCT, notNull(truncate("x", 10))); + TestHelpers.assertAllReferencesBound("NotNull", bound); + assertThat(bound).isEqualTo(Expressions.alwaysTrue()); + } + + @Test + public void testIsNullWithRequiredTransformedNonOrderPreserving() { + Expression bound = Binder.bind(STRUCT, isNull(bucket("x", 10))); + TestHelpers.assertAllReferencesBound("IsNull", bound); + BoundPredicate pred = TestHelpers.assertAndUnwrap(bound); + assertThat(pred.op()).isEqualTo(Expression.Operation.IS_NULL); + } + + @Test + public void testNotNullWithRequiredTransformedNonOrderPreserving() { + Expression bound = Binder.bind(STRUCT, notNull(bucket("x", 10))); + TestHelpers.assertAllReferencesBound("NotNull", bound); + BoundPredicate pred = TestHelpers.assertAndUnwrap(bound); + assertThat(pred.op()).isEqualTo(Expression.Operation.NOT_NULL); + } + + @Test + public void testIsNullWithRequiredVariant() { + Expression bound = Binder.bind(STRUCT, isNull(extract("var", "$.event_id", "long"))); + TestHelpers.assertAllReferencesBound("IsNull", bound); + BoundPredicate pred = TestHelpers.assertAndUnwrap(bound); + assertThat(pred.op()).isEqualTo(Expression.Operation.IS_NULL); + } + + @Test + public void testNotNullWithRequiredVariant() { + Expression bound = Binder.bind(STRUCT, notNull(extract("var", "$.event_id", "long"))); + TestHelpers.assertAllReferencesBound("NotNull", bound); + BoundPredicate pred = TestHelpers.assertAndUnwrap(bound); + assertThat(pred.op()).isEqualTo(Expression.Operation.NOT_NULL); + } + + @Test + public void testExtractExpressionBinding() { + Expression bound = Binder.bind(STRUCT, lessThan(extract("var", "$.event_id", "long"), 100)); + TestHelpers.assertAllReferencesBound("BoundExtract", bound); + BoundPredicate pred = TestHelpers.assertAndUnwrap(bound); + assertThat(pred.op()).isEqualTo(Expression.Operation.LT); + assertThat(pred.asLiteralPredicate().literal().value()).isEqualTo(100L); // cast to long + assertThat(pred.term()).as("Should use a BoundExtract").isInstanceOf(BoundExtract.class); + BoundExtract boundExtract = (BoundExtract) pred.term(); + assertThat(boundExtract.ref().fieldId()).isEqualTo(4); + assertThat(boundExtract.fullFieldName()).isEqualTo("event_id"); + assertThat(boundExtract.type()).isEqualTo(Types.LongType.get()); + } + + @Test + public void testExtractExpressionNonVariant() { + assertThatThrownBy(() -> Binder.bind(STRUCT, lessThan(extract("x", "$.event_id", "long"), 100))) + .isInstanceOf(ValidationException.class) + .hasMessage("Cannot bind extract, not a variant: x"); + } + + private static final String[] VALID_PATHS = + new String[] { + "$", // root path + "$.event_id", + "$.event.id" + }; + + @ParameterizedTest + @FieldSource("VALID_PATHS") + public void testExtractExpressionBindingPaths(String path) { + Expression bound = Binder.bind(STRUCT, lessThan(extract("var", path, "long"), 100)); + TestHelpers.assertAllReferencesBound("BoundExtract", bound); + BoundPredicate pred = TestHelpers.assertAndUnwrap(bound); + assertThat(pred.term()).as("Should use a BoundExtract").isInstanceOf(BoundExtract.class); + } + + private static final String[] INVALID_PATHS = + new String[] { + null, + "", + "event_id", // missing root + "$['event_id']", // uses bracket notation + "$..event_id", // uses recursive descent + "$.events[0].event_id", // uses position accessor + "$.events.*" // uses wildcard + }; + + @ParameterizedTest + @FieldSource("INVALID_PATHS") + public void testExtractBindingWithInvalidPath(String path) { + assertThatThrownBy(() -> Binder.bind(STRUCT, lessThan(extract("var", path, "long"), 100))) + .isInstanceOf(IllegalArgumentException.class); + } + + @Test + public void testExtractUnknown() { + assertThatThrownBy(() -> Binder.bind(STRUCT, isNull(extract("var", "$.field", "unknown")))) + .isInstanceOf(ValidationException.class) + .hasMessage("Invalid type to extract: unknown"); + } + + private static final String[] VALID_TYPES = + new String[] { + "boolean", + "int", + "long", + "float", + "double", + "decimal(9,2)", + "date", + "time", + "timestamp", + "timestamptz", + "timestamp_ns", + "timestamptz_ns", + "string", + "uuid", + "fixed[4]", + "binary", + }; + + @ParameterizedTest + @FieldSource("VALID_TYPES") + public void testExtractBindingWithTypes(String typeName) { + Expression bound = Binder.bind(STRUCT, notNull(extract("var", "$.field", typeName))); + TestHelpers.assertAllReferencesBound("BoundExtract", bound); + BoundPredicate pred = TestHelpers.assertAndUnwrap(bound); + assertThat(pred.term()).as("Should use a BoundExtract").isInstanceOf(BoundExtract.class); + assertThat(pred.term().type()).isEqualTo(Types.fromPrimitiveString(typeName)); + } } From 6a0fdc0607fd9416fc001b907c15305d0a6e6b80 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Mon, 17 Feb 2025 13:38:17 -0800 Subject: [PATCH 3/7] Apply spotless and check for a bound variant. --- .../iceberg/expressions/UnboundExtract.java | 31 +++++++++---------- .../org/apache/iceberg/variants/Variant.java | 5 +-- .../apache/iceberg/variants/VariantData.java | 31 +++++++++---------- .../iceberg/variants/VariantDataUtil.java | 31 +++++++++---------- 4 files changed, 45 insertions(+), 53 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java b/api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java index 6c81a8ec2c2e..00805674ea0a 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java +++ b/api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.expressions; import org.apache.iceberg.exceptions.ValidationException; diff --git a/api/src/main/java/org/apache/iceberg/variants/Variant.java b/api/src/main/java/org/apache/iceberg/variants/Variant.java index 0d6ce227b484..c34b1909c133 100644 --- a/api/src/main/java/org/apache/iceberg/variants/Variant.java +++ b/api/src/main/java/org/apache/iceberg/variants/Variant.java @@ -30,9 +30,10 @@ public interface Variant { * Query the value with a given path. * *

The path format is a subset of JSON path that supports: + * *

* *

If the query result is a list, the value is a variant array of results. diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantData.java b/api/src/main/java/org/apache/iceberg/variants/VariantData.java index 4f696f1da977..c2126b3260e8 100644 --- a/api/src/main/java/org/apache/iceberg/variants/VariantData.java +++ b/api/src/main/java/org/apache/iceberg/variants/VariantData.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.util.List; diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantDataUtil.java b/api/src/main/java/org/apache/iceberg/variants/VariantDataUtil.java index f9c8589e9cf2..4a81269c241b 100644 --- a/api/src/main/java/org/apache/iceberg/variants/VariantDataUtil.java +++ b/api/src/main/java/org/apache/iceberg/variants/VariantDataUtil.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.util.List; From ca7473a43e1757ed800ad3213c7b9cb3984f7522 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Thu, 20 Feb 2025 11:34:20 -0800 Subject: [PATCH 4/7] Update JSON path parsing to conform to RFC 9535. --- .../iceberg/variants/VariantDataUtil.java | 20 ++++- .../expressions/TestExpressionBinding.java | 4 +- .../iceberg/variants/TestPathParsing.java | 75 +++++++++++++++++++ 3 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 api/src/test/java/org/apache/iceberg/variants/TestPathParsing.java diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantDataUtil.java b/api/src/main/java/org/apache/iceberg/variants/VariantDataUtil.java index 4a81269c241b..bb605bcd63dc 100644 --- a/api/src/main/java/org/apache/iceberg/variants/VariantDataUtil.java +++ b/api/src/main/java/org/apache/iceberg/variants/VariantDataUtil.java @@ -19,12 +19,21 @@ package org.apache.iceberg.variants; import java.util.List; +import java.util.function.Predicate; +import java.util.regex.Pattern; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.base.Splitter; public class VariantDataUtil { private VariantDataUtil() {} + private static final String RFC9535_NAME_FIRST = + "[A-Za-z_\\x{0080}-\\x{D7FF}\\x{E000}-\\x{10FFFF}]"; + private static final String RFC9535_NAME_CHARS = + "[0-9A-Za-z_\\x{0080}-\\x{D7FF}\\x{E000}-\\x{10FFFF}]*"; + private static final Predicate RFC9535_MEMBER_NAME_SHORTHAND = + Pattern.compile(RFC9535_NAME_FIRST + RFC9535_NAME_CHARS).asMatchPredicate(); + private static final Splitter DOT = Splitter.on("."); private static final String ROOT = "$"; @@ -41,6 +50,15 @@ public static List parsePath(String path) { Preconditions.checkArgument( ROOT.equals(parts.get(0)), "Invalid path, does not start with %s: %s", ROOT, path); - return parts.subList(1, parts.size()); + List names = parts.subList(1, parts.size()); + for (String name : names) { + Preconditions.checkArgument( + RFC9535_MEMBER_NAME_SHORTHAND.test(name), + "Invalid path: %s (%s has invalid characters)", + path, + name); + } + + return names; } } diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java index 5f2ab2e37cbf..40919cb4cbb0 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java @@ -366,7 +366,7 @@ public void testExtractExpressionBindingPaths(String path) { assertThat(pred.term()).as("Should use a BoundExtract").isInstanceOf(BoundExtract.class); } - private static final String[] INVALID_PATHS = + private static final String[] UNSUPPORTED_PATHS = new String[] { null, "", @@ -378,7 +378,7 @@ public void testExtractExpressionBindingPaths(String path) { }; @ParameterizedTest - @FieldSource("INVALID_PATHS") + @FieldSource("UNSUPPORTED_PATHS") public void testExtractBindingWithInvalidPath(String path) { assertThatThrownBy(() -> Binder.bind(STRUCT, lessThan(extract("var", path, "long"), 100))) .isInstanceOf(IllegalArgumentException.class); diff --git a/api/src/test/java/org/apache/iceberg/variants/TestPathParsing.java b/api/src/test/java/org/apache/iceberg/variants/TestPathParsing.java new file mode 100644 index 000000000000..a14deed623bb --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/variants/TestPathParsing.java @@ -0,0 +1,75 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.variants; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.util.List; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.FieldSource; + +public class TestPathParsing { + + @Test + public void testSimplePath() { + assertThat(VariantDataUtil.parsePath("$.event.id")).isEqualTo(List.of("event", "id")); + } + + private static final String[] VALID_PATHS = + new String[] { + "$", // root path + "$.event_id", + "$.event.id", + "$.\u2603", // snowman + "$.\uD834\uDD1E", // surrogate pair, U+1D11E + }; + + @ParameterizedTest + @FieldSource("VALID_PATHS") + public void testExtractExpressionBindingPaths(String path) { + assertThatCode(() -> VariantDataUtil.parsePath(path)).doesNotThrowAnyException(); + } + + private static final String[] INVALID_PATHS = + new String[] { + null, + "", + "event_id", // missing root + "$['event_id']", // uses bracket notation + "$..event_id", // uses recursive descent + "$.events[0].event_id", // uses position accessor + "$.events.*", // uses wildcard + "$.0invalid", // starts with a digit + "$._\uD834", // dangling high surrogate + "$._\uDC34", // low surrogate without high surrogate + }; + + @ParameterizedTest + @FieldSource("INVALID_PATHS") + public void testExtractBindingWithInvalidPath(String path) { + assertThatThrownBy(() -> VariantDataUtil.parsePath(path)) + .isInstanceOf(IllegalArgumentException.class); + } +} From ffb1c00c854051456d6bbdeb2a915f33575dcc12 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Thu, 20 Feb 2025 15:59:47 -0800 Subject: [PATCH 5/7] Make PathUtil privage and remove VariantData (no longer used). --- .../iceberg/expressions/BoundExtract.java | 10 +-- .../PathUtil.java} | 38 ++++++----- .../iceberg/expressions/UnboundExtract.java | 3 +- .../org/apache/iceberg/variants/Variant.java | 20 ------ .../apache/iceberg/variants/VariantData.java | 67 ------------------- .../TestPathParsing.java | 9 ++- 6 files changed, 28 insertions(+), 119 deletions(-) rename api/src/main/java/org/apache/iceberg/{variants/VariantDataUtil.java => expressions/PathUtil.java} (62%) delete mode 100644 api/src/main/java/org/apache/iceberg/variants/VariantData.java rename api/src/test/java/org/apache/iceberg/{variants => expressions}/TestPathParsing.java (87%) diff --git a/api/src/main/java/org/apache/iceberg/expressions/BoundExtract.java b/api/src/main/java/org/apache/iceberg/expressions/BoundExtract.java index 9c37838dd379..ae99c08735ab 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/BoundExtract.java +++ b/api/src/main/java/org/apache/iceberg/expressions/BoundExtract.java @@ -18,25 +18,21 @@ */ package org.apache.iceberg.expressions; -import java.util.List; import org.apache.iceberg.StructLike; import org.apache.iceberg.relocated.com.google.common.base.Joiner; import org.apache.iceberg.types.Type; import org.apache.iceberg.variants.Variant; -import org.apache.iceberg.variants.VariantDataUtil; public class BoundExtract implements BoundTerm { private final BoundReference ref; private final String path; - private final List fields; private final String fullFieldName; private final Type type; BoundExtract(BoundReference ref, String path, Type type) { this.ref = ref; this.path = path; - this.fields = VariantDataUtil.parsePath(path); - this.fullFieldName = Joiner.on(".").join(fields); + this.fullFieldName = Joiner.on(".").join(PathUtil.parse(path)); this.type = type; } @@ -49,7 +45,7 @@ public String path() { return path; } - public String fullFieldName() { + String fullFieldName() { return fullFieldName; } @@ -62,7 +58,7 @@ public Type type() { public boolean isEquivalentTo(BoundTerm other) { if (other instanceof BoundExtract) { BoundExtract that = (BoundExtract) other; - return ref.isEquivalentTo(that.ref) && fields.equals(that.fields) && type.equals(that.type); + return ref.isEquivalentTo(that.ref) && path.equals(that.path) && type.equals(that.type); } return false; diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantDataUtil.java b/api/src/main/java/org/apache/iceberg/expressions/PathUtil.java similarity index 62% rename from api/src/main/java/org/apache/iceberg/variants/VariantDataUtil.java rename to api/src/main/java/org/apache/iceberg/expressions/PathUtil.java index bb605bcd63dc..4d8875f9880a 100644 --- a/api/src/main/java/org/apache/iceberg/variants/VariantDataUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/PathUtil.java @@ -1,22 +1,24 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. */ -package org.apache.iceberg.variants; +package org.apache.iceberg.expressions; import java.util.List; import java.util.function.Predicate; @@ -24,8 +26,8 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.base.Splitter; -public class VariantDataUtil { - private VariantDataUtil() {} +class PathUtil { + private PathUtil() {} private static final String RFC9535_NAME_FIRST = "[A-Za-z_\\x{0080}-\\x{D7FF}\\x{E000}-\\x{10FFFF}]"; @@ -37,7 +39,7 @@ private VariantDataUtil() {} private static final Splitter DOT = Splitter.on("."); private static final String ROOT = "$"; - public static List parsePath(String path) { + static List parse(String path) { Preconditions.checkArgument(path != null, "Invalid path: null"); Preconditions.checkArgument( !path.contains("[") && !path.contains("]"), "Unsupported path, contains bracket: %s", path); diff --git a/api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java b/api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java index 00805674ea0a..0cbd28be043e 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java +++ b/api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java @@ -22,7 +22,6 @@ import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.iceberg.variants.Variant; -import org.apache.iceberg.variants.VariantDataUtil; public class UnboundExtract implements UnboundTerm { private final NamedReference ref; @@ -34,7 +33,7 @@ public UnboundExtract(NamedReference ref, String path, String type) { this.path = path; this.type = Types.fromPrimitiveString(type); // verify that the path is well-formed - VariantDataUtil.parsePath(path); + PathUtil.parse(path); } @Override diff --git a/api/src/main/java/org/apache/iceberg/variants/Variant.java b/api/src/main/java/org/apache/iceberg/variants/Variant.java index c34b1909c133..b5606fa094b6 100644 --- a/api/src/main/java/org/apache/iceberg/variants/Variant.java +++ b/api/src/main/java/org/apache/iceberg/variants/Variant.java @@ -25,24 +25,4 @@ public interface Variant { /** Returns the variant value. */ VariantValue value(); - - /** - * Query the value with a given path. - * - *

The path format is a subset of JSON path that supports: - * - *

- * - *

If the query result is a list, the value is a variant array of results. - * - * @param path a JSON-path formatted string. - */ - VariantValue query(String path); - - static Variant of(VariantMetadata metadata, VariantValue value) { - return new VariantData(metadata, value); - } } diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantData.java b/api/src/main/java/org/apache/iceberg/variants/VariantData.java deleted file mode 100644 index c2126b3260e8..000000000000 --- a/api/src/main/java/org/apache/iceberg/variants/VariantData.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.variants; - -import java.util.List; -import org.apache.iceberg.relocated.com.google.common.base.Joiner; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; - -class VariantData implements Variant { - private static final Joiner DOT_JOINER = Joiner.on("."); - - private final VariantMetadata metadata; - private final VariantValue value; - - VariantData(VariantMetadata metadata, VariantValue value) { - Preconditions.checkArgument(metadata != null, "Invalid variant metadata: null"); - Preconditions.checkArgument(value != null, "Invalid variant value: null"); - this.metadata = metadata; - this.value = value; - } - - @Override - public VariantMetadata metadata() { - return metadata; - } - - @Override - public VariantValue value() { - return value; - } - - @Override - public VariantValue query(String path) { - VariantValue current = value; - List resolved = Lists.newArrayList(); - for (String field : VariantDataUtil.parsePath(path)) { - Preconditions.checkArgument( - current != null && current.type() == PhysicalType.OBJECT, - "Cannot find %s: %s=%s is not an object", - path, - DOT_JOINER.join(resolved), - current); - - current = current.asObject().get(field); - resolved.add(field); - } - - return current; - } -} diff --git a/api/src/test/java/org/apache/iceberg/variants/TestPathParsing.java b/api/src/test/java/org/apache/iceberg/expressions/TestPathParsing.java similarity index 87% rename from api/src/test/java/org/apache/iceberg/variants/TestPathParsing.java rename to api/src/test/java/org/apache/iceberg/expressions/TestPathParsing.java index a14deed623bb..5ef590a5c32f 100644 --- a/api/src/test/java/org/apache/iceberg/variants/TestPathParsing.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestPathParsing.java @@ -19,7 +19,7 @@ * */ -package org.apache.iceberg.variants; +package org.apache.iceberg.expressions; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatCode; @@ -34,7 +34,7 @@ public class TestPathParsing { @Test public void testSimplePath() { - assertThat(VariantDataUtil.parsePath("$.event.id")).isEqualTo(List.of("event", "id")); + assertThat(PathUtil.parse("$.event.id")).isEqualTo(List.of("event", "id")); } private static final String[] VALID_PATHS = @@ -49,7 +49,7 @@ public void testSimplePath() { @ParameterizedTest @FieldSource("VALID_PATHS") public void testExtractExpressionBindingPaths(String path) { - assertThatCode(() -> VariantDataUtil.parsePath(path)).doesNotThrowAnyException(); + assertThatCode(() -> PathUtil.parse(path)).doesNotThrowAnyException(); } private static final String[] INVALID_PATHS = @@ -69,7 +69,6 @@ public void testExtractExpressionBindingPaths(String path) { @ParameterizedTest @FieldSource("INVALID_PATHS") public void testExtractBindingWithInvalidPath(String path) { - assertThatThrownBy(() -> VariantDataUtil.parsePath(path)) - .isInstanceOf(IllegalArgumentException.class); + assertThatThrownBy(() -> PathUtil.parse(path)).isInstanceOf(IllegalArgumentException.class); } } From a8a90aa27cc1db8c329a0288f5953858d9ff084b Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Thu, 20 Feb 2025 16:04:35 -0800 Subject: [PATCH 6/7] Revert "API: Move Variant interfaces to API." This reverts commit 671b1be536128566005549c48e16d33b1c63a69a. --- .../java/org/apache/iceberg/expressions/BoundExtract.java | 5 ++--- .../org/apache/iceberg/expressions/UnboundExtract.java | 7 +++---- api/src/main/java/org/apache/iceberg/types/Type.java | 3 +-- .../main/java/org/apache/iceberg/variants/LogicalType.java | 0 .../java/org/apache/iceberg/variants/PhysicalType.java | 0 .../main/java/org/apache/iceberg/variants/Primitives.java | 0 .../src/main/java/org/apache/iceberg/variants/Variant.java | 0 .../java/org/apache/iceberg/variants/VariantArray.java | 0 .../java/org/apache/iceberg/variants/VariantMetadata.java | 0 .../java/org/apache/iceberg/variants/VariantObject.java | 0 .../java/org/apache/iceberg/variants/VariantPrimitive.java | 0 .../java/org/apache/iceberg/variants/VariantValue.java | 0 12 files changed, 6 insertions(+), 9 deletions(-) rename {api => core}/src/main/java/org/apache/iceberg/variants/LogicalType.java (100%) rename {api => core}/src/main/java/org/apache/iceberg/variants/PhysicalType.java (100%) rename {api => core}/src/main/java/org/apache/iceberg/variants/Primitives.java (100%) rename {api => core}/src/main/java/org/apache/iceberg/variants/Variant.java (100%) rename {api => core}/src/main/java/org/apache/iceberg/variants/VariantArray.java (100%) rename {api => core}/src/main/java/org/apache/iceberg/variants/VariantMetadata.java (100%) rename {api => core}/src/main/java/org/apache/iceberg/variants/VariantObject.java (100%) rename {api => core}/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java (100%) rename {api => core}/src/main/java/org/apache/iceberg/variants/VariantValue.java (100%) diff --git a/api/src/main/java/org/apache/iceberg/expressions/BoundExtract.java b/api/src/main/java/org/apache/iceberg/expressions/BoundExtract.java index ae99c08735ab..3767e5665079 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/BoundExtract.java +++ b/api/src/main/java/org/apache/iceberg/expressions/BoundExtract.java @@ -21,15 +21,14 @@ import org.apache.iceberg.StructLike; import org.apache.iceberg.relocated.com.google.common.base.Joiner; import org.apache.iceberg.types.Type; -import org.apache.iceberg.variants.Variant; public class BoundExtract implements BoundTerm { - private final BoundReference ref; + private final BoundReference ref; private final String path; private final String fullFieldName; private final Type type; - BoundExtract(BoundReference ref, String path, Type type) { + BoundExtract(BoundReference ref, String path, Type type) { this.ref = ref; this.path = path; this.fullFieldName = Joiner.on(".").join(PathUtil.parse(path)); diff --git a/api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java b/api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java index 0cbd28be043e..1f29650c7411 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java +++ b/api/src/main/java/org/apache/iceberg/expressions/UnboundExtract.java @@ -21,14 +21,13 @@ import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; -import org.apache.iceberg.variants.Variant; public class UnboundExtract implements UnboundTerm { - private final NamedReference ref; + private final NamedReference ref; private final String path; private final Type.PrimitiveType type; - public UnboundExtract(NamedReference ref, String path, String type) { + public UnboundExtract(NamedReference ref, String path, String type) { this.ref = ref; this.path = path; this.type = Types.fromPrimitiveString(type); @@ -38,7 +37,7 @@ public UnboundExtract(NamedReference ref, String path, String type) { @Override public BoundTerm bind(Types.StructType struct, boolean caseSensitive) { - BoundReference boundRef = ref.bind(struct, caseSensitive); + BoundReference boundRef = ref.bind(struct, caseSensitive); ValidationException.check( Types.VariantType.get().equals(boundRef.type()), "Cannot bind extract, not a variant: %s", diff --git a/api/src/main/java/org/apache/iceberg/types/Type.java b/api/src/main/java/org/apache/iceberg/types/Type.java index 184a17416eae..67e40df9e939 100644 --- a/api/src/main/java/org/apache/iceberg/types/Type.java +++ b/api/src/main/java/org/apache/iceberg/types/Type.java @@ -26,7 +26,6 @@ import java.util.Map; import java.util.Objects; import org.apache.iceberg.StructLike; -import org.apache.iceberg.variants.Variant; public interface Type extends Serializable { enum TypeID { @@ -47,7 +46,7 @@ enum TypeID { STRUCT(StructLike.class), LIST(List.class), MAP(Map.class), - VARIANT(Variant.class), + VARIANT(Object.class), UNKNOWN(Object.class); private final Class javaClass; diff --git a/api/src/main/java/org/apache/iceberg/variants/LogicalType.java b/core/src/main/java/org/apache/iceberg/variants/LogicalType.java similarity index 100% rename from api/src/main/java/org/apache/iceberg/variants/LogicalType.java rename to core/src/main/java/org/apache/iceberg/variants/LogicalType.java diff --git a/api/src/main/java/org/apache/iceberg/variants/PhysicalType.java b/core/src/main/java/org/apache/iceberg/variants/PhysicalType.java similarity index 100% rename from api/src/main/java/org/apache/iceberg/variants/PhysicalType.java rename to core/src/main/java/org/apache/iceberg/variants/PhysicalType.java diff --git a/api/src/main/java/org/apache/iceberg/variants/Primitives.java b/core/src/main/java/org/apache/iceberg/variants/Primitives.java similarity index 100% rename from api/src/main/java/org/apache/iceberg/variants/Primitives.java rename to core/src/main/java/org/apache/iceberg/variants/Primitives.java diff --git a/api/src/main/java/org/apache/iceberg/variants/Variant.java b/core/src/main/java/org/apache/iceberg/variants/Variant.java similarity index 100% rename from api/src/main/java/org/apache/iceberg/variants/Variant.java rename to core/src/main/java/org/apache/iceberg/variants/Variant.java diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantArray.java b/core/src/main/java/org/apache/iceberg/variants/VariantArray.java similarity index 100% rename from api/src/main/java/org/apache/iceberg/variants/VariantArray.java rename to core/src/main/java/org/apache/iceberg/variants/VariantArray.java diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantMetadata.java b/core/src/main/java/org/apache/iceberg/variants/VariantMetadata.java similarity index 100% rename from api/src/main/java/org/apache/iceberg/variants/VariantMetadata.java rename to core/src/main/java/org/apache/iceberg/variants/VariantMetadata.java diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantObject.java b/core/src/main/java/org/apache/iceberg/variants/VariantObject.java similarity index 100% rename from api/src/main/java/org/apache/iceberg/variants/VariantObject.java rename to core/src/main/java/org/apache/iceberg/variants/VariantObject.java diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java b/core/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java similarity index 100% rename from api/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java rename to core/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantValue.java b/core/src/main/java/org/apache/iceberg/variants/VariantValue.java similarity index 100% rename from api/src/main/java/org/apache/iceberg/variants/VariantValue.java rename to core/src/main/java/org/apache/iceberg/variants/VariantValue.java From 6ad4a9ce9a6b3a12c412c7e5bc2aff819577785a Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Thu, 20 Feb 2025 16:08:53 -0800 Subject: [PATCH 7/7] Apply spotless. --- .../apache/iceberg/expressions/PathUtil.java | 30 +++++++++--------- .../iceberg/expressions/TestPathParsing.java | 31 +++++++++---------- 2 files changed, 28 insertions(+), 33 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/PathUtil.java b/api/src/main/java/org/apache/iceberg/expressions/PathUtil.java index 4d8875f9880a..ad7b5cd72684 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/PathUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/PathUtil.java @@ -1,22 +1,20 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.expressions; diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestPathParsing.java b/api/src/test/java/org/apache/iceberg/expressions/TestPathParsing.java index 5ef590a5c32f..13fa2ef7184f 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestPathParsing.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestPathParsing.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.expressions; import static org.assertj.core.api.Assertions.assertThat;