diff --git a/api/src/main/java/org/apache/iceberg/PartitionSpec.java b/api/src/main/java/org/apache/iceberg/PartitionSpec.java index 3742f7d73b1b..9ccce0dadc34 100644 --- a/api/src/main/java/org/apache/iceberg/PartitionSpec.java +++ b/api/src/main/java/org/apache/iceberg/PartitionSpec.java @@ -456,6 +456,17 @@ public Builder truncate(String sourceName, int width) { return truncate(sourceName, width, sourceName + "_trunc"); } + public Builder alwaysNull(String sourceName, String targetName) { + checkAndAddPartitionName(targetName); + Types.NestedField sourceColumn = findSourceColumn(sourceName); + fields.add(new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.alwaysNull())); + return this; + } + + public Builder alwaysNull(String sourceName) { + return alwaysNull(sourceName, sourceName + "_null"); + } + // add a partition field with an auto-increment partition field id starting from PARTITION_DATA_ID_START Builder add(int sourceId, String name, String transform) { return add(sourceId, nextFieldId(), name, transform); diff --git a/api/src/main/java/org/apache/iceberg/transforms/SerializationProxies.java b/api/src/main/java/org/apache/iceberg/transforms/SerializationProxies.java new file mode 100644 index 000000000000..db12159c4a7e --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/transforms/SerializationProxies.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.transforms; + +import java.io.ObjectStreamException; +import java.io.Serializable; + +/** + * Stand-in classes for expression classes in Java Serialization. + *

+ * These are used so that transform classes can be singletons and use identical equality. + */ +class SerializationProxies { + private SerializationProxies() { + } + + static class VoidTransformProxy implements Serializable { + private static final VoidTransformProxy INSTANCE = new VoidTransformProxy(); + + static VoidTransformProxy get() { + return INSTANCE; + } + + /** + * Constructor for Java serialization. + */ + VoidTransformProxy() { + } + + Object readResolve() throws ObjectStreamException { + return VoidTransform.get(); + } + } +} diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java index ee92c291f3b3..cd370d481b30 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java @@ -67,6 +67,10 @@ private Transforms() { // fall through to return unknown transform } + if (transform.equalsIgnoreCase("void")) { + return VoidTransform.get(); + } + return new UnknownTransform<>(type, transform); } @@ -178,4 +182,14 @@ public static Transform bucket(Type type, int numBuckets) { public static Transform truncate(Type type, int width) { return Truncate.get(type, width); } + + /** + * Returns a {@link Transform} that always produces null. + * + * @param Java type accepted by the transform. + * @return a transform that always produces null (the void transform). + */ + public static Transform alwaysNull() { + return VoidTransform.get(); + } } diff --git a/api/src/main/java/org/apache/iceberg/transforms/VoidTransform.java b/api/src/main/java/org/apache/iceberg/transforms/VoidTransform.java new file mode 100644 index 000000000000..d2ecbda13d90 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/transforms/VoidTransform.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.transforms; + +import java.io.ObjectStreamException; +import org.apache.iceberg.expressions.BoundPredicate; +import org.apache.iceberg.expressions.UnboundPredicate; +import org.apache.iceberg.types.Type; + +class VoidTransform implements Transform { + private static final VoidTransform INSTANCE = new VoidTransform<>(); + + @SuppressWarnings("unchecked") + static VoidTransform get() { + return (VoidTransform) INSTANCE; + } + + private VoidTransform() { + } + + @Override + public Void apply(Object value) { + return null; + } + + @Override + public boolean canTransform(Type type) { + return true; + } + + @Override + public Type getResultType(Type sourceType) { + return sourceType; + } + + @Override + public UnboundPredicate projectStrict(String name, BoundPredicate predicate) { + return null; + } + + @Override + public UnboundPredicate project(String name, BoundPredicate predicate) { + return null; + } + + @Override + public String toHumanString(Void value) { + return "null"; + } + + @Override + public String toString() { + return "void"; + } + + Object writeReplace() throws ObjectStreamException { + return SerializationProxies.VoidTransformProxy.get(); + } +} diff --git a/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java b/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java new file mode 100644 index 000000000000..d8d64831d1d2 --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import org.apache.iceberg.types.Types; + +@SuppressWarnings("checkstyle:HideUtilityClassConstructor") +public class PartitionSpecTestBase { + public static final Schema SCHEMA = new Schema( + Types.NestedField.required(1, "i", Types.IntegerType.get()), + Types.NestedField.required(2, "l", Types.LongType.get()), + Types.NestedField.required(3, "d", Types.DateType.get()), + Types.NestedField.required(4, "t", Types.TimeType.get()), + Types.NestedField.required(5, "ts", Types.TimestampType.withoutZone()), + Types.NestedField.required(6, "dec", Types.DecimalType.of(9, 2)), + Types.NestedField.required(7, "s", Types.StringType.get()), + Types.NestedField.required(8, "u", Types.UUIDType.get()), + Types.NestedField.required(9, "f", Types.FixedType.ofLength(3)), + Types.NestedField.required(10, "b", Types.BinaryType.get()) + ); + + // a spec with all of the allowed transform/type pairs + public static final PartitionSpec[] SPECS = new PartitionSpec[] { + PartitionSpec.builderFor(SCHEMA).identity("i").build(), + PartitionSpec.builderFor(SCHEMA).identity("l").build(), + PartitionSpec.builderFor(SCHEMA).identity("d").build(), + PartitionSpec.builderFor(SCHEMA).identity("t").build(), + PartitionSpec.builderFor(SCHEMA).identity("ts").build(), + PartitionSpec.builderFor(SCHEMA).identity("dec").build(), + PartitionSpec.builderFor(SCHEMA).identity("s").build(), + PartitionSpec.builderFor(SCHEMA).identity("u").build(), + PartitionSpec.builderFor(SCHEMA).identity("f").build(), + PartitionSpec.builderFor(SCHEMA).identity("b").build(), + PartitionSpec.builderFor(SCHEMA).bucket("i", 128).build(), + PartitionSpec.builderFor(SCHEMA).bucket("l", 128).build(), + PartitionSpec.builderFor(SCHEMA).bucket("d", 128).build(), + PartitionSpec.builderFor(SCHEMA).bucket("t", 128).build(), + PartitionSpec.builderFor(SCHEMA).bucket("ts", 128).build(), + PartitionSpec.builderFor(SCHEMA).bucket("dec", 128).build(), + PartitionSpec.builderFor(SCHEMA).bucket("s", 128).build(), + PartitionSpec.builderFor(SCHEMA).bucket("u", 128).build(), + PartitionSpec.builderFor(SCHEMA).bucket("f", 128).build(), + PartitionSpec.builderFor(SCHEMA).bucket("b", 128).build(), + PartitionSpec.builderFor(SCHEMA).year("d").build(), + PartitionSpec.builderFor(SCHEMA).month("d").build(), + PartitionSpec.builderFor(SCHEMA).day("d").build(), + PartitionSpec.builderFor(SCHEMA).year("ts").build(), + PartitionSpec.builderFor(SCHEMA).month("ts").build(), + PartitionSpec.builderFor(SCHEMA).day("ts").build(), + PartitionSpec.builderFor(SCHEMA).hour("ts").build(), + PartitionSpec.builderFor(SCHEMA).truncate("i", 10).build(), + PartitionSpec.builderFor(SCHEMA).truncate("l", 10).build(), + PartitionSpec.builderFor(SCHEMA).truncate("dec", 10).build(), + PartitionSpec.builderFor(SCHEMA).truncate("s", 10).build(), + PartitionSpec.builderFor(SCHEMA).add(6, "dec_unsupported", "unsupported").build(), + PartitionSpec.builderFor(SCHEMA).add(6, 1111, "dec_unsupported", "unsupported").build(), + PartitionSpec.builderFor(SCHEMA).alwaysNull("ts").build(), + }; +} diff --git a/api/src/test/java/org/apache/iceberg/TestTransformSerialization.java b/api/src/test/java/org/apache/iceberg/TestTransformSerialization.java index cb4616c12449..74f4ea83108e 100644 --- a/api/src/test/java/org/apache/iceberg/TestTransformSerialization.java +++ b/api/src/test/java/org/apache/iceberg/TestTransformSerialization.java @@ -19,64 +19,13 @@ package org.apache.iceberg; -import org.apache.iceberg.types.Types; import org.junit.Assert; import org.junit.Test; -public class TestTransformSerialization { +public class TestTransformSerialization extends PartitionSpecTestBase { @Test public void testTransforms() throws Exception { - Schema schema = new Schema( - Types.NestedField.required(1, "i", Types.IntegerType.get()), - Types.NestedField.required(2, "l", Types.LongType.get()), - Types.NestedField.required(3, "d", Types.DateType.get()), - Types.NestedField.required(4, "t", Types.TimeType.get()), - Types.NestedField.required(5, "ts", Types.TimestampType.withoutZone()), - Types.NestedField.required(6, "dec", Types.DecimalType.of(9, 2)), - Types.NestedField.required(7, "s", Types.StringType.get()), - Types.NestedField.required(8, "u", Types.UUIDType.get()), - Types.NestedField.required(9, "f", Types.FixedType.ofLength(3)), - Types.NestedField.required(10, "b", Types.BinaryType.get()) - ); - - // a spec with all of the allowed transform/type pairs - PartitionSpec[] specs = new PartitionSpec[] { - PartitionSpec.builderFor(schema).identity("i").build(), - PartitionSpec.builderFor(schema).identity("l").build(), - PartitionSpec.builderFor(schema).identity("d").build(), - PartitionSpec.builderFor(schema).identity("t").build(), - PartitionSpec.builderFor(schema).identity("ts").build(), - PartitionSpec.builderFor(schema).identity("dec").build(), - PartitionSpec.builderFor(schema).identity("s").build(), - PartitionSpec.builderFor(schema).identity("u").build(), - PartitionSpec.builderFor(schema).identity("f").build(), - PartitionSpec.builderFor(schema).identity("b").build(), - PartitionSpec.builderFor(schema).bucket("i", 128).build(), - PartitionSpec.builderFor(schema).bucket("l", 128).build(), - PartitionSpec.builderFor(schema).bucket("d", 128).build(), - PartitionSpec.builderFor(schema).bucket("t", 128).build(), - PartitionSpec.builderFor(schema).bucket("ts", 128).build(), - PartitionSpec.builderFor(schema).bucket("dec", 128).build(), - PartitionSpec.builderFor(schema).bucket("s", 128).build(), - PartitionSpec.builderFor(schema).bucket("u", 128).build(), - PartitionSpec.builderFor(schema).bucket("f", 128).build(), - PartitionSpec.builderFor(schema).bucket("b", 128).build(), - PartitionSpec.builderFor(schema).year("d").build(), - PartitionSpec.builderFor(schema).month("d").build(), - PartitionSpec.builderFor(schema).day("d").build(), - PartitionSpec.builderFor(schema).year("ts").build(), - PartitionSpec.builderFor(schema).month("ts").build(), - PartitionSpec.builderFor(schema).day("ts").build(), - PartitionSpec.builderFor(schema).hour("ts").build(), - PartitionSpec.builderFor(schema).truncate("i", 10).build(), - PartitionSpec.builderFor(schema).truncate("l", 10).build(), - PartitionSpec.builderFor(schema).truncate("dec", 10).build(), - PartitionSpec.builderFor(schema).truncate("s", 10).build(), - PartitionSpec.builderFor(schema).add(6, "dec_unsupported", "unsupported").build(), - PartitionSpec.builderFor(schema).add(6, 1111, "dec_unsupported", "unsupported").build(), - }; - - for (PartitionSpec spec : specs) { + for (PartitionSpec spec : SPECS) { Assert.assertEquals("Deserialization should produce equal partition spec", spec, TestHelpers.roundTripSerialize(spec)); } diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java b/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java index dea0d378ad43..c31f89406b49 100644 --- a/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java +++ b/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java @@ -23,7 +23,6 @@ import org.junit.Test; public class TestPartitionSpecParser extends TableTestBase { - @Test public void testToJsonForV1Table() { String expected = "{\n" + @@ -108,4 +107,16 @@ public void testFromJsonWithoutFieldId() { Assert.assertEquals(1000, spec.fields().get(0).fieldId()); Assert.assertEquals(1001, spec.fields().get(1).fieldId()); } + + @Test + public void testTransforms() { + for (PartitionSpec spec : PartitionSpecTestBase.SPECS) { + Assert.assertEquals("To/from JSON should produce equal partition spec", + spec, roundTripJSON(spec)); + } + } + + private static PartitionSpec roundTripJSON(PartitionSpec spec) { + return PartitionSpecParser.fromJson(PartitionSpecTestBase.SCHEMA, PartitionSpecParser.toJson(spec)); + } }