Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions api/src/main/java/org/apache/iceberg/PartitionSpec.java
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,17 @@ public Builder truncate(String sourceName, int width) {
return truncate(sourceName, width, sourceName + "_trunc");
}

public Builder alwaysNull(String sourceName, String targetName) {
checkAndAddPartitionName(targetName);
Types.NestedField sourceColumn = findSourceColumn(sourceName);
fields.add(new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.alwaysNull()));
return this;
}

public Builder alwaysNull(String sourceName) {
return alwaysNull(sourceName, sourceName + "_null");
}

// add a partition field with an auto-increment partition field id starting from PARTITION_DATA_ID_START
Builder add(int sourceId, String name, String transform) {
return add(sourceId, nextFieldId(), name, transform);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.transforms;

import java.io.ObjectStreamException;
import java.io.Serializable;

/**
* Stand-in classes for expression classes in Java Serialization.
* <p>
* These are used so that transform classes can be singletons and use identical equality.
*/
class SerializationProxies {
private SerializationProxies() {
}

static class VoidTransformProxy implements Serializable {
private static final VoidTransformProxy INSTANCE = new VoidTransformProxy();

static VoidTransformProxy get() {
return INSTANCE;
}

/**
* Constructor for Java serialization.
*/
VoidTransformProxy() {
}

Object readResolve() throws ObjectStreamException {
return VoidTransform.get();
}
}
}
14 changes: 14 additions & 0 deletions api/src/main/java/org/apache/iceberg/transforms/Transforms.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ private Transforms() {
// fall through to return unknown transform
}

if (transform.equalsIgnoreCase("void")) {
return VoidTransform.get();
}

return new UnknownTransform<>(type, transform);
}

Expand Down Expand Up @@ -178,4 +182,14 @@ public static <T> Transform<T, Integer> bucket(Type type, int numBuckets) {
public static <T> Transform<T, T> truncate(Type type, int width) {
return Truncate.get(type, width);
}

/**
* Returns a {@link Transform} that always produces null.
*
* @param <T> Java type accepted by the transform.
* @return a transform that always produces null (the void transform).
*/
public static <T> Transform<T, Void> alwaysNull() {
return VoidTransform.get();
}
}
76 changes: 76 additions & 0 deletions api/src/main/java/org/apache/iceberg/transforms/VoidTransform.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.transforms;

import java.io.ObjectStreamException;
import org.apache.iceberg.expressions.BoundPredicate;
import org.apache.iceberg.expressions.UnboundPredicate;
import org.apache.iceberg.types.Type;

class VoidTransform<S> implements Transform<S, Void> {
private static final VoidTransform<Object> INSTANCE = new VoidTransform<>();

@SuppressWarnings("unchecked")
static <T> VoidTransform<T> get() {
return (VoidTransform<T>) INSTANCE;
}

private VoidTransform() {
}

@Override
public Void apply(Object value) {
return null;
}

@Override
public boolean canTransform(Type type) {
return true;
}

@Override
public Type getResultType(Type sourceType) {
return sourceType;
}

@Override
public UnboundPredicate<Void> projectStrict(String name, BoundPredicate<S> predicate) {
return null;
}

@Override
public UnboundPredicate<Void> project(String name, BoundPredicate<S> predicate) {
return null;
}

@Override
public String toHumanString(Void value) {
return "null";
}

@Override
public String toString() {
return "void";
}
Comment on lines +64 to +71
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor nit here to think about . . . we seem to be using both void and null. Would it make more sense to just consistently use void as it seems to better indicate there there is no expected value?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Null is the human-readable string for the value produced by the transform. Void is the name of the transform. I considered naming it something like always_null but void seemed shorter and less error prone (was that alwaysNull or always-null?)


Object writeReplace() throws ObjectStreamException {
return SerializationProxies.VoidTransformProxy.get();
}
}
76 changes: 76 additions & 0 deletions api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg;

import org.apache.iceberg.types.Types;

@SuppressWarnings("checkstyle:HideUtilityClassConstructor")
public class PartitionSpecTestBase {
public static final Schema SCHEMA = new Schema(
Types.NestedField.required(1, "i", Types.IntegerType.get()),
Types.NestedField.required(2, "l", Types.LongType.get()),
Types.NestedField.required(3, "d", Types.DateType.get()),
Types.NestedField.required(4, "t", Types.TimeType.get()),
Types.NestedField.required(5, "ts", Types.TimestampType.withoutZone()),
Types.NestedField.required(6, "dec", Types.DecimalType.of(9, 2)),
Types.NestedField.required(7, "s", Types.StringType.get()),
Types.NestedField.required(8, "u", Types.UUIDType.get()),
Types.NestedField.required(9, "f", Types.FixedType.ofLength(3)),
Types.NestedField.required(10, "b", Types.BinaryType.get())
);

// a spec with all of the allowed transform/type pairs
public static final PartitionSpec[] SPECS = new PartitionSpec[] {
PartitionSpec.builderFor(SCHEMA).identity("i").build(),
PartitionSpec.builderFor(SCHEMA).identity("l").build(),
PartitionSpec.builderFor(SCHEMA).identity("d").build(),
PartitionSpec.builderFor(SCHEMA).identity("t").build(),
PartitionSpec.builderFor(SCHEMA).identity("ts").build(),
PartitionSpec.builderFor(SCHEMA).identity("dec").build(),
PartitionSpec.builderFor(SCHEMA).identity("s").build(),
PartitionSpec.builderFor(SCHEMA).identity("u").build(),
PartitionSpec.builderFor(SCHEMA).identity("f").build(),
PartitionSpec.builderFor(SCHEMA).identity("b").build(),
PartitionSpec.builderFor(SCHEMA).bucket("i", 128).build(),
PartitionSpec.builderFor(SCHEMA).bucket("l", 128).build(),
PartitionSpec.builderFor(SCHEMA).bucket("d", 128).build(),
PartitionSpec.builderFor(SCHEMA).bucket("t", 128).build(),
PartitionSpec.builderFor(SCHEMA).bucket("ts", 128).build(),
PartitionSpec.builderFor(SCHEMA).bucket("dec", 128).build(),
PartitionSpec.builderFor(SCHEMA).bucket("s", 128).build(),
PartitionSpec.builderFor(SCHEMA).bucket("u", 128).build(),
PartitionSpec.builderFor(SCHEMA).bucket("f", 128).build(),
PartitionSpec.builderFor(SCHEMA).bucket("b", 128).build(),
PartitionSpec.builderFor(SCHEMA).year("d").build(),
PartitionSpec.builderFor(SCHEMA).month("d").build(),
PartitionSpec.builderFor(SCHEMA).day("d").build(),
PartitionSpec.builderFor(SCHEMA).year("ts").build(),
PartitionSpec.builderFor(SCHEMA).month("ts").build(),
PartitionSpec.builderFor(SCHEMA).day("ts").build(),
PartitionSpec.builderFor(SCHEMA).hour("ts").build(),
PartitionSpec.builderFor(SCHEMA).truncate("i", 10).build(),
PartitionSpec.builderFor(SCHEMA).truncate("l", 10).build(),
PartitionSpec.builderFor(SCHEMA).truncate("dec", 10).build(),
PartitionSpec.builderFor(SCHEMA).truncate("s", 10).build(),
PartitionSpec.builderFor(SCHEMA).add(6, "dec_unsupported", "unsupported").build(),
PartitionSpec.builderFor(SCHEMA).add(6, 1111, "dec_unsupported", "unsupported").build(),
PartitionSpec.builderFor(SCHEMA).alwaysNull("ts").build(),
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,64 +19,13 @@

package org.apache.iceberg;

import org.apache.iceberg.types.Types;
import org.junit.Assert;
import org.junit.Test;

public class TestTransformSerialization {
public class TestTransformSerialization extends PartitionSpecTestBase {
@Test
public void testTransforms() throws Exception {
Schema schema = new Schema(
Types.NestedField.required(1, "i", Types.IntegerType.get()),
Types.NestedField.required(2, "l", Types.LongType.get()),
Types.NestedField.required(3, "d", Types.DateType.get()),
Types.NestedField.required(4, "t", Types.TimeType.get()),
Types.NestedField.required(5, "ts", Types.TimestampType.withoutZone()),
Types.NestedField.required(6, "dec", Types.DecimalType.of(9, 2)),
Types.NestedField.required(7, "s", Types.StringType.get()),
Types.NestedField.required(8, "u", Types.UUIDType.get()),
Types.NestedField.required(9, "f", Types.FixedType.ofLength(3)),
Types.NestedField.required(10, "b", Types.BinaryType.get())
);

// a spec with all of the allowed transform/type pairs
PartitionSpec[] specs = new PartitionSpec[] {
PartitionSpec.builderFor(schema).identity("i").build(),
PartitionSpec.builderFor(schema).identity("l").build(),
PartitionSpec.builderFor(schema).identity("d").build(),
PartitionSpec.builderFor(schema).identity("t").build(),
PartitionSpec.builderFor(schema).identity("ts").build(),
PartitionSpec.builderFor(schema).identity("dec").build(),
PartitionSpec.builderFor(schema).identity("s").build(),
PartitionSpec.builderFor(schema).identity("u").build(),
PartitionSpec.builderFor(schema).identity("f").build(),
PartitionSpec.builderFor(schema).identity("b").build(),
PartitionSpec.builderFor(schema).bucket("i", 128).build(),
PartitionSpec.builderFor(schema).bucket("l", 128).build(),
PartitionSpec.builderFor(schema).bucket("d", 128).build(),
PartitionSpec.builderFor(schema).bucket("t", 128).build(),
PartitionSpec.builderFor(schema).bucket("ts", 128).build(),
PartitionSpec.builderFor(schema).bucket("dec", 128).build(),
PartitionSpec.builderFor(schema).bucket("s", 128).build(),
PartitionSpec.builderFor(schema).bucket("u", 128).build(),
PartitionSpec.builderFor(schema).bucket("f", 128).build(),
PartitionSpec.builderFor(schema).bucket("b", 128).build(),
PartitionSpec.builderFor(schema).year("d").build(),
PartitionSpec.builderFor(schema).month("d").build(),
PartitionSpec.builderFor(schema).day("d").build(),
PartitionSpec.builderFor(schema).year("ts").build(),
PartitionSpec.builderFor(schema).month("ts").build(),
PartitionSpec.builderFor(schema).day("ts").build(),
PartitionSpec.builderFor(schema).hour("ts").build(),
PartitionSpec.builderFor(schema).truncate("i", 10).build(),
PartitionSpec.builderFor(schema).truncate("l", 10).build(),
PartitionSpec.builderFor(schema).truncate("dec", 10).build(),
PartitionSpec.builderFor(schema).truncate("s", 10).build(),
PartitionSpec.builderFor(schema).add(6, "dec_unsupported", "unsupported").build(),
PartitionSpec.builderFor(schema).add(6, 1111, "dec_unsupported", "unsupported").build(),
};

for (PartitionSpec spec : specs) {
for (PartitionSpec spec : SPECS) {
Assert.assertEquals("Deserialization should produce equal partition spec",
spec, TestHelpers.roundTripSerialize(spec));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import org.junit.Test;

public class TestPartitionSpecParser extends TableTestBase {

@Test
public void testToJsonForV1Table() {
String expected = "{\n" +
Expand Down Expand Up @@ -108,4 +107,16 @@ public void testFromJsonWithoutFieldId() {
Assert.assertEquals(1000, spec.fields().get(0).fieldId());
Assert.assertEquals(1001, spec.fields().get(1).fieldId());
}

@Test
public void testTransforms() {
for (PartitionSpec spec : PartitionSpecTestBase.SPECS) {
Assert.assertEquals("To/from JSON should produce equal partition spec",
spec, roundTripJSON(spec));
}
}

private static PartitionSpec roundTripJSON(PartitionSpec spec) {
return PartitionSpecParser.fromJson(PartitionSpecTestBase.SCHEMA, PartitionSpecParser.toJson(spec));
}
}