diff --git a/api/src/main/java/org/apache/iceberg/types/Type.java b/api/src/main/java/org/apache/iceberg/types/Type.java index 67e40df9e939..184a17416eae 100644 --- a/api/src/main/java/org/apache/iceberg/types/Type.java +++ b/api/src/main/java/org/apache/iceberg/types/Type.java @@ -26,6 +26,7 @@ import java.util.Map; import java.util.Objects; import org.apache.iceberg.StructLike; +import org.apache.iceberg.variants.Variant; public interface Type extends Serializable { enum TypeID { @@ -46,7 +47,7 @@ enum TypeID { STRUCT(StructLike.class), LIST(List.class), MAP(Map.class), - VARIANT(Object.class), + VARIANT(Variant.class), UNKNOWN(Object.class); private final Class javaClass; diff --git a/api/src/main/java/org/apache/iceberg/variants/BasicType.java b/api/src/main/java/org/apache/iceberg/variants/BasicType.java new file mode 100644 index 000000000000..1f3ce4006511 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/variants/BasicType.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +enum BasicType { + PRIMITIVE, + SHORT_STRING, + OBJECT, + ARRAY +} diff --git a/core/src/main/java/org/apache/iceberg/variants/LogicalType.java b/api/src/main/java/org/apache/iceberg/variants/LogicalType.java similarity index 100% rename from core/src/main/java/org/apache/iceberg/variants/LogicalType.java rename to api/src/main/java/org/apache/iceberg/variants/LogicalType.java diff --git a/core/src/main/java/org/apache/iceberg/variants/PhysicalType.java b/api/src/main/java/org/apache/iceberg/variants/PhysicalType.java similarity index 100% rename from core/src/main/java/org/apache/iceberg/variants/PhysicalType.java rename to api/src/main/java/org/apache/iceberg/variants/PhysicalType.java diff --git a/core/src/main/java/org/apache/iceberg/variants/Primitives.java b/api/src/main/java/org/apache/iceberg/variants/Primitives.java similarity index 100% rename from core/src/main/java/org/apache/iceberg/variants/Primitives.java rename to api/src/main/java/org/apache/iceberg/variants/Primitives.java diff --git a/core/src/main/java/org/apache/iceberg/variants/Variant.java b/api/src/main/java/org/apache/iceberg/variants/Serialized.java similarity index 79% rename from core/src/main/java/org/apache/iceberg/variants/Variant.java rename to api/src/main/java/org/apache/iceberg/variants/Serialized.java index b5606fa094b6..f66d0c898135 100644 --- a/core/src/main/java/org/apache/iceberg/variants/Variant.java +++ b/api/src/main/java/org/apache/iceberg/variants/Serialized.java @@ -18,11 +18,8 @@ */ package org.apache.iceberg.variants; -/** A variant metadata and value pair. */ -public interface Variant { - /** Returns the metadata for all values in the variant. */ - VariantMetadata metadata(); +import java.nio.ByteBuffer; - /** Returns the variant value. */ - VariantValue value(); +interface Serialized { + ByteBuffer buffer(); } diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedArray.java b/api/src/main/java/org/apache/iceberg/variants/SerializedArray.java similarity index 84% rename from core/src/main/java/org/apache/iceberg/variants/SerializedArray.java rename to api/src/main/java/org/apache/iceberg/variants/SerializedArray.java index be6649cb0d20..619d2fe24ef5 100644 --- a/core/src/main/java/org/apache/iceberg/variants/SerializedArray.java +++ b/api/src/main/java/org/apache/iceberg/variants/SerializedArray.java @@ -23,7 +23,8 @@ import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -class SerializedArray extends Variants.SerializedValue implements VariantArray { +class SerializedArray implements VariantArray, SerializedValue { + private static final int HEADER_SIZE = 1; private static final int OFFSET_SIZE_MASK = 0b1100; private static final int OFFSET_SIZE_SHIFT = 2; private static final int IS_LARGE = 0b10000; @@ -36,9 +37,9 @@ static SerializedArray from(VariantMetadata metadata, byte[] bytes) { static SerializedArray from(VariantMetadata metadata, ByteBuffer value, int header) { Preconditions.checkArgument( value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); - Variants.BasicType basicType = VariantUtil.basicType(header); + BasicType basicType = VariantUtil.basicType(header); Preconditions.checkArgument( - basicType == Variants.BasicType.ARRAY, "Invalid array, basic type: " + basicType); + basicType == BasicType.ARRAY, "Invalid array, basic type: " + basicType); return new SerializedArray(metadata, value, header); } @@ -54,9 +55,8 @@ private SerializedArray(VariantMetadata metadata, ByteBuffer value, int header) this.value = value; this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); int numElementsSize = ((header & IS_LARGE) == IS_LARGE) ? 4 : 1; - int numElements = - VariantUtil.readLittleEndianUnsigned(value, Variants.HEADER_SIZE, numElementsSize); - this.offsetListOffset = Variants.HEADER_SIZE + numElementsSize; + int numElements = VariantUtil.readLittleEndianUnsigned(value, HEADER_SIZE, numElementsSize); + this.offsetListOffset = HEADER_SIZE + numElementsSize; this.dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); this.array = new VariantValue[numElements]; } @@ -76,7 +76,7 @@ public VariantValue get(int index) { VariantUtil.readLittleEndianUnsigned( value, offsetListOffset + (offsetSize * (1 + index)), offsetSize); array[index] = - Variants.value(metadata, VariantUtil.slice(value, dataOffset + offset, next - offset)); + VariantValue.from(metadata, VariantUtil.slice(value, dataOffset + offset, next - offset)); } return array[index]; } diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java b/api/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java similarity index 87% rename from core/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java rename to api/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java index 746f645c9dcd..9113a8c1c969 100644 --- a/core/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java +++ b/api/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java @@ -23,7 +23,8 @@ import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -class SerializedMetadata implements VariantMetadata, Variants.Serialized { +class SerializedMetadata implements VariantMetadata, Serialized { + private static final int HEADER_SIZE = 1; private static final int SUPPORTED_VERSION = 1; private static final int VERSION_MASK = 0b1111; private static final int SORTED_STRINGS = 0b10000; @@ -31,7 +32,7 @@ class SerializedMetadata implements VariantMetadata, Variants.Serialized { private static final int OFFSET_SIZE_SHIFT = 6; static final ByteBuffer EMPTY_V1_BUFFER = - ByteBuffer.wrap(new byte[] {0x01, 0x00}).order(ByteOrder.LITTLE_ENDIAN); + ByteBuffer.wrap(new byte[] {0x01, 0x00, 0x00}).order(ByteOrder.LITTLE_ENDIAN); static final SerializedMetadata EMPTY_V1_METADATA = from(EMPTY_V1_BUFFER); static SerializedMetadata from(byte[] bytes) { @@ -55,13 +56,21 @@ static SerializedMetadata from(ByteBuffer metadata) { private final String[] dict; private SerializedMetadata(ByteBuffer metadata, int header) { - this.metadata = metadata; this.isSorted = (header & SORTED_STRINGS) == SORTED_STRINGS; this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); - int dictSize = VariantUtil.readLittleEndianUnsigned(metadata, Variants.HEADER_SIZE, offsetSize); + int dictSize = VariantUtil.readLittleEndianUnsigned(metadata, HEADER_SIZE, offsetSize); this.dict = new String[dictSize]; - this.offsetListOffset = Variants.HEADER_SIZE + offsetSize; + this.offsetListOffset = HEADER_SIZE + offsetSize; this.dataOffset = offsetListOffset + ((1 + dictSize) * offsetSize); + int endOffset = + dataOffset + + VariantUtil.readLittleEndianUnsigned( + metadata, offsetListOffset + (offsetSize * dictSize), offsetSize); + if (endOffset < metadata.limit()) { + this.metadata = VariantUtil.slice(metadata, 0, endOffset); + } else { + this.metadata = metadata; + } } @Override diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedObject.java b/api/src/main/java/org/apache/iceberg/variants/SerializedObject.java similarity index 89% rename from core/src/main/java/org/apache/iceberg/variants/SerializedObject.java rename to api/src/main/java/org/apache/iceberg/variants/SerializedObject.java index b43fd10dc1e3..078fd2cb6760 100644 --- a/core/src/main/java/org/apache/iceberg/variants/SerializedObject.java +++ b/api/src/main/java/org/apache/iceberg/variants/SerializedObject.java @@ -27,9 +27,9 @@ import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Maps; -import org.apache.iceberg.util.Pair; -class SerializedObject extends Variants.SerializedValue implements VariantObject { +class SerializedObject implements VariantObject, SerializedValue { + private static final int HEADER_SIZE = 1; private static final int OFFSET_SIZE_MASK = 0b1100; private static final int OFFSET_SIZE_SHIFT = 2; private static final int FIELD_ID_SIZE_MASK = 0b110000; @@ -43,9 +43,9 @@ static SerializedObject from(VariantMetadata metadata, byte[] bytes) { static SerializedObject from(VariantMetadata metadata, ByteBuffer value, int header) { Preconditions.checkArgument( value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); - Variants.BasicType basicType = VariantUtil.basicType(header); + BasicType basicType = VariantUtil.basicType(header); Preconditions.checkArgument( - basicType == Variants.BasicType.OBJECT, "Invalid object, basic type: " + basicType); + basicType == BasicType.OBJECT, "Invalid object, basic type: " + basicType); return new SerializedObject(metadata, value, header); } @@ -67,9 +67,8 @@ private SerializedObject(VariantMetadata metadata, ByteBuffer value, int header) this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); this.fieldIdSize = 1 + ((header & FIELD_ID_SIZE_MASK) >> FIELD_ID_SIZE_SHIFT); int numElementsSize = ((header & IS_LARGE) == IS_LARGE) ? 4 : 1; - int numElements = - VariantUtil.readLittleEndianUnsigned(value, Variants.HEADER_SIZE, numElementsSize); - this.fieldIdListOffset = Variants.HEADER_SIZE + numElementsSize; + int numElements = VariantUtil.readLittleEndianUnsigned(value, HEADER_SIZE, numElementsSize); + this.fieldIdListOffset = HEADER_SIZE + numElementsSize; this.fieldIds = new Integer[numElements]; this.offsetListOffset = fieldIdListOffset + (numElements * fieldIdSize); this.offsets = new int[numElements]; @@ -122,7 +121,7 @@ VariantMetadata metadata() { return metadata; } - Iterable> fields() { + Iterable> fields() { return () -> new Iterator<>() { private int index = 0; @@ -133,8 +132,8 @@ public boolean hasNext() { } @Override - public Pair next() { - Pair next = Pair.of(metadata.get(id(index)), index); + public Map.Entry next() { + Map.Entry next = Map.entry(metadata.get(id(index)), index); index += 1; return next; } @@ -182,7 +181,7 @@ public VariantValue get(String name) { if (null == values[index]) { values[index] = - Variants.value( + VariantValue.from( metadata, VariantUtil.slice(value, dataOffset + offsets[index], lengths[index])); } @@ -213,7 +212,7 @@ ByteBuffer sliceValue(String name) { */ ByteBuffer sliceValue(int index) { if (values[index] != null) { - return ((Variants.Serialized) values[index]).buffer(); + return ((Serialized) values[index]).buffer(); } return VariantUtil.slice(value, dataOffset + offsets[index], lengths[index]); @@ -224,11 +223,6 @@ public ByteBuffer buffer() { return value; } - @Override - public int sizeInBytes() { - return value.remaining(); - } - @Override public String toString() { return VariantObject.asString(this); diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java b/api/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java similarity index 93% rename from core/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java rename to api/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java index 54035771477d..7c243df1fa19 100644 --- a/core/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java +++ b/api/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java @@ -24,9 +24,9 @@ import java.nio.ByteOrder; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -class SerializedPrimitive extends Variants.SerializedValue implements VariantPrimitive { +class SerializedPrimitive implements VariantPrimitive, SerializedValue { private static final int PRIMITIVE_TYPE_SHIFT = 2; - private static final int PRIMITIVE_OFFSET = Variants.HEADER_SIZE; + private static final int PRIMITIVE_OFFSET = 1; static SerializedPrimitive from(byte[] bytes) { return from(ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN), bytes[0]); @@ -35,9 +35,9 @@ static SerializedPrimitive from(byte[] bytes) { static SerializedPrimitive from(ByteBuffer value, int header) { Preconditions.checkArgument( value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); - Variants.BasicType basicType = VariantUtil.basicType(header); + BasicType basicType = VariantUtil.basicType(header); Preconditions.checkArgument( - basicType == Variants.BasicType.PRIMITIVE, + basicType == BasicType.PRIMITIVE, "Invalid primitive, basic type != PRIMITIVE: " + basicType); return new SerializedPrimitive(value, header); } diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedShortString.java b/api/src/main/java/org/apache/iceberg/variants/SerializedShortString.java similarity index 85% rename from core/src/main/java/org/apache/iceberg/variants/SerializedShortString.java rename to api/src/main/java/org/apache/iceberg/variants/SerializedShortString.java index e07fa99a7e25..ee5a744f88c8 100644 --- a/core/src/main/java/org/apache/iceberg/variants/SerializedShortString.java +++ b/api/src/main/java/org/apache/iceberg/variants/SerializedShortString.java @@ -22,7 +22,8 @@ import java.nio.ByteOrder; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -class SerializedShortString extends Variants.SerializedValue implements VariantPrimitive { +class SerializedShortString implements VariantPrimitive, SerializedValue { + private static final int HEADER_SIZE = 1; private static final int LENGTH_MASK = 0b11111100; private static final int LENGTH_SHIFT = 2; @@ -33,10 +34,9 @@ static SerializedShortString from(byte[] bytes) { static SerializedShortString from(ByteBuffer value, int header) { Preconditions.checkArgument( value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); - Variants.BasicType basicType = VariantUtil.basicType(header); + BasicType basicType = VariantUtil.basicType(header); Preconditions.checkArgument( - basicType == Variants.BasicType.SHORT_STRING, - "Invalid short string, basic type: " + basicType); + basicType == BasicType.SHORT_STRING, "Invalid short string, basic type: " + basicType); return new SerializedShortString(value, header); } @@ -57,7 +57,7 @@ public PhysicalType type() { @Override public String get() { if (null == string) { - this.string = VariantUtil.readString(value, Variants.HEADER_SIZE, length); + this.string = VariantUtil.readString(value, HEADER_SIZE, length); } return string; } diff --git a/api/src/main/java/org/apache/iceberg/variants/SerializedValue.java b/api/src/main/java/org/apache/iceberg/variants/SerializedValue.java new file mode 100644 index 000000000000..19b9e880f5a6 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/variants/SerializedValue.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +import java.nio.ByteBuffer; + +interface SerializedValue extends VariantValue, Serialized { + @Override + default int sizeInBytes() { + return buffer().remaining(); + } + + @Override + default int writeTo(ByteBuffer buffer, int offset) { + ByteBuffer value = buffer(); + VariantUtil.writeBufferAbsolute(buffer, offset, value); + return value.remaining(); + } +} diff --git a/api/src/main/java/org/apache/iceberg/variants/Variant.java b/api/src/main/java/org/apache/iceberg/variants/Variant.java new file mode 100644 index 000000000000..4376938e9e15 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/variants/Variant.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.variants; + +import java.nio.ByteBuffer; + +/** A variant metadata and value pair. */ +public interface Variant { + /** Returns the metadata for all values in the variant. */ + VariantMetadata metadata(); + + /** Returns the variant value. */ + VariantValue value(); + + static Variant of(VariantMetadata metadata, VariantValue value) { + return new Variant() { + @Override + public VariantMetadata metadata() { + return metadata; + } + + @Override + public VariantValue value() { + return value; + } + }; + } + + static Variant from(ByteBuffer buffer) { + VariantMetadata metadata = VariantMetadata.from(buffer); + ByteBuffer valueBuffer = + VariantUtil.slice( + buffer, metadata.sizeInBytes(), buffer.remaining() - metadata.sizeInBytes()); + VariantValue value = VariantValue.from(metadata, valueBuffer); + return of(metadata, value); + } +} diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantArray.java b/api/src/main/java/org/apache/iceberg/variants/VariantArray.java similarity index 100% rename from core/src/main/java/org/apache/iceberg/variants/VariantArray.java rename to api/src/main/java/org/apache/iceberg/variants/VariantArray.java diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantMetadata.java b/api/src/main/java/org/apache/iceberg/variants/VariantMetadata.java similarity index 95% rename from core/src/main/java/org/apache/iceberg/variants/VariantMetadata.java rename to api/src/main/java/org/apache/iceberg/variants/VariantMetadata.java index 07a6e0a52c28..5cb50742a58f 100644 --- a/core/src/main/java/org/apache/iceberg/variants/VariantMetadata.java +++ b/api/src/main/java/org/apache/iceberg/variants/VariantMetadata.java @@ -50,6 +50,10 @@ public interface VariantMetadata { */ int writeTo(ByteBuffer buffer, int offset); + static VariantMetadata from(ByteBuffer buffer) { + return SerializedMetadata.from(buffer); + } + static String asString(VariantMetadata metadata) { StringBuilder builder = new StringBuilder(); diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantObject.java b/api/src/main/java/org/apache/iceberg/variants/VariantObject.java similarity index 100% rename from core/src/main/java/org/apache/iceberg/variants/VariantObject.java rename to api/src/main/java/org/apache/iceberg/variants/VariantObject.java diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java b/api/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java similarity index 100% rename from core/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java rename to api/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java b/api/src/main/java/org/apache/iceberg/variants/VariantUtil.java similarity index 96% rename from core/src/main/java/org/apache/iceberg/variants/VariantUtil.java rename to api/src/main/java/org/apache/iceberg/variants/VariantUtil.java index 6a33d2059f81..263aff38d6ca 100644 --- a/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java +++ b/api/src/main/java/org/apache/iceberg/variants/VariantUtil.java @@ -177,17 +177,17 @@ static byte arrayHeader(boolean isLarge, int offsetSize) { return (byte) ((isLarge ? 0b10000 : 0) | (offsetSize - 1) << 2 | 0b11); } - static Variants.BasicType basicType(int header) { + static BasicType basicType(int header) { int basicType = header & BASIC_TYPE_MASK; switch (basicType) { case BASIC_TYPE_PRIMITIVE: - return Variants.BasicType.PRIMITIVE; + return BasicType.PRIMITIVE; case BASIC_TYPE_SHORT_STRING: - return Variants.BasicType.SHORT_STRING; + return BasicType.SHORT_STRING; case BASIC_TYPE_OBJECT: - return Variants.BasicType.OBJECT; + return BasicType.OBJECT; case BASIC_TYPE_ARRAY: - return Variants.BasicType.ARRAY; + return BasicType.ARRAY; } throw new UnsupportedOperationException("Unsupported basic type: " + basicType); diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantValue.java b/api/src/main/java/org/apache/iceberg/variants/VariantValue.java similarity index 76% rename from core/src/main/java/org/apache/iceberg/variants/VariantValue.java rename to api/src/main/java/org/apache/iceberg/variants/VariantValue.java index d6bf4092dca6..1cda7b2d3ca2 100644 --- a/core/src/main/java/org/apache/iceberg/variants/VariantValue.java +++ b/api/src/main/java/org/apache/iceberg/variants/VariantValue.java @@ -59,4 +59,21 @@ default VariantObject asObject() { default VariantArray asArray() { throw new IllegalArgumentException("Not an array: " + this); } + + static VariantValue from(VariantMetadata metadata, ByteBuffer value) { + int header = VariantUtil.readByte(value, 0); + BasicType basicType = VariantUtil.basicType(header); + switch (basicType) { + case PRIMITIVE: + return SerializedPrimitive.from(value, header); + case SHORT_STRING: + return SerializedShortString.from(value, header); + case OBJECT: + return SerializedObject.from(metadata, value, header); + case ARRAY: + return SerializedArray.from(metadata, value, header); + } + + throw new UnsupportedOperationException("Unsupported basic type: " + basicType); + } } diff --git a/core/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java b/api/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java similarity index 100% rename from core/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java rename to api/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java diff --git a/core/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java b/api/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java similarity index 88% rename from core/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java rename to api/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java index f0b3fdab9be1..034480954a77 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java +++ b/api/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java @@ -44,7 +44,7 @@ public void testEmptyVariantMetadata() { @Test public void testHeaderSorted() { - SerializedMetadata metadata = SerializedMetadata.from(new byte[] {0b10001, 0x00}); + SerializedMetadata metadata = SerializedMetadata.from(new byte[] {0b10001, 0x00, 0x00}); assertThat(metadata.isSorted()).isTrue(); assertThat(metadata.dictionarySize()).isEqualTo(0); @@ -54,22 +54,45 @@ public void testHeaderSorted() { public void testHeaderOffsetSize() { // offset size is 4-byte LE = 1 assertThat( - SerializedMetadata.from(new byte[] {(byte) 0b11010001, 0x01, 0x00, 0x00, 0x00}) + SerializedMetadata.from( + new byte[] { + (byte) 0b11010001, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00 + }) .dictionarySize()) .isEqualTo(1); // offset size is 3-byte LE = 1 assertThat( - SerializedMetadata.from(new byte[] {(byte) 0b10010001, 0x01, 0x00, 0x00}) + SerializedMetadata.from( + new byte[] { + (byte) 0b10010001, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + }) .dictionarySize()) .isEqualTo(1); // offset size is 2-byte LE = 1 - assertThat(SerializedMetadata.from(new byte[] {(byte) 0b01010001, 0x01, 0x00}).dictionarySize()) + assertThat( + SerializedMetadata.from( + new byte[] {(byte) 0b01010001, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00}) + .dictionarySize()) .isEqualTo(1); // offset size is 1-byte LE = 1 - assertThat(SerializedMetadata.from(new byte[] {(byte) 0b00010001, 0x01}).dictionarySize()) + assertThat( + SerializedMetadata.from(new byte[] {(byte) 0b00010001, 0x01, 0x00, 0x00}) + .dictionarySize()) .isEqualTo(1); } diff --git a/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java b/api/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java similarity index 94% rename from core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java rename to api/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java index 979f84b5075b..2bc0a50beed6 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java +++ b/api/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java @@ -86,7 +86,7 @@ public void testSimpleObject() { ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* sort names */); ByteBuffer value = VariantTestUtil.createObject(meta, data); - VariantMetadata metadata = Variants.metadata(meta); + VariantMetadata metadata = VariantMetadata.from(meta); SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); @@ -107,7 +107,7 @@ public void testUnsortedValues() { ByteBuffer meta = VariantTestUtil.createMetadata(Sets.newHashSet("a", "b", "c"), true /* sort names */); - VariantMetadata metadata = Variants.metadata(meta); + VariantMetadata metadata = VariantMetadata.from(meta); SerializedObject object = SerializedObject.from(metadata, UNSORTED_VALUES); assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); @@ -129,7 +129,7 @@ public void testOutOfOrderKeys() { ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), false /* sort names */); ByteBuffer value = VariantTestUtil.createObject(meta, data); - VariantMetadata metadata = Variants.metadata(meta); + VariantMetadata metadata = VariantMetadata.from(meta); SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); @@ -150,7 +150,7 @@ public void testMixedValueTypes() { ByteBuffer meta = VariantTestUtil.createMetadata( ImmutableList.of("a", "b", "c", "d", "e", "f"), true /* sort names */); - VariantMetadata metadata = Variants.metadata(meta); + VariantMetadata metadata = VariantMetadata.from(meta); Map inner = ImmutableMap.of("b", I2, "f", I3); ByteBuffer innerBuffer = VariantTestUtil.createObject(meta, inner); @@ -193,7 +193,7 @@ public void testTwoByteOffsets() { ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* sort names */); ByteBuffer value = VariantTestUtil.createObject(meta, data); - VariantMetadata metadata = Variants.metadata(meta); + VariantMetadata metadata = VariantMetadata.from(meta); SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); @@ -221,7 +221,7 @@ public void testThreeByteOffsets() { ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* sort names */); ByteBuffer value = VariantTestUtil.createObject(meta, data); - VariantMetadata metadata = Variants.metadata(meta); + VariantMetadata metadata = VariantMetadata.from(meta); SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); @@ -241,23 +241,23 @@ public void testThreeByteOffsets() { @ValueSource(booleans = {true, false}) @SuppressWarnings({"unchecked", "rawtypes"}) public void testLargeObject(boolean sortFieldNames) { - Map> fields = Maps.newHashMap(); + Map fields = Maps.newHashMap(); for (int i = 0; i < 10_000; i += 1) { fields.put( RandomUtil.generateString(10, random), - Variants.of(RandomUtil.generateString(10, random))); + VariantTestUtil.createString(RandomUtil.generateString(10, random))); } ByteBuffer meta = VariantTestUtil.createMetadata(fields.keySet(), sortFieldNames); ByteBuffer value = VariantTestUtil.createObject(meta, (Map) fields); - VariantMetadata metadata = Variants.metadata(meta); + VariantMetadata metadata = VariantMetadata.from(meta); SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); assertThat(object.numFields()).isEqualTo(10_000); - for (Map.Entry> entry : fields.entrySet()) { + for (Map.Entry entry : fields.entrySet()) { VariantValue fieldValue = object.get(entry.getKey()); assertThat(fieldValue.type()).isEqualTo(PhysicalType.STRING); assertThat(fieldValue.asPrimitive().get()).isEqualTo(entry.getValue().get()); @@ -279,7 +279,7 @@ public void testTwoByteFieldIds(boolean sortFieldNames) { ByteBuffer meta = VariantTestUtil.createMetadata(keySet, sortFieldNames); ByteBuffer value = VariantTestUtil.createObject(meta, data); - VariantMetadata metadata = Variants.metadata(meta); + VariantMetadata metadata = VariantMetadata.from(meta); SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); @@ -308,7 +308,7 @@ public void testThreeByteFieldIds(boolean sortFieldNames) { ByteBuffer meta = VariantTestUtil.createMetadata(keySet, sortFieldNames); ByteBuffer value = VariantTestUtil.createObject(meta, data); - VariantMetadata metadata = Variants.metadata(meta); + VariantMetadata metadata = VariantMetadata.from(meta); SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); diff --git a/core/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java b/api/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java similarity index 100% rename from core/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java rename to api/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java diff --git a/core/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java b/api/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java similarity index 100% rename from core/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java rename to api/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java diff --git a/core/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java b/api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java similarity index 91% rename from core/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java rename to api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java index e2973b78b9ea..f06f481e6eee 100644 --- a/core/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java +++ b/api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java @@ -107,6 +107,20 @@ static SerializedPrimitive createString(String string) { return SerializedPrimitive.from(buffer, buffer.get(0)); } + public static ByteBuffer variantBuffer(Map data) { + ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* sort names */); + ByteBuffer value = VariantTestUtil.createObject(meta, data); + ByteBuffer buffer = + ByteBuffer.allocate(meta.remaining() + value.remaining()).order(ByteOrder.LITTLE_ENDIAN); + writeBufferAbsolute(buffer, 0, meta); + writeBufferAbsolute(buffer, meta.remaining(), value); + return buffer; + } + + public static Variant variant(Map data) { + return Variant.from(variantBuffer(data)); + } + public static ByteBuffer emptyMetadata() { return createMetadata(ImmutableList.of(), true); } @@ -161,8 +175,10 @@ public static ByteBuffer createMetadata(Collection fieldNames, boolean s public static ByteBuffer createObject(ByteBuffer metadataBuffer, Map data) { // create the metadata to look up field names - VariantMetadata metadata = Variants.metadata(metadataBuffer); + return createObject(SerializedMetadata.from(metadataBuffer), data); + } + public static ByteBuffer createObject(VariantMetadata metadata, Map data) { int numElements = data.size(); boolean isLarge = numElements > 0xFF; @@ -215,12 +231,12 @@ public static ByteBuffer createObject(ByteBuffer metadataBuffer, Map 0xFF; int dataSize = 0; - for (Variants.Serialized value : values) { + for (Serialized value : values) { // TODO: produce size for every variant without serializing dataSize += value.buffer().remaining(); } @@ -244,7 +260,7 @@ static ByteBuffer createArray(Variants.Serialized... values) { // write values and offsets int nextOffset = 0; // the first offset is always 0 int index = 0; - for (Variants.Serialized value : values) { + for (Serialized value : values) { // write the offset and value VariantUtil.writeLittleEndianUnsigned( buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); diff --git a/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java index 28bc16a6b9f9..1a22a652b0d9 100644 --- a/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java +++ b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java @@ -28,7 +28,6 @@ import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Sets; -import org.apache.iceberg.util.Pair; import org.apache.iceberg.util.SortedMerge; /** @@ -167,12 +166,12 @@ private SerializationState( if (unshredded instanceof SerializedObject) { // for serialized objects, use existing buffers instead of materializing values SerializedObject serialized = (SerializedObject) unshredded; - for (Pair field : serialized.fields()) { + for (Map.Entry field : serialized.fields()) { // if the value is replaced by an unshredded field, don't include it - String name = field.first(); + String name = field.getKey(); boolean replaced = shreddedFields.containsKey(name) || removedFields.contains(name); if (!replaced) { - ByteBuffer value = serialized.sliceValue(field.second()); + ByteBuffer value = serialized.sliceValue(field.getValue()); unshreddedBuilder.put(name, value); totalDataSize += value.remaining(); } diff --git a/core/src/main/java/org/apache/iceberg/variants/Variants.java b/core/src/main/java/org/apache/iceberg/variants/Variants.java index 96ef0bbb5ba6..de965b6f9044 100644 --- a/core/src/main/java/org/apache/iceberg/variants/Variants.java +++ b/core/src/main/java/org/apache/iceberg/variants/Variants.java @@ -25,33 +25,6 @@ public class Variants { private Variants() {} - interface Serialized { - ByteBuffer buffer(); - } - - abstract static class SerializedValue implements VariantValue, Serialized { - @Override - public int sizeInBytes() { - return buffer().remaining(); - } - - @Override - public int writeTo(ByteBuffer buffer, int offset) { - ByteBuffer value = buffer(); - VariantUtil.writeBufferAbsolute(buffer, offset, value); - return value.remaining(); - } - } - - static final int HEADER_SIZE = 1; - - enum BasicType { - PRIMITIVE, - SHORT_STRING, - OBJECT, - ARRAY - } - public static VariantMetadata emptyMetadata() { return SerializedMetadata.EMPTY_V1_METADATA; } @@ -61,20 +34,7 @@ public static VariantMetadata metadata(ByteBuffer metadata) { } public static VariantValue value(VariantMetadata metadata, ByteBuffer value) { - int header = VariantUtil.readByte(value, 0); - BasicType basicType = VariantUtil.basicType(header); - switch (basicType) { - case PRIMITIVE: - return SerializedPrimitive.from(value, header); - case SHORT_STRING: - return SerializedShortString.from(value, header); - case OBJECT: - return SerializedObject.from(metadata, value, header); - case ARRAY: - return SerializedArray.from(metadata, value, header); - } - - throw new UnsupportedOperationException("Unsupported basic type: " + basicType); + return VariantValue.from(metadata, value); } public static ShreddedObject object(VariantMetadata metadata, VariantObject object) {