-
Notifications
You must be signed in to change notification settings - Fork 2.9k
API: Move Variant interfaces and serialized implementations to API #12374
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
| package org.apache.iceberg.variants; | ||
|
|
||
| enum BasicType { | ||
| PRIMITIVE, | ||
| SHORT_STRING, | ||
| OBJECT, | ||
| ARRAY | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,15 +23,16 @@ | |
| import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
|
|
||
| class SerializedMetadata implements VariantMetadata, Variants.Serialized { | ||
| class SerializedMetadata implements VariantMetadata, Serialized { | ||
| private static final int HEADER_SIZE = 1; | ||
| private static final int SUPPORTED_VERSION = 1; | ||
| private static final int VERSION_MASK = 0b1111; | ||
| private static final int SORTED_STRINGS = 0b10000; | ||
| private static final int OFFSET_SIZE_MASK = 0b11000000; | ||
| private static final int OFFSET_SIZE_SHIFT = 6; | ||
|
|
||
| static final ByteBuffer EMPTY_V1_BUFFER = | ||
| ByteBuffer.wrap(new byte[] {0x01, 0x00}).order(ByteOrder.LITTLE_ENDIAN); | ||
| ByteBuffer.wrap(new byte[] {0x01, 0x00, 0x00}).order(ByteOrder.LITTLE_ENDIAN); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This implementation now finds the end of the Variant metadata buffer so that metadata and value buffers can be concatenated. |
||
| static final SerializedMetadata EMPTY_V1_METADATA = from(EMPTY_V1_BUFFER); | ||
|
|
||
| static SerializedMetadata from(byte[] bytes) { | ||
|
|
@@ -55,13 +56,21 @@ static SerializedMetadata from(ByteBuffer metadata) { | |
| private final String[] dict; | ||
|
|
||
| private SerializedMetadata(ByteBuffer metadata, int header) { | ||
| this.metadata = metadata; | ||
| this.isSorted = (header & SORTED_STRINGS) == SORTED_STRINGS; | ||
| this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); | ||
| int dictSize = VariantUtil.readLittleEndianUnsigned(metadata, Variants.HEADER_SIZE, offsetSize); | ||
| int dictSize = VariantUtil.readLittleEndianUnsigned(metadata, HEADER_SIZE, offsetSize); | ||
| this.dict = new String[dictSize]; | ||
| this.offsetListOffset = Variants.HEADER_SIZE + offsetSize; | ||
| this.offsetListOffset = HEADER_SIZE + offsetSize; | ||
| this.dataOffset = offsetListOffset + ((1 + dictSize) * offsetSize); | ||
| int endOffset = | ||
| dataOffset | ||
| + VariantUtil.readLittleEndianUnsigned( | ||
| metadata, offsetListOffset + (offsetSize * dictSize), offsetSize); | ||
| if (endOffset < metadata.limit()) { | ||
| this.metadata = VariantUtil.slice(metadata, 0, endOffset); | ||
| } else { | ||
| this.metadata = metadata; | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,9 +27,9 @@ | |
| import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Maps; | ||
| import org.apache.iceberg.util.Pair; | ||
|
|
||
| class SerializedObject extends Variants.SerializedValue implements VariantObject { | ||
| class SerializedObject implements VariantObject, SerializedValue { | ||
| private static final int HEADER_SIZE = 1; | ||
| private static final int OFFSET_SIZE_MASK = 0b1100; | ||
| private static final int OFFSET_SIZE_SHIFT = 2; | ||
| private static final int FIELD_ID_SIZE_MASK = 0b110000; | ||
|
|
@@ -43,9 +43,9 @@ static SerializedObject from(VariantMetadata metadata, byte[] bytes) { | |
| static SerializedObject from(VariantMetadata metadata, ByteBuffer value, int header) { | ||
| Preconditions.checkArgument( | ||
| value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); | ||
| Variants.BasicType basicType = VariantUtil.basicType(header); | ||
| BasicType basicType = VariantUtil.basicType(header); | ||
| Preconditions.checkArgument( | ||
| basicType == Variants.BasicType.OBJECT, "Invalid object, basic type: " + basicType); | ||
| basicType == BasicType.OBJECT, "Invalid object, basic type: " + basicType); | ||
| return new SerializedObject(metadata, value, header); | ||
| } | ||
|
|
||
|
|
@@ -67,9 +67,8 @@ private SerializedObject(VariantMetadata metadata, ByteBuffer value, int header) | |
| this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); | ||
| this.fieldIdSize = 1 + ((header & FIELD_ID_SIZE_MASK) >> FIELD_ID_SIZE_SHIFT); | ||
| int numElementsSize = ((header & IS_LARGE) == IS_LARGE) ? 4 : 1; | ||
| int numElements = | ||
| VariantUtil.readLittleEndianUnsigned(value, Variants.HEADER_SIZE, numElementsSize); | ||
| this.fieldIdListOffset = Variants.HEADER_SIZE + numElementsSize; | ||
| int numElements = VariantUtil.readLittleEndianUnsigned(value, HEADER_SIZE, numElementsSize); | ||
| this.fieldIdListOffset = HEADER_SIZE + numElementsSize; | ||
| this.fieldIds = new Integer[numElements]; | ||
| this.offsetListOffset = fieldIdListOffset + (numElements * fieldIdSize); | ||
| this.offsets = new int[numElements]; | ||
|
|
@@ -122,7 +121,7 @@ VariantMetadata metadata() { | |
| return metadata; | ||
| } | ||
|
|
||
| Iterable<Pair<String, Integer>> fields() { | ||
| Iterable<Map.Entry<String, Integer>> fields() { | ||
| return () -> | ||
| new Iterator<>() { | ||
| private int index = 0; | ||
|
|
@@ -133,8 +132,8 @@ public boolean hasNext() { | |
| } | ||
|
|
||
| @Override | ||
| public Pair<String, Integer> next() { | ||
| Pair<String, Integer> next = Pair.of(metadata.get(id(index)), index); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| public Map.Entry<String, Integer> next() { | ||
| Map.Entry<String, Integer> next = Map.entry(metadata.get(id(index)), index); | ||
| index += 1; | ||
| return next; | ||
| } | ||
|
|
@@ -182,7 +181,7 @@ public VariantValue get(String name) { | |
|
|
||
| if (null == values[index]) { | ||
| values[index] = | ||
| Variants.value( | ||
| VariantValue.from( | ||
| metadata, VariantUtil.slice(value, dataOffset + offsets[index], lengths[index])); | ||
| } | ||
|
|
||
|
|
@@ -213,7 +212,7 @@ ByteBuffer sliceValue(String name) { | |
| */ | ||
| ByteBuffer sliceValue(int index) { | ||
| if (values[index] != null) { | ||
| return ((Variants.Serialized) values[index]).buffer(); | ||
| return ((Serialized) values[index]).buffer(); | ||
| } | ||
|
|
||
| return VariantUtil.slice(value, dataOffset + offsets[index], lengths[index]); | ||
|
|
@@ -224,11 +223,6 @@ public ByteBuffer buffer() { | |
| return value; | ||
| } | ||
|
|
||
| @Override | ||
| public int sizeInBytes() { | ||
| return value.remaining(); | ||
| } | ||
|
|
||
| @Override | ||
| public String toString() { | ||
| return VariantObject.asString(this); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
| package org.apache.iceberg.variants; | ||
|
|
||
| import java.nio.ByteBuffer; | ||
|
|
||
| interface SerializedValue extends VariantValue, Serialized { | ||
| @Override | ||
| default int sizeInBytes() { | ||
| return buffer().remaining(); | ||
| } | ||
|
|
||
| @Override | ||
| default int writeTo(ByteBuffer buffer, int offset) { | ||
| ByteBuffer value = buffer(); | ||
| VariantUtil.writeBufferAbsolute(buffer, offset, value); | ||
| return value.remaining(); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
| package org.apache.iceberg.variants; | ||
|
|
||
| import java.nio.ByteBuffer; | ||
|
|
||
| /** A variant metadata and value pair. */ | ||
| public interface Variant { | ||
| /** Returns the metadata for all values in the variant. */ | ||
| VariantMetadata metadata(); | ||
|
|
||
| /** Returns the variant value. */ | ||
| VariantValue value(); | ||
|
|
||
| static Variant of(VariantMetadata metadata, VariantValue value) { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be replaced with the implementation in the Parquet writer PR (#12323). |
||
| return new Variant() { | ||
| @Override | ||
| public VariantMetadata metadata() { | ||
| return metadata; | ||
| } | ||
|
|
||
| @Override | ||
| public VariantValue value() { | ||
| return value; | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| static Variant from(ByteBuffer buffer) { | ||
| VariantMetadata metadata = VariantMetadata.from(buffer); | ||
| ByteBuffer valueBuffer = | ||
| VariantUtil.slice( | ||
| buffer, metadata.sizeInBytes(), buffer.remaining() - metadata.sizeInBytes()); | ||
| VariantValue value = VariantValue.from(metadata, valueBuffer); | ||
| return of(metadata, value); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was moved from
Variantsand did not replace theVariantinterface. Looks like a bad diff detection in git.