-
Notifications
You must be signed in to change notification settings - Fork 3k
Core: add key_metadata to ManifestFile spec #2675
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| package org.apache.iceberg; | ||
|
|
||
| import java.io.Serializable; | ||
| import java.nio.ByteBuffer; | ||
| import java.util.Arrays; | ||
| import java.util.List; | ||
| import java.util.function.Function; | ||
|
|
@@ -33,6 +34,7 @@ | |
| import org.apache.iceberg.relocated.com.google.common.base.Objects; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Lists; | ||
| import org.apache.iceberg.types.Types; | ||
| import org.apache.iceberg.util.ByteBuffers; | ||
|
|
||
| public class GenericManifestFile | ||
| implements ManifestFile, StructLike, IndexedRecord, SchemaConstructable, Serializable { | ||
|
|
@@ -58,6 +60,7 @@ public class GenericManifestFile | |
| private Long existingRowsCount = null; | ||
| private Long deletedRowsCount = null; | ||
| private PartitionFieldSummary[] partitions = null; | ||
| private byte[] keyMetadata = null; | ||
|
|
||
| /** | ||
| * Used by Avro reflection to instantiate this class when reading manifest files. | ||
|
|
@@ -101,13 +104,14 @@ public GenericManifestFile(org.apache.avro.Schema avroSchema) { | |
| this.deletedRowsCount = null; | ||
| this.partitions = null; | ||
| this.fromProjectionPos = null; | ||
| this.keyMetadata = null; | ||
| } | ||
|
|
||
| public GenericManifestFile(String path, long length, int specId, ManifestContent content, | ||
| long sequenceNumber, long minSequenceNumber, Long snapshotId, | ||
| int addedFilesCount, long addedRowsCount, int existingFilesCount, | ||
| long existingRowsCount, int deletedFilesCount, long deletedRowsCount, | ||
| List<PartitionFieldSummary> partitions) { | ||
| List<PartitionFieldSummary> partitions, ByteBuffer keyMetadata) { | ||
| this.avroSchema = AVRO_SCHEMA; | ||
| this.manifestPath = path; | ||
| this.length = length; | ||
|
|
@@ -124,6 +128,7 @@ public GenericManifestFile(String path, long length, int specId, ManifestContent | |
| this.deletedRowsCount = deletedRowsCount; | ||
| this.partitions = partitions == null ? null : partitions.toArray(new PartitionFieldSummary[0]); | ||
| this.fromProjectionPos = null; | ||
| this.keyMetadata = ByteBuffers.toByteArray(keyMetadata); | ||
rdblue marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| /** | ||
|
|
@@ -154,6 +159,7 @@ private GenericManifestFile(GenericManifestFile toCopy) { | |
| this.partitions = null; | ||
| } | ||
| this.fromProjectionPos = toCopy.fromProjectionPos; | ||
| this.keyMetadata = toCopy.keyMetadata == null ? null : Arrays.copyOf(toCopy.keyMetadata, toCopy.keyMetadata.length); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -245,6 +251,11 @@ public List<PartitionFieldSummary> partitions() { | |
| return partitions == null ? null : Arrays.asList(partitions); | ||
| } | ||
|
|
||
| @Override | ||
| public ByteBuffer keyMetadata() { | ||
| return keyMetadata == null ? null : ByteBuffer.wrap(keyMetadata); | ||
| } | ||
|
|
||
| @Override | ||
| public int size() { | ||
| return ManifestFile.schema().columns().size(); | ||
|
|
@@ -291,6 +302,8 @@ public Object get(int i) { | |
| return deletedRowsCount; | ||
| case 13: | ||
| return partitions(); | ||
| case 14: | ||
| return keyMetadata(); | ||
| default: | ||
| throw new UnsupportedOperationException("Unknown field ordinal: " + pos); | ||
| } | ||
|
|
@@ -349,6 +362,9 @@ public <T> void set(int i, T value) { | |
| this.partitions = value == null ? null : | ||
| ((List<PartitionFieldSummary>) value).toArray(new PartitionFieldSummary[0]); | ||
| return; | ||
| case 14: | ||
| this.keyMetadata = ByteBuffers.toByteArray((ByteBuffer) value); | ||
| return; | ||
| default: | ||
| // ignore the object, it must be from a newer version of the format | ||
| } | ||
|
|
@@ -399,6 +415,7 @@ public String toString() { | |
| .add("deleted_data_files_count", deletedFilesCount) | ||
| .add("deleted_rows_count", deletedRowsCount) | ||
| .add("partitions", partitions) | ||
| .add("key_metadata", keyMetadata == null ? "null" : "(redacted)") | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would actually be more against using this approach to support redact, for 2 reasons:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense to me. 👍
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So maybe we should just drop this from toString? I'm fine with that
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm also fine dropping this from |
||
| .toString(); | ||
| } | ||
|
|
||
|
|
@@ -418,7 +435,7 @@ private CopyBuilder(ManifestFile toCopy) { | |
| toCopy.sequenceNumber(), toCopy.minSequenceNumber(), toCopy.snapshotId(), | ||
| toCopy.addedFilesCount(), toCopy.addedRowsCount(), toCopy.existingFilesCount(), | ||
| toCopy.existingRowsCount(), toCopy.deletedFilesCount(), toCopy.deletedRowsCount(), | ||
| copyList(toCopy.partitions(), PartitionFieldSummary::copy)); | ||
| copyList(toCopy.partitions(), PartitionFieldSummary::copy), toCopy.keyMetadata()); | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this used in tests? We could just allow this to use the default implementation, unless we added tests that exercise it.