-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Spark: Support spec ID and partition metadata columns #2984
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,6 +38,12 @@ private MetadataColumns() { | |
| Integer.MAX_VALUE - 2, "_pos", Types.LongType.get(), "Ordinal position of a row in the source data file"); | ||
| public static final NestedField IS_DELETED = NestedField.required( | ||
| Integer.MAX_VALUE - 3, "_deleted", Types.BooleanType.get(), "Whether the row has been deleted"); | ||
| public static final NestedField SPEC_ID = NestedField.required( | ||
| Integer.MAX_VALUE - 4, "_spec_id", Types.IntegerType.get(), "Spec ID used to track the file containing a row"); | ||
| // the partition column type is not static and depends on all specs in the table | ||
| public static final int PARTITION_COLUMN_ID = Integer.MAX_VALUE - 5; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that new reserved columns need to be added to the spec. We should also make sure the spec notes the ranges that are reserved. I'm not sure if we did that or just added specific IDs.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll do that in a follow-up as we are missing |
||
| public static final String PARTITION_COLUMN_NAME = "_partition"; | ||
| public static final String PARTITION_COLUMN_DOC = "Partition to which a row belongs to"; | ||
aokolnychyi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // IDs Integer.MAX_VALUE - (101-200) are used for reserved columns | ||
| public static final NestedField DELETE_FILE_PATH = NestedField.required( | ||
|
|
@@ -51,24 +57,39 @@ private MetadataColumns() { | |
| private static final Map<String, NestedField> META_COLUMNS = ImmutableMap.of( | ||
| FILE_PATH.name(), FILE_PATH, | ||
| ROW_POSITION.name(), ROW_POSITION, | ||
| IS_DELETED.name(), IS_DELETED); | ||
| IS_DELETED.name(), IS_DELETED, | ||
| SPEC_ID.name(), SPEC_ID | ||
| ); | ||
|
|
||
| private static final Set<Integer> META_IDS = META_COLUMNS.values().stream().map(NestedField::fieldId) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Had to change this as the partition type is not static and is handled specially. |
||
| .collect(ImmutableSet.toImmutableSet()); | ||
| private static final Set<Integer> META_IDS = ImmutableSet.of( | ||
| FILE_PATH.fieldId(), | ||
| ROW_POSITION.fieldId(), | ||
| IS_DELETED.fieldId(), | ||
| SPEC_ID.fieldId(), | ||
| PARTITION_COLUMN_ID | ||
| ); | ||
|
|
||
| public static Set<Integer> metadataFieldIds() { | ||
| return META_IDS; | ||
| } | ||
|
|
||
| public static NestedField get(String name) { | ||
| return META_COLUMNS.get(name); | ||
| public static NestedField metadataColumn(Table table, String name) { | ||
| if (name.equals(PARTITION_COLUMN_NAME)) { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I kept the logic case sensitive as before but we maybe should reconsider it at some point. |
||
| return Types.NestedField.optional( | ||
| PARTITION_COLUMN_ID, | ||
| PARTITION_COLUMN_NAME, | ||
| Partitioning.partitionType(table), | ||
| PARTITION_COLUMN_DOC); | ||
| } else { | ||
| return META_COLUMNS.get(name); | ||
| } | ||
| } | ||
|
|
||
| public static boolean isMetadataColumn(String name) { | ||
| return META_COLUMNS.containsKey(name); | ||
| return name.equals(PARTITION_COLUMN_NAME) || META_COLUMNS.containsKey(name); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am starting to wonder if it is better to just have a dummy column type mapping for
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did that first but It looks ugly, though, I agree. |
||
| } | ||
|
|
||
| public static boolean nonMetadataColumn(String name) { | ||
| return !META_COLUMNS.containsKey(name); | ||
| return !isMetadataColumn(name); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,24 +24,32 @@ | |
| import java.math.BigDecimal; | ||
| import java.nio.ByteBuffer; | ||
| import java.util.Iterator; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.stream.Stream; | ||
| import org.apache.avro.generic.GenericData; | ||
| import org.apache.avro.util.Utf8; | ||
| import org.apache.iceberg.CombinedScanTask; | ||
| import org.apache.iceberg.FileScanTask; | ||
| import org.apache.iceberg.MetadataColumns; | ||
| import org.apache.iceberg.Partitioning; | ||
| import org.apache.iceberg.Schema; | ||
| import org.apache.iceberg.StructLike; | ||
| import org.apache.iceberg.Table; | ||
| import org.apache.iceberg.encryption.EncryptedFiles; | ||
| import org.apache.iceberg.encryption.EncryptedInputFile; | ||
| import org.apache.iceberg.encryption.EncryptionManager; | ||
| import org.apache.iceberg.io.CloseableIterator; | ||
| import org.apache.iceberg.io.FileIO; | ||
| import org.apache.iceberg.io.InputFile; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Maps; | ||
| import org.apache.iceberg.types.Type; | ||
| import org.apache.iceberg.types.Types.NestedField; | ||
| import org.apache.iceberg.types.Types.StructType; | ||
| import org.apache.iceberg.util.ByteBuffers; | ||
| import org.apache.iceberg.util.PartitionUtil; | ||
| import org.apache.spark.rdd.InputFileBlockHolder; | ||
| import org.apache.spark.sql.catalyst.expressions.GenericInternalRow; | ||
| import org.apache.spark.sql.types.Decimal; | ||
| import org.apache.spark.unsafe.types.UTF8String; | ||
| import org.slf4j.Logger; | ||
|
|
@@ -55,24 +63,26 @@ | |
| abstract class BaseDataReader<T> implements Closeable { | ||
| private static final Logger LOG = LoggerFactory.getLogger(BaseDataReader.class); | ||
|
|
||
| private final Table table; | ||
| private final Iterator<FileScanTask> tasks; | ||
| private final Map<String, InputFile> inputFiles; | ||
|
|
||
| private CloseableIterator<T> currentIterator; | ||
| private T current = null; | ||
| private FileScanTask currentTask = null; | ||
|
|
||
| BaseDataReader(CombinedScanTask task, FileIO io, EncryptionManager encryptionManager) { | ||
| BaseDataReader(Table table, CombinedScanTask task) { | ||
| this.table = table; | ||
| this.tasks = task.files().iterator(); | ||
| Map<String, ByteBuffer> keyMetadata = Maps.newHashMap(); | ||
| task.files().stream() | ||
| .flatMap(fileScanTask -> Stream.concat(Stream.of(fileScanTask.file()), fileScanTask.deletes().stream())) | ||
| .forEach(file -> keyMetadata.put(file.path().toString(), file.keyMetadata())); | ||
| Stream<EncryptedInputFile> encrypted = keyMetadata.entrySet().stream() | ||
| .map(entry -> EncryptedFiles.encryptedInput(io.newInputFile(entry.getKey()), entry.getValue())); | ||
| .map(entry -> EncryptedFiles.encryptedInput(table.io().newInputFile(entry.getKey()), entry.getValue())); | ||
|
|
||
| // decrypt with the batch call to avoid multiple RPCs to a key server, if possible | ||
| Iterable<InputFile> decryptedFiles = encryptionManager.decrypt(encrypted::iterator); | ||
| Iterable<InputFile> decryptedFiles = table.encryption().decrypt(encrypted::iterator); | ||
|
|
||
| Map<String, InputFile> files = Maps.newHashMapWithExpectedSize(task.files().size()); | ||
| decryptedFiles.forEach(decrypted -> files.putIfAbsent(decrypted.location(), decrypted)); | ||
|
|
@@ -132,6 +142,15 @@ protected InputFile getInputFile(String location) { | |
| return inputFiles.get(location); | ||
| } | ||
|
|
||
| protected Map<Integer, ?> constantsMap(FileScanTask task, Schema readSchema) { | ||
| if (readSchema.findField(MetadataColumns.PARTITION_COLUMN_ID) != null) { | ||
| StructType partitionType = Partitioning.partitionType(table); | ||
| return PartitionUtil.constantsMap(task, partitionType, BaseDataReader::convertConstant); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd prefer just to call the method rather than trying to optimize by not adding the partition entry.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd prefer calling this all the time too. However, this would mean we can no longer query tables with unknown transforms. We need to know all transforms to build a common partition type. That's why I don't set the partition column if it is not requested. Any thoughts on this, @rdblue?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we leave unknown transforms out of the partition type instead? We can just ignore them if they're unknown?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd probably consider persisting the partition type in the metadata instead. It might be confusing to silently ignore a partition column.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be fine for unknown partitions right now. It would unblock this without much risk. |
||
| } else { | ||
| return PartitionUtil.constantsMap(task, BaseDataReader::convertConstant); | ||
| } | ||
| } | ||
|
|
||
| protected static Object convertConstant(Type type, Object value) { | ||
| if (value == null) { | ||
| return null; | ||
|
|
@@ -155,6 +174,24 @@ protected static Object convertConstant(Type type, Object value) { | |
| return ByteBuffers.toByteArray((ByteBuffer) value); | ||
| case BINARY: | ||
| return ByteBuffers.toByteArray((ByteBuffer) value); | ||
| case STRUCT: | ||
| StructType structType = (StructType) type; | ||
rdblue marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| if (structType.fields().isEmpty()) { | ||
| return new GenericInternalRow(); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We won't hit this clause with |
||
| } | ||
|
|
||
| List<NestedField> fields = structType.fields(); | ||
| Object[] values = new Object[fields.size()]; | ||
| StructLike struct = (StructLike) value; | ||
|
|
||
| for (int index = 0; index < fields.size(); index++) { | ||
| NestedField field = fields.get(index); | ||
| Type fieldType = field.type(); | ||
| values[index] = convertConstant(fieldType, struct.get(index, fieldType.typeId().javaClass())); | ||
| } | ||
|
|
||
| return new GenericInternalRow(values); | ||
| default: | ||
| } | ||
| return value; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since this relies on a
nullfornestedProjections[pos], I think it should setnestedProjections[pos]tonullin the constructor wherepositionMap[pos]is set to -1.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it is actually always null as long as the field was not found. I'll add an explicit call, though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added.