-
Notifications
You must be signed in to change notification settings - Fork 3k
Add delta writer classes #1802
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add delta writer classes #1802
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| /* | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.iceberg.io; | ||
|
|
||
| import java.io.Closeable; | ||
| import java.io.IOException; | ||
| import java.nio.ByteBuffer; | ||
| import org.apache.iceberg.DataFile; | ||
| import org.apache.iceberg.DataFiles; | ||
| import org.apache.iceberg.FileFormat; | ||
| import org.apache.iceberg.PartitionSpec; | ||
| import org.apache.iceberg.StructLike; | ||
| import org.apache.iceberg.encryption.EncryptionKeyMetadata; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
|
|
||
| public class DataWriter<T> implements Closeable { | ||
| private final FileAppender<T> appender; | ||
| private final FileFormat format; | ||
| private final String location; | ||
| private final PartitionSpec spec; | ||
| private final StructLike partition; | ||
| private final ByteBuffer keyMetadata; | ||
| private DataFile dataFile = null; | ||
|
|
||
| public DataWriter(FileAppender<T> appender, FileFormat format, String location, | ||
| PartitionSpec spec, StructLike partition, EncryptionKeyMetadata keyMetadata) { | ||
| this.appender = appender; | ||
| this.format = format; | ||
| this.location = location; | ||
| this.spec = spec; | ||
| this.partition = partition; | ||
| this.keyMetadata = keyMetadata != null ? keyMetadata.buffer() : null; | ||
| } | ||
|
|
||
| public void addAll(Iterable<T> rows) { | ||
| appender.addAll(rows); | ||
| } | ||
|
|
||
| public void add(T row) { | ||
| appender.add(row); | ||
| } | ||
|
|
||
| public long length() { | ||
| return appender.length(); | ||
| } | ||
|
|
||
| @Override | ||
| public void close() throws IOException { | ||
| if (dataFile == null) { | ||
| appender.close(); | ||
| this.dataFile = DataFiles.builder(spec) | ||
| .withFormat(format) | ||
| .withPath(location) | ||
| .withPartition(partition) | ||
| .withEncryptionKeyMetadata(keyMetadata) | ||
| .withFileSizeInBytes(appender.length()) | ||
| .withMetrics(appender.metrics()) | ||
| .withSplitOffsets(appender.splitOffsets()) | ||
| .build(); | ||
| } | ||
| } | ||
|
|
||
| public DataFile toDataFile() { | ||
| Preconditions.checkState(dataFile != null, "Cannot create data file from unclosed writer"); | ||
| return dataFile; | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| /* | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.iceberg.io; | ||
|
|
||
| import java.io.IOException; | ||
| import org.apache.iceberg.FileFormat; | ||
| import org.apache.iceberg.PartitionSpec; | ||
|
|
||
| public class UnpartitionedDeltaWriter<T> extends BaseTaskWriter<T> { | ||
|
|
||
| private final RollingFileWriter currentWriter; | ||
| private final RollingEqDeleteWriter currentEqDeletes; | ||
|
|
||
| public UnpartitionedDeltaWriter(PartitionSpec spec, FileFormat format, FileAppenderFactory<T> appenderFactory, | ||
| OutputFileFactory fileFactory, FileIO io, long targetFileSize) { | ||
| super(spec, format, appenderFactory, fileFactory, io, targetFileSize); | ||
| currentWriter = new RollingFileWriter(null); | ||
| currentEqDeletes = new RollingEqDeleteWriter(null); | ||
| } | ||
|
|
||
| @Override | ||
| public void write(T record) throws IOException { | ||
| currentWriter.add(record); | ||
| } | ||
|
|
||
| public void delete(T record) throws IOException { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will it be better to add this
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe not
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably. I just wanted to demonstrate that we can add a delete here that works with the rolling writer. What we actually expose will probably be different. |
||
| currentEqDeletes.delete(record); | ||
| } | ||
|
|
||
| @Override | ||
| public void close() throws IOException { | ||
| currentWriter.close(); | ||
| currentEqDeletes.close(); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we would still need to add an extra
DeltaWriterbetween theTaskWriterandRollingFileWriters, because for a fanoutTaskWriter, it will have rows from different partitions or buckets, and the writer for different partitions(or buckets) would accept data record, equality deletions , which is namedDeltaWriter.In this way, we could move all the equality delete logics inside a single common
DeltaWriterclass, and theTaskWriterwill focus on how to dispatch the records with the customized policy to the methods inDeltaWriter, for example, Flink's RowData has INSERT/DELETE/UPDATE_BEFORE/UPDATE_AFTER, if the row is a DELETE, then could use the fanout policy to direct it toDeltaWriter'sdeletemethod.