-
Notifications
You must be signed in to change notification settings - Fork 3k
Flink: Add ChangeLog DataStream end-to-end unit tests. #1974
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,7 @@ | |
|
|
||
| import java.io.IOException; | ||
| import java.io.UncheckedIOException; | ||
| import java.util.List; | ||
| import java.util.Locale; | ||
| import java.util.Map; | ||
| import org.apache.flink.api.common.functions.MapFunction; | ||
|
|
@@ -44,6 +45,7 @@ | |
| import org.apache.iceberg.flink.TableLoader; | ||
| import org.apache.iceberg.io.WriteResult; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Lists; | ||
| import org.apache.iceberg.types.TypeUtil; | ||
| import org.apache.iceberg.util.PropertyUtil; | ||
|
|
||
|
|
@@ -113,6 +115,7 @@ public static class Builder { | |
| private TableSchema tableSchema; | ||
| private boolean overwrite = false; | ||
| private Integer writeParallelism = null; | ||
| private List<String> equalityFieldColumns = null; | ||
|
|
||
| private Builder() { | ||
| } | ||
|
|
@@ -169,6 +172,17 @@ public Builder writeParallelism(int newWriteParallelism) { | |
| return this; | ||
| } | ||
|
|
||
| /** | ||
| * Configuring the equality field columns for iceberg table that accept CDC or UPSERT events. | ||
| * | ||
| * @param columns defines the iceberg table's key. | ||
| * @return {@link Builder} to connect the iceberg table. | ||
| */ | ||
| public Builder equalityFieldColumns(List<String> columns) { | ||
| this.equalityFieldColumns = columns; | ||
| return this; | ||
| } | ||
|
|
||
| @SuppressWarnings("unchecked") | ||
| public DataStreamSink<RowData> build() { | ||
| Preconditions.checkArgument(rowDataInput != null, | ||
|
|
@@ -184,7 +198,18 @@ public DataStreamSink<RowData> build() { | |
| } | ||
| } | ||
|
|
||
| IcebergStreamWriter<RowData> streamWriter = createStreamWriter(table, tableSchema); | ||
| // Find out the equality field id list based on the user-provided equality field column names. | ||
| List<Integer> equalityFieldIds = Lists.newArrayList(); | ||
| if (equalityFieldColumns != null && equalityFieldColumns.size() > 0) { | ||
| for (String column : equalityFieldColumns) { | ||
| org.apache.iceberg.types.Types.NestedField field = table.schema().findField(column); | ||
| Preconditions.checkNotNull(field, "Missing required equality field column '%s' in table schema %s", | ||
| column, table.schema()); | ||
| equalityFieldIds.add(field.fieldId()); | ||
| } | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not do this conversion in
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because the |
||
|
|
||
| IcebergStreamWriter<RowData> streamWriter = createStreamWriter(table, tableSchema, equalityFieldIds); | ||
| IcebergFilesCommitter filesCommitter = new IcebergFilesCommitter(tableLoader, overwrite); | ||
|
|
||
| this.writeParallelism = writeParallelism == null ? rowDataInput.getParallelism() : writeParallelism; | ||
|
|
@@ -202,7 +227,8 @@ public DataStreamSink<RowData> build() { | |
| } | ||
| } | ||
|
|
||
| static IcebergStreamWriter<RowData> createStreamWriter(Table table, TableSchema requestedSchema) { | ||
| static IcebergStreamWriter<RowData> createStreamWriter(Table table, TableSchema requestedSchema, | ||
| List<Integer> equalityFieldIds) { | ||
| Preconditions.checkArgument(table != null, "Iceberg table should't be null"); | ||
|
|
||
| RowType flinkSchema; | ||
|
|
@@ -226,7 +252,7 @@ static IcebergStreamWriter<RowData> createStreamWriter(Table table, TableSchema | |
|
|
||
| TaskWriterFactory<RowData> taskWriterFactory = new RowDataTaskWriterFactory(table.schema(), flinkSchema, | ||
| table.spec(), table.locationProvider(), table.io(), table.encryption(), targetFileSize, fileFormat, props, | ||
| null); | ||
| equalityFieldIds); | ||
|
|
||
| return new IcebergStreamWriter<>(table.name(), taskWriterFactory); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package org.apache.iceberg.flink; | ||
|
|
||
| import java.io.File; | ||
| import org.apache.iceberg.Table; | ||
| import org.apache.iceberg.TestTables; | ||
|
|
||
| public class TestTableLoader implements TableLoader { | ||
| private File dir; | ||
|
|
||
| public TestTableLoader(String dir) { | ||
| this.dir = new File(dir); | ||
| } | ||
|
|
||
| @Override | ||
| public void open() { | ||
|
|
||
| } | ||
|
|
||
| @Override | ||
| public Table loadTable() { | ||
| return TestTables.load(dir, "test"); | ||
| } | ||
|
|
||
| @Override | ||
| public void close() { | ||
|
|
||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you think that we should consider adding primary key columns to the spec?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the next PR https://github.com/openinx/incubator-iceberg/commit/a863c66eb3d72dd975ea64c75ed2ac35984c17fe, The flink table SQL's primary key will act as the equality field columns. The semantic of iceberg equality columns is almost the same as primary key, one difference I can think of is: the uniqueness of key are not enforced. In this discussion, we don't guarantee the uniqueness when writing a key which has been also wrote in the previous committed txn, that means if :
Then the table will have two records with the same key.
If people really need iceberg to maintain the key's uniqueness, then they will need to transform all the
INSERTtoUPSERT, which meansDELETEfirstly and thenINSERTthe new values.It will introduce another issues: Each
INSERTwill be regarded as anUPSERT, so it write aDELETEand aINSERT. Finally the size of delete files will be almost same as the size of data files. The process of merging on read will be quite inefficient because there are too many uselessDELETEto JOIN.The direct way is using bloom filter to reduce the useless
DELETE, say we will generate bloom filter binary for each committed data file. When bootstrap the flink/spark job we will need to prefetch all the bloom filter binary from parquet/avro data files's metadata. Before writing a equality delete, we will check the bloom filter, and if the bloom filter indicate that all the committed data files are not containing the given key, then we could skip to append that equality-delete. That would reduce lots of uselessDELETEin delete files. Of course, the bloom filter will have 'false positive' issue, but that probability is less than 1%, that means we may appendsmall amout of deletes whose keys don't exist in the current table. In my view, that should be OK.
In summary, I think it's reasonable to regard those equality fields as primary key in iceberg table, people could choose to use
UNIQUENESS ENFORCEDorUNIQUENESS NOT-ENFORCED, in this way they could trade off between strong semantic and performance.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For the bloom filter idea, @wangmiao1981 has been working on a proposal for secondary indexes. I think that could be used for the check you're suggesting here.
Are you saying that if uniqueness is enforced, each insert becomes an upsert. But if uniqueness is not enforced, then the sink would assume that whatever is emitting records will correctly delete before inserting? That sounds reasonable to me.
I think that even if uniqueness is not enforced, tables will quickly require compaction to rewrite the equality deletes. I think we should spend some time making sure that we have good ways to maintain tables and compact equality deletes into position deletes, and position deletes into data files.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. If someone are exporting relational database's change log events to apache iceberg table and they could guarantee the exactly-once semantics (For example, the flink-cdc-connector could guarantee that), then the uniqueness is always correct when we just write the INSERT/DELETE/UPDATE_BEFORE/UPDATE_AFTER to iceberg. While in some other cases, for example flink aggregate job to refresh the metrics count value, we will write the same key several times without deleting first, then we should regard all the INSERT as UPSERT.
That was planned in the second phase, include:
insertedRowMapis exceeding the task's memory threshold, etc. I will evaluate the read & write & compaction paths in a large dataset, making this to be a stable solution for production.It's good to have a document to collect all those things for reviewing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I’d vote for not ensuring uniqueness as it is really hard at scale. If we are to ensure this at write, we have to join the incoming data with the target table making it really expensive. Doing this at read would require sorting the data not only by the sort key but also by the sequence number.