-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Core: Enable row lineage for all v3 tables #12593
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a66942b
aff717b
c1954bb
000941a
6e047e8
16f6b28
2ea119c
973a85c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,12 +53,11 @@ public class TableMetadata implements Serializable { | |
| static final long INVALID_SEQUENCE_NUMBER = -1; | ||
| static final int DEFAULT_TABLE_FORMAT_VERSION = 2; | ||
| static final int SUPPORTED_TABLE_FORMAT_VERSION = 3; | ||
| static final int MIN_FORMAT_VERSION_ROW_LINEAGE = 3; | ||
| static final int INITIAL_SPEC_ID = 0; | ||
| static final int INITIAL_SORT_ORDER_ID = 1; | ||
| static final int INITIAL_SCHEMA_ID = 0; | ||
| static final int INITIAL_ROW_ID = 0; | ||
| static final boolean DEFAULT_ROW_LINEAGE = false; | ||
| static final int MIN_FORMAT_VERSION_ROW_LINEAGE = 3; | ||
rdblue marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| private static final long ONE_MINUTE = TimeUnit.MINUTES.toMillis(1); | ||
|
|
||
|
|
@@ -133,11 +132,6 @@ static TableMetadata newTableMetadata( | |
| int freshSortOrderId = sortOrder.isUnsorted() ? sortOrder.orderId() : INITIAL_SORT_ORDER_ID; | ||
| SortOrder freshSortOrder = freshSortOrder(freshSortOrderId, freshSchema, sortOrder); | ||
|
|
||
| // configure row lineage using table properties | ||
| Boolean rowLineage = | ||
| PropertyUtil.propertyAsBoolean( | ||
| properties, TableProperties.ROW_LINEAGE, DEFAULT_ROW_LINEAGE); | ||
|
|
||
| // Validate the metrics configuration. Note: we only do this on new tables to we don't | ||
| // break existing tables. | ||
| MetricsConfig.fromProperties(properties).validateReferencedColumns(schema); | ||
|
|
@@ -151,7 +145,6 @@ static TableMetadata newTableMetadata( | |
| .setDefaultSortOrder(freshSortOrder) | ||
| .setLocation(location) | ||
| .setProperties(properties) | ||
| .setRowLineage(rowLineage) | ||
| .build(); | ||
| } | ||
|
|
||
|
|
@@ -266,13 +259,12 @@ public String toString() { | |
| private final List<StatisticsFile> statisticsFiles; | ||
| private final List<PartitionStatisticsFile> partitionStatisticsFiles; | ||
| private final List<MetadataUpdate> changes; | ||
| private final long nextRowId; | ||
| private SerializableSupplier<List<Snapshot>> snapshotsSupplier; | ||
| private volatile List<Snapshot> snapshots; | ||
| private volatile Map<Long, Snapshot> snapshotsById; | ||
| private volatile Map<String, SnapshotRef> refs; | ||
| private volatile boolean snapshotsLoaded; | ||
| private final Boolean rowLineageEnabled; | ||
| private final long nextRowId; | ||
|
|
||
| @SuppressWarnings("checkstyle:CyclomaticComplexity") | ||
| TableMetadata( | ||
|
|
@@ -299,9 +291,8 @@ public String toString() { | |
| Map<String, SnapshotRef> refs, | ||
| List<StatisticsFile> statisticsFiles, | ||
| List<PartitionStatisticsFile> partitionStatisticsFiles, | ||
| List<MetadataUpdate> changes, | ||
| boolean rowLineageEnabled, | ||
| long nextRowId) { | ||
| long nextRowId, | ||
| List<MetadataUpdate> changes) { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not that this PR also restores the convention that changes are passed last to the |
||
| Preconditions.checkArgument( | ||
| specs != null && !specs.isEmpty(), "Partition specs cannot be null or empty"); | ||
| Preconditions.checkArgument( | ||
|
|
@@ -320,10 +311,6 @@ public String toString() { | |
| Preconditions.checkArgument( | ||
| metadataFileLocation == null || changes.isEmpty(), | ||
| "Cannot create TableMetadata with a metadata location and changes"); | ||
| Preconditions.checkArgument( | ||
| formatVersion >= MIN_FORMAT_VERSION_ROW_LINEAGE || !rowLineageEnabled, | ||
| "Cannot enable row lineage when Table Version is less than V3. Table Version is %s", | ||
| formatVersion); | ||
|
|
||
| this.metadataFileLocation = metadataFileLocation; | ||
| this.formatVersion = formatVersion; | ||
|
|
@@ -359,7 +346,6 @@ public String toString() { | |
| this.partitionStatisticsFiles = ImmutableList.copyOf(partitionStatisticsFiles); | ||
|
|
||
| // row lineage | ||
| this.rowLineageEnabled = rowLineageEnabled; | ||
| this.nextRowId = nextRowId; | ||
|
|
||
| HistoryEntry last = null; | ||
|
|
@@ -584,8 +570,14 @@ public TableMetadata withUUID() { | |
| return new Builder(this).assignUUID().build(); | ||
| } | ||
|
|
||
| /** | ||
| * Whether row lineage is enabled. | ||
| * | ||
| * @deprecated will be removed in 1.10.0; row lineage is required for all v3+ tables. | ||
| */ | ||
| @Deprecated | ||
| public boolean rowLineageEnabled() { | ||
| return rowLineageEnabled; | ||
| return formatVersion >= MIN_FORMAT_VERSION_ROW_LINEAGE; | ||
| } | ||
|
|
||
| public long nextRowId() { | ||
|
|
@@ -634,15 +626,10 @@ public TableMetadata replaceProperties(Map<String, String> rawProperties) { | |
| int newFormatVersion = | ||
| PropertyUtil.propertyAsInt(rawProperties, TableProperties.FORMAT_VERSION, formatVersion); | ||
|
|
||
| Boolean newRowLineage = | ||
| PropertyUtil.propertyAsBoolean( | ||
| rawProperties, TableProperties.ROW_LINEAGE, rowLineageEnabled); | ||
|
|
||
| return new Builder(this) | ||
| .setProperties(updated) | ||
| .removeProperties(removed) | ||
| .upgradeFormatVersion(newFormatVersion) | ||
| .setRowLineage(newRowLineage) | ||
| .build(); | ||
| } | ||
|
|
||
|
|
@@ -927,7 +914,6 @@ public static class Builder { | |
| private final Map<Long, List<StatisticsFile>> statisticsFiles; | ||
| private final Map<Long, List<PartitionStatisticsFile>> partitionStatisticsFiles; | ||
| private boolean suppressHistoricalSnapshots = false; | ||
| private boolean rowLineage; | ||
| private long nextRowId; | ||
|
|
||
| // change tracking | ||
|
|
@@ -975,7 +961,6 @@ private Builder(int formatVersion) { | |
| this.schemasById = Maps.newHashMap(); | ||
| this.specsById = Maps.newHashMap(); | ||
| this.sortOrdersById = Maps.newHashMap(); | ||
| this.rowLineage = DEFAULT_ROW_LINEAGE; | ||
| this.nextRowId = INITIAL_ROW_ID; | ||
| } | ||
|
|
||
|
|
@@ -1011,10 +996,25 @@ private Builder(TableMetadata base) { | |
| this.specsById = Maps.newHashMap(base.specsById); | ||
| this.sortOrdersById = Maps.newHashMap(base.sortOrdersById); | ||
|
|
||
| this.rowLineage = base.rowLineageEnabled; | ||
| this.nextRowId = base.nextRowId; | ||
| } | ||
|
|
||
| /** | ||
| * Enables row lineage in v3 tables. | ||
| * | ||
| * @deprecated will be removed in 1.10.0; row lineage is required for all v3+ tables. | ||
| */ | ||
| @Deprecated | ||
| public Builder enableRowLineage() { | ||
| if (formatVersion < MIN_FORMAT_VERSION_ROW_LINEAGE) { | ||
| throw new UnsupportedOperationException( | ||
| "Cannot enable row lineage for format-version=" + formatVersion); | ||
| } | ||
|
|
||
| // otherwise this is a no-op | ||
| return null; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would have expected that this method would return |
||
| } | ||
|
|
||
| public Builder withMetadataLocation(String newMetadataLocation) { | ||
| this.metadataLocation = newMetadataLocation; | ||
| if (null != base) { | ||
|
|
@@ -1269,18 +1269,14 @@ public Builder addSnapshot(Snapshot snapshot) { | |
| snapshotsById.put(snapshot.snapshotId(), snapshot); | ||
| changes.add(new MetadataUpdate.AddSnapshot(snapshot)); | ||
|
|
||
| if (rowLineage) { | ||
| if (formatVersion >= MIN_FORMAT_VERSION_ROW_LINEAGE) { | ||
| ValidationException.check( | ||
| snapshot.firstRowId() != null, "Cannot add a snapshot: first-row-id is null"); | ||
| ValidationException.check( | ||
| snapshot.firstRowId() >= nextRowId, | ||
| "Cannot add a snapshot whose 'first-row-id' (%s) is less than the metadata 'next-row-id' (%s) because this will end up generating duplicate row_ids.", | ||
| snapshot.firstRowId() != null && snapshot.firstRowId() >= nextRowId, | ||
| "Cannot add a snapshot, first-row-id is behind table next-row-id: %s < %s", | ||
| snapshot.firstRowId(), | ||
| nextRowId); | ||
| ValidationException.check( | ||
| snapshot.addedRows() != null, | ||
| "Cannot add a snapshot with a null 'added-rows' field when row lineage is enabled"); | ||
| Preconditions.checkArgument( | ||
| snapshot.addedRows() >= 0, | ||
| "Cannot decrease 'last-row-id'. 'last-row-id' must increase monotonically. Snapshot reports %s added rows"); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @RussellSpitzer, I moved these validations in to Also, I thought about keeping the checks here rather than moving them to |
||
|
|
||
| this.nextRowId += snapshot.addedRows(); | ||
| } | ||
|
|
@@ -1508,34 +1504,6 @@ public Builder setPreviousFileLocation(String previousFileLocation) { | |
| return this; | ||
| } | ||
|
|
||
| private Builder setRowLineage(Boolean newRowLineage) { | ||
| if (newRowLineage == null) { | ||
| return this; | ||
| } | ||
|
|
||
| boolean disablingRowLineage = rowLineage && !newRowLineage; | ||
|
|
||
| Preconditions.checkArgument( | ||
| !disablingRowLineage, "Cannot disable row lineage once it has been enabled"); | ||
|
|
||
| if (!rowLineage && newRowLineage) { | ||
| return enableRowLineage(); | ||
| } else { | ||
| return this; | ||
| } | ||
| } | ||
|
|
||
| public Builder enableRowLineage() { | ||
| Preconditions.checkArgument( | ||
| formatVersion >= MIN_FORMAT_VERSION_ROW_LINEAGE, | ||
| "Cannot use row lineage with format version %s. Only format version %s or higher support row lineage", | ||
| formatVersion, | ||
| MIN_FORMAT_VERSION_ROW_LINEAGE); | ||
| this.rowLineage = true; | ||
| changes.add(new MetadataUpdate.EnableRowLineage()); | ||
| return this; | ||
| } | ||
|
|
||
| private boolean hasChanges() { | ||
| return changes.size() != startingChangeCount | ||
| || (discardChanges && !changes.isEmpty()) | ||
|
|
@@ -1603,9 +1571,8 @@ public TableMetadata build() { | |
| partitionStatisticsFiles.values().stream() | ||
| .flatMap(List::stream) | ||
| .collect(Collectors.toList()), | ||
| discardChanges ? ImmutableList.of() : ImmutableList.copyOf(changes), | ||
| rowLineage, | ||
| nextRowId); | ||
| nextRowId, | ||
| discardChanges ? ImmutableList.of() : ImmutableList.copyOf(changes)); | ||
| } | ||
|
|
||
| private int addSchemaInternal(Schema schema, int newLastColumnId) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To be slightly more permissive, I'm allowing
addedRowsto be passed, but it is only propagated iffirstRowIdis non-null.