-
Notifications
You must be signed in to change notification settings - Fork 3k
fix: add and remove partition transform on same column failed when use v1 metadata #2691
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
563fca3
a9035f2
4877bdc
adf19d7
c9a429b
00a48a3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,7 @@ | |
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Set; | ||
| import java.util.UUID; | ||
| import org.apache.iceberg.exceptions.ValidationException; | ||
| import org.apache.iceberg.expressions.BoundReference; | ||
| import org.apache.iceberg.expressions.BoundTerm; | ||
|
|
@@ -50,7 +51,7 @@ class BaseUpdatePartitionSpec implements UpdatePartitionSpec { | |
| private final Map<String, PartitionField> nameToField; | ||
| private final Map<Pair<Integer, String>, PartitionField> transformToField; | ||
|
|
||
| private final List<PartitionField> adds = Lists.newArrayList(); | ||
| private final List<Pair<PartitionField, String>> adds = Lists.newArrayList(); | ||
| private final Map<Integer, PartitionField> addedTimeFields = Maps.newHashMap(); | ||
| private final Map<Pair<Integer, String>, PartitionField> transformToAddedField = Maps.newHashMap(); | ||
| private final Map<String, PartitionField> nameToAddedField = Maps.newHashMap(); | ||
|
|
@@ -145,11 +146,29 @@ public BaseUpdatePartitionSpec addField(String name, Term term) { | |
| checkForRedundantAddedPartitions(newField); | ||
|
|
||
| transformToAddedField.put(validationKey, newField); | ||
| if (name != null) { | ||
| nameToAddedField.put(name, newField); | ||
|
|
||
| String partitionName; | ||
| if (newField.name() != null) { | ||
| partitionName = newField.name(); | ||
| } else { | ||
| partitionName = PartitionSpecVisitor.visit(schema, newField, PartitionNameGenerator.INSTANCE); | ||
| } | ||
|
|
||
| adds.add(newField); | ||
| PartitionField existingField = nameToField.get(partitionName); | ||
| if (existingField != null) { | ||
| if (isVoidTransform(existingField)) { | ||
| // rename the old deleted field that is being replaced by the new field | ||
| renameField(existingField.name(), existingField.name() + "_" + UUID.randomUUID()); | ||
| } else { | ||
| throw new IllegalArgumentException(String.format("Cannot add duplicate partition field name: %s", name)); | ||
| } | ||
| } | ||
|
|
||
| if (partitionName != null) { | ||
| nameToAddedField.put(partitionName, newField); | ||
| } | ||
|
|
||
| adds.add(Pair.of(newField, partitionName)); | ||
|
||
|
|
||
| return this; | ||
| } | ||
|
|
@@ -192,6 +211,12 @@ public BaseUpdatePartitionSpec removeField(Term term) { | |
|
|
||
| @Override | ||
| public BaseUpdatePartitionSpec renameField(String name, String newName) { | ||
| PartitionField existingField = nameToField.get(newName); | ||
| if (existingField != null && isVoidTransform(existingField)) { | ||
| // rename the old deleted field that is being replaced by the new field | ||
| renameField(existingField.name(), existingField.name() + "_" + UUID.randomUUID()); | ||
|
||
| } | ||
|
|
||
| PartitionField added = nameToAddedField.get(name); | ||
| Preconditions.checkArgument(added == null, | ||
| "Cannot rename newly added partition field: %s", name); | ||
|
|
@@ -227,14 +252,9 @@ public PartitionSpec apply() { | |
| } | ||
| } | ||
|
|
||
| for (PartitionField newField : adds) { | ||
| String partitionName; | ||
| if (newField.name() != null) { | ||
| partitionName = newField.name(); | ||
| } else { | ||
| partitionName = PartitionSpecVisitor.visit(schema, newField, PartitionNameGenerator.INSTANCE); | ||
| } | ||
|
|
||
| for (Pair<PartitionField, String> newFieldAndNamePair : adds) { | ||
| PartitionField newField = newFieldAndNamePair.first(); | ||
| String partitionName = newFieldAndNamePair.second(); | ||
| builder.add(newField.sourceId(), newField.fieldId(), partitionName, newField.transform()); | ||
| } | ||
|
|
||
|
|
@@ -287,13 +307,13 @@ private static Map<String, PartitionField> indexSpecByName(PartitionSpec spec) { | |
| } | ||
|
|
||
| private static Map<Pair<Integer, String>, PartitionField> indexSpecByTransform(PartitionSpec spec) { | ||
| ImmutableMap.Builder<Pair<Integer, String>, PartitionField> builder = ImmutableMap.builder(); | ||
| Map<Pair<Integer, String>, PartitionField> indexSpecs = Maps.newHashMap(); | ||
| List<PartitionField> fields = spec.fields(); | ||
| for (PartitionField field : fields) { | ||
| builder.put(Pair.of(field.sourceId(), field.transform().toString()), field); | ||
| indexSpecs.put(Pair.of(field.sourceId(), field.transform().toString()), field); | ||
| } | ||
|
|
||
| return builder.build(); | ||
| return indexSpecs; | ||
| } | ||
|
|
||
| private boolean isTimeTransform(PartitionField field) { | ||
|
|
@@ -352,6 +372,62 @@ public Boolean unknown(int fieldId, String sourceName, int sourceId, String tran | |
| } | ||
| } | ||
|
|
||
| private boolean isVoidTransform(PartitionField field) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 for this approach to detect void transforms. |
||
| return PartitionSpecVisitor.visit(schema, field, IsVoidTransform.INSTANCE); | ||
| } | ||
|
|
||
| private static class IsVoidTransform implements PartitionSpecVisitor<Boolean> { | ||
| private static final IsVoidTransform INSTANCE = new IsVoidTransform(); | ||
|
|
||
| private IsVoidTransform() { | ||
| } | ||
|
|
||
| @Override | ||
| public Boolean identity(int fieldId, String sourceName, int sourceId) { | ||
| return false; | ||
| } | ||
|
|
||
| @Override | ||
| public Boolean bucket(int fieldId, String sourceName, int sourceId, int numBuckets) { | ||
| return false; | ||
| } | ||
|
|
||
| @Override | ||
| public Boolean truncate(int fieldId, String sourceName, int sourceId, int width) { | ||
| return false; | ||
| } | ||
|
|
||
| @Override | ||
| public Boolean year(int fieldId, String sourceName, int sourceId) { | ||
| return false; | ||
| } | ||
|
|
||
| @Override | ||
| public Boolean month(int fieldId, String sourceName, int sourceId) { | ||
| return false; | ||
| } | ||
|
|
||
| @Override | ||
| public Boolean day(int fieldId, String sourceName, int sourceId) { | ||
| return false; | ||
| } | ||
|
|
||
| @Override | ||
| public Boolean hour(int fieldId, String sourceName, int sourceId) { | ||
| return false; | ||
| } | ||
|
|
||
| @Override | ||
| public Boolean alwaysNull(int fieldId, String sourceName, int sourceId) { | ||
| return true; | ||
| } | ||
|
|
||
| @Override | ||
| public Boolean unknown(int fieldId, String sourceName, int sourceId, String transform) { | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| private static class PartitionNameGenerator implements PartitionSpecVisitor<String> { | ||
| private static final PartitionNameGenerator INSTANCE = new PartitionNameGenerator(); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here as well, I'd prefer not to use a UUID. This should be able to use the existing field's ID instead.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use the existing field's ID instead.