-
Notifications
You must be signed in to change notification settings - Fork 3k
Core: Use avro compression properties from table properties when writing manifests and manifest lists #6799
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
26c697d
d3486ea
e4af18d
0d5c6f6
be3460b
980e9e0
38e6eae
01d95f0
ad08003
615e94a
be048b5
430b7a2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -157,11 +157,30 @@ public static ManifestWriter<DataFile> write(PartitionSpec spec, OutputFile outp | |
| */ | ||
| public static ManifestWriter<DataFile> write( | ||
| int formatVersion, PartitionSpec spec, OutputFile outputFile, Long snapshotId) { | ||
| return write(formatVersion, spec, outputFile, snapshotId, ManifestWriter.options()); | ||
| } | ||
|
|
||
| /** | ||
| * Create a new {@link ManifestWriter} for the given format version. | ||
| * | ||
| * @param formatVersion a target format version | ||
| * @param spec a {@link PartitionSpec} | ||
| * @param outputFile an {@link OutputFile} where the manifest will be written | ||
| * @param snapshotId a snapshot ID for the manifest entries, or null for an inherited ID | ||
| * @param options additional options for the manifest writer | ||
| * @return a manifest writer | ||
| */ | ||
| public static ManifestWriter<DataFile> write( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought I already commented before going on vacation but can't seem to find the old discussion. Sorry if I post the same question again. Have we considered using a builder? My worry with the current approach was that we need to offer an overloaded method every time we add a new parameter.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @aokolnychyi thanks for reviewing. I'm interested to hear what @rdblue thinks. In the meantime, let me think about how to address your concern. However, using a builder will mean an API break, right?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @sumeetgajjar originally used a |
||
| int formatVersion, | ||
| PartitionSpec spec, | ||
| OutputFile outputFile, | ||
| Long snapshotId, | ||
| ManifestWriter.Options options) { | ||
| switch (formatVersion) { | ||
| case 1: | ||
| return new ManifestWriter.V1Writer(spec, outputFile, snapshotId); | ||
| return new ManifestWriter.V1Writer(spec, outputFile, snapshotId, options); | ||
| case 2: | ||
| return new ManifestWriter.V2Writer(spec, outputFile, snapshotId); | ||
| return new ManifestWriter.V2Writer(spec, outputFile, snapshotId, options); | ||
| } | ||
| throw new UnsupportedOperationException( | ||
| "Cannot write manifest for table version: " + formatVersion); | ||
|
|
@@ -198,11 +217,31 @@ public static ManifestReader<DeleteFile> readDeleteManifest( | |
| */ | ||
| public static ManifestWriter<DeleteFile> writeDeleteManifest( | ||
| int formatVersion, PartitionSpec spec, OutputFile outputFile, Long snapshotId) { | ||
| return writeDeleteManifest( | ||
| formatVersion, spec, outputFile, snapshotId, ManifestWriter.options()); | ||
| } | ||
|
|
||
| /** | ||
| * Create a new {@link ManifestWriter} for the given format version. | ||
| * | ||
| * @param formatVersion a target format version | ||
| * @param spec a {@link PartitionSpec} | ||
| * @param outputFile an {@link OutputFile} where the manifest will be written | ||
| * @param snapshotId a snapshot ID for the manifest entries, or null for an inherited ID | ||
| * @param options additional options for the manifest writer | ||
| * @return a manifest writer | ||
| */ | ||
| public static ManifestWriter<DeleteFile> writeDeleteManifest( | ||
| int formatVersion, | ||
| PartitionSpec spec, | ||
| OutputFile outputFile, | ||
| Long snapshotId, | ||
| ManifestWriter.Options options) { | ||
| switch (formatVersion) { | ||
| case 1: | ||
| throw new IllegalArgumentException("Cannot write delete files in a v1 table"); | ||
| case 2: | ||
| return new ManifestWriter.V2DeleteWriter(spec, outputFile, snapshotId); | ||
| return new ManifestWriter.V2DeleteWriter(spec, outputFile, snapshotId, options); | ||
| } | ||
| throw new UnsupportedOperationException( | ||
| "Cannot write manifest for table version: " + formatVersion); | ||
|
|
@@ -256,7 +295,9 @@ static ManifestFile copyAppendManifest( | |
| Map<Integer, PartitionSpec> specsById, | ||
| OutputFile outputFile, | ||
| long snapshotId, | ||
| SnapshotSummary.Builder summaryBuilder) { | ||
| SnapshotSummary.Builder summaryBuilder, | ||
| String compressionCodec, | ||
| Integer compressionLevel) { | ||
| // use metadata that will add the current snapshot's ID for the rewrite | ||
| InheritableMetadata inheritableMetadata = InheritableMetadataFactory.forCopy(snapshotId); | ||
| try (ManifestReader<DataFile> reader = | ||
|
|
@@ -267,7 +308,9 @@ static ManifestFile copyAppendManifest( | |
| outputFile, | ||
| snapshotId, | ||
| summaryBuilder, | ||
| ManifestEntry.Status.ADDED); | ||
| ManifestEntry.Status.ADDED, | ||
| compressionCodec, | ||
| compressionLevel); | ||
| } catch (IOException e) { | ||
| throw new RuntimeIOException(e, "Failed to close manifest: %s", toCopy.location()); | ||
| } | ||
|
|
@@ -280,7 +323,9 @@ static ManifestFile copyRewriteManifest( | |
| Map<Integer, PartitionSpec> specsById, | ||
| OutputFile outputFile, | ||
| long snapshotId, | ||
| SnapshotSummary.Builder summaryBuilder) { | ||
| SnapshotSummary.Builder summaryBuilder, | ||
| String compressionCodec, | ||
| Integer compressionLevel) { | ||
| // for a rewritten manifest all snapshot ids should be set. use empty metadata to throw an | ||
| // exception if it is not | ||
| InheritableMetadata inheritableMetadata = InheritableMetadataFactory.empty(); | ||
|
|
@@ -292,7 +337,9 @@ static ManifestFile copyRewriteManifest( | |
| outputFile, | ||
| snapshotId, | ||
| summaryBuilder, | ||
| ManifestEntry.Status.EXISTING); | ||
| ManifestEntry.Status.EXISTING, | ||
| compressionCodec, | ||
| compressionLevel); | ||
| } catch (IOException e) { | ||
| throw new RuntimeIOException(e, "Failed to close manifest: %s", toCopy.location()); | ||
| } | ||
|
|
@@ -305,8 +352,18 @@ private static ManifestFile copyManifestInternal( | |
| OutputFile outputFile, | ||
| long snapshotId, | ||
| SnapshotSummary.Builder summaryBuilder, | ||
| ManifestEntry.Status allowedEntryStatus) { | ||
| ManifestWriter<DataFile> writer = write(formatVersion, reader.spec(), outputFile, snapshotId); | ||
| ManifestEntry.Status allowedEntryStatus, | ||
| String compressionCodec, | ||
| Integer compressionLevel) { | ||
| ManifestWriter<DataFile> writer = | ||
| write( | ||
| formatVersion, | ||
| reader.spec(), | ||
| outputFile, | ||
| snapshotId, | ||
| ManifestWriter.options() | ||
| .compressionCodec(compressionCodec) | ||
| .compressionLevel(compressionLevel)); | ||
| boolean threw = true; | ||
| try { | ||
| for (ManifestEntry<DataFile> entry : reader.entries()) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.