Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
103 commits
Select commit Hold shift + click to select a range
3569172
Docs: Fix variable name in Flink doc (#2668)
wangjunyou Jun 12, 2021
2c29032
Nessie: Use AssertJ assertions (#2684)
nastra Jun 14, 2021
8104769
Core: add key_metadata to ManifestFile schema and classes (#2675)
Jun 14, 2021
a6b0f42
Docs: Add cache-enabled to catalog property list (#2648)
Jun 15, 2021
5a61211
ORC: Remove unused constants in generic readers (#2695)
jerqi Jun 16, 2021
7be269d
AWS: Fix typo in S3OutputFile.createOrOverwrite exception message (#2…
kbendick Jun 16, 2021
e79580b
Update spec for v2 changes (#2654)
rdblue Jun 16, 2021
2366154
Parquet: Update to 1.12.0 (#2441)
Fokko Jun 16, 2021
3c41289
[python] Adding type ignores for dependencies without type support (#…
TGooch44 Jun 17, 2021
b2ebf22
Core: Use .as() with AssertJ (#2706)
nastra Jun 17, 2021
629da77
API: Add more null checks to TableIdentifier (#2703)
nastra Jun 17, 2021
1f64154
API: Fix Namespace null handling (#2704)
nastra Jun 17, 2021
9c859df
Docs: Add Adobe Migration article (#2707)
rominparekh Jun 17, 2021
9ebebdd
Core: Do not allow optional, double, or float identifier fields (#2705)
Jun 17, 2021
4c013a8
[Python] support custom target name in partition spec builder (#2689)
jun-he Jun 17, 2021
d9bf148
Docs: updating README to link to github actions instead of travis-ci.…
TGooch44 Jun 18, 2021
965775a
Core: Use equals instead of reference equality (#2714)
nastra Jun 18, 2021
ec69a25
Tests: Add unit tests for InternalRecordWrapper, RowDataWrapper, Inte…
openinx Jun 18, 2021
7798094
Spark: Fix scanAllFiles in MicroBatch.open (#2667)
southernriver Jun 18, 2021
e750316
Docs: Add commit.status-check.* properties (#2661)
coolderli Jun 18, 2021
a9f4363
Core: Add delete marker metadata column (#2538)
chenjunjiedada Jun 18, 2021
765ec12
[python] Adding Unknown and Void transforms (#2697)
TGooch44 Jun 19, 2021
619603c
fix: add and remove partition transform on same column failed when us…
vinson0526 Jun 21, 2021
ed0c702
[SPARK][BUILD] Allow existing spark2 JMH benchmarks to work with eith…
kbendick Jun 21, 2021
f8d173e
Core: Replace LinkedList usage with ArrayDeque (#2713)
nastra Jun 21, 2021
0c784fa
Core: Add JDBC catalog implementation (#1870)
ismailsimsek Jun 21, 2021
83ebd4e
Core: Throw an error for incremental scans of metadata tables (#2617)
kbendick Jun 22, 2021
63392d9
Core: Use CharSequenceSet instead of Set<CharSequence> (#2712)
nastra Jun 22, 2021
1460743
Spark: Add extensions DDL to set identifier fields (#2560)
Jun 22, 2021
ca575e9
[AWS] Fix MissingFail error prone warning by cleaning up GlueCatalogC…
kbendick Jun 22, 2021
8d5c498
Core: Add predicate push down for partitions metadata table (#2358)
szehon-ho Jun 22, 2021
01393a0
Core: Add HadoopConfigurable interface to serialize custom FileIO (#2…
Jun 23, 2021
8d16bad
Docs: Add catalog and metadata files to metadata structure diagram (#…
Jun 23, 2021
f81d8ad
AWS: add DynamoDb catalog (#2688)
Jun 23, 2021
92a264b
Use bulk decryption interface in ArrowReader (#2720)
nastra Jun 24, 2021
111fe81
[Spark] Support building against both Spark 3.0 and Spark 3.1. (#2512)
wypoon Jun 24, 2021
2e46847
Core: Fix float and double metrics for Parquet and ORC (#2464)
yyanyy Jun 24, 2021
98da974
API: Use equals instead of reference equality (#2716)
nastra Jun 24, 2021
c3ac4c6
Spark: Fix check for SQL extensions with extra white space (#2729)
jfz Jun 25, 2021
9cfcf5c
Spark: Support micro-batch streaming read for DSv2 (#2660)
SreeramGarlapati Jun 25, 2021
2dd5b4e
Docs: Remove file spark.md since it is outdated and causes confusion …
flyrain Jun 25, 2021
bf2cbc3
Flink: Rename FlinkTableOptions to more generic FlinkConfigOptions
stevenzwu Jun 28, 2021
aa65c06
Add support for TimeType / UUIDType (#2739)
nastra Jun 28, 2021
a100d2d
Spark: Fix file-open-cost in DSv2 streams (#2743)
SreeramGarlapati Jun 28, 2021
d4d376b
Core: Add schema-id to snapshots (#2275)
yyanyy Jun 29, 2021
8798f4e
Spec: Fix diagram alignment and size (#2750)
Jun 29, 2021
ee5a04f
Spec: Update for v2 schemas in remaining sections (#2748)
rdblue Jun 29, 2021
56ae374
Flink: Add an optional uidPrefix to FlinkSink#Builder to explicitly s…
stevenzwu Jun 29, 2021
af1e1f6
Docs: Update directions for joining #iceberg on Slack (#2758)
cwsteinbach Jun 29, 2021
735c70f
Core: Validate planTasks and splitFiles args in TableScanUtil (#2759)
RussellSpitzer Jun 29, 2021
b5ca06e
Core: Bin pack strategy cosmetic changes (#2770)
RussellSpitzer Jun 30, 2021
c579f0b
Docs: Describe how to configure Code formatter for IntelliJ IDEA (#2766)
nastra Jul 1, 2021
4b09c3d
Spark: Add limited support for vectorized reads for Parquet V2 (#2749)
samarthjain Jul 1, 2021
1ca781b
Docs: Update for mkdocs 1.2 (#2747)
rdblue Jul 1, 2021
9786fd1
Docs: Fix typo in flink.md (#2772)
a49a Jul 2, 2021
c92092f
Spark: RemoveReachableFiles action should fail if GC is disabled (#2763)
karuppayya Jul 2, 2021
cf0b94f
Docs: Describe available Benchmarks and how to run them (#2767)
nastra Jul 2, 2021
8671a93
Nessie: Properly format code in Nessie module (#2733)
nastra Jul 5, 2021
ea240a8
Style: Delete blank line of CachedClientPool.java (#2787)
southernriver Jul 6, 2021
072a86a
Spec: Update v2 change summary (#2762)
rdblue Jul 6, 2021
3e9684e
Build: bump up DiffPlug Spotless version (#2776)
Jul 6, 2021
6bcca16
Core: Fix JdbcCatalog CATALOG_TABLE_NAME to be lowercase (#2778)
haormj Jul 8, 2021
703aab4
Build: Change Spark Versions to Support M1 Processors (#2795)
RussellSpitzer Jul 9, 2021
5cf4248
Docs: Fixes broken links to old spark doc page (#2801)
RussellSpitzer Jul 9, 2021
bed47a4
Build: Upgrade to JUnit 5 (#2797)
nastra Jul 10, 2021
25eaeba
Spark: Reimplement RewriteDatafilesAction with partial progress (#2591)
RussellSpitzer Jul 11, 2021
6ab914d
Don't use deprecated methods
nastra Jun 25, 2021
42466b9
Refactor VectorizedArrowReader
nastra Jun 25, 2021
f2943d3
Reduce code duplication in VectorizedColumnIterator
nastra Jun 25, 2021
2842f0b
Reduce code duplication in VectorizedDictionaryEncodedParquetValuesRe…
nastra Jun 25, 2021
0667f38
Reduce code duplication in VectorizedPageIterator
nastra Jun 28, 2021
8058ec1
Reduce code duplication in VectorizedParquetDefinitionLevelReader
nastra Jun 28, 2021
87aea34
Upgrade to Tez 0.10.1 (#2790)
marton-bod Jul 12, 2021
0bb89d0
Spark: Parallelize task init when fetching locality info (#2800)
Jul 12, 2021
40e626a
Spark: Add table property to skip delete snapshots in streaming (#2752)
daksha121 Jul 13, 2021
712fe66
API: Use delete instead of remove in action names (#2810)
aokolnychyi Jul 13, 2021
118efb6
Spark: Use JavaSparkContext.fromSparkContext instead of constructor (…
aokolnychyi Jul 13, 2021
f6a9103
Spark: Add missing deprecation annotations for old actions (#2811)
aokolnychyi Jul 13, 2021
3947849
Docs: Fix link to intellij-java-palantir-style.xml (#2817)
nastra Jul 13, 2021
1a903f6
Spark : Add Files Perf improvement by push down partition filter to S…
szehon-ho Jul 13, 2021
b3fb81a
Core: Use Avro 1.10.1 (#1648)
Fokko Jul 13, 2021
3268799
spark: Add in support to read timestamp without timezone from parquet
bkahloon Feb 28, 2021
4b9a190
spark: Remove ORC vectorized test for reading timestamp without timezone
bkahloon Feb 28, 2021
f5036fb
Spark: address PR comments
bkahloon Mar 4, 2021
bac19c6
Spark: fix build failure due to import of all iceberg packages
bkahloon Mar 6, 2021
ab0bf3a
Spark: remove unsed imports and try to fix package import ordering
bkahloon Mar 7, 2021
f8a5293
Spark: fix code formatting issue
bkahloon Mar 8, 2021
ee386b9
Spark: fix code formatting issue
bkahloon Mar 8, 2021
fc6ee0e
Spark: Fix formatting error of long line
bkahloon Mar 9, 2021
e537c0b
Spark: Fix formatting error of long line
bkahloon Mar 9, 2021
abb607e
Add support for writing timestamps without timezone.
sshkvar Jun 11, 2021
4bee290
Added missed check for handling tomestamp without zone for Writer in …
sshkvar Jun 17, 2021
0259262
Added missed check for handling tomestamp without zone for Writer in …
sshkvar Jun 17, 2021
1579abc
Address PR comments.
sshkvar Jun 29, 2021
4f37486
Address PR comments.
sshkvar Jun 30, 2021
bafaffb
Address PR comments.
sshkvar Jul 1, 2021
7acaec0
Address PR comments.
sshkvar Jul 1, 2021
cee72a0
Address few little clean up
sshkvar Jul 12, 2021
459ce89
Address few little clean up
sshkvar Jul 12, 2021
d14b2b2
Address few little clean up
sshkvar Jul 12, 2021
398a2a0
fix for `'lambda arguments' has incorrect indentation level 12, expec…
sshkvar Jul 12, 2021
3f6b0f2
fix for `incorrect indentation level 6, expected level should be 8.`
sshkvar Jul 12, 2021
bc316c4
Added withSQLConf method to AvroDataTest.java as suggested in the PR …
sshkvar Jul 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
5 changes: 5 additions & 0 deletions .baseline/checkstyle/checkstyle.xml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@
<property name="format" value="sparkContext\(\)\.hadoopConfiguration\(\)"/>
<property name="message" value="Are you sure that you want to use sparkContext().hadoopConfiguration()? In most cases, you should use sessionState().newHadoopConf() instead, so that the Hadoop configurations specified in the Spark session configuration will come into effect."/>
</module>
<module name="RegexpSingleline">
<property name="fileExtensions" value="java"/>
<property name="format" value="new JavaSparkContext\(.*\)"/>
<property name="message" value="Prefer using JavaSparkContext.fromSparkContext() instead of calling a constructor directly."/>
</module>
<module name="SuppressionFilter"> <!-- baseline-gradle: README.md -->
<property name="file" value="${config_loc}/checkstyle-suppressions.xml"/>
</module>
Expand Down
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ lib/
# web site build
site/site

# benchmark output
spark2/benchmark/*
!spark2/benchmark/.gitkeep
spark3/benchmark/*
!spark3/benchmark/.gitkeep

__pycache__/
*.py[cod]
.eggs/
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@

![](site/docs/img/Iceberg-logo.png)

[![](https://travis-ci.org/apache/iceberg.svg?branch=master)](https://travis-ci.org/apache/iceberg)
[![](https://github.com/apache/iceberg/actions/workflows/java-ci.yml/badge.svg)](https://github.com/apache/iceberg/actions/workflows/java-ci.yml)
[![](https://github.com/apache/iceberg/actions/workflows/python-ci.yml/badge.svg)](https://github.com/apache/iceberg/actions/workflows/python-ci.yml)
[![Slack](https://img.shields.io/badge/chat-on%20Slack-brightgreen.svg)](https://the-asf.slack.com/archives/CF01LKV9S)

Apache Iceberg is a new table format for storing large, slow-moving tabular data. It is designed to improve on the de-facto standard table layout built into Hive, Trino, and Spark.
Expand Down
13 changes: 11 additions & 2 deletions api/src/main/java/org/apache/iceberg/ManifestFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,16 @@ public interface ManifestFile {
Types.NestedField PARTITION_SUMMARIES = optional(507, "partitions",
Types.ListType.ofRequired(508, PARTITION_SUMMARY_TYPE),
"Summary for each partition");
// next ID to assign: 519
Types.NestedField KEY_METADATA = optional(519, "key_metadata", Types.BinaryType.get(),
"Encryption key metadata blob");
// next ID to assign: 520

Schema SCHEMA = new Schema(
PATH, LENGTH, SPEC_ID, MANIFEST_CONTENT,
SEQUENCE_NUMBER, MIN_SEQUENCE_NUMBER, SNAPSHOT_ID,
ADDED_FILES_COUNT, EXISTING_FILES_COUNT, DELETED_FILES_COUNT,
ADDED_ROWS_COUNT, EXISTING_ROWS_COUNT, DELETED_ROWS_COUNT,
PARTITION_SUMMARIES);
PARTITION_SUMMARIES, KEY_METADATA);

static Schema schema() {
return SCHEMA;
Expand Down Expand Up @@ -179,6 +181,13 @@ default boolean hasDeletedFiles() {
*/
List<PartitionFieldSummary> partitions();

/**
* Returns metadata about how this manifest file is encrypted, or null if the file is stored in plain text.
*/
default ByteBuffer keyMetadata() {
return null;
}

/**
* Copies this {@link ManifestFile manifest file}. Readers can reuse manifest file instances; use
* this method to make defensive copies.
Expand Down
4 changes: 2 additions & 2 deletions api/src/main/java/org/apache/iceberg/Schema.java
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,11 @@ public List<NestedField> columns() {
* It consists of a unique set of primitive fields in the schema.
* An identifier field must be at root, or nested in a chain of structs (no maps or lists).
* A row should be unique in a table based on the values of the identifier fields.
* Optional, float and double columns cannot be used as identifier fields.
* However, Iceberg identifier differs from primary key in the following ways:
* <ul>
* <li>Iceberg does not enforce the uniqueness of a row based on this identifier information.
* It is used for operations like upsert to define the default upsert key.</li>
* <li>NULL can be used as value of an identifier field. Iceberg ensures null-safe equality check.</li>
* <li>A nested field in a struct can be used as an identifier. For example, if there is a "last_name" field
* inside a "user" struct in a schema, field "user.last_name" can be set as a part of the identifier field.</li>
* </ul>
Expand All @@ -215,7 +215,7 @@ public Set<Integer> identifierFieldIds() {
public Set<String> identifierFieldNames() {
return identifierFieldIds()
.stream()
.map(id -> findField(id).name())
.map(id -> lazyIdToName().get(id))
.collect(Collectors.toSet());
}

Expand Down
9 changes: 9 additions & 0 deletions api/src/main/java/org/apache/iceberg/Snapshot.java
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,13 @@ public interface Snapshot extends Serializable {
* @return the location of the manifest list for this Snapshot
*/
String manifestListLocation();

/**
* Return the id of the schema used when this snapshot was created, or null if this information is not available.
*
* @return schema id associated with this snapshot
*/
default Integer schemaId() {
return null;
}
}
2 changes: 1 addition & 1 deletion api/src/main/java/org/apache/iceberg/SortField.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public NullOrder nullOrder() {
* @return true if this order satisfies the given order
*/
public boolean satisfies(SortField other) {
if (this == other) {
if (Objects.equals(this, other)) {
return true;
} else if (sourceId != other.sourceId || direction != other.direction || nullOrder != other.nullOrder) {
return false;
Expand Down
7 changes: 7 additions & 0 deletions api/src/main/java/org/apache/iceberg/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ default String name() {
*/
Schema schema();

/**
* Return a map of {@link Schema schema} for this table.
*
* @return this table's schema map
*/
Map<Integer, Schema> schemas();

/**
* Return the {@link PartitionSpec partition spec} for this table.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ default MigrateTable migrateTable(String tableIdent) {
}

/**
* Instantiates an action to remove orphan files.
* Instantiates an action to delete orphan files.
*/
default RemoveOrphanFiles removeOrphanFiles(Table table) {
throw new UnsupportedOperationException(this.getClass().getName() + " does not implement removeOrphanFiles");
default DeleteOrphanFiles deleteOrphanFiles(Table table) {
throw new UnsupportedOperationException(this.getClass().getName() + " does not implement deleteOrphanFiles");
}

/**
Expand All @@ -69,9 +69,9 @@ default ExpireSnapshots expireSnapshots(Table table) {
}

/**
* Instantiates an action to remove all the files reachable from given metadata location.
* Instantiates an action to delete all the files reachable from given metadata location.
*/
default RemoveReachableFiles removeReachableFiles(String metadataLocation) {
throw new UnsupportedOperationException(this.getClass().getName() + " does not implement removeReachableFiles");
default DeleteReachableFiles deleteReachableFiles(String metadataLocation) {
throw new UnsupportedOperationException(this.getClass().getName() + " does not implement deleteReachableFiles");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@
import java.util.function.Consumer;

/**
* An action that removes orphan files in a table.
* An action that deletes orphan files in a table.
* <p>
* A metadata or data file is considered orphan if it is not reachable by any valid snapshot.
* The set of actual files is built by listing the underlying storage which makes this operation
* expensive.
*/
public interface RemoveOrphanFiles extends Action<RemoveOrphanFiles, RemoveOrphanFiles.Result> {
public interface DeleteOrphanFiles extends Action<DeleteOrphanFiles, DeleteOrphanFiles.Result> {
/**
* Passes a location which should be scanned for orphan files.
* <p>
Expand All @@ -38,7 +38,7 @@ public interface RemoveOrphanFiles extends Action<RemoveOrphanFiles, RemoveOrpha
* @param location the location where to look for orphan files
* @return this for method chaining
*/
RemoveOrphanFiles location(String location);
DeleteOrphanFiles location(String location);

/**
* Removes orphan files only if they are older than the given timestamp.
Expand All @@ -52,7 +52,7 @@ public interface RemoveOrphanFiles extends Action<RemoveOrphanFiles, RemoveOrpha
* @param olderThanTimestamp a long timestamp, as returned by {@link System#currentTimeMillis()}
* @return this for method chaining
*/
RemoveOrphanFiles olderThan(long olderThanTimestamp);
DeleteOrphanFiles olderThan(long olderThanTimestamp);

/**
* Passes an alternative delete implementation that will be used for orphan files.
Expand All @@ -65,7 +65,7 @@ public interface RemoveOrphanFiles extends Action<RemoveOrphanFiles, RemoveOrpha
* @param deleteFunc a function that will be called to delete files
* @return this for method chaining
*/
RemoveOrphanFiles deleteWith(Consumer<String> deleteFunc);
DeleteOrphanFiles deleteWith(Consumer<String> deleteFunc);

/**
* The action result that contains a summary of the execution.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,24 @@
import org.apache.iceberg.io.FileIO;

/**
* An action that removes all files referenced by a table metadata file.
* An action that deletes all files referenced by a table metadata file.
* <p>
* This action will irreversibly delete all reachable files such as data files, manifests,
* manifest lists and should be used to clean up the underlying storage once a table is dropped
* and no longer needed.
* <p>
* Implementations may use a query engine to distribute parts of work.
*/
public interface RemoveReachableFiles extends Action<RemoveReachableFiles, RemoveReachableFiles.Result> {
public interface DeleteReachableFiles extends Action<DeleteReachableFiles, DeleteReachableFiles.Result> {

/**
* Passes an alternative delete implementation that will be used for files.
*
* @param removeFunc a function that will be called to delete files.
* @param deleteFunc a function that will be called to delete files.
* The function accepts path to file as an argument.
* @return this for method chaining
*/
RemoveReachableFiles deleteWith(Consumer<String> removeFunc);
DeleteReachableFiles deleteWith(Consumer<String> deleteFunc);

/**
* Passes an alternative executor service that will be used for files removal.
Expand All @@ -51,39 +51,39 @@ public interface RemoveReachableFiles extends Action<RemoveReachableFiles, Remov
* @param executorService the service to use
* @return this for method chaining
*/
RemoveReachableFiles executeDeleteWith(ExecutorService executorService);
DeleteReachableFiles executeDeleteWith(ExecutorService executorService);

/**
* Set the {@link FileIO} to be used for files removal
*
* @param io FileIO to use for files removal
* @return this for method chaining
*/
RemoveReachableFiles io(FileIO io);
DeleteReachableFiles io(FileIO io);

/**
* The action result that contains a summary of the execution.
*/
interface Result {

/**
* Returns the number of data files removed.
* Returns the number of deleted data files.
*/
long removedDataFilesCount();
long deletedDataFilesCount();

/**
* Returns the number of manifests removed.
* Returns the number of deleted manifests.
*/
long removedManifestsCount();
long deletedManifestsCount();

/**
* Returns the number of manifest lists removed.
* Returns the number of deleted manifest lists.
*/
long removedManifestListsCount();
long deletedManifestListsCount();

/**
* Returns the number of metadata json, version hint files removed.
* Returns the number of deleted metadata json, version hint files.
*/
long otherRemovedFilesCount();
long deletedOtherFilesCount();
}
}
30 changes: 17 additions & 13 deletions api/src/main/java/org/apache/iceberg/actions/RewriteDataFiles.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

package org.apache.iceberg.actions;

import java.util.Map;
import java.util.List;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.expressions.Expression;

Expand Down Expand Up @@ -49,10 +49,10 @@ public interface RewriteDataFiles extends SnapshotUpdate<RewriteDataFiles, Rewri
/**
* The entire rewrite operation is broken down into pieces based on partitioning and within partitions based
* on size into groups. These sub-units of the rewrite are referred to as file groups. The largest amount of data that
* should be compacted in a single group is controlled by MAX_FILE_GROUP_SIZE_BYTES. This helps with breaking down the
* rewriting of very large partitions which may not be rewritable otherwise due to the resource constraints of the
* cluster. For example a sort based rewrite may not scale to terabyte sized partitions, those partitions need to be
* worked on in small subsections to avoid exhaustion of resources.
* should be compacted in a single group is controlled by {@link #MAX_FILE_GROUP_SIZE_BYTES}. This helps with
* breaking down the rewriting of very large partitions which may not be rewritable otherwise due to the resource
* constraints of the cluster. For example a sort based rewrite may not scale to terabyte sized partitions, those
* partitions need to be worked on in small subsections to avoid exhaustion of resources.
* <p>
* When grouping files, the underlying rewrite strategy will use this value as to limit the files which
* will be included in a single file group. A group will be processed by a single framework "action". For example,
Expand All @@ -68,20 +68,14 @@ public interface RewriteDataFiles extends SnapshotUpdate<RewriteDataFiles, Rewri
* independently and asynchronously.
**/
String MAX_CONCURRENT_FILE_GROUP_REWRITES = "max-concurrent-file-group-rewrites";
int MAX_CONCURRENT_FILE_GROUP_ACTIONS_DEFAULT = 1;
int MAX_CONCURRENT_FILE_GROUP_REWRITES_DEFAULT = 1;

/**
* The output file size that this rewrite strategy will attempt to generate when rewriting files. By default this
* will use the "write.target-file-size-bytes value" in the table properties of the table being updated.
*/
String TARGET_FILE_SIZE_BYTES = "target-file-size-bytes";

/**
* The partition spec to use when writing the output data from this operation. By default uses the
* current table partition spec.
*/
String OUTPUT_PARTITION_SPEC_ID = "output-partition-spec-id";

/**
* Choose BINPACK as a strategy for this rewrite operation
* @return this for method chaining
Expand All @@ -106,14 +100,24 @@ default RewriteDataFiles binPack() {
* will report a total failure for the job.
*/
interface Result {
Map<FileGroupInfo, FileGroupRewriteResult> resultMap();
List<FileGroupRewriteResult> rewriteResults();

default int addedDataFilesCount() {
return rewriteResults().stream().mapToInt(FileGroupRewriteResult::addedDataFilesCount).sum();
}

default int rewrittenDataFilesCount() {
return rewriteResults().stream().mapToInt(FileGroupRewriteResult::rewrittenDataFilesCount).sum();
}
}

/**
* For a particular file group, the number of files which are newly created and the number of files
* which were formerly part of the table but have been rewritten.
*/
interface FileGroupRewriteResult {
FileGroupInfo info();

int addedDataFilesCount();

int rewrittenDataFilesCount();
Expand Down
2 changes: 2 additions & 0 deletions api/src/main/java/org/apache/iceberg/catalog/Namespace.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.util.Arrays;
import org.apache.iceberg.relocated.com.google.common.base.Joiner;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;

/**
* A namespace in a {@link Catalog}.
Expand All @@ -34,6 +35,7 @@ public static Namespace empty() {
}

public static Namespace of(String... levels) {
Preconditions.checkArgument(null != levels, "Cannot create Namespace from null array");
if (levels.length == 0) {
return empty();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ public class TableIdentifier {
private final String name;

public static TableIdentifier of(String... names) {
Preconditions.checkArgument(names != null, "Cannot create table identifier from null array");
Preconditions.checkArgument(names.length > 0, "Cannot create table identifier without a table name");
return new TableIdentifier(Namespace.of(Arrays.copyOf(names, names.length - 1)), names[names.length - 1]);
}
Expand All @@ -45,12 +46,14 @@ public static TableIdentifier of(Namespace namespace, String name) {
}

public static TableIdentifier parse(String identifier) {
Preconditions.checkArgument(identifier != null, "Cannot parse table identifier: null");
Iterable<String> parts = DOT.split(identifier);
return TableIdentifier.of(Iterables.toArray(parts, String.class));
}

private TableIdentifier(Namespace namespace, String name) {
Preconditions.checkArgument(name != null && !name.isEmpty(), "Invalid table name %s", name);
Preconditions.checkArgument(name != null && !name.isEmpty(), "Invalid table name: null or empty");
Preconditions.checkArgument(namespace != null, "Invalid Namespace: null");
this.namespace = namespace;
this.name = name;
}
Expand Down
Loading