From 74057c1e53793a919655ffcf7dbf67d35a2e9bf1 Mon Sep 17 00:00:00 2001 From: Robert Stupp Date: Thu, 15 Apr 2021 11:05:29 +0200 Subject: [PATCH 1/5] Bump Nessie to 0.8.2 + replace Gradle plugin with new JUnit extension More changes in this PR in following commits. Replace Gradle plugin with new JUnit extension. See [Add JAX-RS tests and add JUnit/Jupyter extension](https://github.com/projectnessie/nessie/pull/1566) --- build.gradle | 15 ++++++------ .../iceberg/nessie/BaseTestIceberg.java | 24 +++++++++++-------- .../apache/iceberg/nessie/NessieUtilTest.java | 2 +- .../iceberg/nessie/TestBranchVisibility.java | 10 ++++---- .../apache/iceberg/nessie/TestNamespace.java | 2 +- .../iceberg/nessie/TestNessieTable.java | 14 +++++------ .../iceberg/nessie/TestTableReference.java | 2 +- versions.props | 6 ++--- 8 files changed, 38 insertions(+), 37 deletions(-) diff --git a/build.gradle b/build.gradle index 31f776b6f157..4ad62f076a83 100644 --- a/build.gradle +++ b/build.gradle @@ -35,7 +35,6 @@ buildscript { plugins { id 'nebula.dependency-recommender' version '9.0.2' - id 'org.projectnessie' version '0.5.1' } try { @@ -127,6 +126,7 @@ subprojects { compile 'com.github.stephenc.findbugs:findbugs-annotations' testCompile 'org.junit.vintage:junit-vintage-engine' + testCompile 'org.junit.jupiter:junit-jupiter-engine' testCompile 'org.junit.jupiter:junit-jupiter' testCompile 'org.slf4j:slf4j-simple' testCompile 'org.mockito:mockito-core' @@ -1266,19 +1266,18 @@ project(':iceberg-pig') { } project(':iceberg-nessie') { - apply plugin: 'org.projectnessie' + test { + useJUnitPlatform() + } dependencies { compile project(':iceberg-core') compile project(path: ':iceberg-bundled-guava', configuration: 'shadow') compile "org.projectnessie:nessie-client" - // dependency version "recommendation" via nebula.dependency-recommender don't work here "immediately", - // so pull the Quarkus + Nessie versions from the root-project - def quarkusVersion = rootProject.dependencyRecommendations.getRecommendedVersion("io.quarkus", "quarkus-bom") - def nessieVersion = rootProject.dependencyRecommendations.getRecommendedVersion("org.projectnessie", "nessie-quarkus") - nessieQuarkusRuntime(enforcedPlatform("io.quarkus:quarkus-bom:${quarkusVersion}")) - nessieQuarkusServer "org.projectnessie:nessie-quarkus:${nessieVersion}" + testImplementation "org.projectnessie:nessie-jaxrs-testextension" + // Need to "pull in" el-api explicitly :( + testImplementation "jakarta.el:jakarta.el-api" compileOnly "org.apache.hadoop:hadoop-common" diff --git a/nessie/src/test/java/org/apache/iceberg/nessie/BaseTestIceberg.java b/nessie/src/test/java/org/apache/iceberg/nessie/BaseTestIceberg.java index a73a768fe62f..c98532f62a0c 100644 --- a/nessie/src/test/java/org/apache/iceberg/nessie/BaseTestIceberg.java +++ b/nessie/src/test/java/org/apache/iceberg/nessie/BaseTestIceberg.java @@ -20,6 +20,7 @@ package org.apache.iceberg.nessie; import java.io.IOException; +import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -33,15 +34,16 @@ import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.LongType; import org.apache.iceberg.types.Types.StructType; -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.junit.jupiter.api.io.TempDir; import org.projectnessie.api.ContentsApi; import org.projectnessie.api.TreeApi; import org.projectnessie.client.NessieClient; import org.projectnessie.error.NessieConflictException; import org.projectnessie.error.NessieNotFoundException; +import org.projectnessie.jaxrs.NessieJaxRsExtension; import org.projectnessie.model.Branch; import org.projectnessie.model.Reference; import org.slf4j.Logger; @@ -50,11 +52,13 @@ import static org.apache.iceberg.types.Types.NestedField.required; public abstract class BaseTestIceberg { + @RegisterExtension + static NessieJaxRsExtension server = new NessieJaxRsExtension(); private static final Logger LOGGER = LoggerFactory.getLogger(BaseTestIceberg.class); - @Rule - public TemporaryFolder temp = new TemporaryFolder(); + @TempDir + public Path temp; protected NessieCatalog catalog; protected NessieClient client; @@ -79,10 +83,10 @@ private void resetData() throws NessieConflictException, NessieNotFoundException tree.createReference(Branch.of("main", null)); } - @Before + @BeforeEach public void beforeEach() throws IOException { String port = System.getProperty("quarkus.http.test-port", "19120"); - uri = String.format("http://localhost:%s/api/v1", port); + uri = server.getURI().toString(); this.client = NessieClient.builder().withUri(uri).build(); tree = client.getTreeApi(); contents = client.getContentsApi(); @@ -105,7 +109,7 @@ NessieCatalog initCatalog(String ref) { newCatalog.initialize("nessie", ImmutableMap.of("ref", ref, CatalogProperties.URI, uri, "auth_type", "NONE", - CatalogProperties.WAREHOUSE_LOCATION, temp.getRoot().toURI().toString() + CatalogProperties.WAREHOUSE_LOCATION, temp.toUri().toString() )); return newCatalog; } @@ -136,7 +140,7 @@ void createBranch(String name, String hash) throws NessieNotFoundException, Ness tree.createReference(Branch.of(name, hash)); } - @After + @AfterEach public void afterEach() throws Exception { catalog.close(); client.close(); diff --git a/nessie/src/test/java/org/apache/iceberg/nessie/NessieUtilTest.java b/nessie/src/test/java/org/apache/iceberg/nessie/NessieUtilTest.java index 6c021241d2e1..e8072091a86b 100644 --- a/nessie/src/test/java/org/apache/iceberg/nessie/NessieUtilTest.java +++ b/nessie/src/test/java/org/apache/iceberg/nessie/NessieUtilTest.java @@ -22,7 +22,7 @@ import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.assertj.core.api.Assertions; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.projectnessie.model.CommitMeta; public class NessieUtilTest { diff --git a/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java b/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java index 43bbc31e264b..224b2f72dba8 100644 --- a/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java +++ b/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java @@ -22,9 +22,9 @@ import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.types.Types; import org.assertj.core.api.Assertions; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.projectnessie.error.NessieConflictException; import org.projectnessie.error.NessieNotFoundException; @@ -39,7 +39,7 @@ public TestBranchVisibility() { super("main"); } - @Before + @BeforeEach public void before() throws NessieNotFoundException, NessieConflictException { createTable(tableIdentifier1, 1); // table 1 createTable(tableIdentifier2, 1); // table 2 @@ -48,7 +48,7 @@ public void before() throws NessieNotFoundException, NessieConflictException { testCatalog = initCatalog("test"); } - @After + @AfterEach public void after() throws NessieNotFoundException, NessieConflictException { catalog.dropTable(tableIdentifier1); catalog.dropTable(tableIdentifier2); diff --git a/nessie/src/test/java/org/apache/iceberg/nessie/TestNamespace.java b/nessie/src/test/java/org/apache/iceberg/nessie/TestNamespace.java index 0b0637ed4965..03994bb26540 100644 --- a/nessie/src/test/java/org/apache/iceberg/nessie/TestNamespace.java +++ b/nessie/src/test/java/org/apache/iceberg/nessie/TestNamespace.java @@ -23,7 +23,7 @@ import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.assertj.core.api.Assertions; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestNamespace extends BaseTestIceberg { private static final String BRANCH = "test-namespace"; diff --git a/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java b/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java index 1596361ca5ad..2f30e3d06464 100644 --- a/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java +++ b/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java @@ -44,9 +44,9 @@ import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.types.Types; import org.assertj.core.api.Assertions; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.projectnessie.error.NessieConflictException; import org.projectnessie.error.NessieNotFoundException; import org.projectnessie.model.Branch; @@ -78,13 +78,13 @@ public TestNessieTable() { super(BRANCH); } - @Before + @BeforeEach public void beforeEach() throws IOException { super.beforeEach(); this.tableLocation = new Path(catalog.createTable(TABLE_IDENTIFIER, schema).location()); } - @After + @AfterEach public void afterEach() throws Exception { // drop the table data if (tableLocation != null) { @@ -112,7 +112,7 @@ public void testCreate() throws NessieNotFoundException, IOException { icebergTable.updateSchema().addColumn("mother", Types.LongType.get()).commit(); IcebergTable table = getTable(KEY); // check parameters are in expected state - String expected = (temp.getRoot().toURI() + DB_NAME + "/" + tableName).replace("//", "/"); + String expected = (temp.toUri() + DB_NAME + "/" + tableName).replace("///", "/"); Assertions.assertThat(getTableLocation(tableName)).isEqualTo(expected); // Only 1 snapshotFile Should exist and no manifests should exist @@ -291,7 +291,7 @@ public void testListTables() { } private String getTableBasePath(String tableName) { - String databasePath = temp.getRoot().toString() + "/" + DB_NAME; + String databasePath = temp.toString() + "/" + DB_NAME; return Paths.get(databasePath, tableName).toAbsolutePath().toString(); } diff --git a/nessie/src/test/java/org/apache/iceberg/nessie/TestTableReference.java b/nessie/src/test/java/org/apache/iceberg/nessie/TestTableReference.java index 96785fd64437..bd693810b30c 100644 --- a/nessie/src/test/java/org/apache/iceberg/nessie/TestTableReference.java +++ b/nessie/src/test/java/org/apache/iceberg/nessie/TestTableReference.java @@ -20,7 +20,7 @@ package org.apache.iceberg.nessie; import org.assertj.core.api.Assertions; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestTableReference { diff --git a/versions.props b/versions.props index e889b0f5d5c8..06e666a216ee 100644 --- a/versions.props +++ b/versions.props @@ -18,13 +18,11 @@ org.apache.arrow:arrow-memory-netty = 2.0.0 com.github.stephenc.findbugs:findbugs-annotations = 1.3.9-1 software.amazon.awssdk:* = 2.15.7 org.scala-lang:scala-library = 2.12.10 -org.projectnessie:* = 0.5.1 -javax.ws.rs:javax.ws.rs-api = 2.1.1 -io.quarkus:* = 1.13.1.Final +org.projectnessie:* = 0.8.2 # test deps org.junit.vintage:junit-vintage-engine = 5.7.2 -org.junit.jupiter:junit-jupiter = 5.7.2 +org.junit.jupiter:* = 5.7.2 org.mockito:mockito-core = 3.7.7 org.apache.hive:hive-exec = 2.3.8 org.apache.hive:hive-service = 2.3.8 From a71a7f9dff74086b4acdf423d4917aea252a4212 Mon Sep 17 00:00:00 2001 From: Robert Stupp Date: Thu, 15 Jul 2021 17:54:06 +0200 Subject: [PATCH 2/5] Changes required by Nessie-API changes Apply changes to Iceberg required by API changes in Nessie: * [Re-introduce wrapper classes for query params of CommitLog/Entries](https://github.com/projectnessie/nessie/pull/1595) * [Server-side commit range filtering](https://github.com/projectnessie/nessie/pull/1596) * [Add hashOnRef query param to support time travel on a named ref](https://github.com/projectnessie/nessie/pull/1589) * [Only accept NamedRefs in REST API](https://github.com/projectnessie/nessie/pull/1583) --- .../apache/iceberg/nessie/NessieCatalog.java | 7 ++++--- .../iceberg/nessie/NessieTableOperations.java | 2 +- .../iceberg/nessie/TestBranchVisibility.java | 17 ++++++++++------- .../apache/iceberg/nessie/TestNessieTable.java | 15 ++++++++++----- 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java b/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java index 88c442185429..afab89a875ba 100644 --- a/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java +++ b/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java @@ -19,7 +19,6 @@ package org.apache.iceberg.nessie; -import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; @@ -46,6 +45,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.util.Tasks; import org.projectnessie.api.TreeApi; +import org.projectnessie.api.params.EntriesParams; import org.projectnessie.client.NessieClient; import org.projectnessie.client.NessieConfigConstants; import org.projectnessie.error.BaseNessieClientServerException; @@ -324,7 +324,8 @@ String currentRefName() { private IcebergTable table(TableIdentifier tableIdentifier) { try { - Contents table = client.getContentsApi().getContents(NessieUtil.toKey(tableIdentifier), reference.getHash()); + Contents table = client.getContentsApi() + .getContents(NessieUtil.toKey(tableIdentifier), reference.getName(), reference.getHash()); return table.unwrap(IcebergTable.class).orElse(null); } catch (NessieNotFoundException e) { return null; @@ -353,7 +354,7 @@ private UpdateableReference loadReference(String requestedRef) { private Stream tableStream(Namespace namespace) { try { return client.getTreeApi() - .getEntries(reference.getHash(), null, null, Collections.emptyList()) + .getEntries(reference.getName(), EntriesParams.builder().hashOnRef(reference.getHash()).build()) .getEntries() .stream() .filter(NessieUtil.namespacePredicate(namespace)) diff --git a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java index c44862bb720a..3cd4f9206776 100644 --- a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java +++ b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java @@ -80,7 +80,7 @@ protected void doRefresh() { } String metadataLocation = null; try { - Contents contents = client.getContentsApi().getContents(key, reference.getHash()); + Contents contents = client.getContentsApi().getContents(key, reference.getName(), reference.getHash()); this.table = contents.unwrap(IcebergTable.class) .orElseThrow(() -> new IllegalStateException("Cannot refresh iceberg table: " + diff --git a/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java b/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java index 224b2f72dba8..26d7d8807b23 100644 --- a/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java +++ b/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java @@ -79,31 +79,34 @@ public void testUpdateCatalogs() { } @Test - public void testCatalogOnReference() throws NessieNotFoundException { + public void testCatalogOnReference() { updateSchema(catalog, tableIdentifier1); updateSchema(testCatalog, tableIdentifier2); - String mainHash = tree.getReferenceByName("main").getHash(); // catalog created with ref points to same catalog as above NessieCatalog refCatalog = initCatalog("test"); testCatalogEquality(refCatalog, testCatalog, true, true); // catalog created with hash points to same catalog as above - NessieCatalog refHashCatalog = initCatalog(mainHash); + NessieCatalog refHashCatalog = initCatalog("main"); testCatalogEquality(refHashCatalog, catalog, true, true); } @Test - public void testCatalogWithTableNames() throws NessieNotFoundException { + public void testCatalogWithTableNames() { updateSchema(testCatalog, tableIdentifier2); - String mainHash = tree.getReferenceByName("main").getHash(); + + String mainName = "main"; // asking for table@branch gives expected regardless of catalog Assertions.assertThat(metadataLocation(catalog, TableIdentifier.of("test-ns", "table1@test"))) .isEqualTo(metadataLocation(testCatalog, tableIdentifier1)); - // asking for table@branch#hash gives expected regardless of catalog - Assertions.assertThat(metadataLocation(catalog, TableIdentifier.of("test-ns", "table1@" + mainHash))) + // Asking for table@branch gives expected regardless of catalog. + // Earlier versions used "table1@" + tree.getReferenceByName("main").getHash() before, but since + // Nessie 0.8.2 the branch name became mandatory and specifying a hash within a branch is not + // possible. + Assertions.assertThat(metadataLocation(catalog, TableIdentifier.of("test-ns", "table1@" + mainName))) .isEqualTo(metadataLocation(testCatalog, tableIdentifier1)); } diff --git a/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java b/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java index 2f30e3d06464..451656800c8b 100644 --- a/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java +++ b/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java @@ -47,12 +47,15 @@ import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.projectnessie.api.params.CommitLogParams; import org.projectnessie.error.NessieConflictException; import org.projectnessie.error.NessieNotFoundException; import org.projectnessie.model.Branch; import org.projectnessie.model.CommitMeta; import org.projectnessie.model.ContentsKey; import org.projectnessie.model.IcebergTable; +import org.projectnessie.model.ImmutableOperations; +import org.projectnessie.model.ImmutablePut; import static org.apache.iceberg.TableMetadataParser.getFileExtension; import static org.apache.iceberg.types.Types.NestedField.optional; @@ -98,7 +101,7 @@ public void afterEach() throws Exception { private org.projectnessie.model.IcebergTable getTable(ContentsKey key) throws NessieNotFoundException { return client.getContentsApi() - .getContents(key, BRANCH) + .getContents(key, BRANCH, null) .unwrap(IcebergTable.class).get(); } @@ -149,7 +152,7 @@ public void testRename() throws NessieNotFoundException { private void verifyCommitMetadata() throws NessieNotFoundException { // check that the author is properly set - List log = tree.getCommitLog(BRANCH, null, null).getOperations(); + List log = tree.getCommitLog(BRANCH, CommitLogParams.empty()).getOperations(); Assertions.assertThat(log).isNotNull().isNotEmpty(); log.forEach(x -> { Assertions.assertThat(x.getAuthor()).isNotNull().isNotEmpty(); @@ -265,10 +268,12 @@ public void testFailure() throws NessieNotFoundException, NessieConflictExceptio Table icebergTable = catalog.loadTable(TABLE_IDENTIFIER); Branch branch = (Branch) client.getTreeApi().getReferenceByName(BRANCH); - IcebergTable table = client.getContentsApi().getContents(KEY, BRANCH).unwrap(IcebergTable.class).get(); + IcebergTable table = client.getContentsApi().getContents(KEY, BRANCH, null).unwrap(IcebergTable.class).get(); - client.getContentsApi().setContents(KEY, branch.getName(), branch.getHash(), "", - IcebergTable.of("dummytable.metadata.json")); + client.getTreeApi().commitMultipleOperations(branch.getName(), branch.getHash(), + ImmutableOperations.builder().addOperations( + ImmutablePut.builder().key(KEY).contents(IcebergTable.of("dummytable.metadata.json")) + .build()).commitMeta(CommitMeta.fromMessage("")).build()); Assertions.assertThatThrownBy(() -> icebergTable.updateSchema().addColumn("data", Types.LongType.get()).commit()) .isInstanceOf(CommitFailedException.class) From 79ec2d5fd13963fa04268a1821f85c990d0a53e9 Mon Sep 17 00:00:00 2001 From: Robert Stupp Date: Thu, 15 Jul 2021 17:29:08 +0200 Subject: [PATCH 3/5] Bugfix: must send the Contents.id of the existing table Nessie's `Contents.id` is a random ID generated when the `Contents.Key` is first used (think: CREATE TABLE) and must not be changed. This change addresses a bug in the Iceberg-Nesie code that caused a new id for every change. --- .../org/apache/iceberg/nessie/NessieTableOperations.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java index 3cd4f9206776..3f24f4ed4f03 100644 --- a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java +++ b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java @@ -102,8 +102,13 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { boolean delete = true; try { - IcebergTable newTable = ImmutableIcebergTable.builder().metadataLocation(newMetadataLocation).build(); - Operations op = ImmutableOperations.builder().addOperations(Operation.Put.of(key, newTable)) + ImmutableIcebergTable.Builder newTable = ImmutableIcebergTable.builder(); + if (table != null) { + newTable.from(table); + } + newTable.metadataLocation(newMetadataLocation); + + Operations op = ImmutableOperations.builder().addOperations(Operation.Put.of(key, newTable.build())) .commitMeta(NessieUtil.buildCommitMetadata("iceberg commit", catalogOptions)).build(); client.getTreeApi().commitMultipleOperations(reference.getAsBranch().getName(), reference.getHash(), op); From 9e461410e05c670bb55f7d652388ef537e157a06 Mon Sep 17 00:00:00 2001 From: Robert Stupp Date: Thu, 15 Jul 2021 17:37:24 +0200 Subject: [PATCH 4/5] Throw `CommitStateUnknownException` for `renameTable` as well Follow-up of #2515 --- .../java/org/apache/iceberg/nessie/NessieCatalog.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java b/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java index afab89a875ba..4e414e7aad13 100644 --- a/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java +++ b/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java @@ -36,6 +36,7 @@ import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.exceptions.CommitStateUnknownException; import org.apache.iceberg.exceptions.NamespaceNotEmptyException; import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.exceptions.NoSuchTableException; @@ -48,6 +49,7 @@ import org.projectnessie.api.params.EntriesParams; import org.projectnessie.client.NessieClient; import org.projectnessie.client.NessieConfigConstants; +import org.projectnessie.client.http.HttpClientException; import org.projectnessie.error.BaseNessieClientServerException; import org.projectnessie.error.NessieConflictException; import org.projectnessie.error.NessieNotFoundException; @@ -236,6 +238,12 @@ public void renameTable(TableIdentifier from, TableIdentifier toOriginal) { throw new RuntimeException("Failed to drop table as ref is no longer valid.", e); } catch (BaseNessieClientServerException e) { throw new CommitFailedException(e, "Failed to rename table: the current reference is not up to date."); + } catch (HttpClientException ex) { + // Intentionally catch all nessie-client-exceptions here and not just the "timeout" variant + // to catch all kinds of network errors (e.g. connection reset). Network code implementation + // details and all kinds of network devices can induce unexpected behavior. So better be + // safe than sorry. + throw new CommitStateUnknownException(ex); } // Intentionally just "throw through" Nessie's HttpClientException here and do not "special case" // just the "timeout" variant to propagate all kinds of network errors (e.g. connection reset). From 5f0ab991126f59a652048c2c8707a0f679781fbe Mon Sep 17 00:00:00 2001 From: Robert Stupp Date: Thu, 15 Jul 2021 17:40:36 +0200 Subject: [PATCH 5/5] Fix race-condition & save one roundtrip to Nessie during "commit" When commiting a change, the Nessie-API now returns the hash of the commit for the change. This returned hash should then be used as the "expected hash" for the next commit. The previous approach was to commit the change to Nessie and then do another request to retrieve the new hash of HEAD. This old approach is prone to a race condition, namely when another commit happens after "this" commit but before retrieving the "new HEAD", so "this" instance would wrongly ignore the other commit's changes during conflict checks. See [Let VersionStore.create()+commit() return the current hash](https://github.com/projectnessie/nessie/pull/1089) --- .../java/org/apache/iceberg/nessie/NessieCatalog.java | 11 +++++++---- .../apache/iceberg/nessie/NessieTableOperations.java | 5 ++++- .../apache/iceberg/nessie/UpdateableReference.java | 6 ++++++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java b/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java index 4e414e7aad13..a9032c3a8090 100644 --- a/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java +++ b/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java @@ -53,6 +53,7 @@ import org.projectnessie.error.BaseNessieClientServerException; import org.projectnessie.error.NessieConflictException; import org.projectnessie.error.NessieNotFoundException; +import org.projectnessie.model.Branch; import org.projectnessie.model.Contents; import org.projectnessie.model.IcebergTable; import org.projectnessie.model.ImmutableDelete; @@ -184,8 +185,9 @@ public boolean dropTable(TableIdentifier identifier, boolean purge) { .throwFailureWhenFinished() .onFailure((c, exception) -> refresh()) .run(c -> { - client.getTreeApi().commitMultipleOperations(reference.getAsBranch().getName(), reference.getHash(), c); - refresh(); // note: updated to reference.updateReference() with Nessie 0.6 + Branch branch = client.getTreeApi().commitMultipleOperations(reference.getAsBranch().getName(), + reference.getHash(), c); + reference.updateReference(branch); }, BaseNessieClientServerException.class); threw = false; } catch (NessieConflictException e) { @@ -227,8 +229,9 @@ public void renameTable(TableIdentifier from, TableIdentifier toOriginal) { .throwFailureWhenFinished() .onFailure((c, exception) -> refresh()) .run(c -> { - client.getTreeApi().commitMultipleOperations(reference.getAsBranch().getName(), reference.getHash(), c); - refresh(); + Branch branch = client.getTreeApi().commitMultipleOperations(reference.getAsBranch().getName(), + reference.getHash(), c); + reference.updateReference(branch); }, BaseNessieClientServerException.class); } catch (NessieNotFoundException e) { // important note: the NotFoundException refers to the ref only. If a table was not found it would imply that the diff --git a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java index 3f24f4ed4f03..3f3fb6129156 100644 --- a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java +++ b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java @@ -30,6 +30,7 @@ import org.projectnessie.client.http.HttpClientException; import org.projectnessie.error.NessieConflictException; import org.projectnessie.error.NessieNotFoundException; +import org.projectnessie.model.Branch; import org.projectnessie.model.Contents; import org.projectnessie.model.ContentsKey; import org.projectnessie.model.IcebergTable; @@ -110,7 +111,9 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { Operations op = ImmutableOperations.builder().addOperations(Operation.Put.of(key, newTable.build())) .commitMeta(NessieUtil.buildCommitMetadata("iceberg commit", catalogOptions)).build(); - client.getTreeApi().commitMultipleOperations(reference.getAsBranch().getName(), reference.getHash(), op); + Branch branch = client.getTreeApi().commitMultipleOperations(reference.getAsBranch().getName(), + reference.getHash(), op); + reference.updateReference(branch); delete = false; } catch (NessieConflictException ex) { diff --git a/nessie/src/main/java/org/apache/iceberg/nessie/UpdateableReference.java b/nessie/src/main/java/org/apache/iceberg/nessie/UpdateableReference.java index 61f97bfea7d9..eae05a53e1f7 100644 --- a/nessie/src/main/java/org/apache/iceberg/nessie/UpdateableReference.java +++ b/nessie/src/main/java/org/apache/iceberg/nessie/UpdateableReference.java @@ -19,6 +19,7 @@ package org.apache.iceberg.nessie; +import java.util.Objects; import org.projectnessie.api.TreeApi; import org.projectnessie.error.NessieNotFoundException; import org.projectnessie.model.Branch; @@ -44,6 +45,11 @@ public boolean refresh() throws NessieNotFoundException { return !oldReference.equals(reference); } + public void updateReference(Reference ref) { + Objects.requireNonNull(ref); + this.reference = ref; + } + public boolean isBranch() { return reference instanceof Branch; }