Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
e8284b0
Initial commit
ajantha-bhat Dec 9, 2022
7c5a16e
clean up
ajantha-bhat Jan 10, 2023
f6e9bdf
IT test framefork
ajantha-bhat Jan 17, 2023
ef04c99
take2
ajantha-bhat Feb 3, 2023
2052fbb
Nessie integration tests
ajantha-bhat Feb 6, 2023
9ae2dac
Self review
ajantha-bhat Feb 7, 2023
6c7200e
intTest gradle task
ajantha-bhat Feb 7, 2023
383f4f0
Enhance test validation
ajantha-bhat Feb 8, 2023
702f3af
Address new review comments
ajantha-bhat Feb 10, 2023
ecf02a0
Address comments
ajantha-bhat Feb 21, 2023
27f487b
module refactoring
ajantha-bhat Feb 22, 2023
71b587f
no more mocks and fix few self review findings
ajantha-bhat Feb 23, 2023
eebf26e
Address review comments from 27th feb
ajantha-bhat Feb 27, 2023
bcaff7b
Address review comments from 2nd March
ajantha-bhat Mar 3, 2023
6c507f6
Part 2: Address review comments from 2nd March
ajantha-bhat Mar 7, 2023
d783e86
Part3: Unify build stuffs
ajantha-bhat Mar 8, 2023
f7f4551
Self review
ajantha-bhat Mar 8, 2023
3db80bb
Address review comments from March 9
ajantha-bhat Mar 8, 2023
8e4c879
Address review comments from March 15
ajantha-bhat Mar 15, 2023
aecd282
Address review comments from March 16
ajantha-bhat Mar 16, 2023
d2cf262
Update colorscheme
ajantha-bhat Mar 20, 2023
d61a0e1
Addressing comments from March 20
ajantha-bhat Mar 21, 2023
6b0aa1c
self review
ajantha-bhat Mar 21, 2023
a8fd104
Addressing the comments from March 22
ajantha-bhat Mar 23, 2023
9c90731
Fix depedency issues
ajantha-bhat Mar 29, 2023
05eccfe
Findings from all catalog test
ajantha-bhat Mar 30, 2023
07157cc
Address comments from April 6
ajantha-bhat Apr 6, 2023
678d5ca
Address comments from April 12
ajantha-bhat Apr 12, 2023
e46aad3
Handle leftover comments
ajantha-bhat Apr 15, 2023
b70dab8
Address comments from April 15
ajantha-bhat Apr 15, 2023
3c3e40e
Handle comments from April 17
ajantha-bhat Apr 17, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,11 @@ jobs:
distribution: 'temurin'
java-version: ${{ matrix.java-version }}

- name: Spotless Check
uses: gradle/gradle-build-action@v2
with:
# Spotless must run in a different invocation, because
# it has some weird Gradle configuration/variant issue
arguments: spotlessCheck --scan

- name: Build with Gradle
uses: gradle/gradle-build-action@v2
with:
arguments: --rerun-tasks assemble ${{ env.ADDITIONAL_GRADLE_OPTS }} check publishToMavenLocal --scan

- uses: codecov/codecov-action@v3
if: ${{ matrix.java-version == '11' }}
with:
files: jacoco/build/reports/jacoco/codeCoverageReport/codeCoverageReport.xml
flags: java

- name: Capture test results
uses: actions/upload-artifact@v3
if: failure()
Expand Down
347 changes: 345 additions & 2 deletions README.md

Large diffs are not rendered by default.

35 changes: 35 additions & 0 deletions api-test/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright (C) 2023 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

plugins {
`java-library`
`maven-publish`
`build-conventions`
}

dependencies {
implementation(libs.guava)
implementation(libs.hadoop.common) {
exclude("org.apache.avro", "avro")
exclude("org.slf4j", "slf4j-log4j12")
exclude("javax.servlet", "servlet-api")
exclude("com.google.code.gson", "gson")
exclude("commons-beanutils")
}
implementation(libs.iceberg.spark.runtime)
implementation(libs.junit.jupiter.api)
implementation("org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/*
* Copyright (C) 2023 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.projectnessie.tools.catalog.migration.api.test;

import java.nio.file.Path;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
import org.apache.iceberg.Schema;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.SupportsNamespaces;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.hadoop.HadoopCatalog;
import org.apache.iceberg.types.Types;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assumptions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.io.TempDir;

public abstract class AbstractTest {

public static final Namespace FOO = Namespace.of("foo");
public static final Namespace BAR = Namespace.of("bar");
public static final Namespace DB1 = Namespace.of("db1");
public static final TableIdentifier FOO_TBL1 = TableIdentifier.of(FOO, "tbl1");
public static final TableIdentifier FOO_TBL2 = TableIdentifier.of(FOO, "tbl2");
public static final TableIdentifier BAR_TBL3 = TableIdentifier.of(BAR, "tbl3");
public static final TableIdentifier BAR_TBL4 = TableIdentifier.of(BAR, "tbl4");

private static final List<Namespace> defaultNamespaceList = Arrays.asList(FOO, BAR, DB1);

protected static final Namespace NS_A = Namespace.of("a");
protected static final Namespace NS_A_B = Namespace.of("a", "b");
protected static final Namespace NS_A_C = Namespace.of("a", "c");
protected static final Namespace NS_A_B_C = Namespace.of("a", "b", "c");
protected static final Namespace NS_A_B_C_D = Namespace.of("a", "b", "c", "d");
protected static final Namespace NS_A_B_C_D_E = Namespace.of("a", "b", "c", "d", "e");

private static String sourceCatalogWarehouse;
private static String targetCatalogWarehouse;

protected static Catalog sourceCatalog;
protected static Catalog targetCatalog;

protected static final Schema schema =
new Schema(
Types.StructType.of(Types.NestedField.required(1, "id", Types.LongType.get())).fields());

protected static @TempDir Path logDir;

protected static @TempDir Path tempDir;

@BeforeAll
protected static void initLogDir() {
System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString());
sourceCatalogWarehouse = tempDir.resolve("sourceCatalogWarehouse").toAbsolutePath().toString();
targetCatalogWarehouse = tempDir.resolve("targetCatalogWarehouse").toAbsolutePath().toString();
}

@AfterAll
protected static void close() throws Exception {
if (sourceCatalog instanceof AutoCloseable) {
((AutoCloseable) sourceCatalog).close();
}
if (targetCatalog instanceof AutoCloseable) {
((AutoCloseable) targetCatalog).close();
}
}

protected void validateAssumptionForHadoopCatalogAsSource(boolean deleteSourceTables) {
Assumptions.assumeFalse(
deleteSourceTables && sourceCatalog instanceof HadoopCatalog,
"deleting source tables is unsupported for HadoopCatalog");
}

protected static void createNamespacesForSourceCatalog() {
defaultNamespaceList.forEach(
namespace -> ((SupportsNamespaces) sourceCatalog).createNamespace(namespace));
}

protected static void createNamespacesForTargetCatalog() {
// don't create "db1" namespace in targetCatalog
defaultNamespaceList
.subList(0, 2)
.forEach(namespace -> ((SupportsNamespaces) targetCatalog).createNamespace(namespace));
}

protected static void dropNamespaces() {
Stream.of(sourceCatalog, targetCatalog)
.map(catalog -> (SupportsNamespaces) catalog)
.forEach(
catalog ->
defaultNamespaceList.stream()
.filter(catalog::namespaceExists)
.forEach(catalog::dropNamespace));
}

protected static void createTables() {
// two tables in 'foo' namespace
sourceCatalog.createTable(FOO_TBL1, schema);
sourceCatalog.createTable(FOO_TBL2, schema);
// two tables in 'bar' namespace
sourceCatalog.createTable(BAR_TBL3, schema);
sourceCatalog.createTable(BAR_TBL4, schema);
}

protected static void dropTables() {
Stream.of(sourceCatalog, targetCatalog)
.forEach(
catalog ->
defaultNamespaceList.stream()
.filter(namespace -> ((SupportsNamespaces) catalog).namespaceExists(namespace))
.forEach(
namespace -> catalog.listTables(namespace).forEach(catalog::dropTable)));
}

protected static Map<String, String> nessieCatalogProperties(boolean isSourceCatalog) {
Map<String, String> properties = new HashMap<>();
Integer nessiePort = Integer.getInteger("quarkus.http.test-port", 19121);
String nessieUri = String.format("http://localhost:%d/api/v1", nessiePort);
properties.put("uri", nessieUri);
properties.put("warehouse", isSourceCatalog ? sourceCatalogWarehouse : targetCatalogWarehouse);
properties.put("ref", "main");
return properties;
}

protected static Map<String, String> hadoopCatalogProperties(boolean isSourceCatalog) {
Map<String, String> properties = new HashMap<>();
properties.put("warehouse", isSourceCatalog ? sourceCatalogWarehouse : targetCatalogWarehouse);
return properties;
}

protected static Map<String, String> hiveCatalogProperties(
boolean isSourceCatalog, Map<String, String> dynamicProperties) {
Map<String, String> properties = new HashMap<>();
properties.put("warehouse", isSourceCatalog ? sourceCatalogWarehouse : targetCatalogWarehouse);
properties.putAll(dynamicProperties);
return properties;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright (C) 2023 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.projectnessie.tools.catalog.migration.api.test;

import org.apache.iceberg.hive.HiveCatalog;
import org.apache.iceberg.hive.HiveMetastoreTest;

public class HiveMetaStoreRunner extends HiveMetastoreTest {

// Expose the catalog for tests
public static HiveCatalog hiveCatalog() {
return HiveMetastoreTest.catalog;
}
}
91 changes: 91 additions & 0 deletions api/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright (C) 2023 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

plugins {
`java-library`
`maven-publish`
alias(libs.plugins.nessie.run)
`build-conventions`
}

dependencies {
implementation(libs.guava)
implementation(libs.slf4j)
implementation(libs.iceberg.spark.runtime)
implementation(libs.iceberg.dell)
implementation(libs.hadoop.common) {
exclude("org.apache.avro", "avro")
exclude("org.slf4j", "slf4j-log4j12")
exclude("javax.servlet", "servlet-api")
exclude("com.google.code.gson", "gson")
exclude("commons-beanutils")
}

compileOnly(libs.immutables.value.annotations)
annotationProcessor(libs.immutables.value.processor)

testRuntimeOnly(libs.logback.classic)
testImplementation(libs.junit.jupiter.params)
testImplementation(libs.junit.jupiter.api)
testImplementation(libs.junit.jupiter.engine)
testImplementation(libs.assertj)
testImplementation(libs.logcaptor)

testImplementation(project(":iceberg-catalog-migrator-api-test"))

// for integration tests
testImplementation(
"org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests"
)
// this junit4 dependency is needed for above Iceberg's TestHiveMetastore
testRuntimeOnly("junit:junit:4.13.2")

testImplementation("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") {
// these are taken from iceberg repo configurations
exclude("org.apache.avro", "avro")
exclude("org.slf4j", "slf4j-log4j12")
exclude("org.pentaho") // missing dependency
exclude("org.apache.hbase")
exclude("org.apache.logging.log4j")
exclude("co.cask.tephra")
exclude("com.google.code.findbugs", "jsr305")
exclude("org.eclipse.jetty.aggregate", "jetty-all")
exclude("org.eclipse.jetty.orbit", "javax.servlet")
exclude("org.apache.parquet", "parquet-hadoop-bundle")
exclude("com.tdunning", "json")
exclude("javax.transaction", "transaction-api")
exclude("com.zaxxer", "HikariCP")
}
testImplementation("org.apache.hive:hive-exec:${libs.versions.hive.get()}:core") {
// these are taken from iceberg repo configurations
exclude("org.apache.avro", "avro")
exclude("org.slf4j", "slf4j-log4j12")
exclude("org.pentaho") // missing dependency
exclude("org.apache.hive", "hive-llap-tez")
exclude("org.apache.logging.log4j")
exclude("com.google.protobuf", "protobuf-java")
exclude("org.apache.calcite")
exclude("org.apache.calcite.avatica")
exclude("com.google.code.findbugs", "jsr305")
}
testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}")

nessieQuarkusServer(
"org.projectnessie.nessie:nessie-quarkus:${libs.versions.nessie.get()}:runner"
)
}

nessieQuarkusApp { includeTask(tasks.named<Test>("intTest")) }
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright (C) 2023 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.projectnessie.tools.catalog.migration.api;

import java.util.List;
import org.apache.iceberg.catalog.TableIdentifier;
import org.immutables.value.Value;

@Value.Immutable
public interface CatalogMigrationResult {

List<TableIdentifier> registeredTableIdentifiers();

List<TableIdentifier> failedToRegisterTableIdentifiers();

List<TableIdentifier> failedToDeleteTableIdentifiers();
}
Loading