From e8284b0ad9151fc277a9a1c34ecf14829ab28f4b Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Fri, 9 Dec 2022 17:32:34 +0530 Subject: [PATCH 01/31] Initial commit --- .github/workflows/main.yml | 2 +- .gitignore | 3 + README.md | 99 ++++- build.gradle.kts | 91 +++- buildSrc/build.gradle.kts | 40 ++ buildSrc/settings.gradle.kts | 19 + buildSrc/src/main/kotlin/Utilities.gradle.kts | 107 +++++ gradle/libs.versions.toml | 52 +++ src/exec/exec-preamble.sh | 20 + .../catalog/migration/CLIVersionProvider.java | 35 ++ .../catalog/migration/CatalogMigrateUtil.java | 229 ++++++++++ .../migration/CatalogMigrationCLI.java | 300 +++++++++++++ src/main/resources/logback.xml | 34 ++ .../catalog/migration/version.properties | 16 + .../catalog/migration/CLIErrorsTest.java | 101 +++++ .../tools/catalog/migration/CLITest.java | 399 ++++++++++++++++++ .../tools/catalog/migration/ITCLITest.java | 37 ++ .../tools/catalog/migration/RunCLI.java | 101 +++++ 18 files changed, 1679 insertions(+), 6 deletions(-) create mode 100644 buildSrc/build.gradle.kts create mode 100644 buildSrc/settings.gradle.kts create mode 100644 buildSrc/src/main/kotlin/Utilities.gradle.kts create mode 100644 gradle/libs.versions.toml create mode 100644 src/exec/exec-preamble.sh create mode 100644 src/main/java/org/projectnessie/tools/catalog/migration/CLIVersionProvider.java create mode 100644 src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java create mode 100644 src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java create mode 100644 src/main/resources/logback.xml create mode 100644 src/main/resources/org/projectnessie/tools/catalog/migration/version.properties create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/CLIErrorsTest.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/ITCLITest.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/RunCLI.java diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7eb052c..8dedf48 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -34,7 +34,7 @@ jobs: - name: Build with Gradle uses: gradle/gradle-build-action@v2 with: - arguments: --rerun-tasks assemble ${{ env.ADDITIONAL_GRADLE_OPTS }} check publishToMavenLocal --scan + arguments: --rerun-tasks assemble ${{ env.ADDITIONAL_GRADLE_OPTS }} check publishToMavenLocal -x spotlessCheck --scan - uses: codecov/codecov-action@v3 if: ${{ matrix.java-version == '11' }} diff --git a/.gitignore b/.gitignore index a893541..bed663c 100644 --- a/.gitignore +++ b/.gitignore @@ -72,3 +72,6 @@ site/site # Ignore Gradle build output directory build + +# test output +failed_identifiers.txt diff --git a/README.md b/README.md index 7fecf62..c7f9db9 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,97 @@ -# iceberg-catalog-migrator -CLI tool to bulk migrate the tables from one catalog another without a data copy +# catalog-migrator +A CLI tool to bulk migrate Iceberg tables from one catalog to another without a data copy. + +Need to have java installed in your machine(JDK8 or later version) to use this CLI tool. + +Below is the CLI syntax: +``` +$ java -jar iceberg-catalog-migrator-0.1.0-SNAPSHOT.jar --help +Usage: register [-hV] [--delete-source-tables] [--identifiers-from-file=] + [--source-custom-catalog-impl=] [-T=] + [--target-custom-catalog-impl=] [-I=[,...]]... + [--source-catalog-hadoop-conf=[,...]]... [--target-catalog-hadoop-conf=[, + ...]]... [[,...]] [[, + ...]] + +Bulk register the iceberg tables from source catalog to target catalog without data copy. + + source catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] + [[,...]] + source catalog properties + target catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] + [[,...]] + target catalog properties + --source-catalog-hadoop-conf=[,...] + optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg + FileIO. + --target-catalog-hadoop-conf=[,...] + optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg + FileIO. + -I, --identifiers=[,...] + optional selective list of identifiers to register. If not specified, all the tables will be registered.Use this when + there are few identifiers needs to be registered. For large number of identifiers, use `--identifiers-from-file` option. + --identifiers-from-file= + optional text file path that contains list of table identifiers (one per line) to register. Should not be used with + `--identifiers` option. + -T, --thread-pool-size= + optional size of the thread pool used for register tables. Tables are migrated sequentially if not specified. + --source-custom-catalog-impl= + optional fully qualified class name of the custom catalog implementation of the source catalog. Required when the catalog + type is CUSTOM. + --target-custom-catalog-impl= + optional fully qualified class name of the custom catalog implementation of the target catalog. Required when the catalog + type is CUSTOM. + --delete-source-tables + Optional configuration to delete the tables entry from source catalog after successfully registering it to target catalog. + -h, --help Show this help message and exit. + -V, --version Print version information and exit. +``` + +> :warning: By default this tool just registers the table. +Which means the table will be present in both the catalogs after registering. +Operating same table from more than one catalog can lead to missing updates, loss of data and table corruption. +So, it is recommended to use the '--delete-source-tables' option in CLI to automatically delete the table from source catalog after registering +or avoid operating tables from the source catalog after registering if '--delete-source-tables' option is not used. + +> :warning: **It is recommended to use this CLI tool when there is no in-progress commits for the tables in the source catalog.** +In-progress commits may not make it into the target catalog if used. + +### Example command for bulk migrating tables between Hadoop catalog and Arctic catalog + +```shell +export PAT=xxxxxxx +export SECRETKEY=xxxxxxx +export ACCESSKEY=xxxxxxx +``` + +##### Register all the tables from Hadoop catalog to Arctic catalog (main branch) + +```shell +java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +HADOOP \ +warehouse=/tmp/warehouse,type=hadoop \ +NESSIE \ +uri=https://nessie.test1.dremio.site/v1/repositories/8158e68a-5046-42c6-a7e4-c920d9ae2475,ref=main,warehouse=/tmp/warehouse,authentication.type=BEARER,authentication.token=$PAT \ +--target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY +``` + +##### Migrate selected tables (t1,t2 in namespace foo) from Arctic catalog (main branch) to Hadoop catalog. +```shell +java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +NESSIE \ +uri=https://nessie.test1.dremio.site/v1/repositories/8158e68a-5046-42c6-a7e4-c920d9ae2475,ref=main,warehouse=/tmp/warehouse,authentication.type=BEARER,authentication.token=$PAT \ +HADOOP \ +warehouse=/tmp/warehouse,type=hadoop --source-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY \ +-I foo.t1,foo.t2 \ +--delete-source-tables +``` + +### Example command for bulk migrating tables from Hadoop catalog to Nessie catalog (main branch) +```shell +java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +HADOOP \ +warehouse=/tmp/warehouse,type=hadoop \ +NESSIE \ +uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--delete-source-tables +``` diff --git a/build.gradle.kts b/build.gradle.kts index 485a8aa..9d61cc1 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -14,19 +14,104 @@ * limitations under the License. */ +import com.github.jengelman.gradle.plugins.shadow.ShadowPlugin +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + plugins { `java-library` `maven-publish` + id("com.diffplug.spotless") + id("com.github.johnrengelman.shadow") + Utilities } -repositories { - mavenCentral() +repositories { mavenCentral() } + +applyShadowJar() + +dependencies { + api("com.google.guava:guava:31.1-jre") + api("org.slf4j:log4j-over-slf4j:1.7.36") + api("ch.qos.logback:logback-classic:1.2.11") + api("ch.qos.logback:logback-core:1.2.11") + api("info.picocli:picocli:4.7.0") + api("org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0") + api("org.apache.iceberg:iceberg-dell:1.1.0") + api("org.apache.hadoop:hadoop-common:3.2.4") + api("org.apache.hadoop:hadoop-aws:3.2.4") + api("com.amazonaws:aws-java-sdk:1.7.4") + + testImplementation(libs.junit.jupiter.params) + testImplementation(libs.junit.jupiter.api) + testImplementation(libs.junit.jupiter.engine) } group = "org.projectnessie" version = file("version.txt").readText().trim() -description = "catalog-migration-tool" +description = "iceberg-catalog-migrator" java.sourceCompatibility = JavaVersion.VERSION_1_8 + +val processResources = + tasks.named("processResources") { + inputs.property("projectVersion", project.version) + filter( + org.apache.tools.ant.filters.ReplaceTokens::class, + mapOf("tokens" to mapOf("projectVersion" to project.version)) + ) + } + +tasks.named("test") { systemProperty("expectedCLIVersion", project.version) } + +fun Project.applyShadowJar() { + plugins.apply(ShadowPlugin::class.java) + + plugins.withType().configureEach { + val shadowJar = + tasks.named("shadowJar") { + isZip64 = true // as the package has more than 65535 files + outputs.cacheIf { false } // do not cache uber/shaded jars + archiveClassifier.set("") + mergeServiceFiles() + } + + tasks.named("jar") { + dependsOn(shadowJar) + archiveClassifier.set("raw") + } + } +} + +val mainClassName = "org.projectnessie.tools.catalog.migration.CatalogMigrationCLI" + +extra["versionGoogleJavaFormat"] = libs.versions.googleJavaFormat.get() + +val shadowJar = tasks.named("shadowJar") + +val unixExecutable by + tasks.registering { + group = "build" + description = "Generates the Unix executable" + + dependsOn(shadowJar) + val dir = buildDir.resolve("executable") + val executable = dir.resolve("iceberg-catalog-migrator") + inputs.files(shadowJar.get().archiveFile).withPathSensitivity(PathSensitivity.RELATIVE) + outputs.files(executable) + outputs.cacheIf { false } // very big file + doFirst { + dir.mkdirs() + executable.outputStream().use { out -> + projectDir.resolve("src/exec/exec-preamble.sh").inputStream().use { i -> i.transferTo(out) } + shadowJar.get().archiveFile.get().asFile.inputStream().use { i -> i.transferTo(out) } + } + executable.setExecutable(true) + } + } + +shadowJar { + manifest { attributes["Main-Class"] = mainClassName } + finalizedBy(unixExecutable) +} diff --git a/buildSrc/build.gradle.kts b/buildSrc/build.gradle.kts new file mode 100644 index 0000000..2767dac --- /dev/null +++ b/buildSrc/build.gradle.kts @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +plugins { `kotlin-dsl` } + +repositories { + mavenCentral() + gradlePluginPortal() + if (System.getProperty("withMavenLocal").toBoolean()) { + mavenLocal() + } +} + +dependencies { + implementation(gradleKotlinDsl()) + val ver = libs.versions + implementation("com.diffplug.spotless:spotless-plugin-gradle:${ver.spotlessPlugin.get()}") + implementation("gradle.plugin.com.github.johnrengelman:shadow:${ver.shadowPlugin.get()}") + val nessieVer = ver.nessieBuildPlugins.get() + implementation("org.projectnessie.buildsupport:ide-integration:$nessieVer") + implementation("org.projectnessie.buildsupport:jacoco:$nessieVer") + implementation("org.projectnessie.buildsupport:publishing:$nessieVer") + implementation("org.projectnessie.buildsupport:reflection-config:$nessieVer") + implementation("org.projectnessie.buildsupport:spotless:$nessieVer") +} + +kotlinDslPluginOptions { jvmTarget.set(JavaVersion.VERSION_11.toString()) } diff --git a/buildSrc/settings.gradle.kts b/buildSrc/settings.gradle.kts new file mode 100644 index 0000000..7bbbf0d --- /dev/null +++ b/buildSrc/settings.gradle.kts @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +dependencyResolutionManagement { + versionCatalogs { create("libs") { from(files("../gradle/libs.versions.toml")) } } +} diff --git a/buildSrc/src/main/kotlin/Utilities.gradle.kts b/buildSrc/src/main/kotlin/Utilities.gradle.kts new file mode 100644 index 0000000..46834d6 --- /dev/null +++ b/buildSrc/src/main/kotlin/Utilities.gradle.kts @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.gradle.api.JavaVersion +import org.gradle.api.file.DuplicatesStrategy +import org.gradle.api.plugins.JavaPlugin +import org.gradle.api.plugins.JavaPluginExtension +import org.gradle.api.tasks.bundling.Jar +import org.gradle.api.tasks.compile.JavaCompile +import org.gradle.api.tasks.javadoc.Javadoc +import org.gradle.api.tasks.testing.Test +import org.gradle.external.javadoc.CoreJavadocOptions +import org.gradle.kotlin.dsl.configure +import org.gradle.kotlin.dsl.named +import org.gradle.kotlin.dsl.repositories +import org.gradle.kotlin.dsl.withType + +plugins { + id("org.projectnessie.buildsupport.jacoco") + id("org.projectnessie.buildsupport.spotless") +} + +// TODO: enable this when support for publishing jars to nexus repo is added. +// if (project.name != "jacoco") { +// apply() +// } + +repositories { + mavenCentral { content { excludeVersionByRegex("io[.]delta", ".*", ".*-nessie") } } + if (System.getProperty("withMavenLocal").toBoolean()) { + mavenLocal() + } +} + +if (project.projectDir.resolve("src/test/java").exists()) { + tasks.withType().configureEach { + useJUnitPlatform {} + maxParallelForks = Runtime.getRuntime().availableProcessors() + } + + if (project.hasProperty("alsoTestAgainstJava8")) { + val javaToolchains = extensions.findByType(JavaToolchainService::class.java) + if (javaToolchains != null) { + val testWithJava8 = + tasks.register("testWithJava8") { + group = "verification" + description = "Run unit tests against Java 8" + + dependsOn("test") + + dependsOn + + useJUnitPlatform {} + maxParallelForks = Runtime.getRuntime().availableProcessors() + javaLauncher.set( + javaToolchains.launcherFor { languageVersion.set(JavaLanguageVersion.of(8)) } + ) + } + tasks.named("check") { dependsOn(testWithJava8) } + } + } +} + +tasks.withType().configureEach { + manifest { + attributes["Implementation-Title"] = "catalog-migration-tool" + attributes["Implementation-Version"] = project.version + } +} + +tasks.withType().configureEach { + options.encoding = "UTF-8" + options.release.set(8) +} + +tasks.withType().configureEach { + val opt = options as CoreJavadocOptions + // don't spam log w/ "warning: no @param/@return" + opt.addStringOption("Xdoclint:-reference", "-quiet") +} + +plugins.withType().configureEach { + configure { + withJavadocJar() + withSourcesJar() + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 + modularity.inferModulePath.set(true) + } +} + +if (project != rootProject) { + tasks.withType().configureEach { duplicatesStrategy = DuplicatesStrategy.WARN } +} diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml new file mode 100644 index 0000000..f4d3860 --- /dev/null +++ b/gradle/libs.versions.toml @@ -0,0 +1,52 @@ +[versions] +checkstyle = "10.3.4" +errorprone = "2.15.0" +googleJavaFormat = "1.15.0" +guava = "31.1-jre" +jacoco = "0.8.8" +jmh = "1.36" +junit = "5.9.1" +nessieBuildPlugins = "0.2.14" +picocli = "4.7.0" +shadowPlugin = "7.1.2" +slf4j = "1.7.36" +spotlessPlugin = "6.12.0" + +[bundles] +# Bundles serve two purposes: +# 1. Group dependencies together for renovatebot, which cares about bundles. Those bundles, that +# _solely_ exist to for dependency management via renovatebot, start with `managed-`. +# 2. Bundles used in build files to group dependencies that are commonly used together. +#managed-grpc = ["grpc-protobuf", "grpc-stub", "grpc-netty-shaded"] +#managed-immutables = ["immutables-value-annotations", "immutables-value-processor"] +#managed-jmh = ["jmh-core", "jmh-generator-annprocess"] + +junit-testing = ["assertj-core", "junit-jupiter-api", "junit-jupiter-params"] + +[libraries] +assertj-core = { module = "org.assertj:assertj-core", version = "3.23.1" } +findbugs-jsr305 = { module = "com.google.code.findbugs:jsr305", version = "3.0.2" } +google-java-format = { module = "com.google.googlejavaformat:google-java-format", version.ref = "googleJavaFormat" } +jacoco-maven-plugin = { module = "org.jacoco:jacoco-maven-plugin", version.ref = "jacoco" } +jmh-core = { module = "org.openjdk.jmh:jmh-core", version.ref = "jmh" } +jmh-generator-annprocess = { module = "org.openjdk.jmh:jmh-generator-annprocess", version.ref = "jmh" } +junit-bom = { module = "org.junit:junit-bom", version.ref = "junit" } +junit-jupiter-api = { module = "org.junit.jupiter:junit-jupiter-api", version.ref = "junit" } +junit-jupiter-engine = { module = "org.junit.jupiter:junit-jupiter-engine" } +junit-jupiter-params = { module = "org.junit.jupiter:junit-jupiter-params" } +picocli = { module = "info.picocli:picocli", version.ref = "picocli" } + +[plugins] +errorprone = { id = "net.ltgt.errorprone", version = "3.0.1" } +idea-ext = { id = "org.jetbrains.gradle.plugin.idea-ext", version = "1.1.7" } +jmh = { id = "me.champeau.jmh", version = "0.6.8" } +nessie-build-checkstyle = { id = "org.projectnessie.buildsupport.checkstyle", version.ref = "nessieBuildPlugins" } +nessie-build-errorprone = { id = "org.projectnessie.buildsupport.errorprone", version.ref = "nessieBuildPlugins" } +nessie-build-ide-integration = { id = "org.projectnessie.buildsupport.ide-integration", version.ref = "nessieBuildPlugins" } +nessie-build-jandex = { id = "org.projectnessie.buildsupport.jandex", version.ref = "nessieBuildPlugins" } +nessie-build-spotless = { id = "org.projectnessie.buildsupport.spotless", version.ref = "nessieBuildPlugins" } +nexus-publish = { id = "io.github.gradle-nexus.publish-plugin", version = "1.1.0" } +shadow = { id = "com.github.johnrengelman.shadow", version.ref = "shadowPlugin" } +spotless = { id = "com.diffplug.spotless", version.ref = "spotlessPlugin" } +testrerun = { id = "org.caffinitas.gradle.testrerun", version = "0.1" } +testsummary = { id = "org.caffinitas.gradle.testsummary", version = "0.1.1" } diff --git a/src/exec/exec-preamble.sh b/src/exec/exec-preamble.sh new file mode 100644 index 0000000..8305636 --- /dev/null +++ b/src/exec/exec-preamble.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2023 Dremio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +THIS_FILE="${0}" + +exec java ${JAVA_OPTS} -jar "${THIS_FILE}" "$@" diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CLIVersionProvider.java b/src/main/java/org/projectnessie/tools/catalog/migration/CLIVersionProvider.java new file mode 100644 index 0000000..5e71dff --- /dev/null +++ b/src/main/java/org/projectnessie/tools/catalog/migration/CLIVersionProvider.java @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import java.io.InputStream; +import java.util.Properties; +import picocli.CommandLine.IVersionProvider; + +public class CLIVersionProvider implements IVersionProvider { + @Override + public String[] getVersion() throws Exception { + try (InputStream input = + CLIVersionProvider.class + .getResource("version.properties") + .openConnection() + .getInputStream()) { + Properties props = new Properties(); + props.load(input); + return new String[] {props.getProperty("cli.version")}; + } + } +} diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java new file mode 100644 index 0000000..fe65bc8 --- /dev/null +++ b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java @@ -0,0 +1,229 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ExecutorService; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.iceberg.BaseTable; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.iceberg.util.Tasks; +import org.apache.iceberg.util.ThreadPools; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CatalogMigrateUtil { + private static final Logger LOG = LoggerFactory.getLogger(CatalogMigrateUtil.class); + + private CatalogMigrateUtil() {} + + /** + * Migrates tables from one catalog(source catalog) to another catalog(target catalog). After + * successful migration, deletes the table entry from source catalog(not applicable for + * HadoopCatalog). + * + *

Supports bulk migrations with a multi-thread execution. + * + *

Users must make sure that no in-progress commits on the tables of source catalog during + * migration. + * + * @param tableIdentifiers a list of {@link TableIdentifier} for the tables required to be + * migrated. If not specified, all the tables would be migrated + * @param sourceCatalog Source {@link Catalog} from which the tables are chosen + * @param targetCatalog Target {@link Catalog} to which the tables need to be migrated + * @param maxThreadPoolSize Size of the thread pool used for migrate tables (If set to 0, no + * thread pool is used) + * @param printWriter to print regular updates on the console. + * @return Collection of successfully migrated and collection of failed to migrate table + * identifiers. + */ + public static ImmutablePair, Collection> + migrateTables( + List tableIdentifiers, + Catalog sourceCatalog, + Catalog targetCatalog, + int maxThreadPoolSize, + PrintWriter printWriter) { + return migrateTables( + tableIdentifiers, sourceCatalog, targetCatalog, maxThreadPoolSize, true, printWriter); + } + + /** + * Register tables from one catalog(source catalog) to another catalog(target catalog). User has + * to take care of deleting the tables from source catalog after registration. + * + *

Supports bulk registration with a multi-thread execution. + * + *

Users must make sure that no in-progress commits on the tables of source catalog during + * registration. + * + * @param tableIdentifiers a list of {@link TableIdentifier} for the tables required to be + * registered. If not specified, all the tables would be registered + * @param sourceCatalog Source {@link Catalog} from which the tables are chosen + * @param targetCatalog Target {@link Catalog} to which the tables need to be registered + * @param maxThreadPoolSize Size of the thread pool used for registering tables (If set to 0, no + * thread pool is used) + * @param printWriter to print regular updates on the console. + * @return Collection of successfully migrated and collection of failed to migrate table + * identifiers. + */ + public static ImmutablePair, Collection> + registerTables( + List tableIdentifiers, + Catalog sourceCatalog, + Catalog targetCatalog, + int maxThreadPoolSize, + PrintWriter printWriter) { + return migrateTables( + tableIdentifiers, sourceCatalog, targetCatalog, maxThreadPoolSize, false, printWriter); + } + + private static ImmutablePair, Collection> + migrateTables( + List tableIdentifiers, + Catalog sourceCatalog, + Catalog targetCatalog, + int maxThreadPoolSize, + boolean deleteEntriesFromSourceCatalog, + PrintWriter printWriter) { + validate(sourceCatalog, targetCatalog, maxThreadPoolSize); + + String operation = deleteEntriesFromSourceCatalog ? "migration" : "registration"; + + List identifiers; + if (tableIdentifiers == null || tableIdentifiers.isEmpty()) { + printWriter.println( + "\nUser has not specified the table identifiers." + + " Selecting all the tables from all the namespaces from the source catalog."); + + printWriter.println("Collecting all the namespaces from source catalog..."); + // fetch all the table identifiers from all the namespaces. + List namespaces = + (sourceCatalog instanceof SupportsNamespaces) + ? ((SupportsNamespaces) sourceCatalog).listNamespaces() + : ImmutableList.of(Namespace.empty()); + printWriter.println("Collecting all the tables from all the namespaces of source catalog..."); + + identifiers = getTableIdentifiers(sourceCatalog, maxThreadPoolSize, namespaces); + } else { + identifiers = tableIdentifiers; + } + + printWriter.printf("\nIdentified %d tables for %s.", identifiers.size(), operation); + + printWriter.printf("\nStarted %s ...", operation); + + ExecutorService executorService = null; + if (maxThreadPoolSize > 0) { + executorService = ThreadPools.newWorkerPool("migrate-tables", maxThreadPoolSize); + } + try { + Collection migratedTableIdentifiers = new ConcurrentLinkedQueue<>(); + Collection failedToMigrateTableIdentifiers = new ConcurrentLinkedQueue<>(); + Tasks.foreach(identifiers.stream().filter(Objects::nonNull)) + .retry(3) + .stopRetryOn( + NoSuchTableException.class, + NoSuchNamespaceException.class, + AlreadyExistsException.class) + .suppressFailureWhenFinished() + .executeWith(executorService) + .onFailure( + (tableIdentifier, exc) -> { + failedToMigrateTableIdentifiers.add(tableIdentifier); + LOG.warn("Unable to migrate table {}", tableIdentifier, exc); + }) + .run( + tableIdentifier -> { + migrate( + tableIdentifier, sourceCatalog, targetCatalog, deleteEntriesFromSourceCatalog); + migratedTableIdentifiers.add(tableIdentifier); + LOG.info("Successfully migrated the table {}", tableIdentifier); + }); + + printWriter.printf("\nFinished %s ...", operation); + return ImmutablePair.of(migratedTableIdentifiers, failedToMigrateTableIdentifiers); + } finally { + if (executorService != null) { + executorService.shutdown(); + } + } + } + + private static List getTableIdentifiers( + Catalog sourceCatalog, int maxThreadPoolSize, List namespaces) { + ExecutorService executorService = null; + if (maxThreadPoolSize > 0) { + executorService = ThreadPools.newWorkerPool("list-tables", maxThreadPoolSize); + } + + try { + Collection allIdentifiers = new ConcurrentLinkedQueue<>(); + Tasks.foreach(namespaces.stream().filter(Objects::nonNull)) + .retry(1) + .suppressFailureWhenFinished() + .executeWith(executorService) + .run(namespace -> allIdentifiers.addAll(sourceCatalog.listTables(namespace))); + return new ArrayList<>(allIdentifiers); + } finally { + if (executorService != null) { + executorService.shutdown(); + } + } + } + + private static void validate( + Catalog sourceCatalog, Catalog targetCatalog, int maxThreadPoolSize) { + Preconditions.checkArgument( + maxThreadPoolSize >= 0, + "maxThreadPoolSize should have value >= 0, value: " + maxThreadPoolSize); + Preconditions.checkArgument(sourceCatalog != null, "Invalid source catalog: null"); + Preconditions.checkArgument(targetCatalog != null, "Invalid target catalog: null"); + Preconditions.checkArgument( + !targetCatalog.equals(sourceCatalog), "target catalog is same as source catalog"); + } + + private static void migrate( + TableIdentifier tableIdentifier, + Catalog sourceCatalog, + Catalog targetCatalog, + boolean deleteEntriesFromSourceCatalog) { + // register the table to the target catalog + TableOperations ops = ((BaseTable) sourceCatalog.loadTable(tableIdentifier)).operations(); + targetCatalog.registerTable(tableIdentifier, ops.current().metadataFileLocation()); + + if (deleteEntriesFromSourceCatalog && !(sourceCatalog instanceof HadoopCatalog)) { + // HadoopCatalog dropTable will delete the table files completely even when purge is false. + // So, skip dropTable for HadoopCatalog. + sourceCatalog.dropTable(tableIdentifier, false); + } + } +} diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java new file mode 100644 index 0000000..cf552a2 --- /dev/null +++ b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java @@ -0,0 +1,300 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.Callable; +import java.util.stream.Collectors; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.aws.dynamodb.DynamoDbCatalog; +import org.apache.iceberg.aws.glue.GlueCatalog; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.dell.ecs.EcsCatalog; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.iceberg.hive.HiveCatalog; +import org.apache.iceberg.jdbc.JdbcCatalog; +import org.apache.iceberg.nessie.NessieCatalog; +import org.apache.iceberg.rest.RESTCatalog; +import picocli.CommandLine; + +@CommandLine.Command( + name = "register", + mixinStandardHelpOptions = true, + versionProvider = CLIVersionProvider.class, + // As both source and target catalog has similar configurations, + // documentation is easy to read if the target and source property is one after another instead + // of sorted. + sortOptions = false, + description = + "\nBulk register the iceberg tables from source catalog to target catalog without data copy.\n") +public class CatalogMigrationCLI implements Callable { + @CommandLine.Spec CommandLine.Model.CommandSpec commandSpec; + + @CommandLine.Option( + names = "--source-catalog-hadoop-conf", + split = ",", + description = + "optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when " + + "using an Iceberg FileIO.") + Map sourceHadoopConf = new HashMap<>(); + + @CommandLine.Option( + names = "--target-catalog-hadoop-conf", + split = ",", + description = + "optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when " + + "using an Iceberg FileIO.") + Map targetHadoopConf = new HashMap<>(); + + @CommandLine.Option( + names = {"-I", "--identifiers"}, + split = ",", + description = + "optional selective list of identifiers to register. If not specified, all the tables will be registered." + + "Use this when there are few identifiers needs to be registered. For large number of identifiers, use " + + "`--identifiers-from-file` option.") + List identifiers = new ArrayList<>(); + + @CommandLine.Option( + names = {"--identifiers-from-file"}, + description = + "optional text file path that contains list of table identifiers (one per line) to register. Should not be " + + "used with `--identifiers` option.") + String identifiersFromFile; + + @CommandLine.Option( + names = {"-T", "--thread-pool-size"}, + defaultValue = "0", + description = + "optional size of the thread pool used for register tables. Tables are migrated sequentially if " + + "not specified.") + int maxThreadPoolSize; + + @CommandLine.Option( + names = {"--source-custom-catalog-impl"}, + description = + "optional fully qualified class name of the custom catalog implementation of the source catalog. Required " + + "when the catalog type is CUSTOM.") + String sourceCustomCatalogImpl; + + @CommandLine.Option( + names = {"--target-custom-catalog-impl"}, + description = + "optional fully qualified class name of the custom catalog implementation of the target catalog. Required " + + "when the catalog type is CUSTOM.") + String targetCustomCatalogImpl; + + @CommandLine.Option( + names = {"--delete-source-tables"}, + description = + "Optional configuration to delete the tables entry from source catalog after successfully registering it " + + "to target catalog.") + private boolean deleteSourceCatalogTables; + + @CommandLine.Parameters( + index = "0", + description = + "source catalog type. " + + "Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]") + private CatalogType sourceCatalogType; + + @CommandLine.Parameters(index = "1", split = ",", description = "source catalog properties") + private Map sourceCatalogProperties; + + @CommandLine.Parameters( + index = "2", + description = + "target catalog type. " + + "Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]") + private CatalogType targetCatalogType; + + @CommandLine.Parameters(index = "3", split = ",", description = "target catalog properties") + private Map targetCatalogProperties; + + public static void main(String... args) { + CommandLine commandLine = new CommandLine(new CatalogMigrationCLI()); + commandLine.setUsageHelpWidth(150); + int exitCode = commandLine.execute(args); + System.exit(exitCode); + } + + @Override + public Integer call() { + if (identifiersFromFile != null && !identifiers.isEmpty()) { + throw new IllegalArgumentException( + "Both `--identifiers` and `--identifiers-from-file` options are configured. Please use only one of them."); + } else if (identifiersFromFile != null) { + if (!Files.exists(Paths.get(identifiersFromFile))) { + throw new IllegalArgumentException( + "File specified in `--identifiers-from-file` option does not exist."); + } + } + + PrintWriter printWriter = commandSpec.commandLine().getOut(); + Configuration sourceCatalogConf = new Configuration(); + if (sourceHadoopConf != null && !sourceHadoopConf.isEmpty()) { + sourceHadoopConf.forEach(sourceCatalogConf::set); + } + Catalog sourceCatalog = + CatalogUtil.loadCatalog( + Objects.requireNonNull(catalogImpl(sourceCatalogType, sourceCustomCatalogImpl)), + "sourceCatalog", + sourceCatalogProperties, + sourceCatalogConf); + printWriter.printf("\nConfigured source catalog: %s\n", sourceCatalogType.name()); + + Configuration targetCatalogConf = new Configuration(); + if (targetHadoopConf != null && !targetHadoopConf.isEmpty()) { + targetHadoopConf.forEach(targetCatalogConf::set); + } + Catalog targetCatalog = + CatalogUtil.loadCatalog( + Objects.requireNonNull(catalogImpl(targetCatalogType, targetCustomCatalogImpl)), + "targetCatalog", + targetCatalogProperties, + targetCatalogConf); + printWriter.printf("\nConfigured target catalog: %s\n", targetCatalogType.name()); + + List tableIdentifiers = null; + if (identifiersFromFile != null) { + try { + printWriter.printf("\nCollecting identifiers from the file %s...\n", identifiersFromFile); + tableIdentifiers = + Files.readAllLines(Paths.get(identifiersFromFile)).stream() + .map(TableIdentifier::parse) + .collect(Collectors.toList()); + } catch (IOException e) { + throw new RuntimeException("Failed to read the file:", e); + } + } else if (!identifiers.isEmpty()) { + tableIdentifiers = + identifiers.stream().map(TableIdentifier::parse).collect(Collectors.toList()); + } + + ImmutablePair, Collection> result; + if (deleteSourceCatalogTables) { + result = + CatalogMigrateUtil.migrateTables( + tableIdentifiers, sourceCatalog, targetCatalog, maxThreadPoolSize, printWriter); + if (sourceCatalogType == CatalogType.HADOOP) { + printWriter.println( + "\n[WARNING]: Source catalog type is HADOOP and it doesn't support dropping tables just from " + + "catalog. \nAvoid operating the migrated tables from the source catalog after migration. " + + "Use the tables from target catalog."); + } + } else { + result = + CatalogMigrateUtil.registerTables( + tableIdentifiers, sourceCatalog, targetCatalog, maxThreadPoolSize, printWriter); + } + + printWriter.println("\nSummary: "); + if (!result.left.isEmpty()) { + printWriter.printf( + "- Successfully %s %d tables from %s catalog to %s catalog. \n", + deleteSourceCatalogTables ? "migrated" : "registered", + result.left.size(), + sourceCatalogType.name(), + targetCatalogType.name()); + } + if (!result.right.isEmpty()) { + List failedIdentifiers = + result.right.stream().map(TableIdentifier::toString).collect(Collectors.toList()); + try { + Files.write(Paths.get("failed_identifiers.txt"), failedIdentifiers); + } catch (IOException e) { + throw new RuntimeException("Failed to write the file:", e); + } + printWriter.printf( + "- Failed to %s %d tables from %s catalog to %s catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. " + + "\n Failed Identifiers are written to `failed_identifiers.txt`. " + + "Retry with that file using `--identifiers-from-file` option " + + "if the failure is because of network/connection timeouts.\n", + deleteSourceCatalogTables ? "migrate" : "register", + result.right.size(), + sourceCatalogType.name(), + targetCatalogType.name()); + } + printWriter.println("\nDetails: "); + if (!result.left.isEmpty()) { + printWriter.printf( + "- Successfully %s these tables: \n", + deleteSourceCatalogTables ? "migrated" : "registered"); + printWriter.println(result.left); + } + if (!result.right.isEmpty()) { + printWriter.printf( + "- Failed to %s these tables: \n", deleteSourceCatalogTables ? "migrate" : "register"); + printWriter.println(result.right); + } + + return 0; + } + + private String catalogImpl(CatalogType type, String customCatalogImpl) { + switch (type) { + case CUSTOM: + if (customCatalogImpl == null || customCatalogImpl.isEmpty()) { + throw new IllegalArgumentException( + "Need to specify the fully qualified class name of the custom catalog " + "impl"); + } + return customCatalogImpl; + case DYNAMODB: + return DynamoDbCatalog.class.getName(); + case ECS: + return EcsCatalog.class.getName(); + case GLUE: + return GlueCatalog.class.getName(); + case HADOOP: + return HadoopCatalog.class.getName(); + case HIVE: + return HiveCatalog.class.getName(); + case JDBC: + return JdbcCatalog.class.getName(); + case NESSIE: + return NessieCatalog.class.getName(); + case REST: + return RESTCatalog.class.getName(); + default: + throw new IllegalArgumentException("Unsupported type: " + type.name()); + } + } + + public enum CatalogType { + CUSTOM, + DYNAMODB, + ECS, + GLUE, + HADOOP, + HIVE, + JDBC, + NESSIE, + REST + } +} diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml new file mode 100644 index 0000000..e3ee191 --- /dev/null +++ b/src/main/resources/logback.xml @@ -0,0 +1,34 @@ + + + + + + catalog_migration.log + true + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + diff --git a/src/main/resources/org/projectnessie/tools/catalog/migration/version.properties b/src/main/resources/org/projectnessie/tools/catalog/migration/version.properties new file mode 100644 index 0000000..64d3a3f --- /dev/null +++ b/src/main/resources/org/projectnessie/tools/catalog/migration/version.properties @@ -0,0 +1,16 @@ +# +# Copyright (C) 2023 Dremio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +cli.version=@projectVersion@ diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/CLIErrorsTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/CLIErrorsTest.java new file mode 100644 index 0000000..0e8d0aa --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/CLIErrorsTest.java @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static java.util.Collections.singletonList; +import static org.junit.jupiter.params.provider.Arguments.arguments; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Stream; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +public class CLIErrorsTest { + + static Stream optionErrors() { + return Stream.of( + // no arguments + arguments( + Collections.emptyList(), + "Missing required parameters: '', ''"), + // invalid argument + arguments( + singletonList(""), + "Invalid value for positional parameter at index 0 (): " + + "expected one of [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] (case-sensitive)"), + // missing required arguments + arguments(singletonList("GLUE"), "Missing required parameter: ''"), + // invalid argument + arguments( + Arrays.asList("HIVE", "properties1=ab", "properties2=cd"), + "Invalid value for positional parameter at index 2 (): expected one of [CUSTOM, " + + "DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] (case-sensitive) but was 'properties2=cd'")); + } + + @ParameterizedTest + @MethodSource("optionErrors") + @Order(0) + public void testOptionErrors(List args, String expectedMessage) throws Exception { + RunCLI run = RunCLI.run(args); + + Assertions.assertEquals(2, run.getExitCode()); + Assertions.assertTrue(run.getErr().contains(expectedMessage)); + } + + @Test + @Order(1) + public void testInvalidArgs() throws Exception { + RunCLI run = RunCLI.run("HADOOP", "k1=v1,k2=v2", "HIVE", "k3=v3, k4=v4"); + Assertions.assertEquals(1, run.getExitCode()); + Assertions.assertTrue( + run.getErr() + .contains( + "java.lang.IllegalArgumentException: Cannot initialize HadoopCatalog " + + "because warehousePath must not be null or empty")); + + run = + RunCLI.run( + "HADOOP", + "k1=v1,k2=v2", + "HIVE", + "k3=v3, k4=v4", + "-I", + "foo.tbl", + "--identifiers-from-file", + "file.txt"); + Assertions.assertEquals(1, run.getExitCode()); + Assertions.assertTrue( + run.getErr() + .contains( + "java.lang.IllegalArgumentException: Both `--identifiers` and `--identifiers-from-file` " + + "options are configured. Please use only one of them.")); + + run = + RunCLI.run( + "HADOOP", "k1=v1,k2=v2", "HIVE", "k3=v3, k4=v4", "--identifiers-from-file", "file.txt"); + Assertions.assertEquals(1, run.getExitCode()); + Assertions.assertTrue( + run.getErr() + .contains( + "java.lang.IllegalArgumentException: File specified in `--identifiers-from-file` option does not exist.")); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java b/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java new file mode 100644 index 0000000..c873599 --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java @@ -0,0 +1,399 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static org.apache.iceberg.types.Types.NestedField.required; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.Schema; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class CLITest { + + private static @TempDir File warehouse1; + private static String warehousePath1; + + private static @TempDir File warehouse2; + private static String warehousePath2; + + private static Catalog catalog1; + + private static Catalog catalog2; + + private static final Schema schema = + new Schema(Types.StructType.of(required(1, "id", Types.LongType.get())).fields()); + + @BeforeAll + protected static void setup() { + warehousePath1 = String.format("file://%s", warehouse1.getAbsolutePath()); + warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + + catalog1 = createCatalog(warehousePath1, "catalog1"); + ((HadoopCatalog) catalog1).createNamespace(Namespace.of("foo"), Collections.emptyMap()); + ((HadoopCatalog) catalog1).createNamespace(Namespace.of("bar"), Collections.emptyMap()); + + catalog2 = createCatalog(warehousePath2, "catalog2"); + ((HadoopCatalog) catalog2).createNamespace(Namespace.of("foo"), Collections.emptyMap()); + ((HadoopCatalog) catalog2).createNamespace(Namespace.of("bar"), Collections.emptyMap()); + } + + @BeforeEach + protected void beforeEach() { + // two tables in 'foo' namespace + catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl-1"), schema); + catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl-2"), schema); + // two tables in 'bar' namespace + catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl-3"), schema); + catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl-4"), schema); + } + + @AfterEach + protected void afterEach() { + Arrays.asList(Namespace.of("foo"), Namespace.of("bar")) + .forEach( + namespace -> { + catalog1.listTables(namespace).forEach(catalog1::dropTable); + catalog2.listTables(namespace).forEach(catalog2::dropTable); + }); + } + + private static Catalog createCatalog(String warehousePath, String name) { + Map properties = new HashMap<>(); + properties.put("warehouse", warehousePath); + properties.put("type", "hadoop"); + return CatalogUtil.loadCatalog( + HadoopCatalog.class.getName(), name, properties, new Configuration()); + } + + @Test + @Order(0) + public void testRegister() throws Exception { + RunCLI run = + RunCLI.run( + "HADOOP", + "warehouse=" + warehousePath1 + ",type=hadoop", + "HADOOP", + "warehouse=" + warehousePath2 + ",type=hadoop"); + + Assertions.assertEquals(0, run.getExitCode()); + Assertions.assertTrue( + run.getOut() + .contains( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog.")); + Assertions.assertTrue(run.getOut().contains("Identified 4 tables for registration.")); + Assertions.assertTrue( + run.getOut() + .contains( + "Summary: \n- Successfully registered 4 tables from HADOOP catalog to" + + " HADOOP catalog.")); + Assertions.assertTrue( + run.getOut() + .contains( + "Details: \n- Successfully registered these tables: \n" + + "[foo.tbl-1, foo.tbl-2, bar.tbl-4, bar.tbl-3]")); + } + + @Test + @Order(1) + public void testMigrate() throws Exception { + RunCLI run = + RunCLI.run( + "HADOOP", + "warehouse=" + warehousePath1 + ",type=hadoop", + "HADOOP", + "warehouse=" + warehousePath2 + ",type=hadoop", + "--delete-source-tables"); + + Assertions.assertEquals(0, run.getExitCode()); + // note that keywords in output is "migrate" instead of "register". + // If the catalog was not hadoop catalog, tables also should get deleted from the source catalog + // after migration. + Assertions.assertTrue( + run.getOut() + .contains( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog.")); + Assertions.assertTrue(run.getOut().contains("Identified 4 tables for migration.")); + Assertions.assertTrue( + run.getOut() + .contains( + "Summary: \n- Successfully migrated 4 tables from HADOOP catalog to" + + " HADOOP catalog.")); + Assertions.assertTrue( + run.getOut() + .contains( + "Details: \n- Successfully migrated these tables: \n" + + "[foo.tbl-1, foo.tbl-2, bar.tbl-4, bar.tbl-3]")); + } + + @Test + @Order(2) + public void testRegisterSelectedTables() throws Exception { + // using `--identifiers` option + RunCLI run = + RunCLI.run( + "HADOOP", + "warehouse=" + warehousePath1 + ",type=hadoop", + "HADOOP", + "warehouse=" + warehousePath2 + ",type=hadoop", + "--identifiers", + "bar.tbl-3"); + + Assertions.assertEquals(0, run.getExitCode()); + + Assertions.assertFalse( + run.getOut() + .contains( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog.")); + + Assertions.assertTrue(run.getOut().contains("Identified 1 tables for registration.")); + Assertions.assertTrue( + run.getOut() + .contains( + "Summary: \n- Successfully registered 1 tables from HADOOP catalog to" + + " HADOOP catalog.")); + Assertions.assertTrue( + run.getOut() + .contains("Details: \n- Successfully registered these tables: \n" + "[bar.tbl-3]")); + + // using `--identifiers-from-file` option + Path identifierFile = Paths.get("ids.txt"); + Files.write(identifierFile, Collections.singletonList("foo.tbl-2")); + run = + RunCLI.run( + "HADOOP", + "warehouse=" + warehousePath1 + ",type=hadoop", + "HADOOP", + "warehouse=" + warehousePath2 + ",type=hadoop", + "--identifiers-from-file", + "ids.txt"); + Files.delete(identifierFile); + + Assertions.assertEquals(0, run.getExitCode()); + + Assertions.assertTrue(run.getOut().contains("Collecting identifiers from the file ids.txt...")); + + Assertions.assertFalse( + run.getOut() + .contains( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog.")); + + Assertions.assertTrue(run.getOut().contains("Identified 1 tables for registration.")); + Assertions.assertTrue( + run.getOut() + .contains( + "Summary: \n- Successfully registered 1 tables from HADOOP catalog to" + + " HADOOP catalog.")); + Assertions.assertTrue( + run.getOut() + .contains("Details: \n- Successfully registered these tables: \n" + "[foo.tbl-2]")); + } + + @Test + @Order(3) + public void testRegisterMultiThread() throws Exception { + RunCLI run = + RunCLI.run( + "HADOOP", + "warehouse=" + warehousePath1 + ",type=hadoop", + "HADOOP", + "warehouse=" + warehousePath2 + ",type=hadoop", + "-T", + "4"); + + Assertions.assertEquals(0, run.getExitCode()); + Assertions.assertTrue( + run.getOut() + .contains( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog.")); + Assertions.assertTrue(run.getOut().contains("Identified 4 tables for registration.")); + Assertions.assertTrue( + run.getOut() + .contains( + "Summary: \n- Successfully registered 4 tables from HADOOP catalog to" + + " HADOOP catalog.")); + } + + @Test + @Order(4) + public void testRegisterError() throws Exception { + // use invalid namespace which leads to NoSuchTableException + RunCLI run = + RunCLI.run( + "HADOOP", + "warehouse=" + warehousePath1 + ",type=hadoop", + "HADOOP", + "warehouse=" + warehousePath2 + ",type=hadoop", + "-I", + "dummy.tbl-3"); + Assertions.assertEquals(0, run.getExitCode()); + Assertions.assertTrue(run.getOut().contains("Identified 1 tables for registration.")); + Assertions.assertTrue( + run.getOut() + .contains( + "Summary: \n- Failed to register 1 tables from HADOOP catalog to HADOOP catalog." + + " Please check the `catalog_migration.log`")); + Assertions.assertTrue( + run.getOut().contains("Details: \n- Failed to register these tables: \n[dummy.tbl-3]")); + + // try to register same table twice which leads to AlreadyExistsException + RunCLI.run( + "HADOOP", + "warehouse=" + warehousePath1 + ",type=hadoop", + "HADOOP", + "warehouse=" + warehousePath2 + ",type=hadoop", + "-I", + "foo.tbl-2"); + run = + RunCLI.run( + "HADOOP", + "warehouse=" + warehousePath1 + ",type=hadoop", + "HADOOP", + "warehouse=" + warehousePath2 + ",type=hadoop", + "-I", + "foo.tbl-2"); + Assertions.assertEquals(0, run.getExitCode()); + Assertions.assertTrue(run.getOut().contains("Identified 1 tables for registration.")); + Assertions.assertTrue( + run.getOut() + .contains( + "Summary: \n- Failed to register 1 tables from HADOOP catalog to HADOOP catalog." + + " Please check the `catalog_migration.log`")); + Assertions.assertTrue( + run.getOut().contains("Details: \n- Failed to register these tables: \n[foo.tbl-2]")); + } + + @Test + @Order(5) + public void testRegisterPartialTables() throws Exception { + // register only foo.tbl-2 + RunCLI run = + RunCLI.run( + "HADOOP", + "warehouse=" + warehousePath1 + ",type=hadoop", + "HADOOP", + "warehouse=" + warehousePath2 + ",type=hadoop", + "-I", + "foo.tbl-2"); + Assertions.assertEquals(0, run.getExitCode()); + Assertions.assertTrue(run.getOut().contains("Identified 1 tables for registration.")); + Assertions.assertTrue( + run.getOut() + .contains( + "Summary: \n- Successfully registered 1 tables from HADOOP catalog to HADOOP catalog.")); + Assertions.assertTrue( + run.getOut() + .contains( + "Details: \n" + "- Successfully registered these tables: \n" + "[foo.tbl-2]")); + + // register all the tables from source catalog again + run = + RunCLI.run( + "HADOOP", + "warehouse=" + warehousePath1 + ",type=hadoop", + "HADOOP", + "warehouse=" + warehousePath2 + ",type=hadoop"); + Assertions.assertEquals(0, run.getExitCode()); + Assertions.assertTrue(run.getOut().contains("Identified 4 tables for registration.")); + Assertions.assertTrue( + run.getOut() + .contains( + "Summary: \n" + + "- Successfully registered 3 tables from HADOOP catalog to HADOOP catalog. \n" + + "- Failed to register 1 tables from HADOOP catalog to HADOOP catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. \n" + + " Failed Identifiers are written to `failed_identifiers.txt`. " + + "Retry with that file using `--identifiers-from-file` option " + + "if the failure is because of network/connection timeouts.")); + Assertions.assertTrue( + run.getOut() + .contains( + "Details: \n" + + "- Successfully registered these tables: \n" + + "[foo.tbl-1, bar.tbl-4, bar.tbl-3]\n" + + "- Failed to register these tables: \n" + + "[foo.tbl-2]")); + + // retry the failed tables using --identifiers-from-file + run = + RunCLI.run( + "HADOOP", + "warehouse=" + warehousePath1 + ",type=hadoop", + "HADOOP", + "warehouse=" + warehousePath2 + ",type=hadoop", + "--identifiers-from-file", + "failed_identifiers.txt"); + Assertions.assertTrue( + run.getOut() + .contains( + "Summary: \n" + + "- Failed to register 1 tables from HADOOP catalog to HADOOP catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. \n" + + " Failed Identifiers are written to `failed_identifiers.txt`. " + + "Retry with that file using `--identifiers-from-file` option if the failure is because of network/connection timeouts.\n" + + "\n" + + "Details: \n" + + "- Failed to register these tables: \n" + + "[foo.tbl-2]")); + } + + @Test + @Order(6) + public void testRegisterNoTables() throws Exception { + // source catalog is catalog2 which has no tables. + RunCLI run = + RunCLI.run( + "HADOOP", + "warehouse=" + warehousePath2 + ",type=hadoop", + "HADOOP", + "warehouse=" + warehousePath1 + ",type=hadoop"); + + Assertions.assertEquals(0, run.getExitCode()); + Assertions.assertTrue(run.getOut().contains("Identified 0 tables for registration.")); + } + + @Test + @Order(7) + public void version() throws Exception { + RunCLI run = RunCLI.run("--version"); + Assertions.assertEquals(0, run.getExitCode()); + Assertions.assertTrue(run.getOut().startsWith(System.getProperty("expectedCLIVersion"))); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITCLITest.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITCLITest.java new file mode 100644 index 0000000..187248b --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITCLITest.java @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import org.junit.jupiter.api.Test; + +public class ITCLITest { + + @Test + public void testCustom() { + // sample code assuming HADOOP and NESSIE catalogs is up and running. + + // RunCLI run = + // RunCLI.run( + // "HADOOP", + // "warehouse=/tmp/warehouseHadoop,type=hadoop", + // "NESSIE", + // "uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie"); + // + // System.out.println(run.getOut()); + // + // Assertions.assertTrue(run.getOut().contains("Summary: ")); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/RunCLI.java b/src/test/java/org/projectnessie/tools/catalog/migration/RunCLI.java new file mode 100644 index 0000000..4f1c272 --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/RunCLI.java @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.Arrays; +import java.util.List; +import picocli.CommandLine; + +/** Helper class for tests. */ +public final class RunCLI { + + private final String[] args; + private final int exitCode; + private final String out; + private final String err; + + public RunCLI(int exitCode, String out, String err, String[] args) { + this.args = args; + this.exitCode = exitCode; + this.out = out; + this.err = err; + } + + public static RunCLI run(List args) throws Exception { + return run(args.toArray(new String[0])); + } + + private static int runMain(PrintWriter out, PrintWriter err, String... arguments) { + CommandLine commandLine = + new CommandLine(new CatalogMigrationCLI()) + .setExecutionExceptionHandler( + (ex, cmd, parseResult) -> { + cmd.getErr().println(cmd.getColorScheme().richStackTraceString(ex)); + return cmd.getExitCodeExceptionMapper() != null + ? cmd.getExitCodeExceptionMapper().getExitCode(ex) + : cmd.getCommandSpec().exitCodeOnExecutionException(); + }); + if (null != out) { + commandLine = commandLine.setOut(out); + } + if (null != err) { + commandLine = commandLine.setErr(err); + } + try { + return commandLine.execute(arguments); + } finally { + commandLine.getOut().flush(); + commandLine.getErr().flush(); + } + } + + public static RunCLI run(String... args) throws Exception { + try (StringWriter out = new StringWriter(); + PrintWriter outWriter = new PrintWriter(out); + StringWriter err = new StringWriter(); + PrintWriter errWriter = new PrintWriter(err)) { + int exitCode = runMain(outWriter, errWriter, args); + return new RunCLI(exitCode, out.toString(), err.toString(), args); + } + } + + public int getExitCode() { + return exitCode; + } + + public String getOut() { + return out; + } + + public String getErr() { + return err; + } + + @Override + public String toString() { + return "org.projectnessie.tools.catalog.migration.RunCLI{" + + "args=" + + Arrays.toString(args) + + "\nexitCode=" + + exitCode + + "\n\nstdout:\n" + + out + + "\n\nstderr:\n" + + err; + } +} From 7c5a16e7d5956a77b600930d806db883f8d1b0a9 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Tue, 10 Jan 2023 15:33:43 +0530 Subject: [PATCH 02/31] clean up --- .github/workflows/main.yml | 8 +--- README.md | 2 +- build.gradle.kts | 27 +++++++----- buildSrc/build.gradle.kts | 4 -- buildSrc/src/main/kotlin/Utilities.gradle.kts | 37 ++-------------- gradle/libs.versions.toml | 43 ++++++------------- .../tools/catalog/migration/ITCLITest.java | 37 ---------------- 7 files changed, 34 insertions(+), 124 deletions(-) delete mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/ITCLITest.java diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8dedf48..00caca5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -29,19 +29,13 @@ jobs: with: # Spotless must run in a different invocation, because # it has some weird Gradle configuration/variant issue - arguments: spotlessCheck --scan + arguments: spotlessCheck - name: Build with Gradle uses: gradle/gradle-build-action@v2 with: arguments: --rerun-tasks assemble ${{ env.ADDITIONAL_GRADLE_OPTS }} check publishToMavenLocal -x spotlessCheck --scan - - uses: codecov/codecov-action@v3 - if: ${{ matrix.java-version == '11' }} - with: - files: jacoco/build/reports/jacoco/codeCoverageReport/codeCoverageReport.xml - flags: java - - name: Capture test results uses: actions/upload-artifact@v3 if: failure() diff --git a/README.md b/README.md index c7f9db9..ef7d83f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # catalog-migrator A CLI tool to bulk migrate Iceberg tables from one catalog to another without a data copy. -Need to have java installed in your machine(JDK8 or later version) to use this CLI tool. +Need to have java installed in your machine(JDK11 or later version) to use this CLI tool. Below is the CLI syntax: ``` diff --git a/build.gradle.kts b/build.gradle.kts index 9d61cc1..8bdd4eb 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -25,21 +25,26 @@ plugins { Utilities } -repositories { mavenCentral() } +repositories { + if (System.getProperty("withMavenLocal").toBoolean()) { + mavenLocal() + } + mavenCentral() +} applyShadowJar() dependencies { - api("com.google.guava:guava:31.1-jre") - api("org.slf4j:log4j-over-slf4j:1.7.36") - api("ch.qos.logback:logback-classic:1.2.11") - api("ch.qos.logback:logback-core:1.2.11") - api("info.picocli:picocli:4.7.0") - api("org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0") - api("org.apache.iceberg:iceberg-dell:1.1.0") - api("org.apache.hadoop:hadoop-common:3.2.4") - api("org.apache.hadoop:hadoop-aws:3.2.4") - api("com.amazonaws:aws-java-sdk:1.7.4") + api(libs.guava) + api(libs.slf4j) + api(libs.picocli) + api(libs.logback.classic) + api(libs.logback.core) + api(libs.iceberg.spark.runtime) + api(libs.iceberg.dell) + api(libs.hadoop.aws) + api(libs.hadoop.common) + api(libs.aws.sdk) testImplementation(libs.junit.jupiter.params) testImplementation(libs.junit.jupiter.api) diff --git a/buildSrc/build.gradle.kts b/buildSrc/build.gradle.kts index 2767dac..b004931 100644 --- a/buildSrc/build.gradle.kts +++ b/buildSrc/build.gradle.kts @@ -30,10 +30,6 @@ dependencies { implementation("com.diffplug.spotless:spotless-plugin-gradle:${ver.spotlessPlugin.get()}") implementation("gradle.plugin.com.github.johnrengelman:shadow:${ver.shadowPlugin.get()}") val nessieVer = ver.nessieBuildPlugins.get() - implementation("org.projectnessie.buildsupport:ide-integration:$nessieVer") - implementation("org.projectnessie.buildsupport:jacoco:$nessieVer") - implementation("org.projectnessie.buildsupport:publishing:$nessieVer") - implementation("org.projectnessie.buildsupport:reflection-config:$nessieVer") implementation("org.projectnessie.buildsupport:spotless:$nessieVer") } diff --git a/buildSrc/src/main/kotlin/Utilities.gradle.kts b/buildSrc/src/main/kotlin/Utilities.gradle.kts index 46834d6..f8d677a 100644 --- a/buildSrc/src/main/kotlin/Utilities.gradle.kts +++ b/buildSrc/src/main/kotlin/Utilities.gradle.kts @@ -24,22 +24,13 @@ import org.gradle.api.tasks.javadoc.Javadoc import org.gradle.api.tasks.testing.Test import org.gradle.external.javadoc.CoreJavadocOptions import org.gradle.kotlin.dsl.configure -import org.gradle.kotlin.dsl.named import org.gradle.kotlin.dsl.repositories import org.gradle.kotlin.dsl.withType -plugins { - id("org.projectnessie.buildsupport.jacoco") - id("org.projectnessie.buildsupport.spotless") -} - -// TODO: enable this when support for publishing jars to nexus repo is added. -// if (project.name != "jacoco") { -// apply() -// } +plugins { id("org.projectnessie.buildsupport.spotless") } repositories { - mavenCentral { content { excludeVersionByRegex("io[.]delta", ".*", ".*-nessie") } } + mavenCentral() if (System.getProperty("withMavenLocal").toBoolean()) { mavenLocal() } @@ -50,33 +41,11 @@ if (project.projectDir.resolve("src/test/java").exists()) { useJUnitPlatform {} maxParallelForks = Runtime.getRuntime().availableProcessors() } - - if (project.hasProperty("alsoTestAgainstJava8")) { - val javaToolchains = extensions.findByType(JavaToolchainService::class.java) - if (javaToolchains != null) { - val testWithJava8 = - tasks.register("testWithJava8") { - group = "verification" - description = "Run unit tests against Java 8" - - dependsOn("test") - - dependsOn - - useJUnitPlatform {} - maxParallelForks = Runtime.getRuntime().availableProcessors() - javaLauncher.set( - javaToolchains.launcherFor { languageVersion.set(JavaLanguageVersion.of(8)) } - ) - } - tasks.named("check") { dependsOn(testWithJava8) } - } - } } tasks.withType().configureEach { manifest { - attributes["Implementation-Title"] = "catalog-migration-tool" + attributes["Implementation-Title"] = "iceberg-catalog-migrator" attributes["Implementation-Version"] = project.version } } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index f4d3860..0cdbf12 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,52 +1,35 @@ [versions] -checkstyle = "10.3.4" -errorprone = "2.15.0" googleJavaFormat = "1.15.0" guava = "31.1-jre" -jacoco = "0.8.8" -jmh = "1.36" junit = "5.9.1" nessieBuildPlugins = "0.2.14" picocli = "4.7.0" shadowPlugin = "7.1.2" slf4j = "1.7.36" spotlessPlugin = "6.12.0" - -[bundles] -# Bundles serve two purposes: -# 1. Group dependencies together for renovatebot, which cares about bundles. Those bundles, that -# _solely_ exist to for dependency management via renovatebot, start with `managed-`. -# 2. Bundles used in build files to group dependencies that are commonly used together. -#managed-grpc = ["grpc-protobuf", "grpc-stub", "grpc-netty-shaded"] -#managed-immutables = ["immutables-value-annotations", "immutables-value-processor"] -#managed-jmh = ["jmh-core", "jmh-generator-annprocess"] - -junit-testing = ["assertj-core", "junit-jupiter-api", "junit-jupiter-params"] +logback = "1.2.11" +iceberg = "1.1.0" +hadoop = "3.2.4" +aws = "1.7.4" [libraries] -assertj-core = { module = "org.assertj:assertj-core", version = "3.23.1" } -findbugs-jsr305 = { module = "com.google.code.findbugs:jsr305", version = "3.0.2" } google-java-format = { module = "com.google.googlejavaformat:google-java-format", version.ref = "googleJavaFormat" } -jacoco-maven-plugin = { module = "org.jacoco:jacoco-maven-plugin", version.ref = "jacoco" } -jmh-core = { module = "org.openjdk.jmh:jmh-core", version.ref = "jmh" } -jmh-generator-annprocess = { module = "org.openjdk.jmh:jmh-generator-annprocess", version.ref = "jmh" } junit-bom = { module = "org.junit:junit-bom", version.ref = "junit" } junit-jupiter-api = { module = "org.junit.jupiter:junit-jupiter-api", version.ref = "junit" } junit-jupiter-engine = { module = "org.junit.jupiter:junit-jupiter-engine" } junit-jupiter-params = { module = "org.junit.jupiter:junit-jupiter-params" } picocli = { module = "info.picocli:picocli", version.ref = "picocli" } +guava = { module = "com.google.guava:guava", version.ref = "guava" } +slf4j = { module = "org.slf4j:log4j-over-slf4j", version.ref = "slf4j" } +logback-core = { module = "ch.qos.logback:logback-core", version.ref = "logback" } +logback-classic = { module = "ch.qos.logback:logback-classic", version.ref = "logback" } +iceberg-spark-runtime = { module = "org.apache.iceberg:iceberg-spark-runtime-3.3_2.12", version.ref = "iceberg" } +iceberg-dell = { module = "org.apache.iceberg:iceberg-dell", version.ref = "iceberg" } +hadoop-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop" } +hadoop-aws = { module = "org.apache.hadoop:hadoop-aws", version.ref = "hadoop" } +aws-sdk = { module = "com.amazonaws:aws-java-sdk", version.ref = "aws" } [plugins] -errorprone = { id = "net.ltgt.errorprone", version = "3.0.1" } -idea-ext = { id = "org.jetbrains.gradle.plugin.idea-ext", version = "1.1.7" } -jmh = { id = "me.champeau.jmh", version = "0.6.8" } -nessie-build-checkstyle = { id = "org.projectnessie.buildsupport.checkstyle", version.ref = "nessieBuildPlugins" } -nessie-build-errorprone = { id = "org.projectnessie.buildsupport.errorprone", version.ref = "nessieBuildPlugins" } -nessie-build-ide-integration = { id = "org.projectnessie.buildsupport.ide-integration", version.ref = "nessieBuildPlugins" } -nessie-build-jandex = { id = "org.projectnessie.buildsupport.jandex", version.ref = "nessieBuildPlugins" } nessie-build-spotless = { id = "org.projectnessie.buildsupport.spotless", version.ref = "nessieBuildPlugins" } -nexus-publish = { id = "io.github.gradle-nexus.publish-plugin", version = "1.1.0" } shadow = { id = "com.github.johnrengelman.shadow", version.ref = "shadowPlugin" } spotless = { id = "com.diffplug.spotless", version.ref = "spotlessPlugin" } -testrerun = { id = "org.caffinitas.gradle.testrerun", version = "0.1" } -testsummary = { id = "org.caffinitas.gradle.testsummary", version = "0.1.1" } diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITCLITest.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITCLITest.java deleted file mode 100644 index 187248b..0000000 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITCLITest.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2023 Dremio - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.projectnessie.tools.catalog.migration; - -import org.junit.jupiter.api.Test; - -public class ITCLITest { - - @Test - public void testCustom() { - // sample code assuming HADOOP and NESSIE catalogs is up and running. - - // RunCLI run = - // RunCLI.run( - // "HADOOP", - // "warehouse=/tmp/warehouseHadoop,type=hadoop", - // "NESSIE", - // "uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie"); - // - // System.out.println(run.getOut()); - // - // Assertions.assertTrue(run.getOut().contains("Summary: ")); - } -} From f6e9bdf30687679c3c42590f4aeff4f1f0f15e48 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Tue, 17 Jan 2023 21:47:14 +0530 Subject: [PATCH 03/31] IT test framefork --- .github/workflows/main.yml | 9 +- build.gradle.kts | 31 ++++ .../migration/CatalogMigrationCLI.java | 10 +- .../catalog/migration/ITHiveAndHadoop.java | 143 ++++++++++++++++++ 4 files changed, 180 insertions(+), 13 deletions(-) create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 00caca5..7f1109a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -24,17 +24,10 @@ jobs: distribution: 'temurin' java-version: ${{ matrix.java-version }} - - name: Spotless Check - uses: gradle/gradle-build-action@v2 - with: - # Spotless must run in a different invocation, because - # it has some weird Gradle configuration/variant issue - arguments: spotlessCheck - - name: Build with Gradle uses: gradle/gradle-build-action@v2 with: - arguments: --rerun-tasks assemble ${{ env.ADDITIONAL_GRADLE_OPTS }} check publishToMavenLocal -x spotlessCheck --scan + arguments: --rerun-tasks assemble ${{ env.ADDITIONAL_GRADLE_OPTS }} check publishToMavenLocal --scan - name: Capture test results uses: actions/upload-artifact@v3 diff --git a/build.gradle.kts b/build.gradle.kts index 8bdd4eb..d94a49f 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -49,6 +49,37 @@ dependencies { testImplementation(libs.junit.jupiter.params) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.engine) + // for integration tests + testImplementation( + "org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests" + ) + testImplementation("org.apache.hive:hive-metastore:2.3.8") { + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hbase") + exclude("org.apache.logging.log4j") + exclude("co.cask.tephra") + exclude("com.google.code.findbugs", "jsr305") + exclude("org.eclipse.jetty.aggregate", "jetty-all") + exclude("org.eclipse.jetty.orbit", "javax.servlet") + exclude("org.apache.parquet", "parquet-hadoop-bundle") + exclude("com.tdunning", "json") + exclude("javax.transaction", "transaction-api") + exclude("com.zaxxer", "HikariCP") + } + testImplementation("org.apache.hive:hive-exec:2.3.8:core") { + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hive", "hive-llap-tez") + exclude("org.apache.logging.log4j") + exclude("com.google.protobuf", "protobuf-java") + exclude("org.apache.calcite") + exclude("org.apache.calcite.avatica") + exclude("com.google.code.findbugs", "jsr305") + } + testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}") } group = "org.projectnessie" diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java index cf552a2..44d969b 100644 --- a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java +++ b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java @@ -75,15 +75,15 @@ public class CatalogMigrationCLI implements Callable { names = {"-I", "--identifiers"}, split = ",", description = - "optional selective list of identifiers to register. If not specified, all the tables will be registered." - + "Use this when there are few identifiers needs to be registered. For large number of identifiers, use " - + "`--identifiers-from-file` option.") + "optional selective list of identifiers to register. If not specified, all the tables will be registered. " + + "Use this when there are few identifiers that need to be registered. For a large number of identifiers, " + + "use the `--identifiers-from-file` option.") List identifiers = new ArrayList<>(); @CommandLine.Option( names = {"--identifiers-from-file"}, description = - "optional text file path that contains list of table identifiers (one per line) to register. Should not be " + "optional text file path that contains a list of table identifiers (one per line) to register. Should not be " + "used with `--identifiers` option.") String identifiersFromFile; @@ -112,7 +112,7 @@ public class CatalogMigrationCLI implements Callable { @CommandLine.Option( names = {"--delete-source-tables"}, description = - "Optional configuration to delete the tables entry from source catalog after successfully registering it " + "Optional configuration to delete the table entry from source catalog after successfully registering it " + "to target catalog.") private boolean deleteSourceCatalogTables; diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java new file mode 100644 index 0000000..825d346 --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java @@ -0,0 +1,143 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static org.apache.iceberg.types.Types.NestedField.required; + +import java.io.File; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.Schema; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.iceberg.hive.HiveMetastoreTest; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class ITHiveAndHadoop extends HiveMetastoreTest { + + private static String warehousePath1; + + private static @TempDir File warehouse2; + private static String warehousePath2; + + private static Catalog catalog1; + + private static Catalog catalog2; + + private static final Schema schema = + new Schema(Types.StructType.of(required(1, "id", Types.LongType.get())).fields()); + + @BeforeAll + protected static void setup() { + try { + startMetastore(); + } catch (Exception e) { + throw new RuntimeException(e); + } + warehousePath1 = catalog.getConf().get("hive.metastore.warehouse.dir"); + warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + + // assign to hive catalog from the parent class + catalog1 = catalog; + ((SupportsNamespaces) catalog1).createNamespace(Namespace.of("foo"), Collections.emptyMap()); + ((SupportsNamespaces) catalog1).createNamespace(Namespace.of("bar"), Collections.emptyMap()); + + catalog2 = createHadoopCatalog(warehousePath2, "catalog2"); + ((SupportsNamespaces) catalog2).createNamespace(Namespace.of("foo"), Collections.emptyMap()); + ((SupportsNamespaces) catalog2).createNamespace(Namespace.of("bar"), Collections.emptyMap()); + } + + @AfterAll + protected static void tearDown() { + try { + stopMetastore(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @BeforeEach + protected void beforeEach() { + // two tables in 'foo' namespace + catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl1"), schema); + catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); + // two tables in 'bar' namespace + catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl3"), schema); + catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl4"), schema); + } + + @AfterEach + protected void afterEach() { + Arrays.asList(Namespace.of("foo"), Namespace.of("bar")) + .forEach( + namespace -> { + catalog1.listTables(namespace).forEach(catalog1::dropTable); + catalog2.listTables(namespace).forEach(catalog2::dropTable); + }); + } + + private static Catalog createHadoopCatalog(String warehousePath, String name) { + Map properties = new HashMap<>(); + properties.put("warehouse", warehousePath); + properties.put("type", "hadoop"); + return CatalogUtil.loadCatalog( + HadoopCatalog.class.getName(), name, properties, new Configuration()); + } + + @Test + @Order(0) + public void testRegister() throws Exception { + RunCLI run = + RunCLI.run( + "HIVE", + "warehouse=" + warehousePath1 + ",uri=" + catalog.getConf().get("hive.metastore.uris"), + "HADOOP", + "warehouse=" + warehousePath2 + ",type=hadoop"); + + Assertions.assertEquals(0, run.getExitCode()); + Assertions.assertTrue( + run.getOut() + .contains( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog.")); + Assertions.assertTrue(run.getOut().contains("Identified 4 tables for registration.")); + Assertions.assertTrue( + run.getOut() + .contains( + "Summary: \n- Successfully registered 4 tables from HIVE catalog to" + + " HADOOP catalog.")); + Assertions.assertTrue( + run.getOut() + .contains( + "Details: \n- Successfully registered these tables: \n" + + "[bar.tbl3, bar.tbl4, foo.tbl1, foo.tbl2]")); + } +} From ef04c99c86c1f9b20c4544b7558666a2b0290a7e Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Fri, 3 Feb 2023 19:46:23 +0530 Subject: [PATCH 04/31] take2 --- .gitignore | 2 + README.md | 372 ++++++++++++-- build.gradle.kts | 1 + .../catalog/migration/CatalogMigrateUtil.java | 323 +++++++----- .../migration/CatalogMigrationCLI.java | 368 ++++++++++---- .../catalog/migration/CLIErrorsTest.java | 167 +++++-- .../tools/catalog/migration/CLITest.java | 469 +++++++++++------- .../catalog/migration/ITHiveAndHadoop.java | 103 +++- 8 files changed, 1308 insertions(+), 497 deletions(-) diff --git a/.gitignore b/.gitignore index bed663c..aaca80c 100644 --- a/.gitignore +++ b/.gitignore @@ -75,3 +75,5 @@ build # test output failed_identifiers.txt +failed_to_delete_at_source.txt +dry_run_identifiers.txt diff --git a/README.md b/README.md index ef7d83f..4956eaf 100644 --- a/README.md +++ b/README.md @@ -1,50 +1,77 @@ -# catalog-migrator +# Objective +Iceberg supports managing the iceberg tables using the following Iceberg Catalogs: +* CUSTOM (By plugging in the jar and providing implementation class name) +* DYNAMODB +* ECS +* GLUE +* HADOOP +* HIVE +* JDBC +* NESSIE (Arctic) +* REST + +Users may want to move away from one catalog and use the other catalog with their existing Iceberg tables for the following reasons: +* They were using hadoop catalog and later realized that it is not production recommended. So, they want to move tables to other production ready catalogs. +* They just heard about the awesome Arctic catalog (or Nessie) and want to move their existing iceberg tables to Dremio Arctic. +* They had an on-premise Hive catalog, but want to move tables to a cloud-based catalog as part of their cloud migration strategy. + +Before the `1.1.0` Iceberg release, the only way to achieve this was **by copying the data** using `insert into catalog1.db.tableName as select * from catalog2.db.tableName`. +After the iceberg `1.1.0` release, all Iceberg Catalogs supports register table with the `catalog#registerTable()` API. +However, custom code is needed to migrate all the tables in bulk. +**Here we introduce a CLI tool to migrate Iceberg tables in bulk from one Iceberg Catalog to another without a data copy.** + +# Iceberg-catalog-migrator A CLI tool to bulk migrate Iceberg tables from one catalog to another without a data copy. Need to have java installed in your machine(JDK11 or later version) to use this CLI tool. Below is the CLI syntax: ``` -$ java -jar iceberg-catalog-migrator-0.1.0-SNAPSHOT.jar --help -Usage: register [-hV] [--delete-source-tables] [--identifiers-from-file=] - [--source-custom-catalog-impl=] [-T=] - [--target-custom-catalog-impl=] [-I=[,...]]... - [--source-catalog-hadoop-conf=[,...]]... [--target-catalog-hadoop-conf=[, - ...]]... [[,...]] [[, - ...]] +$ java -jar iceberg-catalog-migrator-0.1.0-SNAPSHOT.jar -h +Usage: register [-hV] [--delete-source-tables] [--dry-run] [--identifiers-from-file=] [--identifiers-regex=] + --source-catalog-type= [--source-custom-catalog-impl=] + --target-catalog-type= [--target-custom-catalog-impl=] [--identifiers=[, + ...]]... [--source-catalog-hadoop-conf=[,...]]... + --source-catalog-properties=[,...] [--source-catalog-properties=[, + ...]]... [--target-catalog-hadoop-conf=[,...]]... + --target-catalog-properties=[,...] [--target-catalog-properties=[,...]]... Bulk register the iceberg tables from source catalog to target catalog without data copy. - source catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] - [[,...]] - source catalog properties - target catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] - [[,...]] - target catalog properties + --source-catalog-type= + source catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] + --source-catalog-properties=[,...] + source catalog properties (like uri, warehouse, etc) --source-catalog-hadoop-conf=[,...] - optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg - FileIO. - --target-catalog-hadoop-conf=[,...] - optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg - FileIO. - -I, --identifiers=[,...] - optional selective list of identifiers to register. If not specified, all the tables will be registered.Use this when - there are few identifiers needs to be registered. For large number of identifiers, use `--identifiers-from-file` option. - --identifiers-from-file= - optional text file path that contains list of table identifiers (one per line) to register. Should not be used with - `--identifiers` option. - -T, --thread-pool-size= - optional size of the thread pool used for register tables. Tables are migrated sequentially if not specified. + optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg FileIO. --source-custom-catalog-impl= - optional fully qualified class name of the custom catalog implementation of the source catalog. Required when the catalog - type is CUSTOM. + optional fully qualified class name of the custom catalog implementation of the source catalog. Required when the catalog type is + CUSTOM. + --target-catalog-type= + target catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] + --target-catalog-properties=[,...] + target catalog properties (like uri, warehouse, etc) + --target-catalog-hadoop-conf=[,...] + optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg FileIO. --target-custom-catalog-impl= - optional fully qualified class name of the custom catalog implementation of the target catalog. Required when the catalog - type is CUSTOM. + optional fully qualified class name of the custom catalog implementation of the target catalog. Required when the catalog type is + CUSTOM. + --identifiers=[,...] + optional selective list of identifiers to register. If not specified, all the tables will be registered. Use this when there are + few identifiers that need to be registered. For a large number of identifiers, use the `--identifiers-from-file` or + `--identifiers-regex` option. + --identifiers-from-file= + optional text file path that contains a list of table identifiers (one per line) to register. Should not be used with + `--identifiers` or `--identifiers-regex` option. + --identifiers-regex= + optional regular expression pattern used to register only the tables whose identifiers match this pattern. Should not be used with + `--identifiers` or '--identifiers-from-file' option. + --dry-run optional configuration to simulate the registration without actually registering. Can learn about a list of the tables that will be + registered by running this. --delete-source-tables - Optional configuration to delete the tables entry from source catalog after successfully registering it to target catalog. - -h, --help Show this help message and exit. - -V, --version Print version information and exit. + optional configuration to delete the table entry from source catalog after successfully registering it to target catalog. + -h, --help Show this help message and exit. + -V, --version Print version information and exit. ``` > :warning: By default this tool just registers the table. @@ -56,7 +83,18 @@ or avoid operating tables from the source catalog after registering if '--delete > :warning: **It is recommended to use this CLI tool when there is no in-progress commits for the tables in the source catalog.** In-progress commits may not make it into the target catalog if used. -### Example command for bulk migrating tables between Hadoop catalog and Arctic catalog +# Sample Inputs +## Bulk migrating all the tables from Hadoop catalog to Nessie catalog (main branch) +```shell +java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +--source-catalog-type HADOOP \ +--source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--delete-source-tables +``` + +## Register all the tables from Hadoop catalog to Arctic catalog (main branch) ```shell export PAT=xxxxxxx @@ -64,34 +102,264 @@ export SECRETKEY=xxxxxxx export ACCESSKEY=xxxxxxx ``` -##### Register all the tables from Hadoop catalog to Arctic catalog (main branch) - ```shell java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ -HADOOP \ -warehouse=/tmp/warehouse,type=hadoop \ -NESSIE \ -uri=https://nessie.test1.dremio.site/v1/repositories/8158e68a-5046-42c6-a7e4-c920d9ae2475,ref=main,warehouse=/tmp/warehouse,authentication.type=BEARER,authentication.token=$PAT \ +--source-catalog-type HADOOP \ +--source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/8158e68a-5046-42c6-a7e4-c920d9ae2475,ref=main,warehouse=/tmp/warehouse,authentication.type=BEARER,authentication.token=$PAT \ --target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY ``` -##### Migrate selected tables (t1,t2 in namespace foo) from Arctic catalog (main branch) to Hadoop catalog. +## Migrate selected tables (t1,t2 in namespace foo) from Arctic catalog (main branch) to Hadoop catalog. + +```shell +export PAT=xxxxxxx +export SECRETKEY=xxxxxxx +export ACCESSKEY=xxxxxxx +``` + ```shell java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ -NESSIE \ -uri=https://nessie.test1.dremio.site/v1/repositories/8158e68a-5046-42c6-a7e4-c920d9ae2475,ref=main,warehouse=/tmp/warehouse,authentication.type=BEARER,authentication.token=$PAT \ -HADOOP \ -warehouse=/tmp/warehouse,type=hadoop --source-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY \ --I foo.t1,foo.t2 \ +--source-catalog-type NESSIE \ +--source-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/8158e68a-5046-42c6-a7e4-c920d9ae2475,ref=main,warehouse=/tmp/warehouse,authentication.type=BEARER,authentication.token=$PAT \ +--target-catalog-type HADOOP \ +--target-catalog-properties warehouse=/tmp/warehouse,type=hadoop --source-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY \ +--identifiers foo.t1,foo.t2 \ --delete-source-tables ``` -### Example command for bulk migrating tables from Hadoop catalog to Nessie catalog (main branch) +# Scenarios +## A. User need to try out new catalog +They can use a new catalog with a fresh table to explore the capabilities of the new catalog. +No need for a catalog migration tool. + +## B. Users need to move away from one catalog (example: Hadoop) to another (example: Nessie) with all the tables. + +### B.1) uses a `--dry-run` option to see what all the tables will be migrated. + +Sample input: +```shell +java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +--source-catalog-type HADOOP \ +--source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--dry-run +``` + +All the inputs will be validated and a list of identified table identifiers for migration will be printed on the console +along with the total count. Output will be written to _dry_run.txt_ file. +which can be used for actual migration using the `--identifiers-from-file` option without listing tables again from the catalog. + +### B.2) executes the migration of all 1000 tables and all the tables are successfully migrated. + +Sample input: ```shell java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ -HADOOP \ -warehouse=/tmp/warehouse,type=hadoop \ -NESSIE \ -uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--source-catalog-type HADOOP \ +--source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ --delete-source-tables ``` + +Once the input validations are done, users will be prompted with this message. +They have an ability to abort or continue the operation. + +``` +Configured source catalog: HADOOP + +Configured target catalog: NESSIE + +[WARNING] +a) Executing catalog migration when the source catalog has some in-progress commits +can lead to a data loss as the in-progress commit will not be considered for migration. +So, while using this tool please make sure there are no in-progress commits for the source catalog + +b) After the migration, successfully migrated tables will be deleted from the source catalog +and can only be accessed from the target catalog. +Have you read the above warnings and are you sure you want to continue? (yes/no): +``` + +Once the user wants to continue, other information will be printed on the console. + +``` +Continuing... + +User has not specified the table identifiers. Selecting all the tables from all the namespaces from the source catalog. +Collecting all the namespaces from source catalog... +Collecting all the tables from all the namespaces of source catalog... + +Identified 1000 tables for migration. +Started migration ... + +Attempted Migration for 100 tables out of 1000 +Attempted Migration for 200 tables out of 1000 +. +. +. +Attempted Migration for 900 tables out of 1000 +Attempted Migration for 1000 tables out of 1000 + +Finished migration ... + +Summary: +- Successfully migrated 1000 tables from HADOOP catalog to NESSIE catalog. + +Details: +- Successfully migrated these tables: + [foo.tbl-1, foo.tbl-2, bar.tbl-4, bar.tbl-3, …, …,bar.tbl-1000] +``` + +Note: a log file will also be generated which prints “successfully migrated table X” on every table migration. It also captures table level failures if there are any. + + +### B.3) executes the migration and out of 1000 tables 10 tables have failed to migrate because the target catalog had the same table and namespace (maybe different schema).Remaining 990 tables were successfully migrated. + +Sample input: +```shell +java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +--source-catalog-type HADOOP \ +--source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--delete-source-tables +``` + +Console output will be same as B.2) till summary because even in case of failure, +all the identified tables will be attempted for migration. + +``` +Summary: +- Successfully migrated 990 tables from HADOOP catalog to NESSIE catalog. +- Failed to migrate 10 tables from HADOOP catalog to NESSIE catalog. Please check the `catalog_migration.log` file for the failure reason. + Failed Identifiers are written to `failed_identifiers.txt`. Retry with that file using the `--identifiers-from-file` option if the failure is because of network/connection timeouts. + +Details: +- Successfully migrated these tables: + [foo.tbl-1, foo.tbl-2, bar.tbl-4, bar.tbl-3, …, …,bar.tbl-1000] +- Failed to migrate these tables: + [bar.tbl-201, foo.tbl-202, …, …,bar.tbl-210] +``` + +Note: +A log file will also be generated which prints “successfully migrated table X” on every table migration. It also captures table level failures if there are any. +So from the details or from _failed_identifiers.txt_ file, users can get the failed table names and search in the log. +It will have a 10 stacktrace with `TableAlreadyExists` exception for 10 tables. Which gives an idea for the user about why it failed. +Users can rename the tables in the source catalog and migrate only these 10 tables by using any one of the identifier options in the argument. + + +### B.4) executes the migration and out of 1000 tables 900 tables have failed to migrate because the target/source catalog connection went off. Only 100 tables were successfully migrated. + +Sample input: +```shell +java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +--source-catalog-type HADOOP \ +--source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--delete-source-tables +``` + +Console output will be same as B.2) till summary because even in case of failure, +all the identified tables will be attempted for migration. + +``` +Summary: +- Successfully migrated 100 tables from HADOOP catalog to NESSIE catalog. +- Failed to migrate 900 tables from HADOOP catalog to NESSIE catalog. Please check the `catalog_migration.log` file for the failure reason. + Failed Identifiers are written to `failed_identifiers.txt`. Retry with that file using the `--identifiers-from-file` option if the failure is because of network/connection timeouts. + +Details: +- Successfully migrated these tables: + [foo.tbl-1, foo.tbl-2,…,bar.tbl-100] +- Failed to migrate these tables: + [bar.tbl-201, foo.tbl-202, …, …,bar.tbl-1000] +``` + +Note: +A log file will also be generated which prints “successfully migrated table X” on every table migration. It also captures table level failures if there are any. +So from the details or from _failed_identifiers.txt_ file, users can get the failed table names and search in the log. +It will have a 900 stack trace with `ConnectionTimeOut` exception for 900 tables. Which gives an idea for the user about why it failed. +As these were timeout exceptions, users can retry migration of only these 900 tables using the `--identifiers-from-file` option with _failed_identifiers.txt_. + +### B.5) executes the migration and out of 1000 tables. Where all the 1000 tables were migrated successfully but deletion of 200 tables from the source catalog has failed due to network issues. + +Sample input: +```shell +java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +--source-catalog-type HADOOP \ +--source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--delete-source-tables +``` + +Console output will be same as B.2) till summary because even in case of failure, +all the identified tables will be attempted for migration. +These failed to delete tables are stored in _failed_to_delete.txt_ and the user has to delete them manually or stop using them from the source catalog. (console will print this warning) + +``` +Summary: +- Successfully migrated 1000 tables from HADOOP catalog to NESSIE catalog. +- 200 tables were failed to delete from the source catalog due the reason captured in the logs. These table names are written into the `failed_to_delete.txt` file. Do not operate these tables from the source catalog. + +Details: +- Successfully migrated these tables: + [foo.tbl-1, foo.tbl-2,…,bar.tbl-1000] +- [WARNING] Failed to delete these tables from source catalog: + [bar.tbl-201, foo.tbl-202, …, …,bar.tbl-400] +``` + +Users should manually drop the table entry from the source catalog in this case or stop using these tables from the source catalog. + + +### B.6) executes the migration and out of 1000 tables. But manually aborts the migration by killing the process. + +User has to go through the log to figure out how many tables have migrated so far. +Users can also do `listTables()` at the target catalog to see how many tables migrated. +There can be a chance that tables that are migrated to the target catalog may not be cleaned in the source catalog due to abort. +Users should not operate them from source catalog and can manually drop them from source catalog. +Users can also try bulk migration again, which will attempt to migrate all the tables in the source catalog. + +### B.7) Users need to move away from one catalog to another with selective tables (maybe want to move only the production tables, test tables, etc) + +Users can provide the selective list of identifiers to migrate using any of these 3 options +`--identifiers`, `--identifiers-from-file`, `--identifier-regex` and it can be used along with the dry-run option too. + +Sample input: (only migrate tables that starts with "foo.") +```shell +java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +--source-catalog-type HADOOP \ +--source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--delete-source-tables \ +--identifiers-regex ^foo\..* +``` + +Sample input: (migrate all tables in the file ids.txt where each entry is delimited by newline) +```shell +java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +--source-catalog-type HADOOP \ +--source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--delete-source-tables \ +--identifiers-from-file ids.txt +``` + +Sample input: (migrate only two tables foo.tbl1, foo.tbl2) +```shell +java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +--source-catalog-type HADOOP \ +--source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--delete-source-tables \ +--identifiers foo.tbl1,foo.tbl2 +``` + +Console will clearly print that only these identifiers are used for table migration. +Rest of the behavior will be the same as mentioned in the previous sections. diff --git a/build.gradle.kts b/build.gradle.kts index d94a49f..82a3874 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -49,6 +49,7 @@ dependencies { testImplementation(libs.junit.jupiter.params) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.engine) + testImplementation("org.assertj:assertj-core:3.24.2") // for integration tests testImplementation( "org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests" diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java index fe65bc8..3a7209e 100644 --- a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java +++ b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java @@ -19,24 +19,21 @@ import com.google.common.collect.ImmutableList; import java.io.PrintWriter; import java.util.ArrayList; -import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Objects; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.ExecutorService; -import org.apache.commons.lang3.tuple.ImmutablePair; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Predicate; +import java.util.regex.Pattern; +import java.util.stream.Collectors; import org.apache.iceberg.BaseTable; import org.apache.iceberg.TableOperations; import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.exceptions.AlreadyExistsException; -import org.apache.iceberg.exceptions.NoSuchNamespaceException; -import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.hadoop.HadoopCatalog; -import org.apache.iceberg.util.Tasks; -import org.apache.iceberg.util.ThreadPools; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,21 +56,27 @@ private CatalogMigrateUtil() {} * migrated. If not specified, all the tables would be migrated * @param sourceCatalog Source {@link Catalog} from which the tables are chosen * @param targetCatalog Target {@link Catalog} to which the tables need to be migrated - * @param maxThreadPoolSize Size of the thread pool used for migrate tables (If set to 0, no - * thread pool is used) + * @param identifierRegex regular expression pattern used to migrate only the tables whose + * identifiers match this pattern. Can be provided instead of `tableIdentifiers`. + * @param isDryRun to execute as dry run. * @param printWriter to print regular updates on the console. - * @return Collection of successfully migrated and collection of failed to migrate table - * identifiers. + * @return List of successfully migrated and list of failed to migrate table identifiers. */ - public static ImmutablePair, Collection> - migrateTables( - List tableIdentifiers, - Catalog sourceCatalog, - Catalog targetCatalog, - int maxThreadPoolSize, - PrintWriter printWriter) { + public static CatalogMigrationResult migrateTables( + List tableIdentifiers, + Catalog sourceCatalog, + Catalog targetCatalog, + String identifierRegex, + boolean isDryRun, + PrintWriter printWriter) { return migrateTables( - tableIdentifiers, sourceCatalog, targetCatalog, maxThreadPoolSize, true, printWriter); + tableIdentifiers, + sourceCatalog, + targetCatalog, + identifierRegex, + isDryRun, + true, + printWriter); } /** @@ -89,141 +92,211 @@ private CatalogMigrateUtil() {} * registered. If not specified, all the tables would be registered * @param sourceCatalog Source {@link Catalog} from which the tables are chosen * @param targetCatalog Target {@link Catalog} to which the tables need to be registered - * @param maxThreadPoolSize Size of the thread pool used for registering tables (If set to 0, no - * thread pool is used) + * @param identifierRegex regular expression pattern used to migrate only the tables whose + * identifiers match this pattern. Can be provided instead of `tableIdentifiers`. + * @param isDryRun to execute as dry run. * @param printWriter to print regular updates on the console. - * @return Collection of successfully migrated and collection of failed to migrate table - * identifiers. + * @return List of successfully migrated and list of failed to migrate table identifiers. */ - public static ImmutablePair, Collection> - registerTables( - List tableIdentifiers, - Catalog sourceCatalog, - Catalog targetCatalog, - int maxThreadPoolSize, - PrintWriter printWriter) { + public static CatalogMigrationResult registerTables( + List tableIdentifiers, + Catalog sourceCatalog, + Catalog targetCatalog, + String identifierRegex, + boolean isDryRun, + PrintWriter printWriter) { return migrateTables( - tableIdentifiers, sourceCatalog, targetCatalog, maxThreadPoolSize, false, printWriter); + tableIdentifiers, + sourceCatalog, + targetCatalog, + identifierRegex, + isDryRun, + false, + printWriter); } - private static ImmutablePair, Collection> - migrateTables( - List tableIdentifiers, - Catalog sourceCatalog, - Catalog targetCatalog, - int maxThreadPoolSize, - boolean deleteEntriesFromSourceCatalog, - PrintWriter printWriter) { - validate(sourceCatalog, targetCatalog, maxThreadPoolSize); + private static CatalogMigrationResult migrateTables( + List tableIdentifiers, + Catalog sourceCatalog, + Catalog targetCatalog, + String identifierRegex, + boolean isDryRun, + boolean deleteEntriesFromSourceCatalog, + PrintWriter printWriter) { + validate(sourceCatalog, targetCatalog); String operation = deleteEntriesFromSourceCatalog ? "migration" : "registration"; List identifiers; if (tableIdentifiers == null || tableIdentifiers.isEmpty()) { - printWriter.println( - "\nUser has not specified the table identifiers." - + " Selecting all the tables from all the namespaces from the source catalog."); - - printWriter.println("Collecting all the namespaces from source catalog..."); - // fetch all the table identifiers from all the namespaces. - List namespaces = - (sourceCatalog instanceof SupportsNamespaces) - ? ((SupportsNamespaces) sourceCatalog).listNamespaces() - : ImmutableList.of(Namespace.empty()); - printWriter.println("Collecting all the tables from all the namespaces of source catalog..."); - - identifiers = getTableIdentifiers(sourceCatalog, maxThreadPoolSize, namespaces); + identifiers = getMatchingTableIdentifiers(sourceCatalog, identifierRegex, printWriter); } else { identifiers = tableIdentifiers; } - printWriter.printf("\nIdentified %d tables for %s.", identifiers.size(), operation); + printWriter.println( + String.format("\nIdentified %d tables for %s.", identifiers.size(), operation)); - printWriter.printf("\nStarted %s ...", operation); - - ExecutorService executorService = null; - if (maxThreadPoolSize > 0) { - executorService = ThreadPools.newWorkerPool("migrate-tables", maxThreadPoolSize); + if (isDryRun) { + return new CatalogMigrationResult( + identifiers, Collections.emptyList(), Collections.emptyList()); } + printWriter.println(String.format("\nStarted %s ...", operation)); + List registeredTableIdentifiers = new ArrayList<>(); + List failedToRegisterTableIdentifiers = new ArrayList<>(); + List failedToDeleteTableIdentifiers = new ArrayList<>(); + AtomicInteger counter = new AtomicInteger(); + identifiers.forEach( + tableIdentifier -> { + registerTable( + sourceCatalog, + targetCatalog, + registeredTableIdentifiers, + failedToRegisterTableIdentifiers, + tableIdentifier); + + // HadoopCatalog dropTable will delete the table files completely even when purge is + // false. + // So, skip dropTable for HadoopCatalog. + boolean deleteTableFromSourceCatalog = + deleteEntriesFromSourceCatalog && !(sourceCatalog instanceof HadoopCatalog); + + try { + if (deleteTableFromSourceCatalog) { + boolean failedToDelete = sourceCatalog.dropTable(tableIdentifier, false); + if (failedToDelete) { + failedToDeleteTableIdentifiers.add(tableIdentifier); + } + } + } catch (Exception exception) { + failedToDeleteTableIdentifiers.add(tableIdentifier); + LOG.warn("Failed to delete the table after migration {}", tableIdentifier, exception); + } + + int count = counter.incrementAndGet(); + if (count % 100 == 0) { + printWriter.println( + String.format( + "\nAttempted %s for %d tables out of %d tables.", + operation, count, identifiers.size())); + } + }); + printWriter.println(String.format("\nFinished %s ...", operation)); + return new CatalogMigrationResult( + registeredTableIdentifiers, + failedToRegisterTableIdentifiers, + failedToDeleteTableIdentifiers); + } + + private static void registerTable( + Catalog sourceCatalog, + Catalog targetCatalog, + List registeredTableIdentifiers, + List failedToMigrateTableIdentifiers, + TableIdentifier tableIdentifier) { try { - Collection migratedTableIdentifiers = new ConcurrentLinkedQueue<>(); - Collection failedToMigrateTableIdentifiers = new ConcurrentLinkedQueue<>(); - Tasks.foreach(identifiers.stream().filter(Objects::nonNull)) - .retry(3) - .stopRetryOn( - NoSuchTableException.class, - NoSuchNamespaceException.class, - AlreadyExistsException.class) - .suppressFailureWhenFinished() - .executeWith(executorService) - .onFailure( - (tableIdentifier, exc) -> { - failedToMigrateTableIdentifiers.add(tableIdentifier); - LOG.warn("Unable to migrate table {}", tableIdentifier, exc); - }) - .run( - tableIdentifier -> { - migrate( - tableIdentifier, sourceCatalog, targetCatalog, deleteEntriesFromSourceCatalog); - migratedTableIdentifiers.add(tableIdentifier); - LOG.info("Successfully migrated the table {}", tableIdentifier); - }); - - printWriter.printf("\nFinished %s ...", operation); - return ImmutablePair.of(migratedTableIdentifiers, failedToMigrateTableIdentifiers); - } finally { - if (executorService != null) { - executorService.shutdown(); - } + // register the table to the target catalog + TableOperations ops = ((BaseTable) sourceCatalog.loadTable(tableIdentifier)).operations(); + targetCatalog.registerTable(tableIdentifier, ops.current().metadataFileLocation()); + + registeredTableIdentifiers.add(tableIdentifier); + LOG.info("Successfully migrated the table {}", tableIdentifier); + } catch (Exception ex) { + failedToMigrateTableIdentifiers.add(tableIdentifier); + LOG.warn("Unable to migrate table {}", tableIdentifier, ex); } } - private static List getTableIdentifiers( - Catalog sourceCatalog, int maxThreadPoolSize, List namespaces) { - ExecutorService executorService = null; - if (maxThreadPoolSize > 0) { - executorService = ThreadPools.newWorkerPool("list-tables", maxThreadPoolSize); + @NotNull + private static List getMatchingTableIdentifiers( + Catalog sourceCatalog, String identifierRegex, PrintWriter printWriter) { + if (identifierRegex == null) { + printWriter.println( + "\nUser has not specified the table identifiers." + + " Selecting all the tables from all the namespaces from the source catalog."); + } else { + printWriter.println( + "\nUser has not specified the table identifiers." + + " Selecting all the tables from all the namespaces from the source catalog " + + "which matches the regex pattern:" + + identifierRegex); } - try { - Collection allIdentifiers = new ConcurrentLinkedQueue<>(); - Tasks.foreach(namespaces.stream().filter(Objects::nonNull)) - .retry(1) - .suppressFailureWhenFinished() - .executeWith(executorService) - .run(namespace -> allIdentifiers.addAll(sourceCatalog.listTables(namespace))); - return new ArrayList<>(allIdentifiers); - } finally { - if (executorService != null) { - executorService.shutdown(); - } + printWriter.println("Collecting all the namespaces from source catalog..."); + // fetch all the table identifiers from all the namespaces. + List namespaces = + (sourceCatalog instanceof SupportsNamespaces) + ? ((SupportsNamespaces) sourceCatalog).listNamespaces() + : ImmutableList.of(Namespace.empty()); + if (identifierRegex == null) { + printWriter.println("Collecting all the tables from all the namespaces of source catalog..."); + } else { + printWriter.println( + "Collecting all the tables from all the namespaces of source catalog" + + " which matches the regex pattern:" + + identifierRegex); + } + + Predicate matchedIdentifiersPredicate; + if (identifierRegex != null) { + Pattern pattern = Pattern.compile(identifierRegex); + matchedIdentifiersPredicate = + tableIdentifier -> pattern.matcher(tableIdentifier.toString()).matches(); + } else { + matchedIdentifiersPredicate = tableIdentifier -> true; } + return getMatchingTableIdentifiers(sourceCatalog, namespaces, matchedIdentifiersPredicate); } - private static void validate( - Catalog sourceCatalog, Catalog targetCatalog, int maxThreadPoolSize) { - Preconditions.checkArgument( - maxThreadPoolSize >= 0, - "maxThreadPoolSize should have value >= 0, value: " + maxThreadPoolSize); + private static List getMatchingTableIdentifiers( + Catalog sourceCatalog, + List namespaces, + Predicate matchedIdentifiersPredicate) { + List allIdentifiers = new ArrayList<>(); + namespaces.stream() + .filter(Objects::nonNull) + .forEach( + namespace -> { + List matchedIdentifiers = + sourceCatalog.listTables(namespace).stream() + .filter(matchedIdentifiersPredicate) + .collect(Collectors.toList()); + allIdentifiers.addAll(matchedIdentifiers); + }); + return allIdentifiers; + } + + private static void validate(Catalog sourceCatalog, Catalog targetCatalog) { Preconditions.checkArgument(sourceCatalog != null, "Invalid source catalog: null"); Preconditions.checkArgument(targetCatalog != null, "Invalid target catalog: null"); Preconditions.checkArgument( !targetCatalog.equals(sourceCatalog), "target catalog is same as source catalog"); } - private static void migrate( - TableIdentifier tableIdentifier, - Catalog sourceCatalog, - Catalog targetCatalog, - boolean deleteEntriesFromSourceCatalog) { - // register the table to the target catalog - TableOperations ops = ((BaseTable) sourceCatalog.loadTable(tableIdentifier)).operations(); - targetCatalog.registerTable(tableIdentifier, ops.current().metadataFileLocation()); - - if (deleteEntriesFromSourceCatalog && !(sourceCatalog instanceof HadoopCatalog)) { - // HadoopCatalog dropTable will delete the table files completely even when purge is false. - // So, skip dropTable for HadoopCatalog. - sourceCatalog.dropTable(tableIdentifier, false); + public static class CatalogMigrationResult { + private final List registeredTableIdentifiers; + private final List failedToRegisterTableIdentifiers; + private final List failedToDeleteTableIdentifiers; + + CatalogMigrationResult( + List registeredTableIdentifiers, + List failedToRegisterTableIdentifiers, + List failedToDeleteTableIdentifiers) { + this.registeredTableIdentifiers = registeredTableIdentifiers; + this.failedToRegisterTableIdentifiers = failedToRegisterTableIdentifiers; + this.failedToDeleteTableIdentifiers = failedToDeleteTableIdentifiers; + } + + public List registeredTableIdentifiers() { + return registeredTableIdentifiers; + } + + public List failedToRegisterTableIdentifiers() { + return failedToRegisterTableIdentifiers; + } + + public List failedToDeleteTableIdentifiers() { + return failedToDeleteTableIdentifiers; } } } diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java index 44d969b..34ab5c8 100644 --- a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java +++ b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java @@ -20,14 +20,13 @@ import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; -import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Scanner; import java.util.concurrent.Callable; import java.util.stream.Collectors; -import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.aws.dynamodb.DynamoDbCatalog; @@ -48,13 +47,28 @@ versionProvider = CLIVersionProvider.class, // As both source and target catalog has similar configurations, // documentation is easy to read if the target and source property is one after another instead - // of sorted. + // of sorted order. sortOptions = false, description = "\nBulk register the iceberg tables from source catalog to target catalog without data copy.\n") public class CatalogMigrationCLI implements Callable { @CommandLine.Spec CommandLine.Model.CommandSpec commandSpec; + @CommandLine.Option( + names = "--source-catalog-type", + required = true, + description = + "source catalog type. " + + "Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]") + private CatalogType sourceCatalogType; + + @CommandLine.Option( + names = "--source-catalog-properties", + required = true, + split = ",", + description = "source catalog properties (like uri, warehouse, etc)") + private Map sourceCatalogProperties; + @CommandLine.Option( names = "--source-catalog-hadoop-conf", split = ",", @@ -63,6 +77,28 @@ public class CatalogMigrationCLI implements Callable { + "using an Iceberg FileIO.") Map sourceHadoopConf = new HashMap<>(); + @CommandLine.Option( + names = {"--source-custom-catalog-impl"}, + description = + "optional fully qualified class name of the custom catalog implementation of the source catalog. Required " + + "when the catalog type is CUSTOM.") + String sourceCustomCatalogImpl; + + @CommandLine.Option( + names = "--target-catalog-type", + required = true, + description = + "target catalog type. " + + "Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]") + private CatalogType targetCatalogType; + + @CommandLine.Option( + names = "--target-catalog-properties", + required = true, + split = ",", + description = "target catalog properties (like uri, warehouse, etc)") + private Map targetCatalogProperties; + @CommandLine.Option( names = "--target-catalog-hadoop-conf", split = ",", @@ -72,69 +108,52 @@ public class CatalogMigrationCLI implements Callable { Map targetHadoopConf = new HashMap<>(); @CommandLine.Option( - names = {"-I", "--identifiers"}, + names = {"--target-custom-catalog-impl"}, + description = + "optional fully qualified class name of the custom catalog implementation of the target catalog. Required " + + "when the catalog type is CUSTOM.") + String targetCustomCatalogImpl; + + @CommandLine.Option( + names = {"--identifiers"}, split = ",", description = "optional selective list of identifiers to register. If not specified, all the tables will be registered. " + "Use this when there are few identifiers that need to be registered. For a large number of identifiers, " - + "use the `--identifiers-from-file` option.") + + "use the `--identifiers-from-file` or `--identifiers-regex` option.") List identifiers = new ArrayList<>(); @CommandLine.Option( names = {"--identifiers-from-file"}, description = "optional text file path that contains a list of table identifiers (one per line) to register. Should not be " - + "used with `--identifiers` option.") + + "used with `--identifiers` or `--identifiers-regex` option.") String identifiersFromFile; @CommandLine.Option( - names = {"-T", "--thread-pool-size"}, - defaultValue = "0", + names = {"--identifiers-regex"}, description = - "optional size of the thread pool used for register tables. Tables are migrated sequentially if " - + "not specified.") - int maxThreadPoolSize; + "optional regular expression pattern used to register only the tables whose identifiers match this pattern. " + + "Should not be used with `--identifiers` or '--identifiers-from-file' option.") + String identifiersRegEx; @CommandLine.Option( - names = {"--source-custom-catalog-impl"}, + names = {"--dry-run"}, description = - "optional fully qualified class name of the custom catalog implementation of the source catalog. Required " - + "when the catalog type is CUSTOM.") - String sourceCustomCatalogImpl; - - @CommandLine.Option( - names = {"--target-custom-catalog-impl"}, - description = - "optional fully qualified class name of the custom catalog implementation of the target catalog. Required " - + "when the catalog type is CUSTOM.") - String targetCustomCatalogImpl; + "optional configuration to simulate the registration without actually registering. Can learn about a list " + + "of the tables that will be registered by running this.") + private boolean isDryRun; @CommandLine.Option( names = {"--delete-source-tables"}, description = - "Optional configuration to delete the table entry from source catalog after successfully registering it " + "optional configuration to delete the table entry from source catalog after successfully registering it " + "to target catalog.") private boolean deleteSourceCatalogTables; - @CommandLine.Parameters( - index = "0", - description = - "source catalog type. " - + "Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]") - private CatalogType sourceCatalogType; - - @CommandLine.Parameters(index = "1", split = ",", description = "source catalog properties") - private Map sourceCatalogProperties; - - @CommandLine.Parameters( - index = "2", - description = - "target catalog type. " - + "Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]") - private CatalogType targetCatalogType; - - @CommandLine.Parameters(index = "3", split = ",", description = "target catalog properties") - private Map targetCatalogProperties; + static final String FAILED_IDENTIFIERS_FILE = "failed_identifiers.txt"; + static final String FAILED_TO_DELETE_AT_SOURCE_FILE = "failed_to_delete_at_source.txt"; + static final String DRY_RUN_FILE = "dry_run_identifiers.txt"; public static void main(String... args) { CommandLine commandLine = new CommandLine(new CatalogMigrationCLI()); @@ -145,15 +164,7 @@ public static void main(String... args) { @Override public Integer call() { - if (identifiersFromFile != null && !identifiers.isEmpty()) { - throw new IllegalArgumentException( - "Both `--identifiers` and `--identifiers-from-file` options are configured. Please use only one of them."); - } else if (identifiersFromFile != null) { - if (!Files.exists(Paths.get(identifiersFromFile))) { - throw new IllegalArgumentException( - "File specified in `--identifiers-from-file` option does not exist."); - } - } + validateIdentifierOptions(); PrintWriter printWriter = commandSpec.commandLine().getOut(); Configuration sourceCatalogConf = new Configuration(); @@ -166,7 +177,7 @@ public Integer call() { "sourceCatalog", sourceCatalogProperties, sourceCatalogConf); - printWriter.printf("\nConfigured source catalog: %s\n", sourceCatalogType.name()); + printWriter.println(String.format("\nConfigured source catalog: %s", sourceCatalogType.name())); Configuration targetCatalogConf = new Configuration(); if (targetHadoopConf != null && !targetHadoopConf.isEmpty()) { @@ -178,12 +189,14 @@ public Integer call() { "targetCatalog", targetCatalogProperties, targetCatalogConf); - printWriter.printf("\nConfigured target catalog: %s\n", targetCatalogType.name()); + printWriter.println(String.format("\nConfigured target catalog: %s", targetCatalogType.name())); List tableIdentifiers = null; if (identifiersFromFile != null) { try { - printWriter.printf("\nCollecting identifiers from the file %s...\n", identifiersFromFile); + printWriter.println( + String.format("Collecting identifiers from the file %s...", identifiersFromFile)); + printWriter.println(); tableIdentifiers = Files.readAllLines(Paths.get(identifiersFromFile)).stream() .map(TableIdentifier::parse) @@ -196,68 +209,235 @@ public Integer call() { identifiers.stream().map(TableIdentifier::parse).collect(Collectors.toList()); } - ImmutablePair, Collection> result; + CatalogMigrateUtil.CatalogMigrationResult result; if (deleteSourceCatalogTables) { + if (!isDryRun && !proceedForMigration(printWriter)) { + return 0; + } + result = CatalogMigrateUtil.migrateTables( - tableIdentifiers, sourceCatalog, targetCatalog, maxThreadPoolSize, printWriter); + tableIdentifiers, + sourceCatalog, + targetCatalog, + identifiersRegEx, + isDryRun, + printWriter); if (sourceCatalogType == CatalogType.HADOOP) { printWriter.println( - "\n[WARNING]: Source catalog type is HADOOP and it doesn't support dropping tables just from " + "[WARNING]: Source catalog type is HADOOP and it doesn't support dropping tables just from " + "catalog. \nAvoid operating the migrated tables from the source catalog after migration. " + "Use the tables from target catalog."); } } else { + if (!isDryRun && !proceedForRegistration(printWriter)) { + return 0; + } + result = CatalogMigrateUtil.registerTables( - tableIdentifiers, sourceCatalog, targetCatalog, maxThreadPoolSize, printWriter); + tableIdentifiers, + sourceCatalog, + targetCatalog, + identifiersRegEx, + isDryRun, + printWriter); + } + + if (isDryRun) { + printWriter.println("Dry run is completed."); + printDryRunResults(printWriter, result); + return 0; } + printSummary(printWriter, result); + + printDetails(printWriter, result); + + return 0; + } + + private void printSummary( + PrintWriter printWriter, CatalogMigrateUtil.CatalogMigrationResult result) { printWriter.println("\nSummary: "); - if (!result.left.isEmpty()) { - printWriter.printf( - "- Successfully %s %d tables from %s catalog to %s catalog. \n", - deleteSourceCatalogTables ? "migrated" : "registered", - result.left.size(), - sourceCatalogType.name(), - targetCatalogType.name()); + if (!result.registeredTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Successfully %s %d tables from %s catalog to %s catalog.", + deleteSourceCatalogTables ? "migrated" : "registered", + result.registeredTableIdentifiers().size(), + sourceCatalogType.name(), + targetCatalogType.name())); } - if (!result.right.isEmpty()) { - List failedIdentifiers = - result.right.stream().map(TableIdentifier::toString).collect(Collectors.toList()); - try { - Files.write(Paths.get("failed_identifiers.txt"), failedIdentifiers); - } catch (IOException e) { - throw new RuntimeException("Failed to write the file:", e); - } - printWriter.printf( - "- Failed to %s %d tables from %s catalog to %s catalog. " - + "Please check the `catalog_migration.log` file for the failure reason. " - + "\n Failed Identifiers are written to `failed_identifiers.txt`. " - + "Retry with that file using `--identifiers-from-file` option " - + "if the failure is because of network/connection timeouts.\n", - deleteSourceCatalogTables ? "migrate" : "register", - result.right.size(), - sourceCatalogType.name(), - targetCatalogType.name()); + if (!result.failedToRegisterTableIdentifiers().isEmpty()) { + writeToFile(FAILED_IDENTIFIERS_FILE, result.failedToRegisterTableIdentifiers()); + printWriter.println( + String.format( + "- Failed to %s %d tables from %s catalog to %s catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. " + + "\nFailed identifiers are written into `%s`. " + + "Retry with that file using `--identifiers-from-file` option " + + "if the failure is because of network/connection timeouts.", + deleteSourceCatalogTables ? "migrate" : "register", + result.failedToRegisterTableIdentifiers().size(), + sourceCatalogType.name(), + targetCatalogType.name(), + FAILED_IDENTIFIERS_FILE)); } + if (!result.failedToDeleteTableIdentifiers().isEmpty()) { + writeToFile(FAILED_TO_DELETE_AT_SOURCE_FILE, result.failedToDeleteTableIdentifiers()); + printWriter.println( + String.format( + "- Failed to delete %d tables from %s catalog. " + + "Please check the `catalog_migration.log` file for the reason. " + + "\nFailed to delete identifiers are written into `%s`. ", + result.failedToDeleteTableIdentifiers().size(), + sourceCatalogType.name(), + FAILED_TO_DELETE_AT_SOURCE_FILE)); + } + } + + private void printDetails( + PrintWriter printWriter, CatalogMigrateUtil.CatalogMigrationResult result) { printWriter.println("\nDetails: "); - if (!result.left.isEmpty()) { - printWriter.printf( - "- Successfully %s these tables: \n", - deleteSourceCatalogTables ? "migrated" : "registered"); - printWriter.println(result.left); + if (!result.registeredTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Successfully %s these tables:", + deleteSourceCatalogTables ? "migrated" : "registered")); + printWriter.println(result.registeredTableIdentifiers()); } - if (!result.right.isEmpty()) { - printWriter.printf( - "- Failed to %s these tables: \n", deleteSourceCatalogTables ? "migrate" : "register"); - printWriter.println(result.right); + + if (!result.failedToRegisterTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Failed to %s these tables:", deleteSourceCatalogTables ? "migrate" : "register")); + printWriter.println(result.failedToRegisterTableIdentifiers()); } - return 0; + if (!result.failedToDeleteTableIdentifiers().isEmpty()) { + printWriter.println("- [WARNING] Failed to delete these tables from source catalog:"); + printWriter.println(result.failedToDeleteTableIdentifiers()); + } + } + + private void printDryRunResults( + PrintWriter printWriter, CatalogMigrateUtil.CatalogMigrationResult result) { + printWriter.println("\nSummary: "); + if (result.registeredTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- No tables are identified for %s. Please check logs for more info.", + deleteSourceCatalogTables ? "migration" : "registration")); + return; + } + writeToFile(DRY_RUN_FILE, result.registeredTableIdentifiers()); + printWriter.println( + String.format( + "- Identified %d tables for %s by dry-run. These identifiers are also written into %s. " + + "You can use this file with `--identifiers-from-file` option.", + result.registeredTableIdentifiers().size(), + deleteSourceCatalogTables ? "migration" : "registration", + DRY_RUN_FILE)); + + printWriter.println("\nDetails: "); + printWriter.println( + String.format( + "- Identified these tables for %s by dry-run:", + deleteSourceCatalogTables ? "migration" : "registration")); + printWriter.println(result.registeredTableIdentifiers()); + } + + private void writeToFile(String filePath, List identifiers) { + List identifiersString = + identifiers.stream().map(TableIdentifier::toString).collect(Collectors.toList()); + try { + Files.write(Paths.get(filePath), identifiersString); + } catch (IOException e) { + throw new RuntimeException("Failed to write the file:" + filePath, e); + } + } + + private void validateIdentifierOptions() { + if (identifiersFromFile != null && !identifiers.isEmpty() && identifiersRegEx != null) { + throw new IllegalArgumentException( + "All the three identifier options (`--identifiers`, `--identifiers-from-file`, " + + "`--identifiers-regex`) are configured. Please use only one of them."); + } else if (identifiersFromFile != null) { + if (!identifiers.isEmpty()) { + throw new IllegalArgumentException( + "Both `--identifiers` and `--identifiers-from-file` options are configured. Please use only one of them."); + } else if (identifiersRegEx != null) { + throw new IllegalArgumentException( + "Both `--identifiers-regex` and `--identifiers-from-file` options are configured. Please use only one of them."); + } else { + if (!Files.exists(Paths.get(identifiersFromFile))) { + throw new IllegalArgumentException( + "File specified in `--identifiers-from-file` option does not exist."); + } + } + } else if (!identifiers.isEmpty()) { + if (identifiersRegEx != null) { + throw new IllegalArgumentException( + "Both `--identifiers-regex` and `--identifiers` options are configured. Please use only one of them."); + } + } + } + + private static boolean proceedForRegistration(PrintWriter printWriter) { + String warning = + "\n[WARNING]\n" + + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + + "\n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + + "\n\tSo, while using this tool please make sure there are no in-progress commits for the source " + + "catalog\n" + + "\n" + + "\tb) After the registration, successfully registered tables will be present in both source and target " + + "catalog. " + + "\n\tHaving the same metadata.json registered in more than one catalog can lead to missing updates, " + + "loss" + + " of data, and table corruption. " + + "\n\tUse `--delete-source-tables` option to automatically delete the table from source catalog after " + + "migration."; + return proceed(warning, printWriter); + } + + private static boolean proceedForMigration(PrintWriter printWriter) { + String warning = + "\n[WARNING]\n" + + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + + "\n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + + "\n\tSo, while using this tool please make sure there are no in-progress commits for the source " + + "catalog\n" + + "\n" + + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " + + "\n\tand can only be accessed from the target catalog."; + return proceed(warning, printWriter); + } + + private static boolean proceed(String warning, PrintWriter printWriter) { + try (Scanner scanner = new Scanner(System.in)) { + printWriter.println(warning); + + while (true) { + printWriter.println( + "Have you read the above warnings and are you sure you want to continue? (yes/no):"); + String input = scanner.nextLine(); + + if (input.equalsIgnoreCase("yes")) { + printWriter.println("Continuing..."); + return true; + } else if (input.equalsIgnoreCase("no")) { + printWriter.println("Aborting..."); + return false; + } else { + printWriter.println("Invalid input. Please enter 'yes' or 'no'."); + } + } + } } - private String catalogImpl(CatalogType type, String customCatalogImpl) { + private static String catalogImpl(CatalogType type, String customCatalogImpl) { switch (type) { case CUSTOM: if (customCatalogImpl == null || customCatalogImpl.isEmpty()) { diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/CLIErrorsTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/CLIErrorsTest.java index 0e8d0aa..4558e0a 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/CLIErrorsTest.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/CLIErrorsTest.java @@ -22,7 +22,7 @@ import java.util.Collections; import java.util.List; import java.util.stream.Stream; -import org.junit.jupiter.api.Assertions; +import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -36,19 +36,40 @@ static Stream optionErrors() { // no arguments arguments( Collections.emptyList(), - "Missing required parameters: '', ''"), - // invalid argument + "Missing required options: '--source-catalog-type=', " + + "'--source-catalog-properties=', '--target-catalog-type=', " + + "'--target-catalog-properties='"), + // missing required arguments arguments( singletonList(""), - "Invalid value for positional parameter at index 0 (): " - + "expected one of [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] (case-sensitive)"), + "Missing required options: '--source-catalog-type=', " + + "'--source-catalog-properties=', '--target-catalog-type=', " + + "'--target-catalog-properties='"), + // missing required arguments + arguments( + Arrays.asList("--source-catalog-type", "GLUE"), + "Missing required options: '--source-catalog-properties=', " + + "'--target-catalog-type=', '--target-catalog-properties='"), // missing required arguments - arguments(singletonList("GLUE"), "Missing required parameter: ''"), - // invalid argument arguments( - Arrays.asList("HIVE", "properties1=ab", "properties2=cd"), - "Invalid value for positional parameter at index 2 (): expected one of [CUSTOM, " - + "DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] (case-sensitive) but was 'properties2=cd'")); + Arrays.asList( + "--source-catalog-type", + "HIVE", + "--source-catalog-properties", + "properties1=ab", + "--target-catalog-type", + "NESSIE"), + "Missing required option: '--target-catalog-properties='"), + // missing required arguments + arguments( + Arrays.asList( + "--source-catalog-type", + "HIVE", + "--source-catalog-properties", + "properties1=ab", + "--target-catalog-properties", + "properties2=cd"), + "Missing required option: '--target-catalog-type='")); } @ParameterizedTest @@ -57,45 +78,127 @@ static Stream optionErrors() { public void testOptionErrors(List args, String expectedMessage) throws Exception { RunCLI run = RunCLI.run(args); - Assertions.assertEquals(2, run.getExitCode()); - Assertions.assertTrue(run.getErr().contains(expectedMessage)); + Assertions.assertThat(run.getExitCode()).isEqualTo(2); + Assertions.assertThat(run.getErr()).contains(expectedMessage); } @Test @Order(1) public void testInvalidArgs() throws Exception { - RunCLI run = RunCLI.run("HADOOP", "k1=v1,k2=v2", "HIVE", "k3=v3, k4=v4"); - Assertions.assertEquals(1, run.getExitCode()); - Assertions.assertTrue( - run.getErr() - .contains( - "java.lang.IllegalArgumentException: Cannot initialize HadoopCatalog " - + "because warehousePath must not be null or empty")); + RunCLI run = + RunCLI.run( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4"); + Assertions.assertThat(run.getExitCode()).isEqualTo(1); + Assertions.assertThat(run.getErr()) + .contains( + "java.lang.IllegalArgumentException: Cannot initialize HadoopCatalog " + + "because warehousePath must not be null or empty"); run = RunCLI.run( + "--source-catalog-type", "HADOOP", + "--source-catalog-properties", "k1=v1,k2=v2", + "--target-catalog-type", "HIVE", + "--target-catalog-properties", "k3=v3, k4=v4", - "-I", + "--identifiers", "foo.tbl", "--identifiers-from-file", + "file.txt", + "--identifiers-regex", + "^foo\\."); + Assertions.assertThat(run.getExitCode()).isEqualTo(1); + Assertions.assertThat(run.getErr()) + .contains( + "java.lang.IllegalArgumentException: All the three identifier options (`--identifiers`, " + + "`--identifiers-from-file`, `--identifiers-regex`) are configured. Please use only one of them."); + + run = + RunCLI.run( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--identifiers-from-file", "file.txt"); - Assertions.assertEquals(1, run.getExitCode()); - Assertions.assertTrue( - run.getErr() - .contains( - "java.lang.IllegalArgumentException: Both `--identifiers` and `--identifiers-from-file` " - + "options are configured. Please use only one of them.")); + Assertions.assertThat(run.getExitCode()).isEqualTo(1); + Assertions.assertThat(run.getErr()) + .contains( + "java.lang.IllegalArgumentException: " + + "File specified in `--identifiers-from-file` option does not exist."); run = RunCLI.run( - "HADOOP", "k1=v1,k2=v2", "HIVE", "k3=v3, k4=v4", "--identifiers-from-file", "file.txt"); - Assertions.assertEquals(1, run.getExitCode()); - Assertions.assertTrue( - run.getErr() - .contains( - "java.lang.IllegalArgumentException: File specified in `--identifiers-from-file` option does not exist.")); + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--identifiers", + "foo.tbl", + "--identifiers-from-file", + "file.txt"); + Assertions.assertThat(run.getExitCode()).isEqualTo(1); + Assertions.assertThat(run.getErr()) + .contains( + "java.lang.IllegalArgumentException: Both `--identifiers` and `--identifiers-from-file` " + + "options are configured. Please use only one of them."); + + run = + RunCLI.run( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--identifiers-regex", + "^foo\\.", + "--identifiers-from-file", + "file.txt"); + Assertions.assertThat(run.getExitCode()).isEqualTo(1); + Assertions.assertThat(run.getErr()) + .contains( + "java.lang.IllegalArgumentException: Both `--identifiers-regex` " + + "and `--identifiers-from-file` options are configured. Please use only one of them."); + + run = + RunCLI.run( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--identifiers", + "foo.tbl", + "--identifiers-regex", + "^foo\\."); + Assertions.assertThat(run.getExitCode()).isEqualTo(1); + Assertions.assertThat(run.getErr()) + .contains( + "java.lang.IllegalArgumentException: Both `--identifiers-regex` and " + + "`--identifiers` options are configured. Please use only one of them."); } } diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java b/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java index c873599..b3d7ae1 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java @@ -16,7 +16,9 @@ package org.projectnessie.tools.catalog.migration; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.FAILED_IDENTIFIERS_FILE; +import java.io.ByteArrayInputStream; import java.io.File; import java.nio.file.Files; import java.nio.file.Path; @@ -33,8 +35,8 @@ import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.hadoop.HadoopCatalog; import org.apache.iceberg.types.Types; +import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Order; @@ -78,6 +80,27 @@ protected void beforeEach() { // two tables in 'bar' namespace catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl-3"), schema); catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl-4"), schema); + + // to handle the user prompt + respondAsContinue(); + } + + private void respondAsContinue() { + String input = "yes\n"; + ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); + System.setIn(in); + } + + private void respondAsAbort() { + String input = "no\n"; + ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); + System.setIn(in); + } + + private void respondDummy() { + String input = "dummy\n"; + ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); + System.setIn(in); } @AfterEach @@ -101,30 +124,26 @@ private static Catalog createCatalog(String warehousePath, String name) { @Test @Order(0) public void testRegister() throws Exception { - RunCLI run = - RunCLI.run( - "HADOOP", - "warehouse=" + warehousePath1 + ",type=hadoop", - "HADOOP", - "warehouse=" + warehousePath2 + ",type=hadoop"); - - Assertions.assertEquals(0, run.getExitCode()); - Assertions.assertTrue( - run.getOut() - .contains( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog.")); - Assertions.assertTrue(run.getOut().contains("Identified 4 tables for registration.")); - Assertions.assertTrue( - run.getOut() - .contains( - "Summary: \n- Successfully registered 4 tables from HADOOP catalog to" - + " HADOOP catalog.")); - Assertions.assertTrue( - run.getOut() - .contains( - "Details: \n- Successfully registered these tables: \n" - + "[foo.tbl-1, foo.tbl-2, bar.tbl-4, bar.tbl-3]")); + RunCLI run = runWithDefaultArgs(); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .contains( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog."); + Assertions.assertThat(run.getOut()).contains("Identified 4 tables for registration."); + Assertions.assertThat(run.getOut()) + .contains( + "Summary: \n- Successfully registered 4 tables from HADOOP catalog to" + + " HADOOP catalog."); + Assertions.assertThat(run.getOut()) + .contains("Details: \n" + "- Successfully registered these tables:\n"); + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl-1"), TableIdentifier.parse("foo.tbl-2")); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl-3"), TableIdentifier.parse("bar.tbl-4")); } @Test @@ -132,32 +151,37 @@ public void testRegister() throws Exception { public void testMigrate() throws Exception { RunCLI run = RunCLI.run( + "--source-catalog-type", "HADOOP", + "--source-catalog-properties", "warehouse=" + warehousePath1 + ",type=hadoop", + "--target-catalog-type", "HADOOP", + "--target-catalog-properties", "warehouse=" + warehousePath2 + ",type=hadoop", "--delete-source-tables"); - Assertions.assertEquals(0, run.getExitCode()); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); // note that keywords in output is "migrate" instead of "register". // If the catalog was not hadoop catalog, tables also should get deleted from the source catalog // after migration. - Assertions.assertTrue( - run.getOut() - .contains( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog.")); - Assertions.assertTrue(run.getOut().contains("Identified 4 tables for migration.")); - Assertions.assertTrue( - run.getOut() - .contains( - "Summary: \n- Successfully migrated 4 tables from HADOOP catalog to" - + " HADOOP catalog.")); - Assertions.assertTrue( - run.getOut() - .contains( - "Details: \n- Successfully migrated these tables: \n" - + "[foo.tbl-1, foo.tbl-2, bar.tbl-4, bar.tbl-3]")); + Assertions.assertThat(run.getOut()) + .contains( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog."); + Assertions.assertThat(run.getOut()).contains("Identified 4 tables for migration."); + Assertions.assertThat(run.getOut()) + .contains( + "Summary: \n- Successfully migrated 4 tables from HADOOP catalog to" + + " HADOOP catalog."); + Assertions.assertThat(run.getOut()) + .contains("Details: \n" + "- Successfully migrated these tables:\n"); + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl-1"), TableIdentifier.parse("foo.tbl-2")); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl-3"), TableIdentifier.parse("bar.tbl-4")); } @Test @@ -166,234 +190,337 @@ public void testRegisterSelectedTables() throws Exception { // using `--identifiers` option RunCLI run = RunCLI.run( + "--source-catalog-type", "HADOOP", + "--source-catalog-properties", "warehouse=" + warehousePath1 + ",type=hadoop", + "--target-catalog-type", "HADOOP", + "--target-catalog-properties", "warehouse=" + warehousePath2 + ",type=hadoop", "--identifiers", "bar.tbl-3"); - Assertions.assertEquals(0, run.getExitCode()); - - Assertions.assertFalse( - run.getOut() - .contains( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog.")); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .doesNotContain( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog."); - Assertions.assertTrue(run.getOut().contains("Identified 1 tables for registration.")); - Assertions.assertTrue( - run.getOut() - .contains( - "Summary: \n- Successfully registered 1 tables from HADOOP catalog to" - + " HADOOP catalog.")); - Assertions.assertTrue( - run.getOut() - .contains("Details: \n- Successfully registered these tables: \n" + "[bar.tbl-3]")); + Assertions.assertThat(run.getOut()).contains("Identified 1 tables for registration."); + Assertions.assertThat(run.getOut()) + .contains( + "Summary: \n- Successfully registered 1 tables from HADOOP catalog to" + + " HADOOP catalog."); + Assertions.assertThat(run.getOut()) + .contains("Details: \n- Successfully registered these tables:\n" + "[bar.tbl-3]"); // using `--identifiers-from-file` option + respondAsContinue(); Path identifierFile = Paths.get("ids.txt"); - Files.write(identifierFile, Collections.singletonList("foo.tbl-2")); + Files.write(identifierFile, Collections.singletonList("bar.tbl-4")); run = RunCLI.run( + "--source-catalog-type", "HADOOP", + "--source-catalog-properties", "warehouse=" + warehousePath1 + ",type=hadoop", + "--target-catalog-type", "HADOOP", + "--target-catalog-properties", "warehouse=" + warehousePath2 + ",type=hadoop", "--identifiers-from-file", "ids.txt"); Files.delete(identifierFile); - Assertions.assertEquals(0, run.getExitCode()); - - Assertions.assertTrue(run.getOut().contains("Collecting identifiers from the file ids.txt...")); - - Assertions.assertFalse( - run.getOut() - .contains( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog.")); - - Assertions.assertTrue(run.getOut().contains("Identified 1 tables for registration.")); - Assertions.assertTrue( - run.getOut() - .contains( - "Summary: \n- Successfully registered 1 tables from HADOOP catalog to" - + " HADOOP catalog.")); - Assertions.assertTrue( - run.getOut() - .contains("Details: \n- Successfully registered these tables: \n" + "[foo.tbl-2]")); - } + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()).contains("Collecting identifiers from the file ids.txt..."); - @Test - @Order(3) - public void testRegisterMultiThread() throws Exception { - RunCLI run = + Assertions.assertThat(run.getOut()) + .doesNotContain( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog."); + + Assertions.assertThat(run.getOut()).contains("Identified 1 tables for registration."); + Assertions.assertThat(run.getOut()) + .contains( + "Summary: \n- Successfully registered 1 tables from HADOOP catalog to" + + " HADOOP catalog."); + Assertions.assertThat(run.getOut()) + .contains("Details: \n- Successfully registered these tables:\n" + "[bar.tbl-4]"); + + // using --identifiers-regex option which matches all the tables starts with "foo." + respondAsContinue(); + run = RunCLI.run( + "--source-catalog-type", "HADOOP", + "--source-catalog-properties", "warehouse=" + warehousePath1 + ",type=hadoop", + "--target-catalog-type", "HADOOP", + "--target-catalog-properties", "warehouse=" + warehousePath2 + ",type=hadoop", - "-T", - "4"); - - Assertions.assertEquals(0, run.getExitCode()); - Assertions.assertTrue( - run.getOut() - .contains( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog.")); - Assertions.assertTrue(run.getOut().contains("Identified 4 tables for registration.")); - Assertions.assertTrue( - run.getOut() - .contains( - "Summary: \n- Successfully registered 4 tables from HADOOP catalog to" - + " HADOOP catalog.")); + "--identifiers-regex", + "^foo\\..*"); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .contains( + "User has not specified the table identifiers. Selecting all the tables from all the namespaces " + + "from the source catalog which matches the regex pattern:^foo\\..*"); + + Assertions.assertThat(run.getOut()) + .contains( + "Collecting all the tables from all the namespaces of source catalog " + + "which matches the regex pattern:^foo\\..*"); + + Assertions.assertThat(run.getOut()).contains("Identified 2 tables for registration."); + Assertions.assertThat(run.getOut()) + .contains( + "Summary: \n- Successfully registered 2 tables from HADOOP catalog to" + + " HADOOP catalog."); + Assertions.assertThat(run.getOut()) + .contains("Details: \n" + "- Successfully registered these tables:\n"); + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl-1"), TableIdentifier.parse("foo.tbl-2")); } @Test - @Order(4) + @Order(3) public void testRegisterError() throws Exception { // use invalid namespace which leads to NoSuchTableException RunCLI run = RunCLI.run( + "--source-catalog-type", "HADOOP", + "--source-catalog-properties", "warehouse=" + warehousePath1 + ",type=hadoop", + "--target-catalog-type", "HADOOP", + "--target-catalog-properties", "warehouse=" + warehousePath2 + ",type=hadoop", - "-I", + "--identifiers", "dummy.tbl-3"); - Assertions.assertEquals(0, run.getExitCode()); - Assertions.assertTrue(run.getOut().contains("Identified 1 tables for registration.")); - Assertions.assertTrue( - run.getOut() - .contains( - "Summary: \n- Failed to register 1 tables from HADOOP catalog to HADOOP catalog." - + " Please check the `catalog_migration.log`")); - Assertions.assertTrue( - run.getOut().contains("Details: \n- Failed to register these tables: \n[dummy.tbl-3]")); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()).contains("Identified 1 tables for registration."); + Assertions.assertThat(run.getOut()) + .contains( + "Summary: \n- Failed to register 1 tables from HADOOP catalog to HADOOP catalog." + + " Please check the `catalog_migration.log`"); + Assertions.assertThat(run.getOut()) + .contains("Details: \n- Failed to register these tables:\n[dummy.tbl-3]"); // try to register same table twice which leads to AlreadyExistsException + respondAsContinue(); RunCLI.run( + "--source-catalog-type", "HADOOP", + "--source-catalog-properties", "warehouse=" + warehousePath1 + ",type=hadoop", + "--target-catalog-type", "HADOOP", + "--target-catalog-properties", "warehouse=" + warehousePath2 + ",type=hadoop", - "-I", + "--identifiers", "foo.tbl-2"); + respondAsContinue(); run = RunCLI.run( + "--source-catalog-type", "HADOOP", + "--source-catalog-properties", "warehouse=" + warehousePath1 + ",type=hadoop", + "--target-catalog-type", "HADOOP", + "--target-catalog-properties", "warehouse=" + warehousePath2 + ",type=hadoop", - "-I", + "--identifiers", "foo.tbl-2"); - Assertions.assertEquals(0, run.getExitCode()); - Assertions.assertTrue(run.getOut().contains("Identified 1 tables for registration.")); - Assertions.assertTrue( - run.getOut() - .contains( - "Summary: \n- Failed to register 1 tables from HADOOP catalog to HADOOP catalog." - + " Please check the `catalog_migration.log`")); - Assertions.assertTrue( - run.getOut().contains("Details: \n- Failed to register these tables: \n[foo.tbl-2]")); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()).contains("Identified 1 tables for registration."); + Assertions.assertThat(run.getOut()) + .contains( + "Summary: \n- Failed to register 1 tables from HADOOP catalog to HADOOP catalog." + + " Please check the `catalog_migration.log`"); + Assertions.assertThat(run.getOut()) + .contains("Details: \n- Failed to register these tables:\n[foo.tbl-2]"); } @Test - @Order(5) - public void testRegisterPartialTables() throws Exception { + @Order(4) + public void testRegisterWithFewFailures() throws Exception { // register only foo.tbl-2 RunCLI run = RunCLI.run( + "--source-catalog-type", "HADOOP", + "--source-catalog-properties", "warehouse=" + warehousePath1 + ",type=hadoop", + "--target-catalog-type", "HADOOP", + "--target-catalog-properties", "warehouse=" + warehousePath2 + ",type=hadoop", - "-I", + "--identifiers", "foo.tbl-2"); - Assertions.assertEquals(0, run.getExitCode()); - Assertions.assertTrue(run.getOut().contains("Identified 1 tables for registration.")); - Assertions.assertTrue( - run.getOut() - .contains( - "Summary: \n- Successfully registered 1 tables from HADOOP catalog to HADOOP catalog.")); - Assertions.assertTrue( - run.getOut() - .contains( - "Details: \n" + "- Successfully registered these tables: \n" + "[foo.tbl-2]")); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()).contains("Identified 1 tables for registration."); + Assertions.assertThat(run.getOut()) + .contains( + "Summary: \n- Successfully registered 1 tables from HADOOP catalog to HADOOP catalog."); + Assertions.assertThat(run.getOut()) + .contains("Details: \n" + "- Successfully registered these tables:\n" + "[foo.tbl-2]"); // register all the tables from source catalog again + respondAsContinue(); run = RunCLI.run( + "--source-catalog-type", "HADOOP", + "--source-catalog-properties", "warehouse=" + warehousePath1 + ",type=hadoop", + "--target-catalog-type", "HADOOP", + "--target-catalog-properties", "warehouse=" + warehousePath2 + ",type=hadoop"); - Assertions.assertEquals(0, run.getExitCode()); - Assertions.assertTrue(run.getOut().contains("Identified 4 tables for registration.")); - Assertions.assertTrue( - run.getOut() - .contains( - "Summary: \n" - + "- Successfully registered 3 tables from HADOOP catalog to HADOOP catalog. \n" - + "- Failed to register 1 tables from HADOOP catalog to HADOOP catalog. " - + "Please check the `catalog_migration.log` file for the failure reason. \n" - + " Failed Identifiers are written to `failed_identifiers.txt`. " - + "Retry with that file using `--identifiers-from-file` option " - + "if the failure is because of network/connection timeouts.")); - Assertions.assertTrue( - run.getOut() - .contains( - "Details: \n" - + "- Successfully registered these tables: \n" - + "[foo.tbl-1, bar.tbl-4, bar.tbl-3]\n" - + "- Failed to register these tables: \n" - + "[foo.tbl-2]")); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()).contains("Identified 4 tables for registration."); + Assertions.assertThat(run.getOut()) + .contains( + "Summary: \n" + + "- Successfully registered 3 tables from HADOOP catalog to HADOOP catalog.\n" + + "- Failed to register 1 tables from HADOOP catalog to HADOOP catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. \n" + + "Failed identifiers are written into `failed_identifiers.txt`. " + + "Retry with that file using `--identifiers-from-file` option " + + "if the failure is because of network/connection timeouts."); + Assertions.assertThat(run.getOut()) + .contains("Details: \n" + "- Successfully registered these tables:\n"); + Assertions.assertThat(run.getOut()).contains("- Failed to register these tables:\n[foo.tbl-2]"); + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl-1"), TableIdentifier.parse("foo.tbl-2")); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl-3"), TableIdentifier.parse("bar.tbl-4")); // retry the failed tables using --identifiers-from-file + respondAsContinue(); run = RunCLI.run( + "--source-catalog-type", "HADOOP", + "--source-catalog-properties", "warehouse=" + warehousePath1 + ",type=hadoop", + "--target-catalog-type", "HADOOP", + "--target-catalog-properties", "warehouse=" + warehousePath2 + ",type=hadoop", "--identifiers-from-file", - "failed_identifiers.txt"); - Assertions.assertTrue( - run.getOut() - .contains( - "Summary: \n" - + "- Failed to register 1 tables from HADOOP catalog to HADOOP catalog. " - + "Please check the `catalog_migration.log` file for the failure reason. \n" - + " Failed Identifiers are written to `failed_identifiers.txt`. " - + "Retry with that file using `--identifiers-from-file` option if the failure is because of network/connection timeouts.\n" - + "\n" - + "Details: \n" - + "- Failed to register these tables: \n" - + "[foo.tbl-2]")); + FAILED_IDENTIFIERS_FILE); + Assertions.assertThat(run.getOut()) + .contains( + "Summary: \n" + + "- Failed to register 1 tables from HADOOP catalog to HADOOP catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. \n" + + "Failed identifiers are written into `failed_identifiers.txt`. " + + "Retry with that file using `--identifiers-from-file` option " + + "if the failure is because of network/connection timeouts."); + Assertions.assertThat(run.getOut()) + .contains("Details: \n" + "- Failed to register these tables:\n" + "[foo.tbl-2]"); } @Test - @Order(6) + @Order(5) public void testRegisterNoTables() throws Exception { // source catalog is catalog2 which has no tables. RunCLI run = RunCLI.run( + "--source-catalog-type", "HADOOP", + "--source-catalog-properties", "warehouse=" + warehousePath2 + ",type=hadoop", + "--target-catalog-type", "HADOOP", + "--target-catalog-properties", "warehouse=" + warehousePath1 + ",type=hadoop"); - Assertions.assertEquals(0, run.getExitCode()); - Assertions.assertTrue(run.getOut().contains("Identified 0 tables for registration.")); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()).contains("Identified 0 tables for registration."); + } + + @Test + @Order(6) + public void testPrompt() throws Exception { + respondAsAbort(); + RunCLI run = runWithDefaultArgs(); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + // should abort + Assertions.assertThat(run.getOut()).contains("Aborting..."); + // should not have other messages + Assertions.assertThat(run.getOut()).doesNotContain("Summary"); + + respondDummy(); + run = runWithDefaultArgs(); + Assertions.assertThat(run.getExitCode()).isEqualTo(1); + Assertions.assertThat(run.getOut()).contains("Invalid input. Please enter 'yes' or 'no'."); + + respondAsContinue(); + run = runWithDefaultArgs(); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + // should abort + Assertions.assertThat(run.getOut()).contains("Continuing..."); + Assertions.assertThat(run.getOut()).contains("Summary"); } @Test @Order(7) + public void testDryRun() throws Exception { + RunCLI run = + RunCLI.run( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "warehouse=" + warehousePath1 + ",type=hadoop", + "--target-catalog-type", + "HADOOP", + "--target-catalog-properties", + "warehouse=" + warehousePath2 + ",type=hadoop", + "--dry-run"); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + // should not prompt for dry run + Assertions.assertThat(run.getOut()) + .doesNotContain( + "Have you read the above warnings and are you sure you want to continue? (yes/no):"); + Assertions.assertThat(run.getOut()).contains("Dry run is completed."); + Assertions.assertThat(run.getOut()) + .contains( + "Summary: \n" + + "- Identified 4 tables for registration by dry-run. " + + "These identifiers are also written into dry_run_identifiers.txt. " + + "You can use this file with `--identifiers-from-file` option."); + Assertions.assertThat(run.getOut()) + .contains("Details: \n" + "- Identified these tables for registration by dry-run:\n"); + } + + @Test + @Order(9) public void version() throws Exception { RunCLI run = RunCLI.run("--version"); - Assertions.assertEquals(0, run.getExitCode()); - Assertions.assertTrue(run.getOut().startsWith(System.getProperty("expectedCLIVersion"))); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()).startsWith(System.getProperty("expectedCLIVersion")); + } + + private RunCLI runWithDefaultArgs() throws Exception { + return RunCLI.run( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "warehouse=" + warehousePath1 + ",type=hadoop", + "--target-catalog-type", + "HADOOP", + "--target-catalog-properties", + "warehouse=" + warehousePath2 + ",type=hadoop"); } } diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java index 825d346..9459994 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java @@ -17,6 +17,7 @@ import static org.apache.iceberg.types.Types.NestedField.required; +import java.io.ByteArrayInputStream; import java.io.File; import java.util.Arrays; import java.util.Collections; @@ -32,9 +33,9 @@ import org.apache.iceberg.hadoop.HadoopCatalog; import org.apache.iceberg.hive.HiveMetastoreTest; import org.apache.iceberg.types.Types; +import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Order; @@ -65,12 +66,12 @@ protected static void setup() { warehousePath1 = catalog.getConf().get("hive.metastore.warehouse.dir"); warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); - // assign to hive catalog from the parent class - catalog1 = catalog; + catalog1 = createHadoopCatalog(warehousePath2, "catalog1"); ((SupportsNamespaces) catalog1).createNamespace(Namespace.of("foo"), Collections.emptyMap()); ((SupportsNamespaces) catalog1).createNamespace(Namespace.of("bar"), Collections.emptyMap()); - catalog2 = createHadoopCatalog(warehousePath2, "catalog2"); + // assign to hive catalog from the parent class + catalog2 = catalog; ((SupportsNamespaces) catalog2).createNamespace(Namespace.of("foo"), Collections.emptyMap()); ((SupportsNamespaces) catalog2).createNamespace(Namespace.of("bar"), Collections.emptyMap()); } @@ -92,6 +93,9 @@ protected void beforeEach() { // two tables in 'bar' namespace catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl3"), schema); catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl4"), schema); + + // one table in catalog2 + catalog2.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl5"), schema); } @AfterEach @@ -115,29 +119,82 @@ private static Catalog createHadoopCatalog(String warehousePath, String name) { @Test @Order(0) public void testRegister() throws Exception { + respondAsContinue(); + RunCLI run = + RunCLI.run( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "warehouse=" + warehousePath2 + ",type=hadoop", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "warehouse=" + warehousePath1 + ",uri=" + catalog.getConf().get("hive.metastore.uris")); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .contains( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog."); + Assertions.assertThat(run.getOut()).contains("Identified 4 tables for registration."); + Assertions.assertThat(run.getOut()) + .contains( + "Summary: \n- Successfully registered 4 tables from HADOOP catalog to" + + " HIVE catalog."); + Assertions.assertThat(run.getOut()) + .contains("Details: \n" + "- Successfully registered these tables:\n"); + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl3"), + TableIdentifier.parse("bar.tbl4"), + TableIdentifier.parse("bar.tbl5")); + } + + @Test + @Order(1) + public void testMigrate() throws Exception { + respondAsContinue(); RunCLI run = RunCLI.run( + "--source-catalog-type", "HIVE", + "--source-catalog-properties", "warehouse=" + warehousePath1 + ",uri=" + catalog.getConf().get("hive.metastore.uris"), + "--target-catalog-type", "HADOOP", - "warehouse=" + warehousePath2 + ",type=hadoop"); - - Assertions.assertEquals(0, run.getExitCode()); - Assertions.assertTrue( - run.getOut() - .contains( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog.")); - Assertions.assertTrue(run.getOut().contains("Identified 4 tables for registration.")); - Assertions.assertTrue( - run.getOut() - .contains( - "Summary: \n- Successfully registered 4 tables from HIVE catalog to" - + " HADOOP catalog.")); - Assertions.assertTrue( - run.getOut() - .contains( - "Details: \n- Successfully registered these tables: \n" - + "[bar.tbl3, bar.tbl4, foo.tbl1, foo.tbl2]")); + "--target-catalog-properties", + "warehouse=" + warehousePath2 + ",type=hadoop", + "--delete-source-tables"); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .contains( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog."); + Assertions.assertThat(run.getOut()).contains("Identified 1 tables for migration."); + Assertions.assertThat(run.getOut()) + .contains( + "Summary: \n- Successfully migrated 1 tables from HIVE catalog to" + + " HADOOP catalog."); + Assertions.assertThat(run.getOut()) + .contains("Details: \n" + "- Successfully migrated these tables:\n"); + // migrated table should be present in the target catalog + Assertions.assertThat(catalog1.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl5"), + TableIdentifier.parse("bar.tbl4"), + TableIdentifier.parse("bar.tbl3")); + + // migrated table should not be there in the source catalog + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))).isEmpty(); + } + + private void respondAsContinue() { + String input = "yes\n"; + ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); + System.setIn(in); } } From 2052fbbf8922e870248f1aeb09cc4170f3136f11 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Mon, 6 Feb 2023 20:35:16 +0530 Subject: [PATCH 05/31] Nessie integration tests --- build.gradle.kts | 1 + .../catalog/migration/ITHiveAndHadoop.java | 2 +- .../catalog/migration/ITHiveAndNessie.java | 227 ++++++++++++++++++ 3 files changed, 229 insertions(+), 1 deletion(-) create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndNessie.java diff --git a/build.gradle.kts b/build.gradle.kts index 82a3874..806dccc 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -81,6 +81,7 @@ dependencies { exclude("com.google.code.findbugs", "jsr305") } testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}") + testImplementation("org.testcontainers:testcontainers:1.17.6") } group = "org.projectnessie" diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java index 9459994..a125d7c 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java @@ -184,7 +184,7 @@ public void testMigrate() throws Exception { // migrated table should be present in the target catalog Assertions.assertThat(catalog1.listTables(Namespace.of("bar"))) .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl5"), + TableIdentifier.parse("bar.tbl5"), // tbl5 is the migrated table TableIdentifier.parse("bar.tbl4"), TableIdentifier.parse("bar.tbl3")); diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndNessie.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndNessie.java new file mode 100644 index 0000000..e5e32ff --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndNessie.java @@ -0,0 +1,227 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static org.apache.iceberg.types.Types.NestedField.required; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.Schema; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hive.HiveMetastoreTest; +import org.apache.iceberg.nessie.NessieCatalog; +import org.apache.iceberg.types.Types; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.testcontainers.containers.GenericContainer; + +public class ITHiveAndNessie extends HiveMetastoreTest { + + private static String warehousePath1; + + private static @TempDir File warehouse2; + private static String warehousePath2; + + private static Catalog catalog1; + + private static Catalog catalog2; + + private static final Schema schema = + new Schema(Types.StructType.of(required(1, "id", Types.LongType.get())).fields()); + + private static final String IMAGE = "projectnessie/nessie:0.47.1"; + private static final int NESSIE_PORT = 19121; + + private static String nessieUri; + + private static GenericContainer container; + + @BeforeAll + protected static void setup() { + try { + startMetastore(); + } catch (Exception e) { + throw new RuntimeException(e); + } + warehousePath1 = catalog.getConf().get("hive.metastore.warehouse.dir"); + warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + + container = + new GenericContainer(IMAGE) + .withExposedPorts(NESSIE_PORT) + .withEnv("QUARKUS_HTTP_PORT", String.valueOf(NESSIE_PORT)); + + container.start(); + + nessieUri = + String.format( + "http://%s:%s/api/v1", container.getHost(), container.getMappedPort(NESSIE_PORT)); + + // assign to hive catalog from the parent class + catalog1 = catalog; + ((SupportsNamespaces) catalog1).createNamespace(Namespace.of("foo"), Collections.emptyMap()); + ((SupportsNamespaces) catalog1).createNamespace(Namespace.of("bar"), Collections.emptyMap()); + + catalog2 = createNessieCatalog(warehousePath2, nessieUri); + ((SupportsNamespaces) catalog2).createNamespace(Namespace.of("foo"), Collections.emptyMap()); + ((SupportsNamespaces) catalog2).createNamespace(Namespace.of("bar"), Collections.emptyMap()); + } + + @AfterAll + protected static void tearDown() { + try { + stopMetastore(); + } catch (Exception e) { + throw new RuntimeException(e); + } + container.stop(); + } + + @BeforeEach + protected void beforeEach() { + // two tables in 'foo' namespace + catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl1"), schema); + catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); + // two tables in 'bar' namespace + catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl3"), schema); + catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl4"), schema); + + // one table in catalog2 + catalog2.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl5"), schema); + } + + @AfterEach + protected void afterEach() { + Arrays.asList(Namespace.of("foo"), Namespace.of("bar")) + .forEach( + namespace -> { + catalog1.listTables(namespace).forEach(catalog1::dropTable); + catalog2.listTables(namespace).forEach(catalog2::dropTable); + }); + } + + private static Catalog createNessieCatalog(String warehousePath, String uri) { + Map properties = new HashMap<>(); + properties.put("warehouse", warehousePath); + properties.put("ref", "main"); + properties.put("uri", uri); + return CatalogUtil.loadCatalog( + NessieCatalog.class.getName(), "nessie", properties, new Configuration()); + } + + @Test + @Order(0) + public void testRegister() throws Exception { + respondAsContinue(); + RunCLI run = + RunCLI.run( + "--source-catalog-type", + "HIVE", + "--source-catalog-properties", + "warehouse=" + warehousePath1 + ",uri=" + catalog.getConf().get("hive.metastore.uris"), + "--target-catalog-type", + "NESSIE", + "--target-catalog-properties", + "uri=" + nessieUri + ",ref=main,warehouse=" + warehousePath2); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .contains( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog."); + Assertions.assertThat(run.getOut()).contains("Identified 4 tables for registration."); + Assertions.assertThat(run.getOut()) + .contains( + "Summary: \n- Successfully registered 4 tables from HIVE catalog to" + + " NESSIE catalog."); + Assertions.assertThat(run.getOut()) + .contains("Details: \n" + "- Successfully registered these tables:\n"); + // using the fresh instance of nessie catalog at client side to get the latest state of main + // branch. + catalog2 = createNessieCatalog(warehousePath2, nessieUri); + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl3"), + TableIdentifier.parse("bar.tbl4"), + TableIdentifier.parse("bar.tbl5")); + } + + @Test + @Order(1) + public void testMigrate() throws Exception { + respondAsContinue(); + RunCLI run = + RunCLI.run( + "--source-catalog-type", + "NESSIE", + "--source-catalog-properties", + "uri=" + nessieUri + ",ref=main,warehouse=" + warehousePath2, + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "warehouse=" + warehousePath1 + ",uri=" + catalog.getConf().get("hive.metastore.uris"), + "--delete-source-tables"); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .contains( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog."); + Assertions.assertThat(run.getOut()).contains("Identified 1 tables for migration."); + Assertions.assertThat(run.getOut()) + .contains( + "Summary: \n- Successfully migrated 1 tables from NESSIE catalog to" + + " HIVE catalog."); + Assertions.assertThat(run.getOut()) + .contains("Details: \n" + "- Successfully migrated these tables:\n"); + // migrated table should be present in the target catalog + Assertions.assertThat(catalog1.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl5"), // tbl5 is the migrated table + TableIdentifier.parse("bar.tbl4"), + TableIdentifier.parse("bar.tbl3")); + + // migrated table should not be there in the source catalog + // using the fresh instance of nessie catalog at client side to get the latest state of main + // branch. + catalog2 = createNessieCatalog(warehousePath2, nessieUri); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))).isEmpty(); + } + + private void respondAsContinue() { + String input = "yes\n"; + ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); + System.setIn(in); + } +} From 9ae2dac13f6203516955ed241d51089ffea2a762 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Tue, 7 Feb 2023 11:25:40 +0530 Subject: [PATCH 06/31] Self review --- README.md | 6 +-- build.gradle.kts | 11 ++-- gradle/libs.versions.toml | 29 ++++++----- .../catalog/migration/CatalogMigrateUtil.java | 16 ++---- .../migration/CatalogMigrationCLI.java | 5 +- .../tools/catalog/migration/CLITest.java | 25 ++++------ .../catalog/migration/ITHiveAndHadoop.java | 33 +++++------- .../catalog/migration/ITHiveAndNessie.java | 33 +++++------- .../tools/catalog/migration/TestUtil.java | 50 +++++++++++++++++++ 9 files changed, 117 insertions(+), 91 deletions(-) create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/TestUtil.java diff --git a/README.md b/README.md index 4956eaf..ab123a3 100644 --- a/README.md +++ b/README.md @@ -74,15 +74,15 @@ Bulk register the iceberg tables from source catalog to target catalog without d -V, --version Print version information and exit. ``` +> :warning: **It is recommended to use this CLI tool when there is no in-progress commits for the tables in the source catalog.** +In-progress commits may not make it into the target catalog if used. + > :warning: By default this tool just registers the table. Which means the table will be present in both the catalogs after registering. Operating same table from more than one catalog can lead to missing updates, loss of data and table corruption. So, it is recommended to use the '--delete-source-tables' option in CLI to automatically delete the table from source catalog after registering or avoid operating tables from the source catalog after registering if '--delete-source-tables' option is not used. -> :warning: **It is recommended to use this CLI tool when there is no in-progress commits for the tables in the source catalog.** -In-progress commits may not make it into the target catalog if used. - # Sample Inputs ## Bulk migrating all the tables from Hadoop catalog to Nessie catalog (main branch) ```shell diff --git a/build.gradle.kts b/build.gradle.kts index 806dccc..f267ff6 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -49,12 +49,14 @@ dependencies { testImplementation(libs.junit.jupiter.params) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.engine) - testImplementation("org.assertj:assertj-core:3.24.2") + testImplementation(libs.assertj) + // for integration tests testImplementation( "org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests" ) - testImplementation("org.apache.hive:hive-metastore:2.3.8") { + testImplementation("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") { + // these are taken from iceberg repo configurations exclude("org.apache.avro", "avro") exclude("org.slf4j", "slf4j-log4j12") exclude("org.pentaho") // missing dependency @@ -69,7 +71,8 @@ dependencies { exclude("javax.transaction", "transaction-api") exclude("com.zaxxer", "HikariCP") } - testImplementation("org.apache.hive:hive-exec:2.3.8:core") { + testImplementation("org.apache.hive:hive-exec:${libs.versions.hive.get()}:core") { + // these are taken from iceberg repo configurations exclude("org.apache.avro", "avro") exclude("org.slf4j", "slf4j-log4j12") exclude("org.pentaho") // missing dependency @@ -81,7 +84,7 @@ dependencies { exclude("com.google.code.findbugs", "jsr305") } testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}") - testImplementation("org.testcontainers:testcontainers:1.17.6") + testImplementation(libs.test.containers) } group = "org.projectnessie" diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 0cdbf12..bd1b702 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,33 +1,38 @@ [versions] +assertj = "3.24.2" +aws = "1.7.4" googleJavaFormat = "1.15.0" guava = "31.1-jre" +hadoop = "3.2.4" +hive = "2.3.8" +iceberg = "1.1.0" junit = "5.9.1" +logback = "1.2.11" nessieBuildPlugins = "0.2.14" picocli = "4.7.0" shadowPlugin = "7.1.2" slf4j = "1.7.36" spotlessPlugin = "6.12.0" -logback = "1.2.11" -iceberg = "1.1.0" -hadoop = "3.2.4" -aws = "1.7.4" +testContainers = "1.17.6" [libraries] +assertj = { module = "org.assertj:assertj-core", version.ref = "assertj" } +aws-sdk = { module = "com.amazonaws:aws-java-sdk", version.ref = "aws" } google-java-format = { module = "com.google.googlejavaformat:google-java-format", version.ref = "googleJavaFormat" } +guava = { module = "com.google.guava:guava", version.ref = "guava" } +hadoop-aws = { module = "org.apache.hadoop:hadoop-aws", version.ref = "hadoop" } +hadoop-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop" } +iceberg-dell = { module = "org.apache.iceberg:iceberg-dell", version.ref = "iceberg" } +iceberg-spark-runtime = { module = "org.apache.iceberg:iceberg-spark-runtime-3.3_2.12", version.ref = "iceberg" } junit-bom = { module = "org.junit:junit-bom", version.ref = "junit" } junit-jupiter-api = { module = "org.junit.jupiter:junit-jupiter-api", version.ref = "junit" } junit-jupiter-engine = { module = "org.junit.jupiter:junit-jupiter-engine" } junit-jupiter-params = { module = "org.junit.jupiter:junit-jupiter-params" } -picocli = { module = "info.picocli:picocli", version.ref = "picocli" } -guava = { module = "com.google.guava:guava", version.ref = "guava" } -slf4j = { module = "org.slf4j:log4j-over-slf4j", version.ref = "slf4j" } logback-core = { module = "ch.qos.logback:logback-core", version.ref = "logback" } logback-classic = { module = "ch.qos.logback:logback-classic", version.ref = "logback" } -iceberg-spark-runtime = { module = "org.apache.iceberg:iceberg-spark-runtime-3.3_2.12", version.ref = "iceberg" } -iceberg-dell = { module = "org.apache.iceberg:iceberg-dell", version.ref = "iceberg" } -hadoop-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop" } -hadoop-aws = { module = "org.apache.hadoop:hadoop-aws", version.ref = "hadoop" } -aws-sdk = { module = "com.amazonaws:aws-java-sdk", version.ref = "aws" } +picocli = { module = "info.picocli:picocli", version.ref = "picocli" } +slf4j = { module = "org.slf4j:log4j-over-slf4j", version.ref = "slf4j" } +test-containers = { module = "org.testcontainers:testcontainers", version.ref = "testContainers" } [plugins] nessie-build-spotless = { id = "org.projectnessie.buildsupport.spotless", version.ref = "nessieBuildPlugins" } diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java index 3a7209e..6cb6db7 100644 --- a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java +++ b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java @@ -47,8 +47,6 @@ private CatalogMigrateUtil() {} * successful migration, deletes the table entry from source catalog(not applicable for * HadoopCatalog). * - *

Supports bulk migrations with a multi-thread execution. - * *

Users must make sure that no in-progress commits on the tables of source catalog during * migration. * @@ -83,8 +81,6 @@ public static CatalogMigrationResult migrateTables( * Register tables from one catalog(source catalog) to another catalog(target catalog). User has * to take care of deleting the tables from source catalog after registration. * - *

Supports bulk registration with a multi-thread execution. - * *

Users must make sure that no in-progress commits on the tables of source catalog during * registration. * @@ -156,15 +152,13 @@ private static CatalogMigrationResult migrateTables( tableIdentifier); // HadoopCatalog dropTable will delete the table files completely even when purge is - // false. - // So, skip dropTable for HadoopCatalog. + // false. So, skip dropTable for HadoopCatalog. boolean deleteTableFromSourceCatalog = - deleteEntriesFromSourceCatalog && !(sourceCatalog instanceof HadoopCatalog); - + !(sourceCatalog instanceof HadoopCatalog) && deleteEntriesFromSourceCatalog; try { if (deleteTableFromSourceCatalog) { - boolean failedToDelete = sourceCatalog.dropTable(tableIdentifier, false); - if (failedToDelete) { + boolean isDropped = sourceCatalog.dropTable(tableIdentifier, false); + if (!isDropped) { failedToDeleteTableIdentifiers.add(tableIdentifier); } } @@ -203,7 +197,7 @@ private static void registerTable( LOG.info("Successfully migrated the table {}", tableIdentifier); } catch (Exception ex) { failedToMigrateTableIdentifiers.add(tableIdentifier); - LOG.warn("Unable to migrate table {}", tableIdentifier, ex); + LOG.warn("Unable to register the table {}", tableIdentifier, ex); } } diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java index 34ab5c8..5fc41b0 100644 --- a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java +++ b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java @@ -348,7 +348,7 @@ private void printDryRunResults( printWriter.println(result.registeredTableIdentifiers()); } - private void writeToFile(String filePath, List identifiers) { + private static void writeToFile(String filePath, List identifiers) { List identifiersString = identifiers.stream().map(TableIdentifier::toString).collect(Collectors.toList()); try { @@ -395,8 +395,7 @@ private static boolean proceedForRegistration(PrintWriter printWriter) { + "\tb) After the registration, successfully registered tables will be present in both source and target " + "catalog. " + "\n\tHaving the same metadata.json registered in more than one catalog can lead to missing updates, " - + "loss" - + " of data, and table corruption. " + + "loss of data, and table corruption. " + "\n\tUse `--delete-source-tables` option to automatically delete the table from source catalog after " + "migration."; return proceed(warning, printWriter); diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java b/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java index b3d7ae1..4fc271f 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java @@ -16,7 +16,9 @@ package org.projectnessie.tools.catalog.migration; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.DRY_RUN_FILE; import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.FAILED_TO_DELETE_AT_SOURCE_FILE; import java.io.ByteArrayInputStream; import java.io.File; @@ -85,24 +87,12 @@ protected void beforeEach() { respondAsContinue(); } - private void respondAsContinue() { + private static void respondAsContinue() { String input = "yes\n"; ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); System.setIn(in); } - private void respondAsAbort() { - String input = "no\n"; - ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); - System.setIn(in); - } - - private void respondDummy() { - String input = "dummy\n"; - ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); - System.setIn(in); - } - @AfterEach protected void afterEach() { Arrays.asList(Namespace.of("foo"), Namespace.of("bar")) @@ -111,6 +101,9 @@ protected void afterEach() { catalog1.listTables(namespace).forEach(catalog1::dropTable); catalog2.listTables(namespace).forEach(catalog2::dropTable); }); + TestUtil.deleteFileIfExists(FAILED_IDENTIFIERS_FILE); + TestUtil.deleteFileIfExists(FAILED_TO_DELETE_AT_SOURCE_FILE); + TestUtil.deleteFileIfExists(DRY_RUN_FILE); } private static Catalog createCatalog(String warehousePath, String name) { @@ -452,7 +445,7 @@ public void testRegisterNoTables() throws Exception { @Test @Order(6) public void testPrompt() throws Exception { - respondAsAbort(); + TestUtil.respondAsAbort(); RunCLI run = runWithDefaultArgs(); Assertions.assertThat(run.getExitCode()).isEqualTo(0); // should abort @@ -460,7 +453,7 @@ public void testPrompt() throws Exception { // should not have other messages Assertions.assertThat(run.getOut()).doesNotContain("Summary"); - respondDummy(); + TestUtil.respondDummy(); run = runWithDefaultArgs(); Assertions.assertThat(run.getExitCode()).isEqualTo(1); Assertions.assertThat(run.getOut()).contains("Invalid input. Please enter 'yes' or 'no'."); @@ -512,7 +505,7 @@ public void version() throws Exception { Assertions.assertThat(run.getOut()).startsWith(System.getProperty("expectedCLIVersion")); } - private RunCLI runWithDefaultArgs() throws Exception { + private static RunCLI runWithDefaultArgs() throws Exception { return RunCLI.run( "--source-catalog-type", "HADOOP", diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java index a125d7c..f95778f 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java @@ -16,8 +16,10 @@ package org.projectnessie.tools.catalog.migration; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.FAILED_TO_DELETE_AT_SOURCE_FILE; -import java.io.ByteArrayInputStream; import java.io.File; import java.util.Arrays; import java.util.Collections; @@ -57,12 +59,8 @@ public class ITHiveAndHadoop extends HiveMetastoreTest { new Schema(Types.StructType.of(required(1, "id", Types.LongType.get())).fields()); @BeforeAll - protected static void setup() { - try { - startMetastore(); - } catch (Exception e) { - throw new RuntimeException(e); - } + protected static void setup() throws Exception { + startMetastore(); warehousePath1 = catalog.getConf().get("hive.metastore.warehouse.dir"); warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); @@ -77,12 +75,8 @@ protected static void setup() { } @AfterAll - protected static void tearDown() { - try { - stopMetastore(); - } catch (Exception e) { - throw new RuntimeException(e); - } + protected static void tearDown() throws Exception { + stopMetastore(); } @BeforeEach @@ -106,6 +100,9 @@ protected void afterEach() { catalog1.listTables(namespace).forEach(catalog1::dropTable); catalog2.listTables(namespace).forEach(catalog2::dropTable); }); + TestUtil.deleteFileIfExists(FAILED_IDENTIFIERS_FILE); + TestUtil.deleteFileIfExists(FAILED_TO_DELETE_AT_SOURCE_FILE); + TestUtil.deleteFileIfExists(DRY_RUN_FILE); } private static Catalog createHadoopCatalog(String warehousePath, String name) { @@ -119,7 +116,7 @@ private static Catalog createHadoopCatalog(String warehousePath, String name) { @Test @Order(0) public void testRegister() throws Exception { - respondAsContinue(); + TestUtil.respondAsContinue(); RunCLI run = RunCLI.run( "--source-catalog-type", @@ -156,7 +153,7 @@ public void testRegister() throws Exception { @Test @Order(1) public void testMigrate() throws Exception { - respondAsContinue(); + TestUtil.respondAsContinue(); RunCLI run = RunCLI.run( "--source-catalog-type", @@ -191,10 +188,4 @@ public void testMigrate() throws Exception { // migrated table should not be there in the source catalog Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))).isEmpty(); } - - private void respondAsContinue() { - String input = "yes\n"; - ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); - System.setIn(in); - } } diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndNessie.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndNessie.java index e5e32ff..f2e6475 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndNessie.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndNessie.java @@ -16,8 +16,10 @@ package org.projectnessie.tools.catalog.migration; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.FAILED_TO_DELETE_AT_SOURCE_FILE; -import java.io.ByteArrayInputStream; import java.io.File; import java.util.Arrays; import java.util.Collections; @@ -65,12 +67,8 @@ public class ITHiveAndNessie extends HiveMetastoreTest { private static GenericContainer container; @BeforeAll - protected static void setup() { - try { - startMetastore(); - } catch (Exception e) { - throw new RuntimeException(e); - } + protected static void setup() throws Exception { + startMetastore(); warehousePath1 = catalog.getConf().get("hive.metastore.warehouse.dir"); warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); @@ -96,12 +94,8 @@ protected static void setup() { } @AfterAll - protected static void tearDown() { - try { - stopMetastore(); - } catch (Exception e) { - throw new RuntimeException(e); - } + protected static void tearDown() throws Exception { + stopMetastore(); container.stop(); } @@ -126,6 +120,9 @@ protected void afterEach() { catalog1.listTables(namespace).forEach(catalog1::dropTable); catalog2.listTables(namespace).forEach(catalog2::dropTable); }); + TestUtil.deleteFileIfExists(FAILED_IDENTIFIERS_FILE); + TestUtil.deleteFileIfExists(FAILED_TO_DELETE_AT_SOURCE_FILE); + TestUtil.deleteFileIfExists(DRY_RUN_FILE); } private static Catalog createNessieCatalog(String warehousePath, String uri) { @@ -140,7 +137,7 @@ private static Catalog createNessieCatalog(String warehousePath, String uri) { @Test @Order(0) public void testRegister() throws Exception { - respondAsContinue(); + TestUtil.respondAsContinue(); RunCLI run = RunCLI.run( "--source-catalog-type", @@ -180,7 +177,7 @@ public void testRegister() throws Exception { @Test @Order(1) public void testMigrate() throws Exception { - respondAsContinue(); + TestUtil.respondAsContinue(); RunCLI run = RunCLI.run( "--source-catalog-type", @@ -218,10 +215,4 @@ public void testMigrate() throws Exception { catalog2 = createNessieCatalog(warehousePath2, nessieUri); Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))).isEmpty(); } - - private void respondAsContinue() { - String input = "yes\n"; - ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); - System.setIn(in); - } } diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/TestUtil.java b/src/test/java/org/projectnessie/tools/catalog/migration/TestUtil.java new file mode 100644 index 0000000..fce1cd7 --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/TestUtil.java @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import java.io.ByteArrayInputStream; +import java.io.File; + +public final class TestUtil { + + private TestUtil() {} + ; + + static void respondAsContinue() { + String input = "yes\n"; + ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); + System.setIn(in); + } + + static void respondAsAbort() { + String input = "no\n"; + ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); + System.setIn(in); + } + + static void respondDummy() { + String input = "dummy\n"; + ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); + System.setIn(in); + } + + static void deleteFileIfExists(String filePath) { + File file = new File(filePath); + if (file.exists()) { + file.delete(); + } + } +} From 6c7200ebb09ca6fe8c2e126e5d51fd318289e6fe Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Tue, 7 Feb 2023 16:48:27 +0530 Subject: [PATCH 07/31] intTest gradle task --- build.gradle.kts | 44 +++++++++++++++++++ .../tools/catalog/migration/TestUtil.java | 1 - 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/build.gradle.kts b/build.gradle.kts index f267ff6..dd7f900 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -34,6 +34,8 @@ repositories { applyShadowJar() +testTasks() + dependencies { api(libs.guava) api(libs.slf4j) @@ -156,3 +158,45 @@ shadowJar { manifest { attributes["Main-Class"] = mainClassName } finalizedBy(unixExecutable) } + +fun Project.testTasks() { + if (projectDir.resolve("src/test").exists()) { + tasks.withType().configureEach { + useJUnitPlatform {} + val testJvmArgs: String? by project + if (testJvmArgs != null) { + jvmArgs((testJvmArgs as String).split(" ")) + } + + systemProperty("file.encoding", "UTF-8") + systemProperty("user.language", "en") + systemProperty("user.country", "US") + systemProperty("user.variant", "") + systemProperty("test.log.level", testLogLevel()) + environment("TESTCONTAINERS_REUSE_ENABLE", "true") + filter { + isFailOnNoMatchingTests = false + when (name) { + "test" -> { + includeTestsMatching("*Test") + includeTestsMatching("Test*") + excludeTestsMatching("Abstract*") + excludeTestsMatching("IT*") + } + "intTest" -> includeTestsMatching("IT*") + } + } + if (name != "test") { + mustRunAfter(tasks.named("test")) + } + } + val intTest = + tasks.register("intTest") { + group = "verification" + description = "Runs the integration tests." + } + tasks.named("check") { dependsOn(intTest) } + } +} + +fun testLogLevel() = System.getProperty("test.log.level", "WARN") diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/TestUtil.java b/src/test/java/org/projectnessie/tools/catalog/migration/TestUtil.java index fce1cd7..3a45516 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/TestUtil.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/TestUtil.java @@ -21,7 +21,6 @@ public final class TestUtil { private TestUtil() {} - ; static void respondAsContinue() { String input = "yes\n"; From 383f4f0950248b3c8c8f2b04aa01b87514243d10 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Wed, 8 Feb 2023 11:14:15 +0530 Subject: [PATCH 08/31] Enhance test validation --- .../org/projectnessie/tools/catalog/migration/CLITest.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java b/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java index 4fc271f..4344c62 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java @@ -421,6 +421,9 @@ public void testRegisterWithFewFailures() throws Exception { + "if the failure is because of network/connection timeouts."); Assertions.assertThat(run.getOut()) .contains("Details: \n" + "- Failed to register these tables:\n" + "[foo.tbl-2]"); + Assertions.assertThat(new File(FAILED_IDENTIFIERS_FILE).exists()).isTrue(); + Assertions.assertThat(Files.readAllLines(Paths.get(FAILED_IDENTIFIERS_FILE))) + .containsExactly("foo.tbl-2"); } @Test @@ -495,6 +498,9 @@ public void testDryRun() throws Exception { + "You can use this file with `--identifiers-from-file` option."); Assertions.assertThat(run.getOut()) .contains("Details: \n" + "- Identified these tables for registration by dry-run:\n"); + Assertions.assertThat(new File(DRY_RUN_FILE).exists()).isTrue(); + Assertions.assertThat(Files.readAllLines(Paths.get(DRY_RUN_FILE))) + .containsExactlyInAnyOrder("foo.tbl-1", "foo.tbl-2", "bar.tbl-3", "bar.tbl-4"); } @Test From 702f3affa7e63b0500cb3f9309b64d0f4c20dbb0 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Fri, 10 Feb 2023 12:49:49 +0530 Subject: [PATCH 09/31] Address new review comments --- .gitignore | 5 - README.md | 5 +- build.gradle.kts | 79 +- ...ies.gradle.kts => BuildSupport.gradle.kts} | 13 - gradle/libs.versions.toml | 7 +- .../catalog/migration/CatalogMigrateUtil.java | 296 -------- .../migration/CatalogMigrationCLI.java | 226 +----- .../catalog/migration/CatalogMigrator.java | 545 ++++++++++++++ .../migration/AbstractCLIMigrationTest.java | 572 +++++++++++++++ .../tools/catalog/migration/AbstractTest.java | 129 ++++ .../AbstractTestCatalogMigrator.java | 672 ++++++++++++++++++ .../catalog/migration/CLIErrorsTest.java | 204 ------ .../tools/catalog/migration/CLITest.java | 633 ++++------------- .../migration/HadoopCLIMigrationTest.java | 48 ++ .../migration/HadoopCatalogMigratorTest.java | 43 ++ .../migration/HiveMetaStoreRunner.java | 27 + .../ITHadoopToHiveCLIMigrationTest.java | 54 ++ .../ITHadoopToHiveCatalogMigrator.java | 54 ++ .../catalog/migration/ITHiveAndHadoop.java | 191 ----- .../catalog/migration/ITHiveAndNessie.java | 218 ------ .../ITHiveToHadoopCLIMigrationTest.java | 54 ++ .../ITHiveToHadoopCatalogMigrator.java | 54 ++ .../ITHiveToNessieCLIMigrationTest.java | 58 ++ .../ITHiveToNessieCatalogMigrator.java | 58 ++ .../ITNessieToHiveCLIMigrationTest.java | 58 ++ .../ITNessieToHiveCatalogMigrator.java | 58 ++ .../tools/catalog/migration/RunCLI.java | 27 + .../tools/catalog/migration/TestUtil.java | 49 -- 28 files changed, 2737 insertions(+), 1700 deletions(-) rename buildSrc/src/main/kotlin/{Utilities.gradle.kts => BuildSupport.gradle.kts} (82%) delete mode 100644 src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java create mode 100644 src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrator.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/AbstractCLIMigrationTest.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/AbstractTest.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/AbstractTestCatalogMigrator.java delete mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/CLIErrorsTest.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/HadoopCLIMigrationTest.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/HadoopCatalogMigratorTest.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/HiveMetaStoreRunner.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCLIMigrationTest.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCatalogMigrator.java delete mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java delete mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndNessie.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCLIMigrationTest.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCatalogMigrator.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCLIMigrationTest.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCatalogMigrator.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCLIMigrationTest.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCatalogMigrator.java delete mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/TestUtil.java diff --git a/.gitignore b/.gitignore index aaca80c..a893541 100644 --- a/.gitignore +++ b/.gitignore @@ -72,8 +72,3 @@ site/site # Ignore Gradle build output directory build - -# test output -failed_identifiers.txt -failed_to_delete_at_source.txt -dry_run_identifiers.txt diff --git a/README.md b/README.md index ab123a3..4e9d1aa 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ Below is the CLI syntax: ``` $ java -jar iceberg-catalog-migrator-0.1.0-SNAPSHOT.jar -h Usage: register [-hV] [--delete-source-tables] [--dry-run] [--identifiers-from-file=] [--identifiers-regex=] - --source-catalog-type= [--source-custom-catalog-impl=] + [--output-dir=] --source-catalog-type= [--source-custom-catalog-impl=] --target-catalog-type= [--target-custom-catalog-impl=] [--identifiers=[, ...]]... [--source-catalog-hadoop-conf=[,...]]... --source-catalog-properties=[,...] [--source-catalog-properties=[, @@ -70,6 +70,9 @@ Bulk register the iceberg tables from source catalog to target catalog without d registered by running this. --delete-source-tables optional configuration to delete the table entry from source catalog after successfully registering it to target catalog. + --output-dir= + optional local output directory path to write CLI output files like `failed_identifiers.txt`, `failed_to_delete_at_source.txt`, + `dry_run_identifiers.txt`. Uses the present working directory if not specified. -h, --help Show this help message and exit. -V, --version Print version information and exit. ``` diff --git a/build.gradle.kts b/build.gradle.kts index dd7f900..cb083e6 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -20,16 +20,8 @@ import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar plugins { `java-library` `maven-publish` - id("com.diffplug.spotless") - id("com.github.johnrengelman.shadow") - Utilities -} - -repositories { - if (System.getProperty("withMavenLocal").toBoolean()) { - mavenLocal() - } - mavenCentral() + alias(libs.plugins.nessie.run) + BuildSupport } applyShadowJar() @@ -37,21 +29,22 @@ applyShadowJar() testTasks() dependencies { - api(libs.guava) - api(libs.slf4j) - api(libs.picocli) - api(libs.logback.classic) - api(libs.logback.core) - api(libs.iceberg.spark.runtime) - api(libs.iceberg.dell) - api(libs.hadoop.aws) - api(libs.hadoop.common) - api(libs.aws.sdk) + implementation(libs.guava) + implementation(libs.slf4j) + implementation(libs.picocli) + implementation(libs.logback.classic) + implementation(libs.logback.core) + implementation(libs.iceberg.spark.runtime) + implementation(libs.iceberg.dell) + implementation(libs.hadoop.aws) + implementation(libs.hadoop.common) + implementation(libs.aws.sdk) testImplementation(libs.junit.jupiter.params) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.engine) testImplementation(libs.assertj) + testImplementation(libs.system.lambda) // for integration tests testImplementation( @@ -86,7 +79,8 @@ dependencies { exclude("com.google.code.findbugs", "jsr305") } testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}") - testImplementation(libs.test.containers) + + nessieQuarkusServer("org.projectnessie:nessie-quarkus:${libs.versions.nessie.get()}:runner") } group = "org.projectnessie" @@ -108,25 +102,6 @@ val processResources = tasks.named("test") { systemProperty("expectedCLIVersion", project.version) } -fun Project.applyShadowJar() { - plugins.apply(ShadowPlugin::class.java) - - plugins.withType().configureEach { - val shadowJar = - tasks.named("shadowJar") { - isZip64 = true // as the package has more than 65535 files - outputs.cacheIf { false } // do not cache uber/shaded jars - archiveClassifier.set("") - mergeServiceFiles() - } - - tasks.named("jar") { - dependsOn(shadowJar) - archiveClassifier.set("raw") - } - } -} - val mainClassName = "org.projectnessie.tools.catalog.migration.CatalogMigrationCLI" extra["versionGoogleJavaFormat"] = libs.versions.googleJavaFormat.get() @@ -159,6 +134,30 @@ shadowJar { finalizedBy(unixExecutable) } +nessieQuarkusApp { + includeTask(tasks.named("intTest")) + environmentNonInput.put("HTTP_ACCESS_LOG_LEVEL", testLogLevel()) +} + +fun Project.applyShadowJar() { + plugins.apply(ShadowPlugin::class.java) + + plugins.withType().configureEach { + val shadowJar = + tasks.named("shadowJar") { + isZip64 = true // as the package has more than 65535 files + outputs.cacheIf { false } // do not cache uber/shaded jars + archiveClassifier.set("") + mergeServiceFiles() + } + + tasks.named("jar") { + dependsOn(shadowJar) + archiveClassifier.set("raw") + } + } +} + fun Project.testTasks() { if (projectDir.resolve("src/test").exists()) { tasks.withType().configureEach { diff --git a/buildSrc/src/main/kotlin/Utilities.gradle.kts b/buildSrc/src/main/kotlin/BuildSupport.gradle.kts similarity index 82% rename from buildSrc/src/main/kotlin/Utilities.gradle.kts rename to buildSrc/src/main/kotlin/BuildSupport.gradle.kts index f8d677a..5f7ddd5 100644 --- a/buildSrc/src/main/kotlin/Utilities.gradle.kts +++ b/buildSrc/src/main/kotlin/BuildSupport.gradle.kts @@ -15,13 +15,11 @@ */ import org.gradle.api.JavaVersion -import org.gradle.api.file.DuplicatesStrategy import org.gradle.api.plugins.JavaPlugin import org.gradle.api.plugins.JavaPluginExtension import org.gradle.api.tasks.bundling.Jar import org.gradle.api.tasks.compile.JavaCompile import org.gradle.api.tasks.javadoc.Javadoc -import org.gradle.api.tasks.testing.Test import org.gradle.external.javadoc.CoreJavadocOptions import org.gradle.kotlin.dsl.configure import org.gradle.kotlin.dsl.repositories @@ -36,13 +34,6 @@ repositories { } } -if (project.projectDir.resolve("src/test/java").exists()) { - tasks.withType().configureEach { - useJUnitPlatform {} - maxParallelForks = Runtime.getRuntime().availableProcessors() - } -} - tasks.withType().configureEach { manifest { attributes["Implementation-Title"] = "iceberg-catalog-migrator" @@ -70,7 +61,3 @@ plugins.withType().configureEach { modularity.inferModulePath.set(true) } } - -if (project != rootProject) { - tasks.withType().configureEach { duplicatesStrategy = DuplicatesStrategy.WARN } -} diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index bd1b702..64e8673 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -8,12 +8,14 @@ hive = "2.3.8" iceberg = "1.1.0" junit = "5.9.1" logback = "1.2.11" +nessie = "0.48.2" nessieBuildPlugins = "0.2.14" +nessieRunner = "0.28.1" picocli = "4.7.0" shadowPlugin = "7.1.2" slf4j = "1.7.36" spotlessPlugin = "6.12.0" -testContainers = "1.17.6" +systemLambda = "1.2.1" [libraries] assertj = { module = "org.assertj:assertj-core", version.ref = "assertj" } @@ -32,9 +34,10 @@ logback-core = { module = "ch.qos.logback:logback-core", version.ref = "logback" logback-classic = { module = "ch.qos.logback:logback-classic", version.ref = "logback" } picocli = { module = "info.picocli:picocli", version.ref = "picocli" } slf4j = { module = "org.slf4j:log4j-over-slf4j", version.ref = "slf4j" } -test-containers = { module = "org.testcontainers:testcontainers", version.ref = "testContainers" } +system-lambda = { module = "com.github.stefanbirkner:system-lambda", version.ref = "systemLambda" } [plugins] nessie-build-spotless = { id = "org.projectnessie.buildsupport.spotless", version.ref = "nessieBuildPlugins" } +nessie-run = { id = "org.projectnessie", version.ref = "nessieRunner" } shadow = { id = "com.github.johnrengelman.shadow", version.ref = "shadowPlugin" } spotless = { id = "com.diffplug.spotless", version.ref = "spotlessPlugin" } diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java deleted file mode 100644 index 6cb6db7..0000000 --- a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrateUtil.java +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Copyright (C) 2023 Dremio - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.projectnessie.tools.catalog.migration; - -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; -import java.io.PrintWriter; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Objects; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Predicate; -import java.util.regex.Pattern; -import java.util.stream.Collectors; -import org.apache.iceberg.BaseTable; -import org.apache.iceberg.TableOperations; -import org.apache.iceberg.catalog.Catalog; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.SupportsNamespaces; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.hadoop.HadoopCatalog; -import org.jetbrains.annotations.NotNull; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class CatalogMigrateUtil { - private static final Logger LOG = LoggerFactory.getLogger(CatalogMigrateUtil.class); - - private CatalogMigrateUtil() {} - - /** - * Migrates tables from one catalog(source catalog) to another catalog(target catalog). After - * successful migration, deletes the table entry from source catalog(not applicable for - * HadoopCatalog). - * - *

Users must make sure that no in-progress commits on the tables of source catalog during - * migration. - * - * @param tableIdentifiers a list of {@link TableIdentifier} for the tables required to be - * migrated. If not specified, all the tables would be migrated - * @param sourceCatalog Source {@link Catalog} from which the tables are chosen - * @param targetCatalog Target {@link Catalog} to which the tables need to be migrated - * @param identifierRegex regular expression pattern used to migrate only the tables whose - * identifiers match this pattern. Can be provided instead of `tableIdentifiers`. - * @param isDryRun to execute as dry run. - * @param printWriter to print regular updates on the console. - * @return List of successfully migrated and list of failed to migrate table identifiers. - */ - public static CatalogMigrationResult migrateTables( - List tableIdentifiers, - Catalog sourceCatalog, - Catalog targetCatalog, - String identifierRegex, - boolean isDryRun, - PrintWriter printWriter) { - return migrateTables( - tableIdentifiers, - sourceCatalog, - targetCatalog, - identifierRegex, - isDryRun, - true, - printWriter); - } - - /** - * Register tables from one catalog(source catalog) to another catalog(target catalog). User has - * to take care of deleting the tables from source catalog after registration. - * - *

Users must make sure that no in-progress commits on the tables of source catalog during - * registration. - * - * @param tableIdentifiers a list of {@link TableIdentifier} for the tables required to be - * registered. If not specified, all the tables would be registered - * @param sourceCatalog Source {@link Catalog} from which the tables are chosen - * @param targetCatalog Target {@link Catalog} to which the tables need to be registered - * @param identifierRegex regular expression pattern used to migrate only the tables whose - * identifiers match this pattern. Can be provided instead of `tableIdentifiers`. - * @param isDryRun to execute as dry run. - * @param printWriter to print regular updates on the console. - * @return List of successfully migrated and list of failed to migrate table identifiers. - */ - public static CatalogMigrationResult registerTables( - List tableIdentifiers, - Catalog sourceCatalog, - Catalog targetCatalog, - String identifierRegex, - boolean isDryRun, - PrintWriter printWriter) { - return migrateTables( - tableIdentifiers, - sourceCatalog, - targetCatalog, - identifierRegex, - isDryRun, - false, - printWriter); - } - - private static CatalogMigrationResult migrateTables( - List tableIdentifiers, - Catalog sourceCatalog, - Catalog targetCatalog, - String identifierRegex, - boolean isDryRun, - boolean deleteEntriesFromSourceCatalog, - PrintWriter printWriter) { - validate(sourceCatalog, targetCatalog); - - String operation = deleteEntriesFromSourceCatalog ? "migration" : "registration"; - - List identifiers; - if (tableIdentifiers == null || tableIdentifiers.isEmpty()) { - identifiers = getMatchingTableIdentifiers(sourceCatalog, identifierRegex, printWriter); - } else { - identifiers = tableIdentifiers; - } - - printWriter.println( - String.format("\nIdentified %d tables for %s.", identifiers.size(), operation)); - - if (isDryRun) { - return new CatalogMigrationResult( - identifiers, Collections.emptyList(), Collections.emptyList()); - } - printWriter.println(String.format("\nStarted %s ...", operation)); - List registeredTableIdentifiers = new ArrayList<>(); - List failedToRegisterTableIdentifiers = new ArrayList<>(); - List failedToDeleteTableIdentifiers = new ArrayList<>(); - AtomicInteger counter = new AtomicInteger(); - identifiers.forEach( - tableIdentifier -> { - registerTable( - sourceCatalog, - targetCatalog, - registeredTableIdentifiers, - failedToRegisterTableIdentifiers, - tableIdentifier); - - // HadoopCatalog dropTable will delete the table files completely even when purge is - // false. So, skip dropTable for HadoopCatalog. - boolean deleteTableFromSourceCatalog = - !(sourceCatalog instanceof HadoopCatalog) && deleteEntriesFromSourceCatalog; - try { - if (deleteTableFromSourceCatalog) { - boolean isDropped = sourceCatalog.dropTable(tableIdentifier, false); - if (!isDropped) { - failedToDeleteTableIdentifiers.add(tableIdentifier); - } - } - } catch (Exception exception) { - failedToDeleteTableIdentifiers.add(tableIdentifier); - LOG.warn("Failed to delete the table after migration {}", tableIdentifier, exception); - } - - int count = counter.incrementAndGet(); - if (count % 100 == 0) { - printWriter.println( - String.format( - "\nAttempted %s for %d tables out of %d tables.", - operation, count, identifiers.size())); - } - }); - printWriter.println(String.format("\nFinished %s ...", operation)); - return new CatalogMigrationResult( - registeredTableIdentifiers, - failedToRegisterTableIdentifiers, - failedToDeleteTableIdentifiers); - } - - private static void registerTable( - Catalog sourceCatalog, - Catalog targetCatalog, - List registeredTableIdentifiers, - List failedToMigrateTableIdentifiers, - TableIdentifier tableIdentifier) { - try { - // register the table to the target catalog - TableOperations ops = ((BaseTable) sourceCatalog.loadTable(tableIdentifier)).operations(); - targetCatalog.registerTable(tableIdentifier, ops.current().metadataFileLocation()); - - registeredTableIdentifiers.add(tableIdentifier); - LOG.info("Successfully migrated the table {}", tableIdentifier); - } catch (Exception ex) { - failedToMigrateTableIdentifiers.add(tableIdentifier); - LOG.warn("Unable to register the table {}", tableIdentifier, ex); - } - } - - @NotNull - private static List getMatchingTableIdentifiers( - Catalog sourceCatalog, String identifierRegex, PrintWriter printWriter) { - if (identifierRegex == null) { - printWriter.println( - "\nUser has not specified the table identifiers." - + " Selecting all the tables from all the namespaces from the source catalog."); - } else { - printWriter.println( - "\nUser has not specified the table identifiers." - + " Selecting all the tables from all the namespaces from the source catalog " - + "which matches the regex pattern:" - + identifierRegex); - } - - printWriter.println("Collecting all the namespaces from source catalog..."); - // fetch all the table identifiers from all the namespaces. - List namespaces = - (sourceCatalog instanceof SupportsNamespaces) - ? ((SupportsNamespaces) sourceCatalog).listNamespaces() - : ImmutableList.of(Namespace.empty()); - if (identifierRegex == null) { - printWriter.println("Collecting all the tables from all the namespaces of source catalog..."); - } else { - printWriter.println( - "Collecting all the tables from all the namespaces of source catalog" - + " which matches the regex pattern:" - + identifierRegex); - } - - Predicate matchedIdentifiersPredicate; - if (identifierRegex != null) { - Pattern pattern = Pattern.compile(identifierRegex); - matchedIdentifiersPredicate = - tableIdentifier -> pattern.matcher(tableIdentifier.toString()).matches(); - } else { - matchedIdentifiersPredicate = tableIdentifier -> true; - } - return getMatchingTableIdentifiers(sourceCatalog, namespaces, matchedIdentifiersPredicate); - } - - private static List getMatchingTableIdentifiers( - Catalog sourceCatalog, - List namespaces, - Predicate matchedIdentifiersPredicate) { - List allIdentifiers = new ArrayList<>(); - namespaces.stream() - .filter(Objects::nonNull) - .forEach( - namespace -> { - List matchedIdentifiers = - sourceCatalog.listTables(namespace).stream() - .filter(matchedIdentifiersPredicate) - .collect(Collectors.toList()); - allIdentifiers.addAll(matchedIdentifiers); - }); - return allIdentifiers; - } - - private static void validate(Catalog sourceCatalog, Catalog targetCatalog) { - Preconditions.checkArgument(sourceCatalog != null, "Invalid source catalog: null"); - Preconditions.checkArgument(targetCatalog != null, "Invalid target catalog: null"); - Preconditions.checkArgument( - !targetCatalog.equals(sourceCatalog), "target catalog is same as source catalog"); - } - - public static class CatalogMigrationResult { - private final List registeredTableIdentifiers; - private final List failedToRegisterTableIdentifiers; - private final List failedToDeleteTableIdentifiers; - - CatalogMigrationResult( - List registeredTableIdentifiers, - List failedToRegisterTableIdentifiers, - List failedToDeleteTableIdentifiers) { - this.registeredTableIdentifiers = registeredTableIdentifiers; - this.failedToRegisterTableIdentifiers = failedToRegisterTableIdentifiers; - this.failedToDeleteTableIdentifiers = failedToDeleteTableIdentifiers; - } - - public List registeredTableIdentifiers() { - return registeredTableIdentifiers; - } - - public List failedToRegisterTableIdentifiers() { - return failedToRegisterTableIdentifiers; - } - - public List failedToDeleteTableIdentifiers() { - return failedToDeleteTableIdentifiers; - } - } -} diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java index 5fc41b0..2ff001f 100644 --- a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java +++ b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java @@ -24,7 +24,6 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Scanner; import java.util.concurrent.Callable; import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; @@ -151,9 +150,13 @@ public class CatalogMigrationCLI implements Callable { + "to target catalog.") private boolean deleteSourceCatalogTables; - static final String FAILED_IDENTIFIERS_FILE = "failed_identifiers.txt"; - static final String FAILED_TO_DELETE_AT_SOURCE_FILE = "failed_to_delete_at_source.txt"; - static final String DRY_RUN_FILE = "dry_run_identifiers.txt"; + @CommandLine.Option( + names = {"--output-dir"}, + description = + "optional local output directory path to write CLI output files like `failed_identifiers.txt`, " + + "`failed_to_delete_at_source.txt`, `dry_run_identifiers.txt`. " + + "Uses the present working directory if not specified.") + String outputDirPath; public static void main(String... args) { CommandLine commandLine = new CommandLine(new CatalogMigrationCLI()); @@ -174,7 +177,7 @@ public Integer call() { Catalog sourceCatalog = CatalogUtil.loadCatalog( Objects.requireNonNull(catalogImpl(sourceCatalogType, sourceCustomCatalogImpl)), - "sourceCatalog", + sourceCatalogType.name(), sourceCatalogProperties, sourceCatalogConf); printWriter.println(String.format("\nConfigured source catalog: %s", sourceCatalogType.name())); @@ -186,7 +189,7 @@ public Integer call() { Catalog targetCatalog = CatalogUtil.loadCatalog( Objects.requireNonNull(catalogImpl(targetCatalogType, targetCustomCatalogImpl)), - "targetCatalog", + targetCatalogType.name(), targetCatalogProperties, targetCatalogConf); printWriter.println(String.format("\nConfigured target catalog: %s", targetCatalogType.name())); @@ -209,155 +212,28 @@ public Integer call() { identifiers.stream().map(TableIdentifier::parse).collect(Collectors.toList()); } - CatalogMigrateUtil.CatalogMigrationResult result; if (deleteSourceCatalogTables) { - if (!isDryRun && !proceedForMigration(printWriter)) { - return 0; - } - - result = - CatalogMigrateUtil.migrateTables( - tableIdentifiers, - sourceCatalog, - targetCatalog, - identifiersRegEx, - isDryRun, - printWriter); - if (sourceCatalogType == CatalogType.HADOOP) { - printWriter.println( - "[WARNING]: Source catalog type is HADOOP and it doesn't support dropping tables just from " - + "catalog. \nAvoid operating the migrated tables from the source catalog after migration. " - + "Use the tables from target catalog."); - } + CatalogMigrator.migrateTables( + tableIdentifiers, + sourceCatalog, + targetCatalog, + identifiersRegEx, + isDryRun, + printWriter, + outputDirPath); } else { - if (!isDryRun && !proceedForRegistration(printWriter)) { - return 0; - } - - result = - CatalogMigrateUtil.registerTables( - tableIdentifiers, - sourceCatalog, - targetCatalog, - identifiersRegEx, - isDryRun, - printWriter); + CatalogMigrator.registerTables( + tableIdentifiers, + sourceCatalog, + targetCatalog, + identifiersRegEx, + isDryRun, + printWriter, + outputDirPath); } - - if (isDryRun) { - printWriter.println("Dry run is completed."); - printDryRunResults(printWriter, result); - return 0; - } - - printSummary(printWriter, result); - - printDetails(printWriter, result); - return 0; } - private void printSummary( - PrintWriter printWriter, CatalogMigrateUtil.CatalogMigrationResult result) { - printWriter.println("\nSummary: "); - if (!result.registeredTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Successfully %s %d tables from %s catalog to %s catalog.", - deleteSourceCatalogTables ? "migrated" : "registered", - result.registeredTableIdentifiers().size(), - sourceCatalogType.name(), - targetCatalogType.name())); - } - if (!result.failedToRegisterTableIdentifiers().isEmpty()) { - writeToFile(FAILED_IDENTIFIERS_FILE, result.failedToRegisterTableIdentifiers()); - printWriter.println( - String.format( - "- Failed to %s %d tables from %s catalog to %s catalog. " - + "Please check the `catalog_migration.log` file for the failure reason. " - + "\nFailed identifiers are written into `%s`. " - + "Retry with that file using `--identifiers-from-file` option " - + "if the failure is because of network/connection timeouts.", - deleteSourceCatalogTables ? "migrate" : "register", - result.failedToRegisterTableIdentifiers().size(), - sourceCatalogType.name(), - targetCatalogType.name(), - FAILED_IDENTIFIERS_FILE)); - } - if (!result.failedToDeleteTableIdentifiers().isEmpty()) { - writeToFile(FAILED_TO_DELETE_AT_SOURCE_FILE, result.failedToDeleteTableIdentifiers()); - printWriter.println( - String.format( - "- Failed to delete %d tables from %s catalog. " - + "Please check the `catalog_migration.log` file for the reason. " - + "\nFailed to delete identifiers are written into `%s`. ", - result.failedToDeleteTableIdentifiers().size(), - sourceCatalogType.name(), - FAILED_TO_DELETE_AT_SOURCE_FILE)); - } - } - - private void printDetails( - PrintWriter printWriter, CatalogMigrateUtil.CatalogMigrationResult result) { - printWriter.println("\nDetails: "); - if (!result.registeredTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Successfully %s these tables:", - deleteSourceCatalogTables ? "migrated" : "registered")); - printWriter.println(result.registeredTableIdentifiers()); - } - - if (!result.failedToRegisterTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Failed to %s these tables:", deleteSourceCatalogTables ? "migrate" : "register")); - printWriter.println(result.failedToRegisterTableIdentifiers()); - } - - if (!result.failedToDeleteTableIdentifiers().isEmpty()) { - printWriter.println("- [WARNING] Failed to delete these tables from source catalog:"); - printWriter.println(result.failedToDeleteTableIdentifiers()); - } - } - - private void printDryRunResults( - PrintWriter printWriter, CatalogMigrateUtil.CatalogMigrationResult result) { - printWriter.println("\nSummary: "); - if (result.registeredTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- No tables are identified for %s. Please check logs for more info.", - deleteSourceCatalogTables ? "migration" : "registration")); - return; - } - writeToFile(DRY_RUN_FILE, result.registeredTableIdentifiers()); - printWriter.println( - String.format( - "- Identified %d tables for %s by dry-run. These identifiers are also written into %s. " - + "You can use this file with `--identifiers-from-file` option.", - result.registeredTableIdentifiers().size(), - deleteSourceCatalogTables ? "migration" : "registration", - DRY_RUN_FILE)); - - printWriter.println("\nDetails: "); - printWriter.println( - String.format( - "- Identified these tables for %s by dry-run:", - deleteSourceCatalogTables ? "migration" : "registration")); - printWriter.println(result.registeredTableIdentifiers()); - } - - private static void writeToFile(String filePath, List identifiers) { - List identifiersString = - identifiers.stream().map(TableIdentifier::toString).collect(Collectors.toList()); - try { - Files.write(Paths.get(filePath), identifiersString); - } catch (IOException e) { - throw new RuntimeException("Failed to write the file:" + filePath, e); - } - } - private void validateIdentifierOptions() { if (identifiersFromFile != null && !identifiers.isEmpty() && identifiersRegEx != null) { throw new IllegalArgumentException( @@ -384,58 +260,6 @@ private void validateIdentifierOptions() { } } - private static boolean proceedForRegistration(PrintWriter printWriter) { - String warning = - "\n[WARNING]\n" - + "\ta) Executing catalog migration when the source catalog has some in-progress commits " - + "\n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " - + "\n\tSo, while using this tool please make sure there are no in-progress commits for the source " - + "catalog\n" - + "\n" - + "\tb) After the registration, successfully registered tables will be present in both source and target " - + "catalog. " - + "\n\tHaving the same metadata.json registered in more than one catalog can lead to missing updates, " - + "loss of data, and table corruption. " - + "\n\tUse `--delete-source-tables` option to automatically delete the table from source catalog after " - + "migration."; - return proceed(warning, printWriter); - } - - private static boolean proceedForMigration(PrintWriter printWriter) { - String warning = - "\n[WARNING]\n" - + "\ta) Executing catalog migration when the source catalog has some in-progress commits " - + "\n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " - + "\n\tSo, while using this tool please make sure there are no in-progress commits for the source " - + "catalog\n" - + "\n" - + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " - + "\n\tand can only be accessed from the target catalog."; - return proceed(warning, printWriter); - } - - private static boolean proceed(String warning, PrintWriter printWriter) { - try (Scanner scanner = new Scanner(System.in)) { - printWriter.println(warning); - - while (true) { - printWriter.println( - "Have you read the above warnings and are you sure you want to continue? (yes/no):"); - String input = scanner.nextLine(); - - if (input.equalsIgnoreCase("yes")) { - printWriter.println("Continuing..."); - return true; - } else if (input.equalsIgnoreCase("no")) { - printWriter.println("Aborting..."); - return false; - } else { - printWriter.println("Invalid input. Please enter 'yes' or 'no'."); - } - } - } - } - private static String catalogImpl(CatalogType type, String customCatalogImpl) { switch (type) { case CUSTOM: diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrator.java b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrator.java new file mode 100644 index 0000000..561c873 --- /dev/null +++ b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrator.java @@ -0,0 +1,545 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.Scanner; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Predicate; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import org.apache.iceberg.BaseTable; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CatalogMigrator { + private static final Logger LOG = LoggerFactory.getLogger(CatalogMigrator.class); + + static final String FAILED_IDENTIFIERS_FILE = "failed_identifiers.txt"; + static final String FAILED_TO_DELETE_AT_SOURCE_FILE = "failed_to_delete_at_source.txt"; + static final String DRY_RUN_FILE = "dry_run_identifiers.txt"; + + private CatalogMigrator() {} + + /** + * Migrates tables from one catalog(source catalog) to another catalog(target catalog). After + * successful migration, deletes the table entry from source catalog(not applicable for + * HadoopCatalog). + * + *

Users must make sure that no in-progress commits on the tables of source catalog during + * migration. + * + * @param tableIdentifiers a list of {@link TableIdentifier} for the tables required to be + * migrated. If not specified, all the tables would be migrated + * @param sourceCatalog Source {@link Catalog} from which the tables are chosen + * @param targetCatalog Target {@link Catalog} to which the tables need to be migrated + * @param identifierRegex regular expression pattern used to migrate only the tables whose + * identifiers match this pattern. Can be provided instead of `tableIdentifiers`. + * @param isDryRun to execute as dry run. + * @param printWriter to print regular updates on the console. + * @param outputDirPath optional path to store the result files. If null, uses present working + * directory. + * @return List of successfully migrated and list of failed to migrate table identifiers. + */ + public static CatalogMigrationResult migrateTables( + List tableIdentifiers, + Catalog sourceCatalog, + Catalog targetCatalog, + String identifierRegex, + boolean isDryRun, + PrintWriter printWriter, + String outputDirPath) { + return registerTables( + tableIdentifiers, + sourceCatalog, + targetCatalog, + identifierRegex, + isDryRun, + printWriter, + outputDirPath, + true); + } + + /** + * Register tables from one catalog(source catalog) to another catalog(target catalog). User has + * to take care of deleting the tables from source catalog after registration. + * + *

Users must make sure that no in-progress commits on the tables of source catalog during + * registration. + * + * @param tableIdentifiers a list of {@link TableIdentifier} for the tables required to be + * registered. If not specified, all the tables would be registered + * @param sourceCatalog Source {@link Catalog} from which the tables are chosen + * @param targetCatalog Target {@link Catalog} to which the tables need to be registered + * @param identifierRegex regular expression pattern used to migrate only the tables whose + * identifiers match this pattern. Can be provided instead of `tableIdentifiers`. + * @param isDryRun to execute as dry run. + * @param printWriter to print regular updates on the console. + * @param outputDirPath optional path to store the result files. If null, uses present working + * directory. + * @return List of successfully registered and list of failed to register table identifiers. + */ + public static CatalogMigrationResult registerTables( + List tableIdentifiers, + Catalog sourceCatalog, + Catalog targetCatalog, + String identifierRegex, + boolean isDryRun, + PrintWriter printWriter, + String outputDirPath) { + return registerTables( + tableIdentifiers, + sourceCatalog, + targetCatalog, + identifierRegex, + isDryRun, + printWriter, + outputDirPath, + false); + } + + private static CatalogMigrationResult registerTables( + List tableIdentifiers, + Catalog sourceCatalog, + Catalog targetCatalog, + String identifierRegex, + boolean isDryRun, + PrintWriter printWriter, + String outputDirPath, + boolean deleteEntriesFromSourceCatalog) { + validate(sourceCatalog, targetCatalog); + Preconditions.checkArgument(printWriter != null, "printWriter is null"); + + if (identifierRegex != null && tableIdentifiers != null && !tableIdentifiers.isEmpty()) { + throw new IllegalArgumentException( + "Both the identifiers list and identifierRegex is configured."); + } + + if (!isDryRun) { + if (deleteEntriesFromSourceCatalog) { + if (!proceedForRegistration(printWriter)) { + return CatalogMigrator.CatalogMigrationResult.empty(); + } + } else { + if (!proceedForMigration(printWriter)) { + return CatalogMigrator.CatalogMigrationResult.empty(); + } + } + } + + String operation = deleteEntriesFromSourceCatalog ? "migration" : "registration"; + + List identifiers; + if (tableIdentifiers == null || tableIdentifiers.isEmpty()) { + identifiers = getMatchingTableIdentifiers(sourceCatalog, identifierRegex, printWriter); + } else { + identifiers = tableIdentifiers; + } + + printWriter.println( + String.format("\nIdentified %d tables for %s.", identifiers.size(), operation)); + + if (isDryRun) { + CatalogMigrationResult result = + new CatalogMigrationResult(identifiers, Collections.emptyList(), Collections.emptyList()); + printWriter.println("Dry run is completed."); + + writeToFile( + pathWithOutputDir(outputDirPath, DRY_RUN_FILE), result.registeredTableIdentifiers()); + printDryRunResults(printWriter, result, deleteEntriesFromSourceCatalog); + return result; + } + + if (deleteEntriesFromSourceCatalog && sourceCatalog instanceof HadoopCatalog) { + printWriter.println( + "[WARNING]: Source catalog type is HADOOP and it doesn't support dropping tables just from " + + "catalog. \nAvoid operating the migrated tables from the source catalog after migration. " + + "Use the tables from target catalog."); + } + + printWriter.println(String.format("\nStarted %s ...", operation)); + List registeredTableIdentifiers = new ArrayList<>(); + List failedToRegisterTableIdentifiers = new ArrayList<>(); + List failedToDeleteTableIdentifiers = new ArrayList<>(); + AtomicInteger counter = new AtomicInteger(); + identifiers.forEach( + tableIdentifier -> { + boolean isRegistered = + registerTable( + sourceCatalog, + targetCatalog, + registeredTableIdentifiers, + failedToRegisterTableIdentifiers, + tableIdentifier); + + // HadoopCatalog dropTable will delete the table files completely even when purge is + // false. So, skip dropTable for HadoopCatalog. + boolean deleteTableFromSourceCatalog = + !(sourceCatalog instanceof HadoopCatalog) + && isRegistered + && deleteEntriesFromSourceCatalog; + try { + if (deleteTableFromSourceCatalog) { + boolean isDropped = sourceCatalog.dropTable(tableIdentifier, false); + if (!isDropped) { + failedToDeleteTableIdentifiers.add(tableIdentifier); + } + } + } catch (Exception exception) { + failedToDeleteTableIdentifiers.add(tableIdentifier); + LOG.warn("Failed to delete the table after migration {}", tableIdentifier, exception); + } + + int count = counter.incrementAndGet(); + if (count % 100 == 0) { + printWriter.println( + String.format( + "\nAttempted %s for %d tables out of %d tables.", + operation, count, identifiers.size())); + } + }); + printWriter.println(String.format("\nFinished %s ...", operation)); + + CatalogMigrationResult result = + new CatalogMigrationResult( + registeredTableIdentifiers, + failedToRegisterTableIdentifiers, + failedToDeleteTableIdentifiers); + + if (!result.failedToRegisterTableIdentifiers().isEmpty()) { + writeToFile( + pathWithOutputDir(outputDirPath, FAILED_IDENTIFIERS_FILE), + result.failedToRegisterTableIdentifiers()); + } + if (!result.failedToDeleteTableIdentifiers().isEmpty()) { + writeToFile( + pathWithOutputDir(outputDirPath, FAILED_TO_DELETE_AT_SOURCE_FILE), + result.failedToDeleteTableIdentifiers()); + } + + printSummary( + printWriter, + result, + deleteEntriesFromSourceCatalog, + sourceCatalog.name(), + targetCatalog.name()); + + printDetails(printWriter, result, deleteEntriesFromSourceCatalog); + + return result; + } + + private static boolean registerTable( + Catalog sourceCatalog, + Catalog targetCatalog, + List registeredTableIdentifiers, + List failedToMigrateTableIdentifiers, + TableIdentifier tableIdentifier) { + try { + // register the table to the target catalog + TableOperations ops = ((BaseTable) sourceCatalog.loadTable(tableIdentifier)).operations(); + targetCatalog.registerTable(tableIdentifier, ops.current().metadataFileLocation()); + + registeredTableIdentifiers.add(tableIdentifier); + LOG.info("Successfully migrated the table {}", tableIdentifier); + return true; + } catch (Exception ex) { + failedToMigrateTableIdentifiers.add(tableIdentifier); + LOG.warn("Unable to register the table {}", tableIdentifier, ex); + return false; + } + } + + private static List getMatchingTableIdentifiers( + Catalog sourceCatalog, String identifierRegex, PrintWriter printWriter) { + if (identifierRegex == null) { + printWriter.println( + "\nUser has not specified the table identifiers." + + " Selecting all the tables from all the namespaces from the source catalog."); + } else { + printWriter.println( + "\nUser has not specified the table identifiers." + + " Selecting all the tables from all the namespaces from the source catalog " + + "which matches the regex pattern:" + + identifierRegex); + } + + printWriter.println("Collecting all the namespaces from source catalog..."); + // fetch all the table identifiers from all the namespaces. + List namespaces = + (sourceCatalog instanceof SupportsNamespaces) + ? ((SupportsNamespaces) sourceCatalog).listNamespaces() + : ImmutableList.of(Namespace.empty()); + if (identifierRegex == null) { + printWriter.println("Collecting all the tables from all the namespaces of source catalog..."); + } else { + printWriter.println( + "Collecting all the tables from all the namespaces of source catalog" + + " which matches the regex pattern:" + + identifierRegex); + } + + Predicate matchedIdentifiersPredicate; + if (identifierRegex != null) { + Pattern pattern = Pattern.compile(identifierRegex); + matchedIdentifiersPredicate = + tableIdentifier -> pattern.matcher(tableIdentifier.toString()).matches(); + } else { + matchedIdentifiersPredicate = tableIdentifier -> true; + } + return getMatchingTableIdentifiers(sourceCatalog, namespaces, matchedIdentifiersPredicate); + } + + private static boolean proceedForRegistration(PrintWriter printWriter) { + String warning = + "\n[WARNING]\n" + + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + + "\n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + + "\n\tSo, while using this tool please make sure there are no in-progress commits for the source " + + "catalog\n" + + "\n" + + "\tb) After the registration, successfully registered tables will be present in both source and target " + + "catalog. " + + "\n\tHaving the same metadata.json registered in more than one catalog can lead to missing updates, " + + "loss of data, and table corruption. " + + "\n\tUse `--delete-source-tables` option to automatically delete the table from source catalog after " + + "migration."; + return proceed(warning, printWriter); + } + + private static boolean proceedForMigration(PrintWriter printWriter) { + String warning = + "\n[WARNING]\n" + + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + + "\n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + + "\n\tSo, while using this tool please make sure there are no in-progress commits for the source " + + "catalog\n" + + "\n" + + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " + + "\n\tand can only be accessed from the target catalog."; + return proceed(warning, printWriter); + } + + private static boolean proceed(String warning, PrintWriter printWriter) { + try (Scanner scanner = new Scanner(System.in)) { + printWriter.println(warning); + + while (true) { + printWriter.println( + "Have you read the above warnings and are you sure you want to continue? (yes/no):"); + String input = scanner.nextLine(); + + if (input.equalsIgnoreCase("yes")) { + printWriter.println("Continuing..."); + return true; + } else if (input.equalsIgnoreCase("no")) { + printWriter.println("Aborting..."); + return false; + } else { + printWriter.println("Invalid input. Please enter 'yes' or 'no'."); + } + } + } + } + + private static String pathWithOutputDir(String outputDirPath, String fileName) { + if (outputDirPath == null) { + return fileName; + } + if (outputDirPath.endsWith("/")) { + return outputDirPath + fileName; + } + return outputDirPath + "/" + fileName; + } + + private static List getMatchingTableIdentifiers( + Catalog sourceCatalog, + List namespaces, + Predicate matchedIdentifiersPredicate) { + List allIdentifiers = new ArrayList<>(); + namespaces.stream() + .filter(Objects::nonNull) + .forEach( + namespace -> { + List matchedIdentifiers = + sourceCatalog.listTables(namespace).stream() + .filter(matchedIdentifiersPredicate) + .collect(Collectors.toList()); + allIdentifiers.addAll(matchedIdentifiers); + }); + return allIdentifiers; + } + + private static void validate(Catalog sourceCatalog, Catalog targetCatalog) { + Preconditions.checkArgument(sourceCatalog != null, "Invalid source catalog: null"); + Preconditions.checkArgument(targetCatalog != null, "Invalid target catalog: null"); + Preconditions.checkArgument( + !targetCatalog.equals(sourceCatalog), "target catalog is same as source catalog"); + } + + private static void printSummary( + PrintWriter printWriter, + CatalogMigrator.CatalogMigrationResult result, + boolean deleteSourceCatalogTables, + String sourceCatalogType, + String targetCatalogType) { + printWriter.println("\nSummary: "); + if (!result.registeredTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Successfully %s %d tables from %s catalog to %s catalog.", + deleteSourceCatalogTables ? "migrated" : "registered", + result.registeredTableIdentifiers().size(), + sourceCatalogType, + targetCatalogType)); + } + if (!result.failedToRegisterTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Failed to %s %d tables from %s catalog to %s catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. " + + "\nFailed identifiers are written into `%s`. " + + "Retry with that file using `--identifiers-from-file` option " + + "if the failure is because of network/connection timeouts.", + deleteSourceCatalogTables ? "migrate" : "register", + result.failedToRegisterTableIdentifiers().size(), + sourceCatalogType, + targetCatalogType, + FAILED_IDENTIFIERS_FILE)); + } + if (!result.failedToDeleteTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Failed to delete %d tables from %s catalog. " + + "Please check the `catalog_migration.log` file for the reason. " + + "\nFailed to delete identifiers are written into `%s`. ", + result.failedToDeleteTableIdentifiers().size(), + sourceCatalogType, + FAILED_TO_DELETE_AT_SOURCE_FILE)); + } + } + + private static void printDetails( + PrintWriter printWriter, + CatalogMigrator.CatalogMigrationResult result, + boolean deleteSourceCatalogTables) { + printWriter.println("\nDetails: "); + if (!result.registeredTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Successfully %s these tables:", + deleteSourceCatalogTables ? "migrated" : "registered")); + printWriter.println(result.registeredTableIdentifiers()); + } + + if (!result.failedToRegisterTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Failed to %s these tables:", deleteSourceCatalogTables ? "migrate" : "register")); + printWriter.println(result.failedToRegisterTableIdentifiers()); + } + + if (!result.failedToDeleteTableIdentifiers().isEmpty()) { + printWriter.println("- [WARNING] Failed to delete these tables from source catalog:"); + printWriter.println(result.failedToDeleteTableIdentifiers()); + } + } + + private static void printDryRunResults( + PrintWriter printWriter, + CatalogMigrator.CatalogMigrationResult result, + boolean deleteSourceCatalogTables) { + printWriter.println("\nSummary: "); + if (result.registeredTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- No tables are identified for %s. Please check logs for more info.", + deleteSourceCatalogTables ? "migration" : "registration")); + return; + } + printWriter.println( + String.format( + "- Identified %d tables for %s by dry-run. These identifiers are also written into %s. " + + "You can use this file with `--identifiers-from-file` option.", + result.registeredTableIdentifiers().size(), + deleteSourceCatalogTables ? "migration" : "registration", + DRY_RUN_FILE)); + + printWriter.println("\nDetails: "); + printWriter.println( + String.format( + "- Identified these tables for %s by dry-run:", + deleteSourceCatalogTables ? "migration" : "registration")); + printWriter.println(result.registeredTableIdentifiers()); + } + + private static void writeToFile(String filePath, List identifiers) { + List identifiersString = + identifiers.stream().map(TableIdentifier::toString).collect(Collectors.toList()); + try { + Files.write(Paths.get(filePath), identifiersString); + } catch (IOException e) { + throw new RuntimeException("Failed to write the file:" + filePath, e); + } + } + + public static class CatalogMigrationResult { + private final List registeredTableIdentifiers; + private final List failedToRegisterTableIdentifiers; + private final List failedToDeleteTableIdentifiers; + + CatalogMigrationResult( + List registeredTableIdentifiers, + List failedToRegisterTableIdentifiers, + List failedToDeleteTableIdentifiers) { + this.registeredTableIdentifiers = registeredTableIdentifiers; + this.failedToRegisterTableIdentifiers = failedToRegisterTableIdentifiers; + this.failedToDeleteTableIdentifiers = failedToDeleteTableIdentifiers; + } + + public List registeredTableIdentifiers() { + return registeredTableIdentifiers; + } + + public List failedToRegisterTableIdentifiers() { + return failedToRegisterTableIdentifiers; + } + + public List failedToDeleteTableIdentifiers() { + return failedToDeleteTableIdentifiers; + } + + public static CatalogMigrationResult empty() { + return new CatalogMigrationResult( + Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); + } + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/AbstractCLIMigrationTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/AbstractCLIMigrationTest.java new file mode 100644 index 0000000..c154503 --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/AbstractCLIMigrationTest.java @@ -0,0 +1,572 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import com.google.common.collect.Lists; +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public abstract class AbstractCLIMigrationTest extends AbstractTest { + + protected static @TempDir File warehouse1; + + protected static @TempDir File warehouse2; + + protected static @TempDir File outputDir; + + protected static String dryRunFile; + protected static String failedIdentifiersFile; + + protected static String sourceCatalogProperties; + protected static String targetCatalogProperties; + + protected static String sourceCatalogType; + protected static String targetCatalogType; + + @BeforeEach + protected void beforeEach() { + createTables(); + } + + @AfterEach + protected void afterEach() { + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + // create table will call refresh internally. + catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tblx"), schema).refresh(); + catalog2.createTable(TableIdentifier.of(Namespace.of("bar"), "tblx"), schema).refresh(); + + dropTables(); + deleteFileIfExists(dryRunFile); + deleteFileIfExists(failedIdentifiersFile); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegister(boolean deleteSourceTables) throws Exception { + RunCLI run = RunCLI.runWithContinue(registerAllTablesArgs(deleteSourceTables)); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .contains( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog."); + String operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 4 tables for %s.", operation)); + operation = deleteSourceTables ? "migrated" : "registered"; + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Summary: \n- Successfully %s 4 tables from %s catalog to %s catalog.", + operation, sourceCatalogType, targetCatalogType)); + Assertions.assertThat(run.getOut()) + .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + catalog2.loadTable(TableIdentifier.parse("foo.tbl1")).refresh(); + + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + } + + @Order(1) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterSelectedTables(boolean deleteSourceTables) throws Exception { + // using `--identifiers` option + RunCLI run = + registerTablesCLI( + deleteSourceTables, + "--source-catalog-type", + sourceCatalogType, + "--source-catalog-properties", + sourceCatalogProperties, + "--target-catalog-type", + targetCatalogType, + "--target-catalog-properties", + targetCatalogProperties, + "--identifiers", + "bar.tbl3", + "--output-dir", + outputDir.getAbsolutePath()); + + Assertions.assertThat(run.getOut()) + .doesNotContain( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog."); + String operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 1 tables for %s.", operation)); + operation = deleteSourceTables ? "migrated" : "registered"; + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Summary: \n- Successfully %s 1 tables from %s catalog to" + " %s catalog.", + operation, sourceCatalogType, targetCatalogType)); + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Details: \n- Successfully %s these tables:\n" + "[bar.tbl3]", operation)); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + catalog2.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); + + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))).isEmpty(); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactly(TableIdentifier.parse("bar.tbl3")); + + // using `--identifiers-from-file` option + Path identifierFile = Paths.get("ids.txt"); + Files.write(identifierFile, Collections.singletonList("bar.tbl4")); + run = + registerTablesCLI( + deleteSourceTables, + "--source-catalog-type", + sourceCatalogType, + "--source-catalog-properties", + sourceCatalogProperties, + "--target-catalog-type", + targetCatalogType, + "--target-catalog-properties", + targetCatalogProperties, + "--identifiers-from-file", + "ids.txt", + "--output-dir", + outputDir.getAbsolutePath()); + Files.delete(identifierFile); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .doesNotContain( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog."); + operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 1 tables for %s.", operation)); + operation = deleteSourceTables ? "migrated" : "registered"; + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Summary: \n- Successfully %s 1 tables from %s catalog to" + " %s catalog.", + operation, sourceCatalogType, targetCatalogType)); + Assertions.assertThat(run.getOut()) + .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + catalog2.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); + + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))).isEmpty(); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl4"), TableIdentifier.parse("bar.tbl3")); + + // using --identifiers-regex option which matches all the tables starts with "foo." + run = + registerTablesCLI( + deleteSourceTables, + "--source-catalog-type", + sourceCatalogType, + "--source-catalog-properties", + sourceCatalogProperties, + "--target-catalog-type", + targetCatalogType, + "--target-catalog-properties", + targetCatalogProperties, + "--identifiers-regex", + "^foo\\..*", + "--output-dir", + outputDir.getAbsolutePath()); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .contains( + "User has not specified the table identifiers. Selecting all the tables from all the namespaces " + + "from the source catalog which matches the regex pattern:^foo\\..*"); + Assertions.assertThat(run.getOut()) + .contains( + "Collecting all the tables from all the namespaces of source catalog " + + "which matches the regex pattern:^foo\\..*"); + operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 2 tables for %s.", operation)); + operation = deleteSourceTables ? "migrated" : "registered"; + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Summary: \n- Successfully %s 2 tables from %s catalog to" + " %s catalog.", + operation, sourceCatalogType, targetCatalogType)); + Assertions.assertThat(run.getOut()) + .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + catalog2.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); + + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + } + + @Order(2) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterError(boolean deleteSourceTables) throws Exception { + // use invalid namespace which leads to NoSuchTableException + RunCLI run = + registerTablesCLI( + deleteSourceTables, + "--source-catalog-type", + sourceCatalogType, + "--source-catalog-properties", + sourceCatalogProperties, + "--target-catalog-type", + targetCatalogType, + "--target-catalog-properties", + targetCatalogProperties, + "--identifiers", + "dummy.tbl3", + "--output-dir", + outputDir.getAbsolutePath()); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + String operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 1 tables for %s.", operation)); + operation = deleteSourceTables ? "migrate" : "register"; + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Summary: \n- Failed to %s 1 tables from %s catalog to %s catalog." + + " Please check the `catalog_migration.log`", + operation, sourceCatalogType, targetCatalogType)); + Assertions.assertThat(run.getOut()) + .contains( + String.format("Details: \n- Failed to %s these tables:\n[dummy.tbl3]", operation)); + + // try to register same table twice which leads to AlreadyExistsException + registerTablesCLI( + deleteSourceTables, + "--source-catalog-type", + sourceCatalogType, + "--source-catalog-properties", + sourceCatalogProperties, + "--target-catalog-type", + targetCatalogType, + "--target-catalog-properties", + targetCatalogProperties, + "--identifiers", + "foo.tbl2", + "--output-dir", + outputDir.getAbsolutePath()); + run = + registerTablesCLI( + deleteSourceTables, + "--source-catalog-type", + sourceCatalogType, + "--source-catalog-properties", + sourceCatalogProperties, + "--target-catalog-type", + targetCatalogType, + "--target-catalog-properties", + targetCatalogProperties, + "--identifiers", + "foo.tbl2", + "--output-dir", + outputDir.getAbsolutePath()); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 1 tables for %s.", operation)); + operation = deleteSourceTables ? "migrate" : "register"; + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Summary: \n- Failed to %s 1 tables from %s catalog to %s catalog." + + " Please check the `catalog_migration.log`", + operation, sourceCatalogType, targetCatalogType)); + Assertions.assertThat(run.getOut()) + .contains(String.format("Details: \n- Failed to %s these tables:\n[foo.tbl2]", operation)); + } + + @Order(3) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Exception { + // register only foo.tbl2 + RunCLI run = + registerTablesCLI( + deleteSourceTables, + "--source-catalog-type", + sourceCatalogType, + "--source-catalog-properties", + sourceCatalogProperties, + "--target-catalog-type", + targetCatalogType, + "--target-catalog-properties", + targetCatalogProperties, + "--identifiers", + "foo.tbl2", + "--output-dir", + outputDir.getAbsolutePath()); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + String operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 1 tables for %s.", operation)); + operation = deleteSourceTables ? "migrated" : "registered"; + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Summary: \n- Successfully %s 1 tables from %s catalog to %s catalog.", + operation, sourceCatalogType, targetCatalogType)); + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Details: \n" + "- Successfully %s these tables:\n" + "[foo.tbl2]", operation)); + + if (deleteSourceTables && !(catalog1 instanceof HadoopCatalog)) { + // create a table with the same name in source catalog which got deleted. + catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); + } + + // register all the tables from source catalog again + run = + registerTablesCLI( + deleteSourceTables, + "--source-catalog-type", + sourceCatalogType, + "--source-catalog-properties", + sourceCatalogProperties, + "--target-catalog-type", + targetCatalogType, + "--target-catalog-properties", + targetCatalogProperties, + "--output-dir", + outputDir.getAbsolutePath()); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 4 tables for %s.", operation)); + operation = deleteSourceTables ? "migrated" : "registered"; + String ops = deleteSourceTables ? "migrate" : "register"; + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Summary: \n" + + "- Successfully %s 3 tables from %s catalog to %s catalog.\n" + + "- Failed to %s 1 tables from %s catalog to %s catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. \n" + + "Failed identifiers are written into `failed_identifiers.txt`. " + + "Retry with that file using `--identifiers-from-file` option " + + "if the failure is because of network/connection timeouts.", + operation, + sourceCatalogType, + targetCatalogType, + ops, + sourceCatalogType, + targetCatalogType)); + Assertions.assertThat(run.getOut()) + .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + Assertions.assertThat(run.getOut()) + .contains(String.format("- Failed to %s these tables:\n[foo.tbl2]", ops)); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + catalog2.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); + + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + + // retry the failed tables using --identifiers-from-file + run = + registerTablesCLI( + deleteSourceTables, + "--source-catalog-type", + sourceCatalogType, + "--source-catalog-properties", + sourceCatalogProperties, + "--target-catalog-type", + targetCatalogType, + "--target-catalog-properties", + targetCatalogProperties, + "--identifiers-from-file", + failedIdentifiersFile, + "--output-dir", + outputDir.getAbsolutePath()); + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Summary: \n" + + "- Failed to %s 1 tables from %s catalog to %s catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. \n" + + "Failed identifiers are written into `failed_identifiers.txt`. " + + "Retry with that file using `--identifiers-from-file` option " + + "if the failure is because of network/connection timeouts.", + ops, sourceCatalogType, targetCatalogType)); + Assertions.assertThat(run.getOut()) + .contains( + String.format("Details: \n" + "- Failed to %s these tables:\n" + "[foo.tbl2]", ops)); + Assertions.assertThat(new File(failedIdentifiersFile).exists()).isTrue(); + Assertions.assertThat(Files.readAllLines(Paths.get(failedIdentifiersFile))) + .containsExactly("foo.tbl2"); + } + + @Order(4) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { + // source catalog is catalog2 which has no tables. + RunCLI run = + registerTablesCLI( + deleteSourceTables, + "--source-catalog-type", + targetCatalogType, + "--source-catalog-properties", + targetCatalogProperties, + "--target-catalog-type", + sourceCatalogType, + "--target-catalog-properties", + sourceCatalogProperties, + "--output-dir", + outputDir.getAbsolutePath()); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + String operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 0 tables for %s.", operation)); + } + + @Order(5) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testPrompt(boolean deleteSourceTables) throws Exception { + RunCLI run = RunCLI.runWithAbort(registerAllTablesArgs(deleteSourceTables)); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + // should abort + Assertions.assertThat(run.getOut()).contains("Aborting..."); + // should not have other messages + Assertions.assertThat(run.getOut()).doesNotContain("Summary"); + + run = RunCLI.runWithDummyInput(registerAllTablesArgs(deleteSourceTables)); + Assertions.assertThat(run.getExitCode()).isEqualTo(1); + Assertions.assertThat(run.getOut()).contains("Invalid input. Please enter 'yes' or 'no'."); + + run = RunCLI.runWithContinue(registerAllTablesArgs(deleteSourceTables)); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + // should abort + Assertions.assertThat(run.getOut()).contains("Continuing..."); + Assertions.assertThat(run.getOut()).contains("Summary"); + } + + @Order(6) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testDryRun(boolean deleteSourceTables) throws Exception { + RunCLI run = + registerTablesCLI( + deleteSourceTables, + "--source-catalog-type", + sourceCatalogType, + "--source-catalog-properties", + sourceCatalogProperties, + "--target-catalog-type", + targetCatalogType, + "--target-catalog-properties", + targetCatalogProperties, + "--dry-run", + "--output-dir", + outputDir.getAbsolutePath()); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + // should not prompt for dry run + Assertions.assertThat(run.getOut()) + .doesNotContain( + "Have you read the above warnings and are you sure you want to continue? (yes/no):"); + Assertions.assertThat(run.getOut()).contains("Dry run is completed."); + String operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Summary: \n" + + "- Identified 4 tables for %s by dry-run. " + + "These identifiers are also written into dry_run_identifiers.txt. " + + "You can use this file with `--identifiers-from-file` option.", + operation)); + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Details: \n" + "- Identified these tables for %s by dry-run:\n", operation)); + Assertions.assertThat(new File(dryRunFile).exists()).isTrue(); + Assertions.assertThat(Files.readAllLines(Paths.get(dryRunFile))) + .containsExactlyInAnyOrder("foo.tbl1", "foo.tbl2", "bar.tbl3", "bar.tbl4"); + } + + private static String[] registerAllTablesArgs(boolean deleteSourceTables) { + ArrayList args = + Lists.newArrayList( + "--source-catalog-type", + sourceCatalogType, + "--source-catalog-properties", + sourceCatalogProperties, + "--target-catalog-type", + targetCatalogType, + "--target-catalog-properties", + targetCatalogProperties, + "--output-dir", + outputDir.getAbsolutePath()); + if (deleteSourceTables) { + args.add("--delete-source-tables"); + } + return args.toArray(new String[0]); + } + + private static RunCLI registerTablesCLI(boolean deleteSourceTables, String... args) + throws Exception { + if (!deleteSourceTables) { + return RunCLI.runWithContinue(args); + } + List argsList = Lists.newArrayList(args); + argsList.add("--delete-source-tables"); + return RunCLI.runWithContinue(argsList.toArray(new String[0])); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTest.java new file mode 100644 index 0000000..15de31c --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTest.java @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static org.apache.iceberg.types.Types.NestedField.required; + +import java.io.File; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.Schema; +import org.apache.iceberg.aws.dynamodb.DynamoDbCatalog; +import org.apache.iceberg.aws.glue.GlueCatalog; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.dell.ecs.EcsCatalog; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.iceberg.hive.HiveCatalog; +import org.apache.iceberg.jdbc.JdbcCatalog; +import org.apache.iceberg.nessie.NessieCatalog; +import org.apache.iceberg.rest.RESTCatalog; +import org.apache.iceberg.types.Types; + +public abstract class AbstractTest { + + protected static Catalog catalog1; + + protected static Catalog catalog2; + protected static final Schema schema = + new Schema(Types.StructType.of(required(1, "id", Types.LongType.get())).fields()); + + protected static void createTables() { + // two tables in 'foo' namespace + catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl1"), schema); + catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); + // two tables in 'bar' namespace + catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl3"), schema); + catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl4"), schema); + } + + protected static void createNamespaces() { + ((SupportsNamespaces) catalog1).createNamespace(Namespace.of("foo"), Collections.emptyMap()); + ((SupportsNamespaces) catalog1).createNamespace(Namespace.of("bar"), Collections.emptyMap()); + + ((SupportsNamespaces) catalog2).createNamespace(Namespace.of("foo"), Collections.emptyMap()); + ((SupportsNamespaces) catalog2).createNamespace(Namespace.of("bar"), Collections.emptyMap()); + } + + protected static void dropNamespaces() { + ((SupportsNamespaces) catalog1).dropNamespace(Namespace.of("foo")); + ((SupportsNamespaces) catalog1).dropNamespace(Namespace.of("bar")); + + ((SupportsNamespaces) catalog2).dropNamespace(Namespace.of("foo")); + ((SupportsNamespaces) catalog2).dropNamespace(Namespace.of("bar")); + } + + protected static void deleteFileIfExists(String filePath) { + File file = new File(filePath); + if (file.exists()) { + file.delete(); + } + } + + protected static String catalogType(Catalog catalog) { + if (catalog instanceof DynamoDbCatalog) { + return CatalogMigrationCLI.CatalogType.DYNAMODB.name(); + } else if (catalog instanceof EcsCatalog) { + return CatalogMigrationCLI.CatalogType.ECS.name(); + } else if (catalog instanceof GlueCatalog) { + return CatalogMigrationCLI.CatalogType.GLUE.name(); + } else if (catalog instanceof HadoopCatalog) { + return CatalogMigrationCLI.CatalogType.HADOOP.name(); + } else if (catalog instanceof HiveCatalog) { + return CatalogMigrationCLI.CatalogType.HIVE.name(); + } else if (catalog instanceof JdbcCatalog) { + return CatalogMigrationCLI.CatalogType.JDBC.name(); + } else if (catalog instanceof NessieCatalog) { + return CatalogMigrationCLI.CatalogType.NESSIE.name(); + } else if (catalog instanceof RESTCatalog) { + return CatalogMigrationCLI.CatalogType.REST.name(); + } else { + return CatalogMigrationCLI.CatalogType.CUSTOM.name(); + } + } + + protected static Catalog createHadoopCatalog(String warehousePath, String name) { + Map properties = new HashMap<>(); + properties.put("warehouse", warehousePath); + properties.put("type", "hadoop"); + return CatalogUtil.loadCatalog( + HadoopCatalog.class.getName(), name, properties, new Configuration()); + } + + protected static Catalog createNessieCatalog(String warehousePath, String uri) { + Map properties = new HashMap<>(); + properties.put("warehouse", warehousePath); + properties.put("ref", "main"); + properties.put("uri", uri); + return CatalogUtil.loadCatalog( + NessieCatalog.class.getName(), "nessie", properties, new Configuration()); + } + + protected static void dropTables() { + Arrays.asList(Namespace.of("foo"), Namespace.of("bar")) + .forEach( + namespace -> { + catalog1.listTables(namespace).forEach(catalog1::dropTable); + catalog2.listTables(namespace).forEach(catalog2::dropTable); + }); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTestCatalogMigrator.java b/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTestCatalogMigrator.java new file mode 100644 index 0000000..85d881a --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTestCatalogMigrator.java @@ -0,0 +1,672 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static com.github.stefanbirkner.systemlambda.SystemLambda.withTextFromSystemIn; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Collections; +import java.util.List; +import java.util.stream.IntStream; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public abstract class AbstractTestCatalogMigrator extends AbstractTest { + + protected static @TempDir File warehouse1; + + protected static @TempDir File warehouse2; + + protected static @TempDir File outputDir; + + protected static String dryRunFile; + protected static String failedIdentifiersFile; + + private static StringWriter stringWriter; + private static PrintWriter printWriter; + + @BeforeEach + protected void beforeEach() { + createTables(); + + stringWriter = new StringWriter(); + printWriter = new PrintWriter(stringWriter); + } + + @AfterEach + protected void afterEach() throws IOException { + dropTables(); + deleteFileIfExists(dryRunFile); + deleteFileIfExists(failedIdentifiersFile); + stringWriter.close(); + printWriter.close(); + } + + @Order(0) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegister(boolean deleteSourceTables) throws Exception { + + withTextFromSystemIn("yes") + .execute( + () -> { + CatalogMigrator.CatalogMigrationResult result; + result = registerAllTables(deleteSourceTables); + + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), + TableIdentifier.parse("foo.tbl2"), + TableIdentifier.parse("bar.tbl3"), + TableIdentifier.parse("bar.tbl4")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + }); + + String output = stringWriter.toString(); + Assertions.assertThat(output) + .contains( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog."); + String operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(output).contains(String.format("Identified 4 tables for %s.", operation)); + operation = deleteSourceTables ? "migrated" : "registered"; + Assertions.assertThat(output) + .contains( + String.format( + "Summary: \n- Successfully %s 4 tables from %s catalog to" + " %s catalog.", + operation, catalog1.name(), catalog2.name())); + Assertions.assertThat(output) + .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + } + + @Order(1) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterSelectedTables(boolean deleteSourceTables) throws Exception { + // using `--identifiers` option + withTextFromSystemIn("yes") + .execute( + () -> { + CatalogMigrator.CatalogMigrationResult result = + registerTables( + Collections.singletonList(TableIdentifier.parse("bar.tbl3")), + catalog1, + catalog2, + null, + false, + printWriter, + outputDir.getAbsolutePath(), + deleteSourceTables); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactly(TableIdentifier.parse("bar.tbl3")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + }); + + String output = stringWriter.toString(); + Assertions.assertThat(output) + .doesNotContain( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog."); + String operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(output).contains(String.format("Identified 1 tables for %s.", operation)); + operation = deleteSourceTables ? "migrated" : "registered"; + Assertions.assertThat(output) + .contains( + String.format( + "Summary: \n- Successfully %s 1 tables from %s catalog to" + " %s catalog.", + operation, catalog1.name(), catalog2.name())); + Assertions.assertThat(output) + .contains( + String.format( + "Details: \n- Successfully %s these tables:\n" + "[bar.tbl3]", operation)); + + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))).isEmpty(); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactly(TableIdentifier.parse("bar.tbl3")); + + // using --identifiers-regex option which matches all the tables starts with "foo." + withTextFromSystemIn("yes") + .execute( + () -> { + CatalogMigrator.CatalogMigrationResult result = + registerTables( + null, + catalog1, + catalog2, + "^foo\\..*", + false, + printWriter, + outputDir.getAbsolutePath(), + deleteSourceTables); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + }); + + output = stringWriter.toString(); + Assertions.assertThat(output) + .contains( + "User has not specified the table identifiers. Selecting all the tables from all the namespaces " + + "from the source catalog which matches the regex pattern:^foo\\..*"); + Assertions.assertThat(output) + .contains( + "Collecting all the tables from all the namespaces of source catalog " + + "which matches the regex pattern:^foo\\..*"); + operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(output).contains(String.format("Identified 2 tables for %s.", operation)); + operation = deleteSourceTables ? "migrated" : "registered"; + Assertions.assertThat(output) + .contains( + String.format( + "Summary: \n- Successfully %s 2 tables from %s catalog to" + " %s catalog.", + operation, catalog1.name(), catalog2.name())); + Assertions.assertThat(output) + .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactly(TableIdentifier.parse("bar.tbl3")); + } + + @Order(2) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterError(boolean deleteSourceTables) throws Exception { + // use invalid namespace which leads to NoSuchTableException + withTextFromSystemIn("yes") + .execute( + () -> { + CatalogMigrator.CatalogMigrationResult result = + registerTables( + Collections.singletonList(TableIdentifier.parse("dummy.tbl3")), + catalog1, + catalog2, + null, + false, + printWriter, + outputDir.getAbsolutePath(), + deleteSourceTables); + Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()) + .containsExactly(TableIdentifier.parse("dummy.tbl3")); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + }); + + String output = stringWriter.toString(); + String operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(output).contains(String.format("Identified 1 tables for %s.", operation)); + operation = deleteSourceTables ? "migrate" : "register"; + Assertions.assertThat(output) + .contains( + String.format( + "Summary: \n- Failed to %s 1 tables from %s catalog to %s catalog." + + " Please check the `catalog_migration.log`", + operation, catalog1.name(), catalog2.name())); + Assertions.assertThat(output) + .contains( + String.format("Details: \n- Failed to %s these tables:\n[dummy.tbl3]", operation)); + + // try to register same table twice which leads to AlreadyExistsException + withTextFromSystemIn("yes") + .execute( + () -> { + CatalogMigrator.CatalogMigrationResult result = + registerTables( + Collections.singletonList(TableIdentifier.parse("foo.tbl2")), + catalog1, + catalog2, + null, + false, + printWriter, + outputDir.getAbsolutePath(), + deleteSourceTables); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactly(TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + }); + withTextFromSystemIn("yes") + .execute( + () -> { + CatalogMigrator.CatalogMigrationResult result = + registerTables( + Collections.singletonList(TableIdentifier.parse("foo.tbl2")), + catalog1, + catalog2, + null, + false, + printWriter, + outputDir.getAbsolutePath(), + deleteSourceTables); + Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()) + .contains(TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + }); + output = stringWriter.toString(); + operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(output).contains(String.format("Identified 1 tables for %s.", operation)); + operation = deleteSourceTables ? "migrate" : "register"; + Assertions.assertThat(output) + .contains( + String.format( + "Summary: \n- Failed to %s 1 tables from %s catalog to %s catalog." + + " Please check the `catalog_migration.log`", + operation, catalog1.name(), catalog2.name())); + Assertions.assertThat(output) + .contains(String.format("Details: \n- Failed to %s these tables:\n[foo.tbl2]", operation)); + } + + @Order(3) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Exception { + // register only foo.tbl2 + withTextFromSystemIn("yes") + .execute( + () -> { + CatalogMigrator.CatalogMigrationResult result = + registerTables( + Collections.singletonList(TableIdentifier.parse("foo.tbl2")), + catalog1, + catalog2, + null, + false, + printWriter, + outputDir.getAbsolutePath(), + deleteSourceTables); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactly(TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + }); + String output = stringWriter.toString(); + String operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(output).contains(String.format("Identified 1 tables for %s.", operation)); + operation = deleteSourceTables ? "migrated" : "registered"; + Assertions.assertThat(output) + .contains( + String.format( + "Summary: \n- Successfully %s 1 tables from %s catalog to %s catalog.", + operation, catalog1.name(), catalog2.name())); + Assertions.assertThat(output) + .contains( + String.format( + "Details: \n" + "- Successfully %s these tables:\n" + "[foo.tbl2]", operation)); + + if (deleteSourceTables && !(catalog1 instanceof HadoopCatalog)) { + // create a table with the same name in source catalog which got deleted. + catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); + } + + // register all the tables from source catalog again + withTextFromSystemIn("yes") + .execute( + () -> { + CatalogMigrator.CatalogMigrationResult result = registerAllTables(deleteSourceTables); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), + TableIdentifier.parse("bar.tbl3"), + TableIdentifier.parse("bar.tbl4")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()) + .contains(TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + }); + + output = stringWriter.toString(); + operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(output).contains(String.format("Identified 4 tables for %s.", operation)); + operation = deleteSourceTables ? "migrated" : "registered"; + String ops = deleteSourceTables ? "migrate" : "register"; + Assertions.assertThat(output) + .contains( + String.format( + "Summary: \n" + + "- Successfully %s 3 tables from %s catalog to %s catalog.\n" + + "- Failed to %s 1 tables from %s catalog to %s catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. \n" + + "Failed identifiers are written into `failed_identifiers.txt`. " + + "Retry with that file using `--identifiers-from-file` option " + + "if the failure is because of network/connection timeouts.", + operation, + catalog1.name(), + catalog2.name(), + ops, + catalog1.name(), + catalog2.name())); + Assertions.assertThat(output) + .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + Assertions.assertThat(output) + .contains(String.format("- Failed to %s these tables:\n[foo.tbl2]", ops)); + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + } + + @Order(4) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { + // source catalog is catalog2 which has no tables. + withTextFromSystemIn("yes") + .execute( + () -> { + CatalogMigrator.CatalogMigrationResult result = + registerTables( + null, + catalog2, + catalog1, + null, + false, + printWriter, + outputDir.getAbsolutePath(), + deleteSourceTables); + Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + }); + + String output = stringWriter.toString(); + String operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(output).contains(String.format("Identified 0 tables for %s.", operation)); + } + + @Order(5) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testPrompt(boolean deleteSourceTables) throws Exception { + withTextFromSystemIn("no") + .execute( + () -> { + CatalogMigrator.CatalogMigrationResult result = registerAllTables(deleteSourceTables); + Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + }); + String output = stringWriter.toString(); + // should abort + Assertions.assertThat(output).contains("Aborting..."); + // should not have other messages + Assertions.assertThat(output).doesNotContain("Summary"); + + withTextFromSystemIn("dummy", "yes").execute(() -> registerAllTables(deleteSourceTables)); + output = stringWriter.toString(); + Assertions.assertThat(output).contains("Invalid input. Please enter 'yes' or 'no'."); + Assertions.assertThat(output).contains("Continuing..."); + } + + @Order(6) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testDryRun(boolean deleteSourceTables) throws Exception { + withTextFromSystemIn("yes") + .execute( + () -> { + CatalogMigrator.CatalogMigrationResult result = + registerTables( + null, + catalog1, + catalog2, + null, + true, // enable dry-run + printWriter, + outputDir.getAbsolutePath(), + deleteSourceTables); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), + TableIdentifier.parse("foo.tbl2"), + TableIdentifier.parse("bar.tbl3"), + TableIdentifier.parse("bar.tbl4")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + }); + + String output = stringWriter.toString(); + // should not prompt for dry run + Assertions.assertThat(output) + .doesNotContain( + "Have you read the above warnings and are you sure you want to continue? (yes/no):"); + Assertions.assertThat(output).contains("Dry run is completed."); + String operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(output) + .contains( + String.format( + "Summary: \n" + + "- Identified 4 tables for %s by dry-run. " + + "These identifiers are also written into dry_run_identifiers.txt. " + + "You can use this file with `--identifiers-from-file` option.", + operation)); + Assertions.assertThat(output) + .contains( + String.format( + "Details: \n" + "- Identified these tables for %s by dry-run:\n", operation)); + Assertions.assertThat(new File(dryRunFile).exists()).isTrue(); + Assertions.assertThat(Files.readAllLines(Paths.get(dryRunFile))) + .containsExactlyInAnyOrder("foo.tbl1", "foo.tbl2", "bar.tbl3", "bar.tbl4"); + } + + @Order(7) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testInvalidInputs(boolean deleteSourceTables) throws Exception { + withTextFromSystemIn("yes") + .execute( + () -> + Assertions.assertThatThrownBy( + () -> + registerTables( + null, + catalog1, + null, // target-catalog is null + null, + false, + printWriter, + outputDir.getAbsolutePath(), + deleteSourceTables)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Invalid target catalog: null")); + + withTextFromSystemIn("yes") + .execute( + () -> + Assertions.assertThatThrownBy( + () -> + registerTables( + null, + null, // source-catalog is null + catalog2, + null, + false, + printWriter, + outputDir.getAbsolutePath(), + deleteSourceTables)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Invalid source catalog: null")); + + withTextFromSystemIn("yes") + .execute( + () -> + Assertions.assertThatThrownBy( + () -> + registerTables( + null, + catalog2, // source-catalog is same as target catalog + catalog2, + null, + false, + printWriter, + outputDir.getAbsolutePath(), + deleteSourceTables)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("target catalog is same as source catalog")); + + withTextFromSystemIn("yes") + .execute( + () -> + Assertions.assertThatThrownBy( + () -> + registerTables( + Collections.singletonList(TableIdentifier.parse("foo.abc")), + catalog1, + catalog2, + ".*", // both the identifiers and regex is configured. + false, + printWriter, + outputDir.getAbsolutePath(), + deleteSourceTables)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining( + "Both the identifiers list and identifierRegex is configured.")); + + withTextFromSystemIn("yes") + .execute( + () -> + Assertions.assertThatThrownBy( + () -> + registerTables( + Collections.singletonList(TableIdentifier.parse("foo.abc")), + catalog1, + catalog2, + null, + false, + null, // printWriter is null. + outputDir.getAbsolutePath(), + deleteSourceTables)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("printWriter is null")); + } + + @Order(8) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { + // additionally create 240 tables along with 4 tables created in beforeEach() + IntStream.range(0, 240) + .forEach( + val -> + catalog1.createTable( + TableIdentifier.of(Namespace.of("foo"), "tblx" + val), schema)); + + withTextFromSystemIn("yes") + .execute( + () -> { + CatalogMigrator.CatalogMigrationResult result; + result = registerAllTables(deleteSourceTables); + + Assertions.assertThat(result.registeredTableIdentifiers()).hasSize(244); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + }); + + String operation = deleteSourceTables ? "migration" : "registration"; + String output = stringWriter.toString(); + Assertions.assertThat(output) + .contains(String.format("Identified 244 tables for %s.", operation)); + operation = deleteSourceTables ? "migrated" : "registered"; + Assertions.assertThat(output) + .contains( + String.format( + "Summary: \n- Successfully %s 244 tables from %s catalog to" + " %s catalog.", + operation, catalog1.name(), catalog2.name())); + Assertions.assertThat(output) + .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + + operation = deleteSourceTables ? "migration" : "registration"; + // validate intermediate output + Assertions.assertThat(output) + .contains(String.format("Attempted %s for 100 tables out of 244 tables.", operation)); + Assertions.assertThat(output) + .contains(String.format("Attempted %s for 200 tables out of 244 tables.", operation)); + + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))).hasSize(242); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + } + + private CatalogMigrator.CatalogMigrationResult registerAllTables(boolean deleteSourceTables) { + return registerTables( + null, + catalog1, + catalog2, + null, + false, + printWriter, + outputDir.getAbsolutePath(), + deleteSourceTables); + } + + private static CatalogMigrator.CatalogMigrationResult registerTables( + List tableIdentifiers, + Catalog sourceCatalog, + Catalog targetCatalog, + String identifierRegex, + boolean isDryRun, + PrintWriter printWriter, + String outputDirPath, + boolean deleteSourceTables) { + if (deleteSourceTables) { + return CatalogMigrator.migrateTables( + tableIdentifiers, + sourceCatalog, + targetCatalog, + identifierRegex, + isDryRun, + printWriter, + outputDirPath); + } + return CatalogMigrator.registerTables( + tableIdentifiers, + sourceCatalog, + targetCatalog, + identifierRegex, + isDryRun, + printWriter, + outputDirPath); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/CLIErrorsTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/CLIErrorsTest.java deleted file mode 100644 index 4558e0a..0000000 --- a/src/test/java/org/projectnessie/tools/catalog/migration/CLIErrorsTest.java +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (C) 2023 Dremio - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.projectnessie.tools.catalog.migration; - -import static java.util.Collections.singletonList; -import static org.junit.jupiter.params.provider.Arguments.arguments; - -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.stream.Stream; -import org.assertj.core.api.Assertions; -import org.junit.jupiter.api.Order; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -public class CLIErrorsTest { - - static Stream optionErrors() { - return Stream.of( - // no arguments - arguments( - Collections.emptyList(), - "Missing required options: '--source-catalog-type=', " - + "'--source-catalog-properties=', '--target-catalog-type=', " - + "'--target-catalog-properties='"), - // missing required arguments - arguments( - singletonList(""), - "Missing required options: '--source-catalog-type=', " - + "'--source-catalog-properties=', '--target-catalog-type=', " - + "'--target-catalog-properties='"), - // missing required arguments - arguments( - Arrays.asList("--source-catalog-type", "GLUE"), - "Missing required options: '--source-catalog-properties=', " - + "'--target-catalog-type=', '--target-catalog-properties='"), - // missing required arguments - arguments( - Arrays.asList( - "--source-catalog-type", - "HIVE", - "--source-catalog-properties", - "properties1=ab", - "--target-catalog-type", - "NESSIE"), - "Missing required option: '--target-catalog-properties='"), - // missing required arguments - arguments( - Arrays.asList( - "--source-catalog-type", - "HIVE", - "--source-catalog-properties", - "properties1=ab", - "--target-catalog-properties", - "properties2=cd"), - "Missing required option: '--target-catalog-type='")); - } - - @ParameterizedTest - @MethodSource("optionErrors") - @Order(0) - public void testOptionErrors(List args, String expectedMessage) throws Exception { - RunCLI run = RunCLI.run(args); - - Assertions.assertThat(run.getExitCode()).isEqualTo(2); - Assertions.assertThat(run.getErr()).contains(expectedMessage); - } - - @Test - @Order(1) - public void testInvalidArgs() throws Exception { - RunCLI run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "k1=v1,k2=v2", - "--target-catalog-type", - "HIVE", - "--target-catalog-properties", - "k3=v3, k4=v4"); - Assertions.assertThat(run.getExitCode()).isEqualTo(1); - Assertions.assertThat(run.getErr()) - .contains( - "java.lang.IllegalArgumentException: Cannot initialize HadoopCatalog " - + "because warehousePath must not be null or empty"); - - run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "k1=v1,k2=v2", - "--target-catalog-type", - "HIVE", - "--target-catalog-properties", - "k3=v3, k4=v4", - "--identifiers", - "foo.tbl", - "--identifiers-from-file", - "file.txt", - "--identifiers-regex", - "^foo\\."); - Assertions.assertThat(run.getExitCode()).isEqualTo(1); - Assertions.assertThat(run.getErr()) - .contains( - "java.lang.IllegalArgumentException: All the three identifier options (`--identifiers`, " - + "`--identifiers-from-file`, `--identifiers-regex`) are configured. Please use only one of them."); - - run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "k1=v1,k2=v2", - "--target-catalog-type", - "HIVE", - "--target-catalog-properties", - "k3=v3, k4=v4", - "--identifiers-from-file", - "file.txt"); - Assertions.assertThat(run.getExitCode()).isEqualTo(1); - Assertions.assertThat(run.getErr()) - .contains( - "java.lang.IllegalArgumentException: " - + "File specified in `--identifiers-from-file` option does not exist."); - - run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "k1=v1,k2=v2", - "--target-catalog-type", - "HIVE", - "--target-catalog-properties", - "k3=v3, k4=v4", - "--identifiers", - "foo.tbl", - "--identifiers-from-file", - "file.txt"); - Assertions.assertThat(run.getExitCode()).isEqualTo(1); - Assertions.assertThat(run.getErr()) - .contains( - "java.lang.IllegalArgumentException: Both `--identifiers` and `--identifiers-from-file` " - + "options are configured. Please use only one of them."); - - run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "k1=v1,k2=v2", - "--target-catalog-type", - "HIVE", - "--target-catalog-properties", - "k3=v3, k4=v4", - "--identifiers-regex", - "^foo\\.", - "--identifiers-from-file", - "file.txt"); - Assertions.assertThat(run.getExitCode()).isEqualTo(1); - Assertions.assertThat(run.getErr()) - .contains( - "java.lang.IllegalArgumentException: Both `--identifiers-regex` " - + "and `--identifiers-from-file` options are configured. Please use only one of them."); - - run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "k1=v1,k2=v2", - "--target-catalog-type", - "HIVE", - "--target-catalog-properties", - "k3=v3, k4=v4", - "--identifiers", - "foo.tbl", - "--identifiers-regex", - "^foo\\."); - Assertions.assertThat(run.getExitCode()).isEqualTo(1); - Assertions.assertThat(run.getErr()) - .contains( - "java.lang.IllegalArgumentException: Both `--identifiers-regex` and " - + "`--identifiers` options are configured. Please use only one of them."); - } -} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java b/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java index 4344c62..4315269 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java @@ -15,511 +15,184 @@ */ package org.projectnessie.tools.catalog.migration; -import static org.apache.iceberg.types.Types.NestedField.required; -import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.FAILED_IDENTIFIERS_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.FAILED_TO_DELETE_AT_SOURCE_FILE; +import static java.util.Collections.singletonList; +import static org.junit.jupiter.params.provider.Arguments.arguments; -import java.io.ByteArrayInputStream; -import java.io.File; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; import java.util.Arrays; import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import org.apache.hadoop.conf.Configuration; -import org.apache.iceberg.CatalogUtil; -import org.apache.iceberg.Schema; -import org.apache.iceberg.catalog.Catalog; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.hadoop.HadoopCatalog; -import org.apache.iceberg.types.Types; +import java.util.List; +import java.util.stream.Stream; import org.assertj.core.api.Assertions; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; public class CLITest { - private static @TempDir File warehouse1; - private static String warehousePath1; - - private static @TempDir File warehouse2; - private static String warehousePath2; - - private static Catalog catalog1; - - private static Catalog catalog2; - - private static final Schema schema = - new Schema(Types.StructType.of(required(1, "id", Types.LongType.get())).fields()); - - @BeforeAll - protected static void setup() { - warehousePath1 = String.format("file://%s", warehouse1.getAbsolutePath()); - warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); - - catalog1 = createCatalog(warehousePath1, "catalog1"); - ((HadoopCatalog) catalog1).createNamespace(Namespace.of("foo"), Collections.emptyMap()); - ((HadoopCatalog) catalog1).createNamespace(Namespace.of("bar"), Collections.emptyMap()); - - catalog2 = createCatalog(warehousePath2, "catalog2"); - ((HadoopCatalog) catalog2).createNamespace(Namespace.of("foo"), Collections.emptyMap()); - ((HadoopCatalog) catalog2).createNamespace(Namespace.of("bar"), Collections.emptyMap()); - } - - @BeforeEach - protected void beforeEach() { - // two tables in 'foo' namespace - catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl-1"), schema); - catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl-2"), schema); - // two tables in 'bar' namespace - catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl-3"), schema); - catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl-4"), schema); - - // to handle the user prompt - respondAsContinue(); - } - - private static void respondAsContinue() { - String input = "yes\n"; - ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); - System.setIn(in); - } - - @AfterEach - protected void afterEach() { - Arrays.asList(Namespace.of("foo"), Namespace.of("bar")) - .forEach( - namespace -> { - catalog1.listTables(namespace).forEach(catalog1::dropTable); - catalog2.listTables(namespace).forEach(catalog2::dropTable); - }); - TestUtil.deleteFileIfExists(FAILED_IDENTIFIERS_FILE); - TestUtil.deleteFileIfExists(FAILED_TO_DELETE_AT_SOURCE_FILE); - TestUtil.deleteFileIfExists(DRY_RUN_FILE); + private static Stream optionErrors() { + return Stream.of( + // no arguments + arguments( + Collections.emptyList(), + "Missing required options: '--source-catalog-type=', " + + "'--source-catalog-properties=', '--target-catalog-type=', " + + "'--target-catalog-properties='"), + // missing required arguments + arguments( + singletonList(""), + "Missing required options: '--source-catalog-type=', " + + "'--source-catalog-properties=', '--target-catalog-type=', " + + "'--target-catalog-properties='"), + // missing required arguments + arguments( + Arrays.asList("--source-catalog-type", "GLUE"), + "Missing required options: '--source-catalog-properties=', " + + "'--target-catalog-type=', '--target-catalog-properties='"), + // missing required arguments + arguments( + Arrays.asList( + "--source-catalog-type", + "HIVE", + "--source-catalog-properties", + "properties1=ab", + "--target-catalog-type", + "NESSIE"), + "Missing required option: '--target-catalog-properties='"), + // missing required arguments + arguments( + Arrays.asList( + "--source-catalog-type", + "HIVE", + "--source-catalog-properties", + "properties1=ab", + "--target-catalog-properties", + "properties2=cd"), + "Missing required option: '--target-catalog-type='")); } - private static Catalog createCatalog(String warehousePath, String name) { - Map properties = new HashMap<>(); - properties.put("warehouse", warehousePath); - properties.put("type", "hadoop"); - return CatalogUtil.loadCatalog( - HadoopCatalog.class.getName(), name, properties, new Configuration()); - } - - @Test + @ParameterizedTest + @MethodSource("optionErrors") @Order(0) - public void testRegister() throws Exception { - RunCLI run = runWithDefaultArgs(); + public void testOptionErrors(List args, String expectedMessage) throws Exception { + RunCLI run = RunCLI.run(args); - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - Assertions.assertThat(run.getOut()) - .contains( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog."); - Assertions.assertThat(run.getOut()).contains("Identified 4 tables for registration."); - Assertions.assertThat(run.getOut()) - .contains( - "Summary: \n- Successfully registered 4 tables from HADOOP catalog to" - + " HADOOP catalog."); - Assertions.assertThat(run.getOut()) - .contains("Details: \n" + "- Successfully registered these tables:\n"); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl-1"), TableIdentifier.parse("foo.tbl-2")); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl-3"), TableIdentifier.parse("bar.tbl-4")); + Assertions.assertThat(run.getExitCode()).isEqualTo(2); + Assertions.assertThat(run.getErr()).contains(expectedMessage); } - @Test - @Order(1) - public void testMigrate() throws Exception { - RunCLI run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "warehouse=" + warehousePath1 + ",type=hadoop", - "--target-catalog-type", - "HADOOP", - "--target-catalog-properties", - "warehouse=" + warehousePath2 + ",type=hadoop", - "--delete-source-tables"); - - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - // note that keywords in output is "migrate" instead of "register". - // If the catalog was not hadoop catalog, tables also should get deleted from the source catalog - // after migration. - Assertions.assertThat(run.getOut()) - .contains( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog."); - Assertions.assertThat(run.getOut()).contains("Identified 4 tables for migration."); - Assertions.assertThat(run.getOut()) - .contains( - "Summary: \n- Successfully migrated 4 tables from HADOOP catalog to" - + " HADOOP catalog."); - Assertions.assertThat(run.getOut()) - .contains("Details: \n" + "- Successfully migrated these tables:\n"); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl-1"), TableIdentifier.parse("foo.tbl-2")); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl-3"), TableIdentifier.parse("bar.tbl-4")); + private static Stream invalidArgs() { + return Stream.of( + arguments( + Arrays.asList( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4"), + "java.lang.IllegalArgumentException: Cannot initialize HadoopCatalog " + + "because warehousePath must not be null or empty"), + arguments( + Arrays.asList( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--identifiers", + "foo.tbl", + "--identifiers-from-file", + "file.txt", + "--identifiers-regex", + "^foo\\."), + "java.lang.IllegalArgumentException: All the three identifier options (`--identifiers`, " + + "`--identifiers-from-file`, `--identifiers-regex`) are configured. Please use only one of them."), + arguments( + Arrays.asList( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--identifiers-from-file", + "file.txt"), + "java.lang.IllegalArgumentException: " + + "File specified in `--identifiers-from-file` option does not exist."), + arguments( + Arrays.asList( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--identifiers", + "foo.tbl", + "--identifiers-from-file", + "file.txt"), + "java.lang.IllegalArgumentException: Both `--identifiers` and `--identifiers-from-file` " + + "options are configured. Please use only one of them."), + arguments( + Arrays.asList( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--identifiers-regex", + "^foo\\.", + "--identifiers-from-file", + "file.txt"), + "java.lang.IllegalArgumentException: Both `--identifiers-regex` " + + "and `--identifiers-from-file` options are configured. Please use only one of them."), + arguments( + Arrays.asList( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--identifiers", + "foo.tbl", + "--identifiers-regex", + "^foo\\."), + "java.lang.IllegalArgumentException: Both `--identifiers-regex` and " + + "`--identifiers` options are configured. Please use only one of them.")); } - @Test - @Order(2) - public void testRegisterSelectedTables() throws Exception { - // using `--identifiers` option - RunCLI run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "warehouse=" + warehousePath1 + ",type=hadoop", - "--target-catalog-type", - "HADOOP", - "--target-catalog-properties", - "warehouse=" + warehousePath2 + ",type=hadoop", - "--identifiers", - "bar.tbl-3"); - - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - Assertions.assertThat(run.getOut()) - .doesNotContain( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog."); - - Assertions.assertThat(run.getOut()).contains("Identified 1 tables for registration."); - Assertions.assertThat(run.getOut()) - .contains( - "Summary: \n- Successfully registered 1 tables from HADOOP catalog to" - + " HADOOP catalog."); - Assertions.assertThat(run.getOut()) - .contains("Details: \n- Successfully registered these tables:\n" + "[bar.tbl-3]"); - - // using `--identifiers-from-file` option - respondAsContinue(); - Path identifierFile = Paths.get("ids.txt"); - Files.write(identifierFile, Collections.singletonList("bar.tbl-4")); - run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "warehouse=" + warehousePath1 + ",type=hadoop", - "--target-catalog-type", - "HADOOP", - "--target-catalog-properties", - "warehouse=" + warehousePath2 + ",type=hadoop", - "--identifiers-from-file", - "ids.txt"); - Files.delete(identifierFile); - - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - Assertions.assertThat(run.getOut()).contains("Collecting identifiers from the file ids.txt..."); - - Assertions.assertThat(run.getOut()) - .doesNotContain( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog."); - - Assertions.assertThat(run.getOut()).contains("Identified 1 tables for registration."); - Assertions.assertThat(run.getOut()) - .contains( - "Summary: \n- Successfully registered 1 tables from HADOOP catalog to" - + " HADOOP catalog."); - Assertions.assertThat(run.getOut()) - .contains("Details: \n- Successfully registered these tables:\n" + "[bar.tbl-4]"); - - // using --identifiers-regex option which matches all the tables starts with "foo." - respondAsContinue(); - run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "warehouse=" + warehousePath1 + ",type=hadoop", - "--target-catalog-type", - "HADOOP", - "--target-catalog-properties", - "warehouse=" + warehousePath2 + ",type=hadoop", - "--identifiers-regex", - "^foo\\..*"); - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - Assertions.assertThat(run.getOut()) - .contains( - "User has not specified the table identifiers. Selecting all the tables from all the namespaces " - + "from the source catalog which matches the regex pattern:^foo\\..*"); - - Assertions.assertThat(run.getOut()) - .contains( - "Collecting all the tables from all the namespaces of source catalog " - + "which matches the regex pattern:^foo\\..*"); - - Assertions.assertThat(run.getOut()).contains("Identified 2 tables for registration."); - Assertions.assertThat(run.getOut()) - .contains( - "Summary: \n- Successfully registered 2 tables from HADOOP catalog to" - + " HADOOP catalog."); - Assertions.assertThat(run.getOut()) - .contains("Details: \n" + "- Successfully registered these tables:\n"); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl-1"), TableIdentifier.parse("foo.tbl-2")); - } - - @Test - @Order(3) - public void testRegisterError() throws Exception { - // use invalid namespace which leads to NoSuchTableException - RunCLI run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "warehouse=" + warehousePath1 + ",type=hadoop", - "--target-catalog-type", - "HADOOP", - "--target-catalog-properties", - "warehouse=" + warehousePath2 + ",type=hadoop", - "--identifiers", - "dummy.tbl-3"); - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - Assertions.assertThat(run.getOut()).contains("Identified 1 tables for registration."); - Assertions.assertThat(run.getOut()) - .contains( - "Summary: \n- Failed to register 1 tables from HADOOP catalog to HADOOP catalog." - + " Please check the `catalog_migration.log`"); - Assertions.assertThat(run.getOut()) - .contains("Details: \n- Failed to register these tables:\n[dummy.tbl-3]"); - - // try to register same table twice which leads to AlreadyExistsException - respondAsContinue(); - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "warehouse=" + warehousePath1 + ",type=hadoop", - "--target-catalog-type", - "HADOOP", - "--target-catalog-properties", - "warehouse=" + warehousePath2 + ",type=hadoop", - "--identifiers", - "foo.tbl-2"); - respondAsContinue(); - run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "warehouse=" + warehousePath1 + ",type=hadoop", - "--target-catalog-type", - "HADOOP", - "--target-catalog-properties", - "warehouse=" + warehousePath2 + ",type=hadoop", - "--identifiers", - "foo.tbl-2"); - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - Assertions.assertThat(run.getOut()).contains("Identified 1 tables for registration."); - Assertions.assertThat(run.getOut()) - .contains( - "Summary: \n- Failed to register 1 tables from HADOOP catalog to HADOOP catalog." - + " Please check the `catalog_migration.log`"); - Assertions.assertThat(run.getOut()) - .contains("Details: \n- Failed to register these tables:\n[foo.tbl-2]"); - } - - @Test - @Order(4) - public void testRegisterWithFewFailures() throws Exception { - // register only foo.tbl-2 - RunCLI run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "warehouse=" + warehousePath1 + ",type=hadoop", - "--target-catalog-type", - "HADOOP", - "--target-catalog-properties", - "warehouse=" + warehousePath2 + ",type=hadoop", - "--identifiers", - "foo.tbl-2"); - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - Assertions.assertThat(run.getOut()).contains("Identified 1 tables for registration."); - Assertions.assertThat(run.getOut()) - .contains( - "Summary: \n- Successfully registered 1 tables from HADOOP catalog to HADOOP catalog."); - Assertions.assertThat(run.getOut()) - .contains("Details: \n" + "- Successfully registered these tables:\n" + "[foo.tbl-2]"); - - // register all the tables from source catalog again - respondAsContinue(); - run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "warehouse=" + warehousePath1 + ",type=hadoop", - "--target-catalog-type", - "HADOOP", - "--target-catalog-properties", - "warehouse=" + warehousePath2 + ",type=hadoop"); - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - Assertions.assertThat(run.getOut()).contains("Identified 4 tables for registration."); - Assertions.assertThat(run.getOut()) - .contains( - "Summary: \n" - + "- Successfully registered 3 tables from HADOOP catalog to HADOOP catalog.\n" - + "- Failed to register 1 tables from HADOOP catalog to HADOOP catalog. " - + "Please check the `catalog_migration.log` file for the failure reason. \n" - + "Failed identifiers are written into `failed_identifiers.txt`. " - + "Retry with that file using `--identifiers-from-file` option " - + "if the failure is because of network/connection timeouts."); - Assertions.assertThat(run.getOut()) - .contains("Details: \n" + "- Successfully registered these tables:\n"); - Assertions.assertThat(run.getOut()).contains("- Failed to register these tables:\n[foo.tbl-2]"); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl-1"), TableIdentifier.parse("foo.tbl-2")); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl-3"), TableIdentifier.parse("bar.tbl-4")); - - // retry the failed tables using --identifiers-from-file - respondAsContinue(); - run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "warehouse=" + warehousePath1 + ",type=hadoop", - "--target-catalog-type", - "HADOOP", - "--target-catalog-properties", - "warehouse=" + warehousePath2 + ",type=hadoop", - "--identifiers-from-file", - FAILED_IDENTIFIERS_FILE); - Assertions.assertThat(run.getOut()) - .contains( - "Summary: \n" - + "- Failed to register 1 tables from HADOOP catalog to HADOOP catalog. " - + "Please check the `catalog_migration.log` file for the failure reason. \n" - + "Failed identifiers are written into `failed_identifiers.txt`. " - + "Retry with that file using `--identifiers-from-file` option " - + "if the failure is because of network/connection timeouts."); - Assertions.assertThat(run.getOut()) - .contains("Details: \n" + "- Failed to register these tables:\n" + "[foo.tbl-2]"); - Assertions.assertThat(new File(FAILED_IDENTIFIERS_FILE).exists()).isTrue(); - Assertions.assertThat(Files.readAllLines(Paths.get(FAILED_IDENTIFIERS_FILE))) - .containsExactly("foo.tbl-2"); - } - - @Test - @Order(5) - public void testRegisterNoTables() throws Exception { - // source catalog is catalog2 which has no tables. - RunCLI run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "warehouse=" + warehousePath2 + ",type=hadoop", - "--target-catalog-type", - "HADOOP", - "--target-catalog-properties", - "warehouse=" + warehousePath1 + ",type=hadoop"); - - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - Assertions.assertThat(run.getOut()).contains("Identified 0 tables for registration."); - } - - @Test - @Order(6) - public void testPrompt() throws Exception { - TestUtil.respondAsAbort(); - RunCLI run = runWithDefaultArgs(); - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - // should abort - Assertions.assertThat(run.getOut()).contains("Aborting..."); - // should not have other messages - Assertions.assertThat(run.getOut()).doesNotContain("Summary"); + @ParameterizedTest + @Order(1) + @MethodSource("invalidArgs") + public void testInvalidArgs(List args, String expectedMessage) throws Exception { + RunCLI run = RunCLI.run(args); - TestUtil.respondDummy(); - run = runWithDefaultArgs(); Assertions.assertThat(run.getExitCode()).isEqualTo(1); - Assertions.assertThat(run.getOut()).contains("Invalid input. Please enter 'yes' or 'no'."); - - respondAsContinue(); - run = runWithDefaultArgs(); - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - // should abort - Assertions.assertThat(run.getOut()).contains("Continuing..."); - Assertions.assertThat(run.getOut()).contains("Summary"); + Assertions.assertThat(run.getErr()).contains(expectedMessage); } @Test - @Order(7) - public void testDryRun() throws Exception { - RunCLI run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "warehouse=" + warehousePath1 + ",type=hadoop", - "--target-catalog-type", - "HADOOP", - "--target-catalog-properties", - "warehouse=" + warehousePath2 + ",type=hadoop", - "--dry-run"); - - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - // should not prompt for dry run - Assertions.assertThat(run.getOut()) - .doesNotContain( - "Have you read the above warnings and are you sure you want to continue? (yes/no):"); - Assertions.assertThat(run.getOut()).contains("Dry run is completed."); - Assertions.assertThat(run.getOut()) - .contains( - "Summary: \n" - + "- Identified 4 tables for registration by dry-run. " - + "These identifiers are also written into dry_run_identifiers.txt. " - + "You can use this file with `--identifiers-from-file` option."); - Assertions.assertThat(run.getOut()) - .contains("Details: \n" + "- Identified these tables for registration by dry-run:\n"); - Assertions.assertThat(new File(DRY_RUN_FILE).exists()).isTrue(); - Assertions.assertThat(Files.readAllLines(Paths.get(DRY_RUN_FILE))) - .containsExactlyInAnyOrder("foo.tbl-1", "foo.tbl-2", "bar.tbl-3", "bar.tbl-4"); - } - - @Test - @Order(9) + @Order(2) public void version() throws Exception { RunCLI run = RunCLI.run("--version"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); Assertions.assertThat(run.getOut()).startsWith(System.getProperty("expectedCLIVersion")); } - - private static RunCLI runWithDefaultArgs() throws Exception { - return RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "warehouse=" + warehousePath1 + ",type=hadoop", - "--target-catalog-type", - "HADOOP", - "--target-catalog-properties", - "warehouse=" + warehousePath2 + ",type=hadoop"); - } } diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCLIMigrationTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCLIMigrationTest.java new file mode 100644 index 0000000..2c93a37 --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCLIMigrationTest.java @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; + +public class HadoopCLIMigrationTest extends AbstractCLIMigrationTest { + + @BeforeAll + protected static void setup() { + dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; + failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; + String warehousePath1 = String.format("file://%s", warehouse1.getAbsolutePath()); + String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + sourceCatalogProperties = "warehouse=" + warehousePath1 + ",type=hadoop"; + targetCatalogProperties = "warehouse=" + warehousePath2 + ",type=hadoop"; + + catalog1 = createHadoopCatalog(warehousePath1, "catalog1"); + catalog2 = createHadoopCatalog(warehousePath2, "catalog2"); + + sourceCatalogType = catalogType(catalog1); + targetCatalogType = catalogType(catalog2); + + createNamespaces(); + } + + @AfterAll + protected static void tearDown() { + dropNamespaces(); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCatalogMigratorTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCatalogMigratorTest.java new file mode 100644 index 0000000..90e7815 --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCatalogMigratorTest.java @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; + +public class HadoopCatalogMigratorTest extends AbstractTestCatalogMigrator { + + @BeforeAll + protected static void setup() { + dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; + failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; + String warehousePath1 = String.format("file://%s", warehouse1.getAbsolutePath()); + String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + + catalog1 = createHadoopCatalog(warehousePath1, "catalog1"); + catalog2 = createHadoopCatalog(warehousePath2, "catalog2"); + + createNamespaces(); + } + + @AfterAll + protected static void tearDown() { + dropNamespaces(); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/HiveMetaStoreRunner.java b/src/test/java/org/projectnessie/tools/catalog/migration/HiveMetaStoreRunner.java new file mode 100644 index 0000000..a68c935 --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/HiveMetaStoreRunner.java @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import org.apache.iceberg.hive.HiveCatalog; +import org.apache.iceberg.hive.HiveMetastoreTest; + +public class HiveMetaStoreRunner extends HiveMetastoreTest { + + // Expose the catalog for tests + public static HiveCatalog hiveCatalog() { + return catalog; + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCLIMigrationTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCLIMigrationTest.java new file mode 100644 index 0000000..3ee8903 --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCLIMigrationTest.java @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; + +public class ITHadoopToHiveCLIMigrationTest extends AbstractCLIMigrationTest { + + @BeforeAll + protected static void setup() throws Exception { + HiveMetaStoreRunner.startMetastore(); + dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; + failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; + String warehousePath1 = String.format("file://%s", warehouse1.getAbsolutePath()); + String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + sourceCatalogProperties = "warehouse=" + warehousePath1 + ",type=hadoop"; + targetCatalogProperties = + "warehouse=" + + warehousePath2 + + ",uri=" + + HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"); + + catalog1 = createHadoopCatalog(warehousePath1, "catalog1"); + catalog2 = HiveMetaStoreRunner.hiveCatalog(); + + sourceCatalogType = catalogType(catalog1); + targetCatalogType = catalogType(catalog2); + + createNamespaces(); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + HiveMetaStoreRunner.stopMetastore(); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCatalogMigrator.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCatalogMigrator.java new file mode 100644 index 0000000..df4cee2 --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCatalogMigrator.java @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; + +public class ITHadoopToHiveCatalogMigrator extends AbstractTestCatalogMigrator { + + @BeforeAll + protected static void setup() throws Exception { + dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; + failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; + + HiveMetaStoreRunner.startMetastore(); + + String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + + catalog1 = createHadoopCatalog(warehousePath2, "hadoop"); + catalog2 = HiveMetaStoreRunner.hiveCatalog(); + + createNamespaces(); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + HiveMetaStoreRunner.stopMetastore(); + } + + // disable large table test for IT to save CI time. It will be executed only for UT. + @Override + @Disabled + public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { + super.testRegisterLargeNumberOfTables(deleteSourceTables); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java deleted file mode 100644 index f95778f..0000000 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndHadoop.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (C) 2023 Dremio - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.projectnessie.tools.catalog.migration; - -import static org.apache.iceberg.types.Types.NestedField.required; -import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.FAILED_IDENTIFIERS_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.FAILED_TO_DELETE_AT_SOURCE_FILE; - -import java.io.File; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import org.apache.hadoop.conf.Configuration; -import org.apache.iceberg.CatalogUtil; -import org.apache.iceberg.Schema; -import org.apache.iceberg.catalog.Catalog; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.SupportsNamespaces; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.hadoop.HadoopCatalog; -import org.apache.iceberg.hive.HiveMetastoreTest; -import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Order; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -public class ITHiveAndHadoop extends HiveMetastoreTest { - - private static String warehousePath1; - - private static @TempDir File warehouse2; - private static String warehousePath2; - - private static Catalog catalog1; - - private static Catalog catalog2; - - private static final Schema schema = - new Schema(Types.StructType.of(required(1, "id", Types.LongType.get())).fields()); - - @BeforeAll - protected static void setup() throws Exception { - startMetastore(); - warehousePath1 = catalog.getConf().get("hive.metastore.warehouse.dir"); - warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); - - catalog1 = createHadoopCatalog(warehousePath2, "catalog1"); - ((SupportsNamespaces) catalog1).createNamespace(Namespace.of("foo"), Collections.emptyMap()); - ((SupportsNamespaces) catalog1).createNamespace(Namespace.of("bar"), Collections.emptyMap()); - - // assign to hive catalog from the parent class - catalog2 = catalog; - ((SupportsNamespaces) catalog2).createNamespace(Namespace.of("foo"), Collections.emptyMap()); - ((SupportsNamespaces) catalog2).createNamespace(Namespace.of("bar"), Collections.emptyMap()); - } - - @AfterAll - protected static void tearDown() throws Exception { - stopMetastore(); - } - - @BeforeEach - protected void beforeEach() { - // two tables in 'foo' namespace - catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl1"), schema); - catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); - // two tables in 'bar' namespace - catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl3"), schema); - catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl4"), schema); - - // one table in catalog2 - catalog2.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl5"), schema); - } - - @AfterEach - protected void afterEach() { - Arrays.asList(Namespace.of("foo"), Namespace.of("bar")) - .forEach( - namespace -> { - catalog1.listTables(namespace).forEach(catalog1::dropTable); - catalog2.listTables(namespace).forEach(catalog2::dropTable); - }); - TestUtil.deleteFileIfExists(FAILED_IDENTIFIERS_FILE); - TestUtil.deleteFileIfExists(FAILED_TO_DELETE_AT_SOURCE_FILE); - TestUtil.deleteFileIfExists(DRY_RUN_FILE); - } - - private static Catalog createHadoopCatalog(String warehousePath, String name) { - Map properties = new HashMap<>(); - properties.put("warehouse", warehousePath); - properties.put("type", "hadoop"); - return CatalogUtil.loadCatalog( - HadoopCatalog.class.getName(), name, properties, new Configuration()); - } - - @Test - @Order(0) - public void testRegister() throws Exception { - TestUtil.respondAsContinue(); - RunCLI run = - RunCLI.run( - "--source-catalog-type", - "HADOOP", - "--source-catalog-properties", - "warehouse=" + warehousePath2 + ",type=hadoop", - "--target-catalog-type", - "HIVE", - "--target-catalog-properties", - "warehouse=" + warehousePath1 + ",uri=" + catalog.getConf().get("hive.metastore.uris")); - - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - Assertions.assertThat(run.getOut()) - .contains( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog."); - Assertions.assertThat(run.getOut()).contains("Identified 4 tables for registration."); - Assertions.assertThat(run.getOut()) - .contains( - "Summary: \n- Successfully registered 4 tables from HADOOP catalog to" - + " HIVE catalog."); - Assertions.assertThat(run.getOut()) - .contains("Details: \n" + "- Successfully registered these tables:\n"); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl3"), - TableIdentifier.parse("bar.tbl4"), - TableIdentifier.parse("bar.tbl5")); - } - - @Test - @Order(1) - public void testMigrate() throws Exception { - TestUtil.respondAsContinue(); - RunCLI run = - RunCLI.run( - "--source-catalog-type", - "HIVE", - "--source-catalog-properties", - "warehouse=" + warehousePath1 + ",uri=" + catalog.getConf().get("hive.metastore.uris"), - "--target-catalog-type", - "HADOOP", - "--target-catalog-properties", - "warehouse=" + warehousePath2 + ",type=hadoop", - "--delete-source-tables"); - - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - Assertions.assertThat(run.getOut()) - .contains( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog."); - Assertions.assertThat(run.getOut()).contains("Identified 1 tables for migration."); - Assertions.assertThat(run.getOut()) - .contains( - "Summary: \n- Successfully migrated 1 tables from HIVE catalog to" - + " HADOOP catalog."); - Assertions.assertThat(run.getOut()) - .contains("Details: \n" + "- Successfully migrated these tables:\n"); - // migrated table should be present in the target catalog - Assertions.assertThat(catalog1.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl5"), // tbl5 is the migrated table - TableIdentifier.parse("bar.tbl4"), - TableIdentifier.parse("bar.tbl3")); - - // migrated table should not be there in the source catalog - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))).isEmpty(); - } -} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndNessie.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndNessie.java deleted file mode 100644 index f2e6475..0000000 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveAndNessie.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright (C) 2023 Dremio - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.projectnessie.tools.catalog.migration; - -import static org.apache.iceberg.types.Types.NestedField.required; -import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.FAILED_IDENTIFIERS_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrationCLI.FAILED_TO_DELETE_AT_SOURCE_FILE; - -import java.io.File; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import org.apache.hadoop.conf.Configuration; -import org.apache.iceberg.CatalogUtil; -import org.apache.iceberg.Schema; -import org.apache.iceberg.catalog.Catalog; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.SupportsNamespaces; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.hive.HiveMetastoreTest; -import org.apache.iceberg.nessie.NessieCatalog; -import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Order; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; -import org.testcontainers.containers.GenericContainer; - -public class ITHiveAndNessie extends HiveMetastoreTest { - - private static String warehousePath1; - - private static @TempDir File warehouse2; - private static String warehousePath2; - - private static Catalog catalog1; - - private static Catalog catalog2; - - private static final Schema schema = - new Schema(Types.StructType.of(required(1, "id", Types.LongType.get())).fields()); - - private static final String IMAGE = "projectnessie/nessie:0.47.1"; - private static final int NESSIE_PORT = 19121; - - private static String nessieUri; - - private static GenericContainer container; - - @BeforeAll - protected static void setup() throws Exception { - startMetastore(); - warehousePath1 = catalog.getConf().get("hive.metastore.warehouse.dir"); - warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); - - container = - new GenericContainer(IMAGE) - .withExposedPorts(NESSIE_PORT) - .withEnv("QUARKUS_HTTP_PORT", String.valueOf(NESSIE_PORT)); - - container.start(); - - nessieUri = - String.format( - "http://%s:%s/api/v1", container.getHost(), container.getMappedPort(NESSIE_PORT)); - - // assign to hive catalog from the parent class - catalog1 = catalog; - ((SupportsNamespaces) catalog1).createNamespace(Namespace.of("foo"), Collections.emptyMap()); - ((SupportsNamespaces) catalog1).createNamespace(Namespace.of("bar"), Collections.emptyMap()); - - catalog2 = createNessieCatalog(warehousePath2, nessieUri); - ((SupportsNamespaces) catalog2).createNamespace(Namespace.of("foo"), Collections.emptyMap()); - ((SupportsNamespaces) catalog2).createNamespace(Namespace.of("bar"), Collections.emptyMap()); - } - - @AfterAll - protected static void tearDown() throws Exception { - stopMetastore(); - container.stop(); - } - - @BeforeEach - protected void beforeEach() { - // two tables in 'foo' namespace - catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl1"), schema); - catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); - // two tables in 'bar' namespace - catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl3"), schema); - catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl4"), schema); - - // one table in catalog2 - catalog2.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl5"), schema); - } - - @AfterEach - protected void afterEach() { - Arrays.asList(Namespace.of("foo"), Namespace.of("bar")) - .forEach( - namespace -> { - catalog1.listTables(namespace).forEach(catalog1::dropTable); - catalog2.listTables(namespace).forEach(catalog2::dropTable); - }); - TestUtil.deleteFileIfExists(FAILED_IDENTIFIERS_FILE); - TestUtil.deleteFileIfExists(FAILED_TO_DELETE_AT_SOURCE_FILE); - TestUtil.deleteFileIfExists(DRY_RUN_FILE); - } - - private static Catalog createNessieCatalog(String warehousePath, String uri) { - Map properties = new HashMap<>(); - properties.put("warehouse", warehousePath); - properties.put("ref", "main"); - properties.put("uri", uri); - return CatalogUtil.loadCatalog( - NessieCatalog.class.getName(), "nessie", properties, new Configuration()); - } - - @Test - @Order(0) - public void testRegister() throws Exception { - TestUtil.respondAsContinue(); - RunCLI run = - RunCLI.run( - "--source-catalog-type", - "HIVE", - "--source-catalog-properties", - "warehouse=" + warehousePath1 + ",uri=" + catalog.getConf().get("hive.metastore.uris"), - "--target-catalog-type", - "NESSIE", - "--target-catalog-properties", - "uri=" + nessieUri + ",ref=main,warehouse=" + warehousePath2); - - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - Assertions.assertThat(run.getOut()) - .contains( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog."); - Assertions.assertThat(run.getOut()).contains("Identified 4 tables for registration."); - Assertions.assertThat(run.getOut()) - .contains( - "Summary: \n- Successfully registered 4 tables from HIVE catalog to" - + " NESSIE catalog."); - Assertions.assertThat(run.getOut()) - .contains("Details: \n" + "- Successfully registered these tables:\n"); - // using the fresh instance of nessie catalog at client side to get the latest state of main - // branch. - catalog2 = createNessieCatalog(warehousePath2, nessieUri); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl3"), - TableIdentifier.parse("bar.tbl4"), - TableIdentifier.parse("bar.tbl5")); - } - - @Test - @Order(1) - public void testMigrate() throws Exception { - TestUtil.respondAsContinue(); - RunCLI run = - RunCLI.run( - "--source-catalog-type", - "NESSIE", - "--source-catalog-properties", - "uri=" + nessieUri + ",ref=main,warehouse=" + warehousePath2, - "--target-catalog-type", - "HIVE", - "--target-catalog-properties", - "warehouse=" + warehousePath1 + ",uri=" + catalog.getConf().get("hive.metastore.uris"), - "--delete-source-tables"); - - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - Assertions.assertThat(run.getOut()) - .contains( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog."); - Assertions.assertThat(run.getOut()).contains("Identified 1 tables for migration."); - Assertions.assertThat(run.getOut()) - .contains( - "Summary: \n- Successfully migrated 1 tables from NESSIE catalog to" - + " HIVE catalog."); - Assertions.assertThat(run.getOut()) - .contains("Details: \n" + "- Successfully migrated these tables:\n"); - // migrated table should be present in the target catalog - Assertions.assertThat(catalog1.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl5"), // tbl5 is the migrated table - TableIdentifier.parse("bar.tbl4"), - TableIdentifier.parse("bar.tbl3")); - - // migrated table should not be there in the source catalog - // using the fresh instance of nessie catalog at client side to get the latest state of main - // branch. - catalog2 = createNessieCatalog(warehousePath2, nessieUri); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))).isEmpty(); - } -} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCLIMigrationTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCLIMigrationTest.java new file mode 100644 index 0000000..c16be16 --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCLIMigrationTest.java @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; + +public class ITHiveToHadoopCLIMigrationTest extends AbstractCLIMigrationTest { + + @BeforeAll + protected static void setup() throws Exception { + HiveMetaStoreRunner.startMetastore(); + dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; + failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; + String warehousePath1 = String.format("file://%s", warehouse1.getAbsolutePath()); + String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + sourceCatalogProperties = + "warehouse=" + + warehousePath1 + + ",uri=" + + HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"); + targetCatalogProperties = "warehouse=" + warehousePath2 + ",type=hadoop"; + + catalog1 = HiveMetaStoreRunner.hiveCatalog(); + catalog2 = createHadoopCatalog(warehousePath2, "hadoop"); + + sourceCatalogType = catalogType(catalog1); + targetCatalogType = catalogType(catalog2); + + createNamespaces(); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + HiveMetaStoreRunner.stopMetastore(); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCatalogMigrator.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCatalogMigrator.java new file mode 100644 index 0000000..630d6cb --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCatalogMigrator.java @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; + +public class ITHiveToHadoopCatalogMigrator extends AbstractTestCatalogMigrator { + + @BeforeAll + protected static void setup() throws Exception { + dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; + failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; + + HiveMetaStoreRunner.startMetastore(); + + String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + + catalog1 = HiveMetaStoreRunner.hiveCatalog(); + catalog2 = createHadoopCatalog(warehousePath2, "hadoop"); + + createNamespaces(); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + HiveMetaStoreRunner.stopMetastore(); + } + + // disable large table test for IT to save CI time. It will be executed only for UT. + @Override + @Disabled + public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { + super.testRegisterLargeNumberOfTables(deleteSourceTables); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCLIMigrationTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCLIMigrationTest.java new file mode 100644 index 0000000..94ae58c --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCLIMigrationTest.java @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; + +public class ITHiveToNessieCLIMigrationTest extends AbstractCLIMigrationTest { + + protected static final int NESSIE_PORT = Integer.getInteger("quarkus.http.test-port", 19121); + + protected static String nessieUri = String.format("http://localhost:%d/api/v1", NESSIE_PORT); + + @BeforeAll + protected static void setup() throws Exception { + HiveMetaStoreRunner.startMetastore(); + dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; + failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; + String warehousePath1 = String.format("file://%s", warehouse1.getAbsolutePath()); + String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + sourceCatalogProperties = + "warehouse=" + + warehousePath1 + + ",uri=" + + HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"); + targetCatalogProperties = "uri=" + nessieUri + ",ref=main,warehouse=" + warehousePath2; + + catalog1 = HiveMetaStoreRunner.hiveCatalog(); + catalog2 = createNessieCatalog(warehousePath2, nessieUri); + + sourceCatalogType = catalogType(catalog1); + targetCatalogType = catalogType(catalog2); + + createNamespaces(); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + HiveMetaStoreRunner.stopMetastore(); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCatalogMigrator.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCatalogMigrator.java new file mode 100644 index 0000000..f10a239 --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCatalogMigrator.java @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; + +public class ITHiveToNessieCatalogMigrator extends AbstractTestCatalogMigrator { + + protected static final int NESSIE_PORT = Integer.getInteger("quarkus.http.test-port", 19121); + + protected static String nessieUri = String.format("http://localhost:%d/api/v1", NESSIE_PORT); + + @BeforeAll + protected static void setup() throws Exception { + dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; + failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; + + HiveMetaStoreRunner.startMetastore(); + + String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + + catalog1 = HiveMetaStoreRunner.hiveCatalog(); + catalog2 = createNessieCatalog(warehousePath2, nessieUri); + + createNamespaces(); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + HiveMetaStoreRunner.stopMetastore(); + } + + // disable large table test for IT to save CI time. It will be executed only for UT. + @Override + @Disabled + public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { + super.testRegisterLargeNumberOfTables(deleteSourceTables); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCLIMigrationTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCLIMigrationTest.java new file mode 100644 index 0000000..56b37ab --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCLIMigrationTest.java @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; + +public class ITNessieToHiveCLIMigrationTest extends AbstractCLIMigrationTest { + + protected static final int NESSIE_PORT = Integer.getInteger("quarkus.http.test-port", 19121); + + protected static String nessieUri = String.format("http://localhost:%d/api/v1", NESSIE_PORT); + + @BeforeAll + protected static void setup() throws Exception { + HiveMetaStoreRunner.startMetastore(); + dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; + failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; + String warehousePath1 = String.format("file://%s", warehouse1.getAbsolutePath()); + String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + sourceCatalogProperties = "uri=" + nessieUri + ",ref=main,warehouse=" + warehousePath1; + targetCatalogProperties = + "warehouse=" + + warehousePath2 + + ",uri=" + + HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"); + + catalog1 = createNessieCatalog(warehousePath1, nessieUri); + catalog2 = HiveMetaStoreRunner.hiveCatalog(); + + sourceCatalogType = catalogType(catalog1); + targetCatalogType = catalogType(catalog2); + + createNamespaces(); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + HiveMetaStoreRunner.stopMetastore(); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCatalogMigrator.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCatalogMigrator.java new file mode 100644 index 0000000..a5b9e56 --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCatalogMigrator.java @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; + +public class ITNessieToHiveCatalogMigrator extends AbstractTestCatalogMigrator { + + protected static final int NESSIE_PORT = Integer.getInteger("quarkus.http.test-port", 19121); + + protected static String nessieUri = String.format("http://localhost:%d/api/v1", NESSIE_PORT); + + @BeforeAll + protected static void setup() throws Exception { + dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; + failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; + + HiveMetaStoreRunner.startMetastore(); + + String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + + catalog1 = createNessieCatalog(warehousePath2, nessieUri); + catalog2 = HiveMetaStoreRunner.hiveCatalog(); + + createNamespaces(); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + HiveMetaStoreRunner.stopMetastore(); + } + + // disable large table test for IT to save CI time. It will be executed only for UT. + @Override + @Disabled + public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { + super.testRegisterLargeNumberOfTables(deleteSourceTables); + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/RunCLI.java b/src/test/java/org/projectnessie/tools/catalog/migration/RunCLI.java index 4f1c272..606d06a 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/RunCLI.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/RunCLI.java @@ -15,10 +15,13 @@ */ package org.projectnessie.tools.catalog.migration; +import static com.github.stefanbirkner.systemlambda.SystemLambda.withTextFromSystemIn; + import java.io.PrintWriter; import java.io.StringWriter; import java.util.Arrays; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; import picocli.CommandLine; /** Helper class for tests. */ @@ -74,6 +77,30 @@ public static RunCLI run(String... args) throws Exception { } } + static RunCLI runWithContinue(String... args) throws Exception { + return runWithSystemInput(args, "yes"); + } + + static RunCLI runWithAbort(String... args) throws Exception { + return runWithSystemInput(args, "no"); + } + + static RunCLI runWithDummyInput(String... args) throws Exception { + return runWithSystemInput(args, "dummy"); + } + + private static RunCLI runWithSystemInput(String[] args, String input) throws Exception { + try (StringWriter out = new StringWriter(); + PrintWriter outWriter = new PrintWriter(out); + StringWriter err = new StringWriter(); + PrintWriter errWriter = new PrintWriter(err)) { + + AtomicInteger exitCode = new AtomicInteger(); + withTextFromSystemIn(input).execute(() -> exitCode.set(runMain(outWriter, errWriter, args))); + return new RunCLI(exitCode.get(), out.toString(), err.toString(), args); + } + } + public int getExitCode() { return exitCode; } diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/TestUtil.java b/src/test/java/org/projectnessie/tools/catalog/migration/TestUtil.java deleted file mode 100644 index 3a45516..0000000 --- a/src/test/java/org/projectnessie/tools/catalog/migration/TestUtil.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (C) 2023 Dremio - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.projectnessie.tools.catalog.migration; - -import java.io.ByteArrayInputStream; -import java.io.File; - -public final class TestUtil { - - private TestUtil() {} - - static void respondAsContinue() { - String input = "yes\n"; - ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); - System.setIn(in); - } - - static void respondAsAbort() { - String input = "no\n"; - ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); - System.setIn(in); - } - - static void respondDummy() { - String input = "dummy\n"; - ByteArrayInputStream in = new ByteArrayInputStream(input.getBytes()); - System.setIn(in); - } - - static void deleteFileIfExists(String filePath) { - File file = new File(filePath); - if (file.exists()) { - file.delete(); - } - } -} From ecf02a05c6b4cd73d71d04264b826a3206fede4b Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Tue, 21 Feb 2023 07:01:37 +0530 Subject: [PATCH 10/31] Address comments --- build.gradle.kts | 7 +- gradle/libs.versions.toml | 6 +- .../migration/CatalogMigrationCLI.java | 47 +- .../migration/CatalogMigrationResult.java | 124 +++++ .../catalog/migration/CatalogMigrator.java | 398 +++----------- .../migration/CatalogMigratorParams.java | 74 +++ .../tools/catalog/migration/PromptUtil.java | 78 +++ src/main/resources/logback.xml | 4 +- .../migration/AbstractCLIMigrationTest.java | 131 ++--- .../tools/catalog/migration/AbstractTest.java | 20 +- .../AbstractTestCatalogMigrator.java | 517 +++++------------- .../tools/catalog/migration/CLITest.java | 10 + .../migration/CatalogMigratorParamsTest.java | 108 ++++ .../migration/HadoopCLIMigrationTest.java | 16 +- .../migration/HadoopCatalogMigratorTest.java | 10 +- .../ITHadoopToHiveCLIMigrationTest.java | 12 +- .../ITHadoopToHiveCatalogMigrator.java | 8 +- .../ITHiveToHadoopCLIMigrationTest.java | 12 +- .../ITHiveToHadoopCatalogMigrator.java | 8 +- .../ITHiveToNessieCLIMigrationTest.java | 13 +- .../ITHiveToNessieCatalogMigrator.java | 8 +- .../ITNessieToHiveCLIMigrationTest.java | 12 +- .../ITNessieToHiveCatalogMigrator.java | 8 +- .../tools/catalog/migration/RunCLI.java | 41 +- 24 files changed, 765 insertions(+), 907 deletions(-) create mode 100644 src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationResult.java create mode 100644 src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigratorParams.java create mode 100644 src/main/java/org/projectnessie/tools/catalog/migration/PromptUtil.java create mode 100644 src/test/java/org/projectnessie/tools/catalog/migration/CatalogMigratorParamsTest.java diff --git a/build.gradle.kts b/build.gradle.kts index cb083e6..c4209d8 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -40,11 +40,14 @@ dependencies { implementation(libs.hadoop.common) implementation(libs.aws.sdk) + annotationProcessor(libs.immutables) + compileOnly(libs.immutables) + testImplementation(libs.junit.jupiter.params) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.engine) testImplementation(libs.assertj) - testImplementation(libs.system.lambda) + testImplementation(libs.mockito) // for integration tests testImplementation( @@ -83,7 +86,7 @@ dependencies { nessieQuarkusServer("org.projectnessie:nessie-quarkus:${libs.versions.nessie.get()}:runner") } -group = "org.projectnessie" +group = "org.projectnessie.tools.catalog.migration" version = file("version.txt").readText().trim() diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 64e8673..7e80f1d 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -6,8 +6,10 @@ guava = "31.1-jre" hadoop = "3.2.4" hive = "2.3.8" iceberg = "1.1.0" +immutables = "2.9.3" junit = "5.9.1" logback = "1.2.11" +mockito = "5.1.1" nessie = "0.48.2" nessieBuildPlugins = "0.2.14" nessieRunner = "0.28.1" @@ -15,7 +17,6 @@ picocli = "4.7.0" shadowPlugin = "7.1.2" slf4j = "1.7.36" spotlessPlugin = "6.12.0" -systemLambda = "1.2.1" [libraries] assertj = { module = "org.assertj:assertj-core", version.ref = "assertj" } @@ -26,15 +27,16 @@ hadoop-aws = { module = "org.apache.hadoop:hadoop-aws", version.ref = "hadoop" } hadoop-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop" } iceberg-dell = { module = "org.apache.iceberg:iceberg-dell", version.ref = "iceberg" } iceberg-spark-runtime = { module = "org.apache.iceberg:iceberg-spark-runtime-3.3_2.12", version.ref = "iceberg" } +immutables = { module = "org.immutables:value", version.ref = "immutables" } junit-bom = { module = "org.junit:junit-bom", version.ref = "junit" } junit-jupiter-api = { module = "org.junit.jupiter:junit-jupiter-api", version.ref = "junit" } junit-jupiter-engine = { module = "org.junit.jupiter:junit-jupiter-engine" } junit-jupiter-params = { module = "org.junit.jupiter:junit-jupiter-params" } logback-core = { module = "ch.qos.logback:logback-core", version.ref = "logback" } logback-classic = { module = "ch.qos.logback:logback-classic", version.ref = "logback" } +mockito = { module = "org.mockito:mockito-inline", version.ref = "mockito" } picocli = { module = "info.picocli:picocli", version.ref = "picocli" } slf4j = { module = "org.slf4j:log4j-over-slf4j", version.ref = "slf4j" } -system-lambda = { module = "com.github.stefanbirkner:system-lambda", version.ref = "systemLambda" } [plugins] nessie-build-spotless = { id = "org.projectnessie.buildsupport.spotless", version.ref = "nessieBuildPlugins" } diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java index 2ff001f..ce3897e 100644 --- a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java +++ b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java @@ -49,7 +49,7 @@ // of sorted order. sortOptions = false, description = - "\nBulk register the iceberg tables from source catalog to target catalog without data copy.\n") + "Bulk register the iceberg tables from source catalog to target catalog without data copy.") public class CatalogMigrationCLI implements Callable { @CommandLine.Spec CommandLine.Model.CommandSpec commandSpec; @@ -180,7 +180,7 @@ public Integer call() { sourceCatalogType.name(), sourceCatalogProperties, sourceCatalogConf); - printWriter.println(String.format("\nConfigured source catalog: %s", sourceCatalogType.name())); + printWriter.println(String.format("%nConfigured source catalog: %s", sourceCatalogType.name())); Configuration targetCatalogConf = new Configuration(); if (targetHadoopConf != null && !targetHadoopConf.isEmpty()) { @@ -192,7 +192,7 @@ public Integer call() { targetCatalogType.name(), targetCatalogProperties, targetCatalogConf); - printWriter.println(String.format("\nConfigured target catalog: %s", targetCatalogType.name())); + printWriter.println(String.format("%nConfigured target catalog: %s", targetCatalogType.name())); List tableIdentifiers = null; if (identifiersFromFile != null) { @@ -212,25 +212,30 @@ public Integer call() { identifiers.stream().map(TableIdentifier::parse).collect(Collectors.toList()); } - if (deleteSourceCatalogTables) { - CatalogMigrator.migrateTables( - tableIdentifiers, - sourceCatalog, - targetCatalog, - identifiersRegEx, - isDryRun, - printWriter, - outputDirPath); - } else { - CatalogMigrator.registerTables( - tableIdentifiers, - sourceCatalog, - targetCatalog, - identifiersRegEx, - isDryRun, - printWriter, - outputDirPath); + if (!isDryRun) { + if (deleteSourceCatalogTables) { + if (!PromptUtil.proceedForMigration(printWriter)) { + return 0; + } + } else { + if (!PromptUtil.proceedForRegistration(printWriter)) { + return 0; + } + } } + + ImmutableCatalogMigratorParams params = + ImmutableCatalogMigratorParams.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .tableIdentifiers(tableIdentifiers) + .identifierRegex(identifiersRegEx) + .deleteEntriesFromSourceCatalog(deleteSourceCatalogTables) + .isDryRun(isDryRun) + .outputDirPath(outputDirPath) + .printWriter(printWriter) + .build(); + CatalogMigrator.registerTables(params); return 0; } diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationResult.java b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationResult.java new file mode 100644 index 0000000..3386194 --- /dev/null +++ b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationResult.java @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_TO_DELETE_AT_SOURCE_FILE; + +import java.io.PrintWriter; +import java.util.List; +import org.apache.iceberg.catalog.TableIdentifier; +import org.immutables.value.Value; + +@Value.Immutable +public abstract class CatalogMigrationResult { + + abstract List registeredTableIdentifiers(); + + abstract List failedToRegisterTableIdentifiers(); + + abstract List failedToDeleteTableIdentifiers(); + + public void printSummary( + PrintWriter printWriter, + boolean deleteSourceCatalogTables, + String sourceCatalogType, + String targetCatalogType) { + printWriter.println(String.format("%nSummary: ")); + if (!registeredTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Successfully %s %d tables from %s catalog to %s catalog.", + deleteSourceCatalogTables ? "migrated" : "registered", + registeredTableIdentifiers().size(), + sourceCatalogType, + targetCatalogType)); + } + if (!failedToRegisterTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Failed to %s %d tables from %s catalog to %s catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. " + + "%nFailed identifiers are written into `%s`. " + + "Retry with that file using `--identifiers-from-file` option " + + "if the failure is because of network/connection timeouts.", + deleteSourceCatalogTables ? "migrate" : "register", + failedToRegisterTableIdentifiers().size(), + sourceCatalogType, + targetCatalogType, + FAILED_IDENTIFIERS_FILE)); + } + if (!failedToDeleteTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Failed to delete %d tables from %s catalog. " + + "Please check the `catalog_migration.log` file for the reason. " + + "%nFailed to delete identifiers are written into `%s`. ", + failedToDeleteTableIdentifiers().size(), + sourceCatalogType, + FAILED_TO_DELETE_AT_SOURCE_FILE)); + } + } + + public void printDetails(PrintWriter printWriter, boolean deleteSourceCatalogTables) { + printWriter.println(String.format("%nDetails: ")); + if (!registeredTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Successfully %s these tables:", + deleteSourceCatalogTables ? "migrated" : "registered")); + printWriter.println(registeredTableIdentifiers()); + } + + if (!failedToRegisterTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Failed to %s these tables:", deleteSourceCatalogTables ? "migrate" : "register")); + printWriter.println(failedToRegisterTableIdentifiers()); + } + + if (!failedToDeleteTableIdentifiers().isEmpty()) { + printWriter.println("- [WARNING] Failed to delete these tables from source catalog:"); + printWriter.println(failedToDeleteTableIdentifiers()); + } + } + + public void printDryRunResults(PrintWriter printWriter, boolean deleteSourceCatalogTables) { + printWriter.println(String.format("%nSummary: ")); + if (registeredTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- No tables are identified for %s. Please check logs for more info.", + deleteSourceCatalogTables ? "migration" : "registration")); + return; + } + printWriter.println( + String.format( + "- Identified %d tables for %s by dry-run. These identifiers are also written into %s. " + + "You can use this file with `--identifiers-from-file` option.", + registeredTableIdentifiers().size(), + deleteSourceCatalogTables ? "migration" : "registration", + DRY_RUN_FILE)); + + printWriter.println(String.format("%nDetails: ")); + printWriter.println( + String.format( + "- Identified these tables for %s by dry-run:", + deleteSourceCatalogTables ? "migration" : "registration")); + printWriter.println(registeredTableIdentifiers()); + } +} diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrator.java b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrator.java index 561c873..de9a263 100644 --- a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrator.java +++ b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrator.java @@ -15,17 +15,16 @@ */ package org.projectnessie.tools.catalog.migration; -import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import java.io.IOException; import java.io.PrintWriter; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Objects; -import java.util.Scanner; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Predicate; import java.util.regex.Pattern; @@ -50,142 +49,62 @@ public class CatalogMigrator { private CatalogMigrator() {} /** - * Migrates tables from one catalog(source catalog) to another catalog(target catalog). After - * successful migration, deletes the table entry from source catalog(not applicable for - * HadoopCatalog). - * - *

Users must make sure that no in-progress commits on the tables of source catalog during - * migration. - * - * @param tableIdentifiers a list of {@link TableIdentifier} for the tables required to be - * migrated. If not specified, all the tables would be migrated - * @param sourceCatalog Source {@link Catalog} from which the tables are chosen - * @param targetCatalog Target {@link Catalog} to which the tables need to be migrated - * @param identifierRegex regular expression pattern used to migrate only the tables whose - * identifiers match this pattern. Can be provided instead of `tableIdentifiers`. - * @param isDryRun to execute as dry run. - * @param printWriter to print regular updates on the console. - * @param outputDirPath optional path to store the result files. If null, uses present working - * directory. - * @return List of successfully migrated and list of failed to migrate table identifiers. - */ - public static CatalogMigrationResult migrateTables( - List tableIdentifiers, - Catalog sourceCatalog, - Catalog targetCatalog, - String identifierRegex, - boolean isDryRun, - PrintWriter printWriter, - String outputDirPath) { - return registerTables( - tableIdentifiers, - sourceCatalog, - targetCatalog, - identifierRegex, - isDryRun, - printWriter, - outputDirPath, - true); - } - - /** - * Register tables from one catalog(source catalog) to another catalog(target catalog). User has - * to take care of deleting the tables from source catalog after registration. + * Register or Migrate tables from one catalog(source catalog) to another catalog(target catalog). * *

Users must make sure that no in-progress commits on the tables of source catalog during * registration. * - * @param tableIdentifiers a list of {@link TableIdentifier} for the tables required to be - * registered. If not specified, all the tables would be registered - * @param sourceCatalog Source {@link Catalog} from which the tables are chosen - * @param targetCatalog Target {@link Catalog} to which the tables need to be registered - * @param identifierRegex regular expression pattern used to migrate only the tables whose - * identifiers match this pattern. Can be provided instead of `tableIdentifiers`. - * @param isDryRun to execute as dry run. - * @param printWriter to print regular updates on the console. - * @param outputDirPath optional path to store the result files. If null, uses present working - * directory. - * @return List of successfully registered and list of failed to register table identifiers. + * @param catalogMigratorParams configuration params + * @return List of successfully registered/migrated and list of failed to register/migrate table + * identifiers. */ - public static CatalogMigrationResult registerTables( - List tableIdentifiers, - Catalog sourceCatalog, - Catalog targetCatalog, - String identifierRegex, - boolean isDryRun, - PrintWriter printWriter, - String outputDirPath) { - return registerTables( - tableIdentifiers, - sourceCatalog, - targetCatalog, - identifierRegex, - isDryRun, - printWriter, - outputDirPath, - false); - } - - private static CatalogMigrationResult registerTables( - List tableIdentifiers, - Catalog sourceCatalog, - Catalog targetCatalog, - String identifierRegex, - boolean isDryRun, - PrintWriter printWriter, - String outputDirPath, - boolean deleteEntriesFromSourceCatalog) { - validate(sourceCatalog, targetCatalog); - Preconditions.checkArgument(printWriter != null, "printWriter is null"); - - if (identifierRegex != null && tableIdentifiers != null && !tableIdentifiers.isEmpty()) { - throw new IllegalArgumentException( - "Both the identifiers list and identifierRegex is configured."); - } - - if (!isDryRun) { - if (deleteEntriesFromSourceCatalog) { - if (!proceedForRegistration(printWriter)) { - return CatalogMigrator.CatalogMigrationResult.empty(); - } - } else { - if (!proceedForMigration(printWriter)) { - return CatalogMigrator.CatalogMigrationResult.empty(); - } - } - } + public static CatalogMigrationResult registerTables(CatalogMigratorParams catalogMigratorParams) { + PrintWriter printWriter = catalogMigratorParams.printWriter(); + boolean deleteEntriesFromSourceCatalog = catalogMigratorParams.deleteEntriesFromSourceCatalog(); String operation = deleteEntriesFromSourceCatalog ? "migration" : "registration"; List identifiers; - if (tableIdentifiers == null || tableIdentifiers.isEmpty()) { - identifiers = getMatchingTableIdentifiers(sourceCatalog, identifierRegex, printWriter); + if (catalogMigratorParams.tableIdentifiers() == null + || catalogMigratorParams.tableIdentifiers().isEmpty()) { + identifiers = + getMatchingTableIdentifiers( + catalogMigratorParams.sourceCatalog(), + catalogMigratorParams.identifierRegex(), + printWriter); } else { - identifiers = tableIdentifiers; + identifiers = catalogMigratorParams.tableIdentifiers(); } printWriter.println( - String.format("\nIdentified %d tables for %s.", identifiers.size(), operation)); + String.format("%nIdentified %d tables for %s.", identifiers.size(), operation)); - if (isDryRun) { + if (catalogMigratorParams.isDryRun()) { CatalogMigrationResult result = - new CatalogMigrationResult(identifiers, Collections.emptyList(), Collections.emptyList()); + ImmutableCatalogMigrationResult.builder() + .registeredTableIdentifiers(identifiers) + .failedToRegisterTableIdentifiers(Collections.emptyList()) + .failedToDeleteTableIdentifiers(Collections.emptyList()) + .build(); printWriter.println("Dry run is completed."); writeToFile( - pathWithOutputDir(outputDirPath, DRY_RUN_FILE), result.registeredTableIdentifiers()); - printDryRunResults(printWriter, result, deleteEntriesFromSourceCatalog); + pathWithOutputDir(catalogMigratorParams.outputDirPath(), DRY_RUN_FILE), + result.registeredTableIdentifiers()); + result.printDryRunResults(printWriter, deleteEntriesFromSourceCatalog); return result; } - if (deleteEntriesFromSourceCatalog && sourceCatalog instanceof HadoopCatalog) { + if (deleteEntriesFromSourceCatalog + && catalogMigratorParams.sourceCatalog() instanceof HadoopCatalog) { printWriter.println( - "[WARNING]: Source catalog type is HADOOP and it doesn't support dropping tables just from " - + "catalog. \nAvoid operating the migrated tables from the source catalog after migration. " - + "Use the tables from target catalog."); + String.format( + "[WARNING]: Source catalog type is HADOOP and it doesn't support dropping tables just from " + + "catalog. %nAvoid operating the migrated tables from the source catalog after migration. " + + "Use the tables from target catalog.")); } - printWriter.println(String.format("\nStarted %s ...", operation)); + printWriter.println(String.format("%nStarted %s ...", operation)); List registeredTableIdentifiers = new ArrayList<>(); List failedToRegisterTableIdentifiers = new ArrayList<>(); List failedToDeleteTableIdentifiers = new ArrayList<>(); @@ -194,8 +113,8 @@ private static CatalogMigrationResult registerTables( tableIdentifier -> { boolean isRegistered = registerTable( - sourceCatalog, - targetCatalog, + catalogMigratorParams.sourceCatalog(), + catalogMigratorParams.targetCatalog(), registeredTableIdentifiers, failedToRegisterTableIdentifiers, tableIdentifier); @@ -203,12 +122,13 @@ private static CatalogMigrationResult registerTables( // HadoopCatalog dropTable will delete the table files completely even when purge is // false. So, skip dropTable for HadoopCatalog. boolean deleteTableFromSourceCatalog = - !(sourceCatalog instanceof HadoopCatalog) + !(catalogMigratorParams.sourceCatalog() instanceof HadoopCatalog) && isRegistered && deleteEntriesFromSourceCatalog; try { if (deleteTableFromSourceCatalog) { - boolean isDropped = sourceCatalog.dropTable(tableIdentifier, false); + boolean isDropped = + catalogMigratorParams.sourceCatalog().dropTable(tableIdentifier, false); if (!isDropped) { failedToDeleteTableIdentifiers.add(tableIdentifier); } @@ -222,37 +142,37 @@ private static CatalogMigrationResult registerTables( if (count % 100 == 0) { printWriter.println( String.format( - "\nAttempted %s for %d tables out of %d tables.", + "%nAttempted %s for %d tables out of %d tables.", operation, count, identifiers.size())); } }); - printWriter.println(String.format("\nFinished %s ...", operation)); + printWriter.println(String.format("%nFinished %s ...", operation)); CatalogMigrationResult result = - new CatalogMigrationResult( - registeredTableIdentifiers, - failedToRegisterTableIdentifiers, - failedToDeleteTableIdentifiers); + ImmutableCatalogMigrationResult.builder() + .registeredTableIdentifiers(registeredTableIdentifiers) + .failedToRegisterTableIdentifiers(failedToRegisterTableIdentifiers) + .failedToDeleteTableIdentifiers(failedToDeleteTableIdentifiers) + .build(); if (!result.failedToRegisterTableIdentifiers().isEmpty()) { writeToFile( - pathWithOutputDir(outputDirPath, FAILED_IDENTIFIERS_FILE), + pathWithOutputDir(catalogMigratorParams.outputDirPath(), FAILED_IDENTIFIERS_FILE), result.failedToRegisterTableIdentifiers()); } if (!result.failedToDeleteTableIdentifiers().isEmpty()) { writeToFile( - pathWithOutputDir(outputDirPath, FAILED_TO_DELETE_AT_SOURCE_FILE), + pathWithOutputDir(catalogMigratorParams.outputDirPath(), FAILED_TO_DELETE_AT_SOURCE_FILE), result.failedToDeleteTableIdentifiers()); } - printSummary( + result.printSummary( printWriter, - result, deleteEntriesFromSourceCatalog, - sourceCatalog.name(), - targetCatalog.name()); + catalogMigratorParams.sourceCatalog().name(), + catalogMigratorParams.targetCatalog().name()); - printDetails(printWriter, result, deleteEntriesFromSourceCatalog); + result.printDetails(printWriter, deleteEntriesFromSourceCatalog); return result; } @@ -282,14 +202,16 @@ private static List getMatchingTableIdentifiers( Catalog sourceCatalog, String identifierRegex, PrintWriter printWriter) { if (identifierRegex == null) { printWriter.println( - "\nUser has not specified the table identifiers." - + " Selecting all the tables from all the namespaces from the source catalog."); + String.format( + "%nUser has not specified the table identifiers." + + " Selecting all the tables from all the namespaces from the source catalog.")); } else { printWriter.println( - "\nUser has not specified the table identifiers." - + " Selecting all the tables from all the namespaces from the source catalog " - + "which matches the regex pattern:" - + identifierRegex); + String.format( + "%nUser has not specified the table identifiers." + + " Selecting all the tables from all the namespaces from the source catalog " + + "which matches the regex pattern:" + + identifierRegex)); } printWriter.println("Collecting all the namespaces from source catalog..."); @@ -318,68 +240,6 @@ private static List getMatchingTableIdentifiers( return getMatchingTableIdentifiers(sourceCatalog, namespaces, matchedIdentifiersPredicate); } - private static boolean proceedForRegistration(PrintWriter printWriter) { - String warning = - "\n[WARNING]\n" - + "\ta) Executing catalog migration when the source catalog has some in-progress commits " - + "\n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " - + "\n\tSo, while using this tool please make sure there are no in-progress commits for the source " - + "catalog\n" - + "\n" - + "\tb) After the registration, successfully registered tables will be present in both source and target " - + "catalog. " - + "\n\tHaving the same metadata.json registered in more than one catalog can lead to missing updates, " - + "loss of data, and table corruption. " - + "\n\tUse `--delete-source-tables` option to automatically delete the table from source catalog after " - + "migration."; - return proceed(warning, printWriter); - } - - private static boolean proceedForMigration(PrintWriter printWriter) { - String warning = - "\n[WARNING]\n" - + "\ta) Executing catalog migration when the source catalog has some in-progress commits " - + "\n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " - + "\n\tSo, while using this tool please make sure there are no in-progress commits for the source " - + "catalog\n" - + "\n" - + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " - + "\n\tand can only be accessed from the target catalog."; - return proceed(warning, printWriter); - } - - private static boolean proceed(String warning, PrintWriter printWriter) { - try (Scanner scanner = new Scanner(System.in)) { - printWriter.println(warning); - - while (true) { - printWriter.println( - "Have you read the above warnings and are you sure you want to continue? (yes/no):"); - String input = scanner.nextLine(); - - if (input.equalsIgnoreCase("yes")) { - printWriter.println("Continuing..."); - return true; - } else if (input.equalsIgnoreCase("no")) { - printWriter.println("Aborting..."); - return false; - } else { - printWriter.println("Invalid input. Please enter 'yes' or 'no'."); - } - } - } - } - - private static String pathWithOutputDir(String outputDirPath, String fileName) { - if (outputDirPath == null) { - return fileName; - } - if (outputDirPath.endsWith("/")) { - return outputDirPath + fileName; - } - return outputDirPath + "/" + fileName; - } - private static List getMatchingTableIdentifiers( Catalog sourceCatalog, List namespaces, @@ -398,148 +258,20 @@ private static List getMatchingTableIdentifiers( return allIdentifiers; } - private static void validate(Catalog sourceCatalog, Catalog targetCatalog) { - Preconditions.checkArgument(sourceCatalog != null, "Invalid source catalog: null"); - Preconditions.checkArgument(targetCatalog != null, "Invalid target catalog: null"); - Preconditions.checkArgument( - !targetCatalog.equals(sourceCatalog), "target catalog is same as source catalog"); - } - - private static void printSummary( - PrintWriter printWriter, - CatalogMigrator.CatalogMigrationResult result, - boolean deleteSourceCatalogTables, - String sourceCatalogType, - String targetCatalogType) { - printWriter.println("\nSummary: "); - if (!result.registeredTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Successfully %s %d tables from %s catalog to %s catalog.", - deleteSourceCatalogTables ? "migrated" : "registered", - result.registeredTableIdentifiers().size(), - sourceCatalogType, - targetCatalogType)); - } - if (!result.failedToRegisterTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Failed to %s %d tables from %s catalog to %s catalog. " - + "Please check the `catalog_migration.log` file for the failure reason. " - + "\nFailed identifiers are written into `%s`. " - + "Retry with that file using `--identifiers-from-file` option " - + "if the failure is because of network/connection timeouts.", - deleteSourceCatalogTables ? "migrate" : "register", - result.failedToRegisterTableIdentifiers().size(), - sourceCatalogType, - targetCatalogType, - FAILED_IDENTIFIERS_FILE)); - } - if (!result.failedToDeleteTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Failed to delete %d tables from %s catalog. " - + "Please check the `catalog_migration.log` file for the reason. " - + "\nFailed to delete identifiers are written into `%s`. ", - result.failedToDeleteTableIdentifiers().size(), - sourceCatalogType, - FAILED_TO_DELETE_AT_SOURCE_FILE)); - } - } - - private static void printDetails( - PrintWriter printWriter, - CatalogMigrator.CatalogMigrationResult result, - boolean deleteSourceCatalogTables) { - printWriter.println("\nDetails: "); - if (!result.registeredTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Successfully %s these tables:", - deleteSourceCatalogTables ? "migrated" : "registered")); - printWriter.println(result.registeredTableIdentifiers()); - } - - if (!result.failedToRegisterTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Failed to %s these tables:", deleteSourceCatalogTables ? "migrate" : "register")); - printWriter.println(result.failedToRegisterTableIdentifiers()); - } - - if (!result.failedToDeleteTableIdentifiers().isEmpty()) { - printWriter.println("- [WARNING] Failed to delete these tables from source catalog:"); - printWriter.println(result.failedToDeleteTableIdentifiers()); + private static Path pathWithOutputDir(String outputDirPath, String fileName) { + if (outputDirPath == null) { + return Paths.get(fileName); } + return Paths.get(outputDirPath, fileName).toAbsolutePath(); } - private static void printDryRunResults( - PrintWriter printWriter, - CatalogMigrator.CatalogMigrationResult result, - boolean deleteSourceCatalogTables) { - printWriter.println("\nSummary: "); - if (result.registeredTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- No tables are identified for %s. Please check logs for more info.", - deleteSourceCatalogTables ? "migration" : "registration")); - return; - } - printWriter.println( - String.format( - "- Identified %d tables for %s by dry-run. These identifiers are also written into %s. " - + "You can use this file with `--identifiers-from-file` option.", - result.registeredTableIdentifiers().size(), - deleteSourceCatalogTables ? "migration" : "registration", - DRY_RUN_FILE)); - - printWriter.println("\nDetails: "); - printWriter.println( - String.format( - "- Identified these tables for %s by dry-run:", - deleteSourceCatalogTables ? "migration" : "registration")); - printWriter.println(result.registeredTableIdentifiers()); - } - - private static void writeToFile(String filePath, List identifiers) { + private static void writeToFile(Path filePath, List identifiers) { List identifiersString = identifiers.stream().map(TableIdentifier::toString).collect(Collectors.toList()); try { - Files.write(Paths.get(filePath), identifiersString); + Files.write(filePath, identifiersString); } catch (IOException e) { throw new RuntimeException("Failed to write the file:" + filePath, e); } } - - public static class CatalogMigrationResult { - private final List registeredTableIdentifiers; - private final List failedToRegisterTableIdentifiers; - private final List failedToDeleteTableIdentifiers; - - CatalogMigrationResult( - List registeredTableIdentifiers, - List failedToRegisterTableIdentifiers, - List failedToDeleteTableIdentifiers) { - this.registeredTableIdentifiers = registeredTableIdentifiers; - this.failedToRegisterTableIdentifiers = failedToRegisterTableIdentifiers; - this.failedToDeleteTableIdentifiers = failedToDeleteTableIdentifiers; - } - - public List registeredTableIdentifiers() { - return registeredTableIdentifiers; - } - - public List failedToRegisterTableIdentifiers() { - return failedToRegisterTableIdentifiers; - } - - public List failedToDeleteTableIdentifiers() { - return failedToDeleteTableIdentifiers; - } - - public static CatalogMigrationResult empty() { - return new CatalogMigrationResult( - Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); - } - } } diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigratorParams.java b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigratorParams.java new file mode 100644 index 0000000..5fc7c27 --- /dev/null +++ b/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigratorParams.java @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import com.google.common.base.Preconditions; +import java.io.PrintWriter; +import java.util.List; +import javax.annotation.Nullable; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.TableIdentifier; +import org.immutables.value.Value; + +@Value.Immutable +public interface CatalogMigratorParams { + + /** Source {@link Catalog} from which the tables are chosen. */ + Catalog sourceCatalog(); + + /** Target {@link Catalog} to which the tables need to be migrated. */ + Catalog targetCatalog(); + + /** + * Optional List of {@link TableIdentifier} for the tables required to be migrated. If not + * specified, all the tables would be migrated. + */ + @Nullable + List tableIdentifiers(); + + /** + * Optional Regular expression pattern used to migrate only the tables whose identifiers match + * this pattern. Can be provided instead of `tableIdentifiers`. + */ + @Nullable + String identifierRegex(); + + /** To execute as dry run. */ + boolean isDryRun(); + + /** Delete the table entries from source catalog after successful migration. */ + boolean deleteEntriesFromSourceCatalog(); + + /** To print the regular updates on the console. */ + PrintWriter printWriter(); + + /** optional path to store the result files. If null, uses present working directory. */ + @Nullable + String outputDirPath(); + + @Value.Check + default void validate() { + Preconditions.checkArgument(sourceCatalog() != null, "Invalid source catalog: null"); + Preconditions.checkArgument(targetCatalog() != null, "Invalid target catalog: null"); + Preconditions.checkArgument( + !targetCatalog().equals(sourceCatalog()), "target catalog is same as source catalog"); + + if (identifierRegex() != null && tableIdentifiers() != null && !tableIdentifiers().isEmpty()) { + throw new IllegalArgumentException( + "Both the identifiers list and identifierRegex is configured."); + } + } +} diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/PromptUtil.java b/src/main/java/org/projectnessie/tools/catalog/migration/PromptUtil.java new file mode 100644 index 0000000..5a43460 --- /dev/null +++ b/src/main/java/org/projectnessie/tools/catalog/migration/PromptUtil.java @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import java.io.PrintWriter; +import java.util.Scanner; + +public final class PromptUtil { + + private PromptUtil() {} + + static boolean proceedForRegistration(PrintWriter printWriter) { + String warning = + String.format( + "%n[WARNING]%n" + + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + + "%n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + + "%n\tSo, while using this tool please make sure there are no in-progress commits for the source " + + "catalog%n" + + "%n" + + "\tb) After the registration, successfully registered tables will be present in both source and target " + + "catalog. " + + "%n\tHaving the same metadata.json registered in more than one catalog can lead to missing updates, " + + "loss of data, and table corruption. " + + "%n\tUse `--delete-source-tables` option to automatically delete the table from source catalog after " + + "migration."); + return proceed(warning, printWriter); + } + + static boolean proceedForMigration(PrintWriter printWriter) { + String warning = + String.format( + "%n[WARNING]%n" + + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + + "%n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + + "%n\tSo, while using this tool please make sure there are no in-progress commits for the source " + + "catalog%n" + + "%n" + + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " + + "%n\tand can only be accessed from the target catalog."); + return proceed(warning, printWriter); + } + + private static boolean proceed(String warning, PrintWriter printWriter) { + try (Scanner scanner = new Scanner(System.in)) { + printWriter.println(warning); + + while (true) { + printWriter.println( + "Have you read the above warnings and are you sure you want to continue? (yes/no):"); + String input = scanner.nextLine(); + + if (input.equalsIgnoreCase("yes")) { + printWriter.println("Continuing..."); + return true; + } else if (input.equalsIgnoreCase("no")) { + printWriter.println("Aborting..."); + return false; + } else { + printWriter.println("Invalid input. Please enter 'yes' or 'no'."); + } + } + } + } +} diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml index e3ee191..cb1446a 100644 --- a/src/main/resources/logback.xml +++ b/src/main/resources/logback.xml @@ -17,9 +17,9 @@ --> - + - catalog_migration.log + ${catalog.migration.log.dir}/catalog_migration.log true %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/AbstractCLIMigrationTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/AbstractCLIMigrationTest.java index c154503..ce9569f 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/AbstractCLIMigrationTest.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/AbstractCLIMigrationTest.java @@ -16,10 +16,9 @@ package org.projectnessie.tools.catalog.migration; import com.google.common.collect.Lists; -import java.io.File; +import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -36,14 +35,14 @@ public abstract class AbstractCLIMigrationTest extends AbstractTest { - protected static @TempDir File warehouse1; + protected static @TempDir Path warehouse1; - protected static @TempDir File warehouse2; + protected static @TempDir Path warehouse2; - protected static @TempDir File outputDir; + protected static @TempDir Path outputDir; - protected static String dryRunFile; - protected static String failedIdentifiersFile; + protected static Path dryRunFile; + protected static Path failedIdentifiersFile; protected static String sourceCatalogProperties; protected static String targetCatalogProperties; @@ -57,7 +56,7 @@ protected void beforeEach() { } @AfterEach - protected void afterEach() { + protected void afterEach() throws IOException { // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 // create table will call refresh internally. @@ -72,7 +71,7 @@ protected void afterEach() { @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegister(boolean deleteSourceTables) throws Exception { - RunCLI run = RunCLI.runWithContinue(registerAllTablesArgs(deleteSourceTables)); + RunCLI run = RunCLI.runWithMockedPrompts(registerAllTablesArgs(deleteSourceTables)); Assertions.assertThat(run.getExitCode()).isEqualTo(0); Assertions.assertThat(run.getOut()) @@ -86,10 +85,10 @@ public void testRegister(boolean deleteSourceTables) throws Exception { Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: \n- Successfully %s 4 tables from %s catalog to %s catalog.", + "Summary: %n- Successfully %s 4 tables from %s catalog to %s catalog.", operation, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) - .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 @@ -122,7 +121,7 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except "--identifiers", "bar.tbl3", "--output-dir", - outputDir.getAbsolutePath()); + outputDir.toAbsolutePath().toString()); Assertions.assertThat(run.getOut()) .doesNotContain( @@ -135,12 +134,12 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: \n- Successfully %s 1 tables from %s catalog to" + " %s catalog.", + "Summary: %n- Successfully %s 1 tables from %s catalog to" + " %s catalog.", operation, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) .contains( String.format( - "Details: \n- Successfully %s these tables:\n" + "[bar.tbl3]", operation)); + "Details: %n- Successfully %s these tables:%n" + "[bar.tbl3]", operation)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 @@ -150,8 +149,9 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) .containsExactly(TableIdentifier.parse("bar.tbl3")); + Path identifierFile = outputDir.resolve("ids.txt"); + // using `--identifiers-from-file` option - Path identifierFile = Paths.get("ids.txt"); Files.write(identifierFile, Collections.singletonList("bar.tbl4")); run = registerTablesCLI( @@ -165,9 +165,9 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except "--target-catalog-properties", targetCatalogProperties, "--identifiers-from-file", - "ids.txt", + identifierFile.toAbsolutePath().toString(), "--output-dir", - outputDir.getAbsolutePath()); + outputDir.toAbsolutePath().toString()); Files.delete(identifierFile); Assertions.assertThat(run.getExitCode()).isEqualTo(0); @@ -182,10 +182,10 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: \n- Successfully %s 1 tables from %s catalog to" + " %s catalog.", + "Summary: %n- Successfully %s 1 tables from %s catalog to" + " %s catalog.", operation, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) - .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 @@ -211,7 +211,7 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except "--identifiers-regex", "^foo\\..*", "--output-dir", - outputDir.getAbsolutePath()); + outputDir.toAbsolutePath().toString()); Assertions.assertThat(run.getExitCode()).isEqualTo(0); Assertions.assertThat(run.getOut()) .contains( @@ -228,10 +228,10 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: \n- Successfully %s 2 tables from %s catalog to" + " %s catalog.", + "Summary: %n- Successfully %s 2 tables from %s catalog to" + " %s catalog.", operation, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) - .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 @@ -264,7 +264,7 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { "--identifiers", "dummy.tbl3", "--output-dir", - outputDir.getAbsolutePath()); + outputDir.toAbsolutePath().toString()); Assertions.assertThat(run.getExitCode()).isEqualTo(0); String operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) @@ -273,12 +273,12 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: \n- Failed to %s 1 tables from %s catalog to %s catalog." + "Summary: %n- Failed to %s 1 tables from %s catalog to %s catalog." + " Please check the `catalog_migration.log`", operation, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) .contains( - String.format("Details: \n- Failed to %s these tables:\n[dummy.tbl3]", operation)); + String.format("Details: %n- Failed to %s these tables:%n[dummy.tbl3]", operation)); // try to register same table twice which leads to AlreadyExistsException registerTablesCLI( @@ -294,7 +294,7 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { "--identifiers", "foo.tbl2", "--output-dir", - outputDir.getAbsolutePath()); + outputDir.toAbsolutePath().toString()); run = registerTablesCLI( deleteSourceTables, @@ -309,7 +309,7 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { "--identifiers", "foo.tbl2", "--output-dir", - outputDir.getAbsolutePath()); + outputDir.toAbsolutePath().toString()); Assertions.assertThat(run.getExitCode()).isEqualTo(0); operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) @@ -318,11 +318,11 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: \n- Failed to %s 1 tables from %s catalog to %s catalog." + "Summary: %n- Failed to %s 1 tables from %s catalog to %s catalog." + " Please check the `catalog_migration.log`", operation, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) - .contains(String.format("Details: \n- Failed to %s these tables:\n[foo.tbl2]", operation)); + .contains(String.format("Details: %n- Failed to %s these tables:%n[foo.tbl2]", operation)); } @Order(3) @@ -344,7 +344,7 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep "--identifiers", "foo.tbl2", "--output-dir", - outputDir.getAbsolutePath()); + outputDir.toAbsolutePath().toString()); Assertions.assertThat(run.getExitCode()).isEqualTo(0); String operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) @@ -353,12 +353,12 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: \n- Successfully %s 1 tables from %s catalog to %s catalog.", + "Summary: %n- Successfully %s 1 tables from %s catalog to %s catalog.", operation, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) .contains( String.format( - "Details: \n" + "- Successfully %s these tables:\n" + "[foo.tbl2]", operation)); + "Details: %n" + "- Successfully %s these tables:%n" + "[foo.tbl2]", operation)); if (deleteSourceTables && !(catalog1 instanceof HadoopCatalog)) { // create a table with the same name in source catalog which got deleted. @@ -378,7 +378,7 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep "--target-catalog-properties", targetCatalogProperties, "--output-dir", - outputDir.getAbsolutePath()); + outputDir.toAbsolutePath().toString()); Assertions.assertThat(run.getExitCode()).isEqualTo(0); operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) @@ -388,10 +388,10 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: \n" - + "- Successfully %s 3 tables from %s catalog to %s catalog.\n" + "Summary: %n" + + "- Successfully %s 3 tables from %s catalog to %s catalog.%n" + "- Failed to %s 1 tables from %s catalog to %s catalog. " - + "Please check the `catalog_migration.log` file for the failure reason. \n" + + "Please check the `catalog_migration.log` file for the failure reason. %n" + "Failed identifiers are written into `failed_identifiers.txt`. " + "Retry with that file using `--identifiers-from-file` option " + "if the failure is because of network/connection timeouts.", @@ -402,9 +402,9 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) - .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); Assertions.assertThat(run.getOut()) - .contains(String.format("- Failed to %s these tables:\n[foo.tbl2]", ops)); + .contains(String.format("- Failed to %s these tables:%n[foo.tbl2]", ops)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 @@ -430,25 +430,24 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep "--target-catalog-properties", targetCatalogProperties, "--identifiers-from-file", - failedIdentifiersFile, + failedIdentifiersFile.toAbsolutePath().toString(), "--output-dir", - outputDir.getAbsolutePath()); + outputDir.toAbsolutePath().toString()); Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: \n" + "Summary: %n" + "- Failed to %s 1 tables from %s catalog to %s catalog. " - + "Please check the `catalog_migration.log` file for the failure reason. \n" + + "Please check the `catalog_migration.log` file for the failure reason. %n" + "Failed identifiers are written into `failed_identifiers.txt`. " + "Retry with that file using `--identifiers-from-file` option " + "if the failure is because of network/connection timeouts.", ops, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) .contains( - String.format("Details: \n" + "- Failed to %s these tables:\n" + "[foo.tbl2]", ops)); - Assertions.assertThat(new File(failedIdentifiersFile).exists()).isTrue(); - Assertions.assertThat(Files.readAllLines(Paths.get(failedIdentifiersFile))) - .containsExactly("foo.tbl2"); + String.format("Details: %n" + "- Failed to %s these tables:%n" + "[foo.tbl2]", ops)); + Assertions.assertThat(Files.exists(failedIdentifiersFile)).isTrue(); + Assertions.assertThat(Files.readAllLines(failedIdentifiersFile)).containsExactly("foo.tbl2"); } @Order(4) @@ -468,7 +467,7 @@ public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { "--target-catalog-properties", sourceCatalogProperties, "--output-dir", - outputDir.getAbsolutePath()); + outputDir.toAbsolutePath().toString()); Assertions.assertThat(run.getExitCode()).isEqualTo(0); String operation = deleteSourceTables ? "migration" : "registration"; @@ -479,28 +478,6 @@ public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { @Order(5) @ParameterizedTest @ValueSource(booleans = {true, false}) - public void testPrompt(boolean deleteSourceTables) throws Exception { - RunCLI run = RunCLI.runWithAbort(registerAllTablesArgs(deleteSourceTables)); - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - // should abort - Assertions.assertThat(run.getOut()).contains("Aborting..."); - // should not have other messages - Assertions.assertThat(run.getOut()).doesNotContain("Summary"); - - run = RunCLI.runWithDummyInput(registerAllTablesArgs(deleteSourceTables)); - Assertions.assertThat(run.getExitCode()).isEqualTo(1); - Assertions.assertThat(run.getOut()).contains("Invalid input. Please enter 'yes' or 'no'."); - - run = RunCLI.runWithContinue(registerAllTablesArgs(deleteSourceTables)); - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - // should abort - Assertions.assertThat(run.getOut()).contains("Continuing..."); - Assertions.assertThat(run.getOut()).contains("Summary"); - } - - @Order(6) - @ParameterizedTest - @ValueSource(booleans = {true, false}) public void testDryRun(boolean deleteSourceTables) throws Exception { RunCLI run = registerTablesCLI( @@ -515,7 +492,7 @@ public void testDryRun(boolean deleteSourceTables) throws Exception { targetCatalogProperties, "--dry-run", "--output-dir", - outputDir.getAbsolutePath()); + outputDir.toAbsolutePath().toString()); Assertions.assertThat(run.getExitCode()).isEqualTo(0); // should not prompt for dry run @@ -527,7 +504,7 @@ public void testDryRun(boolean deleteSourceTables) throws Exception { Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: \n" + "Summary: %n" + "- Identified 4 tables for %s by dry-run. " + "These identifiers are also written into dry_run_identifiers.txt. " + "You can use this file with `--identifiers-from-file` option.", @@ -535,9 +512,9 @@ public void testDryRun(boolean deleteSourceTables) throws Exception { Assertions.assertThat(run.getOut()) .contains( String.format( - "Details: \n" + "- Identified these tables for %s by dry-run:\n", operation)); - Assertions.assertThat(new File(dryRunFile).exists()).isTrue(); - Assertions.assertThat(Files.readAllLines(Paths.get(dryRunFile))) + "Details: %n" + "- Identified these tables for %s by dry-run:%n", operation)); + Assertions.assertThat(Files.exists(dryRunFile)).isTrue(); + Assertions.assertThat(Files.readAllLines(dryRunFile)) .containsExactlyInAnyOrder("foo.tbl1", "foo.tbl2", "bar.tbl3", "bar.tbl4"); } @@ -553,7 +530,7 @@ private static String[] registerAllTablesArgs(boolean deleteSourceTables) { "--target-catalog-properties", targetCatalogProperties, "--output-dir", - outputDir.getAbsolutePath()); + outputDir.toAbsolutePath().toString()); if (deleteSourceTables) { args.add("--delete-source-tables"); } @@ -563,10 +540,10 @@ private static String[] registerAllTablesArgs(boolean deleteSourceTables) { private static RunCLI registerTablesCLI(boolean deleteSourceTables, String... args) throws Exception { if (!deleteSourceTables) { - return RunCLI.runWithContinue(args); + return RunCLI.runWithMockedPrompts(args); } List argsList = Lists.newArrayList(args); argsList.add("--delete-source-tables"); - return RunCLI.runWithContinue(argsList.toArray(new String[0])); + return RunCLI.runWithMockedPrompts(argsList.toArray(new String[0])); } } diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTest.java index 15de31c..edcfa87 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTest.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTest.java @@ -17,7 +17,9 @@ import static org.apache.iceberg.types.Types.NestedField.required; -import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -38,9 +40,18 @@ import org.apache.iceberg.nessie.NessieCatalog; import org.apache.iceberg.rest.RESTCatalog; import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.io.TempDir; public abstract class AbstractTest { + protected static @TempDir Path logDir; + + @BeforeAll + protected static void initLogDir() { + System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); + } + protected static Catalog catalog1; protected static Catalog catalog2; @@ -72,10 +83,9 @@ protected static void dropNamespaces() { ((SupportsNamespaces) catalog2).dropNamespace(Namespace.of("bar")); } - protected static void deleteFileIfExists(String filePath) { - File file = new File(filePath); - if (file.exists()) { - file.delete(); + protected static void deleteFileIfExists(Path filePath) throws IOException { + if (Files.exists(filePath)) { + Files.delete(filePath); } } diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTestCatalogMigrator.java b/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTestCatalogMigrator.java index 85d881a..8806734 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTestCatalogMigrator.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTestCatalogMigrator.java @@ -15,18 +15,13 @@ */ package org.projectnessie.tools.catalog.migration; -import static com.github.stefanbirkner.systemlambda.SystemLambda.withTextFromSystemIn; - -import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; import java.nio.file.Files; -import java.nio.file.Paths; +import java.nio.file.Path; import java.util.Collections; -import java.util.List; import java.util.stream.IntStream; -import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.hadoop.HadoopCatalog; @@ -40,14 +35,14 @@ public abstract class AbstractTestCatalogMigrator extends AbstractTest { - protected static @TempDir File warehouse1; + protected static @TempDir Path warehouse1; - protected static @TempDir File warehouse2; + protected static @TempDir Path warehouse2; - protected static @TempDir File outputDir; + protected static @TempDir Path outputDir; - protected static String dryRunFile; - protected static String failedIdentifiersFile; + protected static Path dryRunFile; + protected static Path failedIdentifiersFile; private static StringWriter stringWriter; private static PrintWriter printWriter; @@ -72,23 +67,19 @@ protected void afterEach() throws IOException { @Order(0) @ParameterizedTest @ValueSource(booleans = {true, false}) - public void testRegister(boolean deleteSourceTables) throws Exception { - - withTextFromSystemIn("yes") - .execute( - () -> { - CatalogMigrator.CatalogMigrationResult result; - result = registerAllTables(deleteSourceTables); - - Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), - TableIdentifier.parse("foo.tbl2"), - TableIdentifier.parse("bar.tbl3"), - TableIdentifier.parse("bar.tbl4")); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - }); + public void testRegister(boolean deleteSourceTables) { + + CatalogMigrationResult result; + result = registerAllTables(deleteSourceTables); + + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), + TableIdentifier.parse("foo.tbl2"), + TableIdentifier.parse("bar.tbl3"), + TableIdentifier.parse("bar.tbl4")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); String output = stringWriter.toString(); Assertions.assertThat(output) @@ -101,10 +92,10 @@ public void testRegister(boolean deleteSourceTables) throws Exception { Assertions.assertThat(output) .contains( String.format( - "Summary: \n- Successfully %s 4 tables from %s catalog to" + " %s catalog.", + "Summary: %n- Successfully %s 4 tables from %s catalog to" + " %s catalog.", operation, catalog1.name(), catalog2.name())); Assertions.assertThat(output) - .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) .containsExactlyInAnyOrder( @@ -117,26 +108,15 @@ public void testRegister(boolean deleteSourceTables) throws Exception { @Order(1) @ParameterizedTest @ValueSource(booleans = {true, false}) - public void testRegisterSelectedTables(boolean deleteSourceTables) throws Exception { + public void testRegisterSelectedTables(boolean deleteSourceTables) { // using `--identifiers` option - withTextFromSystemIn("yes") - .execute( - () -> { - CatalogMigrator.CatalogMigrationResult result = - registerTables( - Collections.singletonList(TableIdentifier.parse("bar.tbl3")), - catalog1, - catalog2, - null, - false, - printWriter, - outputDir.getAbsolutePath(), - deleteSourceTables); - Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactly(TableIdentifier.parse("bar.tbl3")); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - }); + ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); + builder.tableIdentifiers(Collections.singletonList(TableIdentifier.parse("bar.tbl3"))); + CatalogMigrationResult result = CatalogMigrator.registerTables(builder.build()); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactly(TableIdentifier.parse("bar.tbl3")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); String output = stringWriter.toString(); Assertions.assertThat(output) @@ -149,37 +129,26 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except Assertions.assertThat(output) .contains( String.format( - "Summary: \n- Successfully %s 1 tables from %s catalog to" + " %s catalog.", + "Summary: %n- Successfully %s 1 tables from %s catalog to" + " %s catalog.", operation, catalog1.name(), catalog2.name())); Assertions.assertThat(output) .contains( String.format( - "Details: \n- Successfully %s these tables:\n" + "[bar.tbl3]", operation)); + "Details: %n- Successfully %s these tables:%n" + "[bar.tbl3]", operation)); Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))).isEmpty(); Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) .containsExactly(TableIdentifier.parse("bar.tbl3")); // using --identifiers-regex option which matches all the tables starts with "foo." - withTextFromSystemIn("yes") - .execute( - () -> { - CatalogMigrator.CatalogMigrationResult result = - registerTables( - null, - catalog1, - catalog2, - "^foo\\..*", - false, - printWriter, - outputDir.getAbsolutePath(), - deleteSourceTables); - Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - }); + builder = builderWithDefaultArgs(deleteSourceTables); + builder.identifierRegex("^foo\\..*"); + result = CatalogMigrator.registerTables(builder.build()); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); output = stringWriter.toString(); Assertions.assertThat(output) @@ -196,10 +165,10 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except Assertions.assertThat(output) .contains( String.format( - "Summary: \n- Successfully %s 2 tables from %s catalog to" + " %s catalog.", + "Summary: %n- Successfully %s 2 tables from %s catalog to" + " %s catalog.", operation, catalog1.name(), catalog2.name())); Assertions.assertThat(output) - .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) .containsExactlyInAnyOrder( @@ -211,26 +180,15 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except @Order(2) @ParameterizedTest @ValueSource(booleans = {true, false}) - public void testRegisterError(boolean deleteSourceTables) throws Exception { + public void testRegisterError(boolean deleteSourceTables) { // use invalid namespace which leads to NoSuchTableException - withTextFromSystemIn("yes") - .execute( - () -> { - CatalogMigrator.CatalogMigrationResult result = - registerTables( - Collections.singletonList(TableIdentifier.parse("dummy.tbl3")), - catalog1, - catalog2, - null, - false, - printWriter, - outputDir.getAbsolutePath(), - deleteSourceTables); - Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()) - .containsExactly(TableIdentifier.parse("dummy.tbl3")); - Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - }); + ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); + builder.tableIdentifiers(Collections.singletonList(TableIdentifier.parse("dummy.tbl3"))); + CatalogMigrationResult result = CatalogMigrator.registerTables(builder.build()); + Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()) + .containsExactly(TableIdentifier.parse("dummy.tbl3")); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); String output = stringWriter.toString(); String operation = deleteSourceTables ? "migration" : "registration"; @@ -239,50 +197,30 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { Assertions.assertThat(output) .contains( String.format( - "Summary: \n- Failed to %s 1 tables from %s catalog to %s catalog." + "Summary: %n- Failed to %s 1 tables from %s catalog to %s catalog." + " Please check the `catalog_migration.log`", operation, catalog1.name(), catalog2.name())); Assertions.assertThat(output) .contains( - String.format("Details: \n- Failed to %s these tables:\n[dummy.tbl3]", operation)); + String.format("Details: %n- Failed to %s these tables:%n[dummy.tbl3]", operation)); // try to register same table twice which leads to AlreadyExistsException - withTextFromSystemIn("yes") - .execute( - () -> { - CatalogMigrator.CatalogMigrationResult result = - registerTables( - Collections.singletonList(TableIdentifier.parse("foo.tbl2")), - catalog1, - catalog2, - null, - false, - printWriter, - outputDir.getAbsolutePath(), - deleteSourceTables); - Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactly(TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - }); - withTextFromSystemIn("yes") - .execute( - () -> { - CatalogMigrator.CatalogMigrationResult result = - registerTables( - Collections.singletonList(TableIdentifier.parse("foo.tbl2")), - catalog1, - catalog2, - null, - false, - printWriter, - outputDir.getAbsolutePath(), - deleteSourceTables); - Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()) - .contains(TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - }); + builder = builderWithDefaultArgs(deleteSourceTables); + builder.tableIdentifiers(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))); + result = CatalogMigrator.registerTables(builder.build()); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactly(TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + builder = builderWithDefaultArgs(deleteSourceTables); + builder.tableIdentifiers(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))); + result = CatalogMigrator.registerTables(builder.build()); + Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()) + .contains(TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + output = stringWriter.toString(); operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(output).contains(String.format("Identified 1 tables for %s.", operation)); @@ -290,36 +228,25 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { Assertions.assertThat(output) .contains( String.format( - "Summary: \n- Failed to %s 1 tables from %s catalog to %s catalog." + "Summary: %n- Failed to %s 1 tables from %s catalog to %s catalog." + " Please check the `catalog_migration.log`", operation, catalog1.name(), catalog2.name())); Assertions.assertThat(output) - .contains(String.format("Details: \n- Failed to %s these tables:\n[foo.tbl2]", operation)); + .contains(String.format("Details: %n- Failed to %s these tables:%n[foo.tbl2]", operation)); } @Order(3) @ParameterizedTest @ValueSource(booleans = {true, false}) - public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Exception { + public void testRegisterWithFewFailures(boolean deleteSourceTables) { // register only foo.tbl2 - withTextFromSystemIn("yes") - .execute( - () -> { - CatalogMigrator.CatalogMigrationResult result = - registerTables( - Collections.singletonList(TableIdentifier.parse("foo.tbl2")), - catalog1, - catalog2, - null, - false, - printWriter, - outputDir.getAbsolutePath(), - deleteSourceTables); - Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactly(TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - }); + ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); + builder.tableIdentifiers(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))); + CatalogMigrationResult result = CatalogMigrator.registerTables(builder.build()); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactly(TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); String output = stringWriter.toString(); String operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(output).contains(String.format("Identified 1 tables for %s.", operation)); @@ -327,12 +254,12 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep Assertions.assertThat(output) .contains( String.format( - "Summary: \n- Successfully %s 1 tables from %s catalog to %s catalog.", + "Summary: %n- Successfully %s 1 tables from %s catalog to %s catalog.", operation, catalog1.name(), catalog2.name())); Assertions.assertThat(output) .contains( String.format( - "Details: \n" + "- Successfully %s these tables:\n" + "[foo.tbl2]", operation)); + "Details: %n" + "- Successfully %s these tables:%n" + "[foo.tbl2]", operation)); if (deleteSourceTables && !(catalog1 instanceof HadoopCatalog)) { // create a table with the same name in source catalog which got deleted. @@ -340,19 +267,15 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep } // register all the tables from source catalog again - withTextFromSystemIn("yes") - .execute( - () -> { - CatalogMigrator.CatalogMigrationResult result = registerAllTables(deleteSourceTables); - Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), - TableIdentifier.parse("bar.tbl3"), - TableIdentifier.parse("bar.tbl4")); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()) - .contains(TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - }); + result = registerAllTables(deleteSourceTables); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), + TableIdentifier.parse("bar.tbl3"), + TableIdentifier.parse("bar.tbl4")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()) + .contains(TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); output = stringWriter.toString(); operation = deleteSourceTables ? "migration" : "registration"; @@ -362,10 +285,10 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep Assertions.assertThat(output) .contains( String.format( - "Summary: \n" - + "- Successfully %s 3 tables from %s catalog to %s catalog.\n" + "Summary: %n" + + "- Successfully %s 3 tables from %s catalog to %s catalog.%n" + "- Failed to %s 1 tables from %s catalog to %s catalog. " - + "Please check the `catalog_migration.log` file for the failure reason. \n" + + "Please check the `catalog_migration.log` file for the failure reason. %n" + "Failed identifiers are written into `failed_identifiers.txt`. " + "Retry with that file using `--identifiers-from-file` option " + "if the failure is because of network/connection timeouts.", @@ -376,9 +299,9 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep catalog1.name(), catalog2.name())); Assertions.assertThat(output) - .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); Assertions.assertThat(output) - .contains(String.format("- Failed to %s these tables:\n[foo.tbl2]", ops)); + .contains(String.format("- Failed to %s these tables:%n[foo.tbl2]", ops)); Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); @@ -390,25 +313,20 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep @Order(4) @ParameterizedTest @ValueSource(booleans = {true, false}) - public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { + public void testRegisterNoTables(boolean deleteSourceTables) { // source catalog is catalog2 which has no tables. - withTextFromSystemIn("yes") - .execute( - () -> { - CatalogMigrator.CatalogMigrationResult result = - registerTables( - null, - catalog2, - catalog1, - null, - false, - printWriter, - outputDir.getAbsolutePath(), - deleteSourceTables); - Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - }); + ImmutableCatalogMigratorParams.Builder builder = + ImmutableCatalogMigratorParams.builder() + .sourceCatalog(catalog2) + .targetCatalog(catalog1) + .isDryRun(false) + .printWriter(printWriter) + .outputDirPath(outputDir.toAbsolutePath().toString()) + .deleteEntriesFromSourceCatalog(deleteSourceTables); + CatalogMigrationResult result = CatalogMigrator.registerTables(builder.build()); + Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); String output = stringWriter.toString(); String operation = deleteSourceTables ? "migration" : "registration"; @@ -418,53 +336,18 @@ public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { @Order(5) @ParameterizedTest @ValueSource(booleans = {true, false}) - public void testPrompt(boolean deleteSourceTables) throws Exception { - withTextFromSystemIn("no") - .execute( - () -> { - CatalogMigrator.CatalogMigrationResult result = registerAllTables(deleteSourceTables); - Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - }); - String output = stringWriter.toString(); - // should abort - Assertions.assertThat(output).contains("Aborting..."); - // should not have other messages - Assertions.assertThat(output).doesNotContain("Summary"); - - withTextFromSystemIn("dummy", "yes").execute(() -> registerAllTables(deleteSourceTables)); - output = stringWriter.toString(); - Assertions.assertThat(output).contains("Invalid input. Please enter 'yes' or 'no'."); - Assertions.assertThat(output).contains("Continuing..."); - } - - @Order(6) - @ParameterizedTest - @ValueSource(booleans = {true, false}) public void testDryRun(boolean deleteSourceTables) throws Exception { - withTextFromSystemIn("yes") - .execute( - () -> { - CatalogMigrator.CatalogMigrationResult result = - registerTables( - null, - catalog1, - catalog2, - null, - true, // enable dry-run - printWriter, - outputDir.getAbsolutePath(), - deleteSourceTables); - Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), - TableIdentifier.parse("foo.tbl2"), - TableIdentifier.parse("bar.tbl3"), - TableIdentifier.parse("bar.tbl4")); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - }); + ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); + builder.isDryRun(true); + CatalogMigrationResult result = CatalogMigrator.registerTables(builder.build()); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), + TableIdentifier.parse("foo.tbl2"), + TableIdentifier.parse("bar.tbl3"), + TableIdentifier.parse("bar.tbl4")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); String output = stringWriter.toString(); // should not prompt for dry run @@ -476,7 +359,7 @@ public void testDryRun(boolean deleteSourceTables) throws Exception { Assertions.assertThat(output) .contains( String.format( - "Summary: \n" + "Summary: %n" + "- Identified 4 tables for %s by dry-run. " + "These identifiers are also written into dry_run_identifiers.txt. " + "You can use this file with `--identifiers-from-file` option.", @@ -484,104 +367,13 @@ public void testDryRun(boolean deleteSourceTables) throws Exception { Assertions.assertThat(output) .contains( String.format( - "Details: \n" + "- Identified these tables for %s by dry-run:\n", operation)); - Assertions.assertThat(new File(dryRunFile).exists()).isTrue(); - Assertions.assertThat(Files.readAllLines(Paths.get(dryRunFile))) + "Details: %n" + "- Identified these tables for %s by dry-run:%n", operation)); + Assertions.assertThat(Files.exists(dryRunFile)).isTrue(); + Assertions.assertThat(Files.readAllLines(dryRunFile)) .containsExactlyInAnyOrder("foo.tbl1", "foo.tbl2", "bar.tbl3", "bar.tbl4"); } - @Order(7) - @ParameterizedTest - @ValueSource(booleans = {true, false}) - public void testInvalidInputs(boolean deleteSourceTables) throws Exception { - withTextFromSystemIn("yes") - .execute( - () -> - Assertions.assertThatThrownBy( - () -> - registerTables( - null, - catalog1, - null, // target-catalog is null - null, - false, - printWriter, - outputDir.getAbsolutePath(), - deleteSourceTables)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("Invalid target catalog: null")); - - withTextFromSystemIn("yes") - .execute( - () -> - Assertions.assertThatThrownBy( - () -> - registerTables( - null, - null, // source-catalog is null - catalog2, - null, - false, - printWriter, - outputDir.getAbsolutePath(), - deleteSourceTables)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("Invalid source catalog: null")); - - withTextFromSystemIn("yes") - .execute( - () -> - Assertions.assertThatThrownBy( - () -> - registerTables( - null, - catalog2, // source-catalog is same as target catalog - catalog2, - null, - false, - printWriter, - outputDir.getAbsolutePath(), - deleteSourceTables)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("target catalog is same as source catalog")); - - withTextFromSystemIn("yes") - .execute( - () -> - Assertions.assertThatThrownBy( - () -> - registerTables( - Collections.singletonList(TableIdentifier.parse("foo.abc")), - catalog1, - catalog2, - ".*", // both the identifiers and regex is configured. - false, - printWriter, - outputDir.getAbsolutePath(), - deleteSourceTables)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining( - "Both the identifiers list and identifierRegex is configured.")); - - withTextFromSystemIn("yes") - .execute( - () -> - Assertions.assertThatThrownBy( - () -> - registerTables( - Collections.singletonList(TableIdentifier.parse("foo.abc")), - catalog1, - catalog2, - null, - false, - null, // printWriter is null. - outputDir.getAbsolutePath(), - deleteSourceTables)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("printWriter is null")); - } - - @Order(8) + @Order(6) @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { @@ -592,16 +384,12 @@ public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws E catalog1.createTable( TableIdentifier.of(Namespace.of("foo"), "tblx" + val), schema)); - withTextFromSystemIn("yes") - .execute( - () -> { - CatalogMigrator.CatalogMigrationResult result; - result = registerAllTables(deleteSourceTables); + CatalogMigrationResult result; + result = registerAllTables(deleteSourceTables); - Assertions.assertThat(result.registeredTableIdentifiers()).hasSize(244); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - }); + Assertions.assertThat(result.registeredTableIdentifiers()).hasSize(244); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); String operation = deleteSourceTables ? "migration" : "registration"; String output = stringWriter.toString(); @@ -611,10 +399,10 @@ public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws E Assertions.assertThat(output) .contains( String.format( - "Summary: \n- Successfully %s 244 tables from %s catalog to" + " %s catalog.", + "Summary: %n- Successfully %s 244 tables from %s catalog to" + " %s catalog.", operation, catalog1.name(), catalog2.name())); Assertions.assertThat(output) - .contains(String.format("Details: \n" + "- Successfully %s these tables:\n", operation)); + .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); operation = deleteSourceTables ? "migration" : "registration"; // validate intermediate output @@ -629,44 +417,19 @@ public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws E TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); } - private CatalogMigrator.CatalogMigrationResult registerAllTables(boolean deleteSourceTables) { - return registerTables( - null, - catalog1, - catalog2, - null, - false, - printWriter, - outputDir.getAbsolutePath(), - deleteSourceTables); + private CatalogMigrationResult registerAllTables(boolean deleteSourceTables) { + ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); + return CatalogMigrator.registerTables(builder.build()); } - private static CatalogMigrator.CatalogMigrationResult registerTables( - List tableIdentifiers, - Catalog sourceCatalog, - Catalog targetCatalog, - String identifierRegex, - boolean isDryRun, - PrintWriter printWriter, - String outputDirPath, + private ImmutableCatalogMigratorParams.Builder builderWithDefaultArgs( boolean deleteSourceTables) { - if (deleteSourceTables) { - return CatalogMigrator.migrateTables( - tableIdentifiers, - sourceCatalog, - targetCatalog, - identifierRegex, - isDryRun, - printWriter, - outputDirPath); - } - return CatalogMigrator.registerTables( - tableIdentifiers, - sourceCatalog, - targetCatalog, - identifierRegex, - isDryRun, - printWriter, - outputDirPath); + return ImmutableCatalogMigratorParams.builder() + .sourceCatalog(catalog1) + .targetCatalog(catalog2) + .isDryRun(false) + .printWriter(printWriter) + .outputDirPath(outputDir.toAbsolutePath().toString()) + .deleteEntriesFromSourceCatalog(deleteSourceTables); } } diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java b/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java index 4315269..5e99f2d 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java @@ -18,19 +18,29 @@ import static java.util.Collections.singletonList; import static org.junit.jupiter.params.provider.Arguments.arguments; +import java.nio.file.Path; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.stream.Stream; import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; public class CLITest { + protected static @TempDir Path logDir; + + @BeforeAll + protected static void initLogDir() { + System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); + } + private static Stream optionErrors() { return Stream.of( // no arguments diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/CatalogMigratorParamsTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/CatalogMigratorParamsTest.java new file mode 100644 index 0000000..c3091ca --- /dev/null +++ b/src/test/java/org/projectnessie/tools/catalog/migration/CatalogMigratorParamsTest.java @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.Collections; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +public class CatalogMigratorParamsTest { + + @Test + public void testInvalidArgs() throws IOException { + Catalog catalog1 = new HadoopCatalog(); + Catalog catalog2 = new HadoopCatalog(); + + StringWriter stringWriter = new StringWriter(); + PrintWriter printWriter = new PrintWriter(stringWriter); + + try { + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigratorParams.builder() + .sourceCatalog(catalog2) // source-catalog is same as target catalog + .targetCatalog(catalog2) + .isDryRun(false) + .printWriter(printWriter) + .outputDirPath("temp") + .deleteEntriesFromSourceCatalog(true) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("target catalog is same as source catalog"); + + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigratorParams.builder() + .sourceCatalog(catalog1) + .targetCatalog(catalog2) + .isDryRun(false) + .printWriter(printWriter) + .deleteEntriesFromSourceCatalog(true) + .identifierRegex(".*") + .tableIdentifiers(Collections.singletonList(TableIdentifier.parse("foo.abc"))) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Both the identifiers list and identifierRegex is configured."); + + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigratorParams.builder() + .sourceCatalog(catalog1) + .targetCatalog(null) // target-catalog is null + .isDryRun(false) + .printWriter(printWriter) + .outputDirPath("temp") + .deleteEntriesFromSourceCatalog(true) + .build()) + .isInstanceOf(NullPointerException.class) + .hasMessageContaining("targetCatalog"); + + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigratorParams.builder() + .sourceCatalog(null) // source-catalog is null + .targetCatalog(catalog2) + .isDryRun(false) + .printWriter(printWriter) + .outputDirPath("temp") + .deleteEntriesFromSourceCatalog(true) + .build()) + .isInstanceOf(NullPointerException.class) + .hasMessageContaining("sourceCatalog"); + + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigratorParams.builder() + .sourceCatalog(catalog1) + .targetCatalog(catalog2) + .isDryRun(false) + .printWriter(null) + .deleteEntriesFromSourceCatalog(true) + .build()) + .isInstanceOf(NullPointerException.class) + .hasMessageContaining("printWriter"); + } finally { + stringWriter.close(); + printWriter.close(); + } + } +} diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCLIMigrationTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCLIMigrationTest.java index 2c93a37..7e99990 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCLIMigrationTest.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCLIMigrationTest.java @@ -25,15 +25,13 @@ public class HadoopCLIMigrationTest extends AbstractCLIMigrationTest { @BeforeAll protected static void setup() { - dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; - failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; - String warehousePath1 = String.format("file://%s", warehouse1.getAbsolutePath()); - String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); - sourceCatalogProperties = "warehouse=" + warehousePath1 + ",type=hadoop"; - targetCatalogProperties = "warehouse=" + warehousePath2 + ",type=hadoop"; - - catalog1 = createHadoopCatalog(warehousePath1, "catalog1"); - catalog2 = createHadoopCatalog(warehousePath2, "catalog2"); + dryRunFile = outputDir.resolve(DRY_RUN_FILE); + failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); + sourceCatalogProperties = "warehouse=" + warehouse1.toAbsolutePath() + ",type=hadoop"; + targetCatalogProperties = "warehouse=" + warehouse2.toAbsolutePath() + ",type=hadoop"; + + catalog1 = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "catalog1"); + catalog2 = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "catalog2"); sourceCatalogType = catalogType(catalog1); targetCatalogType = catalogType(catalog2); diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCatalogMigratorTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCatalogMigratorTest.java index 90e7815..359967f 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCatalogMigratorTest.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCatalogMigratorTest.java @@ -25,13 +25,11 @@ public class HadoopCatalogMigratorTest extends AbstractTestCatalogMigrator { @BeforeAll protected static void setup() { - dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; - failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; - String warehousePath1 = String.format("file://%s", warehouse1.getAbsolutePath()); - String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + dryRunFile = outputDir.resolve(DRY_RUN_FILE); + failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); - catalog1 = createHadoopCatalog(warehousePath1, "catalog1"); - catalog2 = createHadoopCatalog(warehousePath2, "catalog2"); + catalog1 = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "catalog1"); + catalog2 = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "catalog2"); createNamespaces(); } diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCLIMigrationTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCLIMigrationTest.java index 3ee8903..bc99ad2 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCLIMigrationTest.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCLIMigrationTest.java @@ -26,18 +26,16 @@ public class ITHadoopToHiveCLIMigrationTest extends AbstractCLIMigrationTest { @BeforeAll protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; - failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; - String warehousePath1 = String.format("file://%s", warehouse1.getAbsolutePath()); - String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); - sourceCatalogProperties = "warehouse=" + warehousePath1 + ",type=hadoop"; + dryRunFile = outputDir.resolve(DRY_RUN_FILE); + failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); + sourceCatalogProperties = "warehouse=" + warehouse1.toAbsolutePath() + ",type=hadoop"; targetCatalogProperties = "warehouse=" - + warehousePath2 + + warehouse2.toAbsolutePath() + ",uri=" + HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"); - catalog1 = createHadoopCatalog(warehousePath1, "catalog1"); + catalog1 = createHadoopCatalog(warehouse1.toString(), "catalog1"); catalog2 = HiveMetaStoreRunner.hiveCatalog(); sourceCatalogType = catalogType(catalog1); diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCatalogMigrator.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCatalogMigrator.java index df4cee2..9058c27 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCatalogMigrator.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCatalogMigrator.java @@ -26,14 +26,12 @@ public class ITHadoopToHiveCatalogMigrator extends AbstractTestCatalogMigrator { @BeforeAll protected static void setup() throws Exception { - dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; - failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; + dryRunFile = outputDir.resolve(DRY_RUN_FILE); + failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); HiveMetaStoreRunner.startMetastore(); - String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); - - catalog1 = createHadoopCatalog(warehousePath2, "hadoop"); + catalog1 = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "hadoop"); catalog2 = HiveMetaStoreRunner.hiveCatalog(); createNamespaces(); diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCLIMigrationTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCLIMigrationTest.java index c16be16..10573da 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCLIMigrationTest.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCLIMigrationTest.java @@ -26,19 +26,17 @@ public class ITHiveToHadoopCLIMigrationTest extends AbstractCLIMigrationTest { @BeforeAll protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; - failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; - String warehousePath1 = String.format("file://%s", warehouse1.getAbsolutePath()); - String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + dryRunFile = outputDir.resolve(DRY_RUN_FILE); + failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); sourceCatalogProperties = "warehouse=" - + warehousePath1 + + warehouse1.toAbsolutePath() + ",uri=" + HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"); - targetCatalogProperties = "warehouse=" + warehousePath2 + ",type=hadoop"; + targetCatalogProperties = "warehouse=" + warehouse2.toAbsolutePath() + ",type=hadoop"; catalog1 = HiveMetaStoreRunner.hiveCatalog(); - catalog2 = createHadoopCatalog(warehousePath2, "hadoop"); + catalog2 = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "hadoop"); sourceCatalogType = catalogType(catalog1); targetCatalogType = catalogType(catalog2); diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCatalogMigrator.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCatalogMigrator.java index 630d6cb..587cbad 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCatalogMigrator.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCatalogMigrator.java @@ -26,15 +26,13 @@ public class ITHiveToHadoopCatalogMigrator extends AbstractTestCatalogMigrator { @BeforeAll protected static void setup() throws Exception { - dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; - failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; + dryRunFile = outputDir.resolve(DRY_RUN_FILE); + failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); HiveMetaStoreRunner.startMetastore(); - String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); - catalog1 = HiveMetaStoreRunner.hiveCatalog(); - catalog2 = createHadoopCatalog(warehousePath2, "hadoop"); + catalog2 = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "hadoop"); createNamespaces(); } diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCLIMigrationTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCLIMigrationTest.java index 94ae58c..df92611 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCLIMigrationTest.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCLIMigrationTest.java @@ -30,19 +30,18 @@ public class ITHiveToNessieCLIMigrationTest extends AbstractCLIMigrationTest { @BeforeAll protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; - failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; - String warehousePath1 = String.format("file://%s", warehouse1.getAbsolutePath()); - String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); + dryRunFile = outputDir.resolve(DRY_RUN_FILE); + failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); sourceCatalogProperties = "warehouse=" - + warehousePath1 + + warehouse1.toAbsolutePath() + ",uri=" + HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"); - targetCatalogProperties = "uri=" + nessieUri + ",ref=main,warehouse=" + warehousePath2; + targetCatalogProperties = + "uri=" + nessieUri + ",ref=main,warehouse=" + warehouse2.toAbsolutePath(); catalog1 = HiveMetaStoreRunner.hiveCatalog(); - catalog2 = createNessieCatalog(warehousePath2, nessieUri); + catalog2 = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); sourceCatalogType = catalogType(catalog1); targetCatalogType = catalogType(catalog2); diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCatalogMigrator.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCatalogMigrator.java index f10a239..2181587 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCatalogMigrator.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCatalogMigrator.java @@ -30,15 +30,13 @@ public class ITHiveToNessieCatalogMigrator extends AbstractTestCatalogMigrator { @BeforeAll protected static void setup() throws Exception { - dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; - failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; + dryRunFile = outputDir.resolve(DRY_RUN_FILE); + failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); HiveMetaStoreRunner.startMetastore(); - String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); - catalog1 = HiveMetaStoreRunner.hiveCatalog(); - catalog2 = createNessieCatalog(warehousePath2, nessieUri); + catalog2 = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); createNamespaces(); } diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCLIMigrationTest.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCLIMigrationTest.java index 56b37ab..81e1cb8 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCLIMigrationTest.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCLIMigrationTest.java @@ -30,18 +30,16 @@ public class ITNessieToHiveCLIMigrationTest extends AbstractCLIMigrationTest { @BeforeAll protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; - failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; - String warehousePath1 = String.format("file://%s", warehouse1.getAbsolutePath()); - String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); - sourceCatalogProperties = "uri=" + nessieUri + ",ref=main,warehouse=" + warehousePath1; + dryRunFile = outputDir.resolve(DRY_RUN_FILE); + failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); + sourceCatalogProperties = "uri=" + nessieUri + ",ref=main,warehouse=" + warehouse1; targetCatalogProperties = "warehouse=" - + warehousePath2 + + warehouse2 + ",uri=" + HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"); - catalog1 = createNessieCatalog(warehousePath1, nessieUri); + catalog1 = createNessieCatalog(warehouse1.toAbsolutePath().toString(), nessieUri); catalog2 = HiveMetaStoreRunner.hiveCatalog(); sourceCatalogType = catalogType(catalog1); diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCatalogMigrator.java b/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCatalogMigrator.java index a5b9e56..c2c0b2f 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCatalogMigrator.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCatalogMigrator.java @@ -30,14 +30,12 @@ public class ITNessieToHiveCatalogMigrator extends AbstractTestCatalogMigrator { @BeforeAll protected static void setup() throws Exception { - dryRunFile = outputDir.getAbsolutePath() + "/" + DRY_RUN_FILE; - failedIdentifiersFile = outputDir.getAbsolutePath() + "/" + FAILED_IDENTIFIERS_FILE; + dryRunFile = outputDir.resolve(DRY_RUN_FILE); + failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); HiveMetaStoreRunner.startMetastore(); - String warehousePath2 = String.format("file://%s", warehouse2.getAbsolutePath()); - - catalog1 = createNessieCatalog(warehousePath2, nessieUri); + catalog1 = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); catalog2 = HiveMetaStoreRunner.hiveCatalog(); createNamespaces(); diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/RunCLI.java b/src/test/java/org/projectnessie/tools/catalog/migration/RunCLI.java index 606d06a..9a9bb41 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/RunCLI.java +++ b/src/test/java/org/projectnessie/tools/catalog/migration/RunCLI.java @@ -15,13 +15,14 @@ */ package org.projectnessie.tools.catalog.migration; -import static com.github.stefanbirkner.systemlambda.SystemLambda.withTextFromSystemIn; +import static org.mockito.Mockito.mockStatic; import java.io.PrintWriter; import java.io.StringWriter; import java.util.Arrays; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; +import org.mockito.MockedStatic; import picocli.CommandLine; /** Helper class for tests. */ @@ -77,27 +78,22 @@ public static RunCLI run(String... args) throws Exception { } } - static RunCLI runWithContinue(String... args) throws Exception { - return runWithSystemInput(args, "yes"); - } - - static RunCLI runWithAbort(String... args) throws Exception { - return runWithSystemInput(args, "no"); - } - - static RunCLI runWithDummyInput(String... args) throws Exception { - return runWithSystemInput(args, "dummy"); - } - - private static RunCLI runWithSystemInput(String[] args, String input) throws Exception { + static RunCLI runWithMockedPrompts(String... args) throws Exception { try (StringWriter out = new StringWriter(); PrintWriter outWriter = new PrintWriter(out); StringWriter err = new StringWriter(); PrintWriter errWriter = new PrintWriter(err)) { AtomicInteger exitCode = new AtomicInteger(); - withTextFromSystemIn(input).execute(() -> exitCode.set(runMain(outWriter, errWriter, args))); - return new RunCLI(exitCode.get(), out.toString(), err.toString(), args); + try (MockedStatic mocked = mockStatic(PromptUtil.class)) { + + // To avoid manipulating `System.in`, mock the APIs that use `System.in` + mocked.when(() -> PromptUtil.proceedForMigration(outWriter)).thenReturn(true); + mocked.when(() -> PromptUtil.proceedForRegistration(outWriter)).thenReturn(true); + + exitCode.set(runMain(outWriter, errWriter, args)); + return new RunCLI(exitCode.get(), out.toString(), err.toString(), args); + } } } @@ -115,14 +111,9 @@ public String getErr() { @Override public String toString() { - return "org.projectnessie.tools.catalog.migration.RunCLI{" - + "args=" - + Arrays.toString(args) - + "\nexitCode=" - + exitCode - + "\n\nstdout:\n" - + out - + "\n\nstderr:\n" - + err; + return String.format( + "org.projectnessie.tools.catalog.migration" + + ".RunCLI{args=%s%nexitCode=%d%n%nstdout:%n%s%n%nstderr:%n%s", + Arrays.toString(args), exitCode, out, err); } } From 27f487b7ad171e9c9d9871f164fd327bb511258e Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Wed, 22 Feb 2023 19:37:06 +0530 Subject: [PATCH 11/31] module refactoring --- README.md | 2 +- api-test/build.gradle.kts | 74 +++++++ .../migration/api/test}/AbstractTest.java | 35 +--- .../api/test}/HiveMetaStoreRunner.java | 4 +- .../src}/main/resources/logback.xml | 0 .../test}/AbstractTestCatalogMigrator.java | 5 +- .../api/test}/HadoopCatalogMigratorTest.java | 6 +- .../test}/ITHadoopToHiveCatalogMigrator.java | 6 +- .../test}/ITHiveToHadoopCatalogMigrator.java | 6 +- .../test}/ITHiveToNessieCatalogMigrator.java | 6 +- .../test}/ITNessieToHiveCatalogMigrator.java | 6 +- api/build.gradle.kts | 38 ++++ .../api}/CatalogMigrationResult.java | 14 +- .../migration/api}/CatalogMigrator.java | 8 +- .../migration/api}/CatalogMigratorParams.java | 2 +- api/src/main/resources/logback.xml | 34 ++++ .../api}/CatalogMigratorParamsTest.java | 2 +- build.gradle.kts | 187 ------------------ .../src/main/kotlin/BuildSupport.gradle.kts | 44 +++++ cli/build.gradle.kts | 148 ++++++++++++++ {src => cli/src}/exec/exec-preamble.sh | 0 .../migration/cli}/CLIVersionProvider.java | 2 +- .../migration/cli}/CatalogMigrationCLI.java | 4 +- .../catalog/migration/cli}/PromptUtil.java | 6 +- cli/src/main/resources/logback.xml | 34 ++++ .../catalog/migration/cli}/version.properties | 0 .../cli}/AbstractCLIMigrationTest.java | 34 +++- .../tools/catlog/migration/cli}/CLITest.java | 2 +- .../cli}/HadoopCLIMigrationTest.java | 6 +- .../cli}/ITHadoopToHiveCLIMigrationTest.java | 7 +- .../cli}/ITHiveToHadoopCLIMigrationTest.java | 7 +- .../cli}/ITHiveToNessieCLIMigrationTest.java | 7 +- .../cli}/ITNessieToHiveCLIMigrationTest.java | 7 +- .../tools/catlog/migration/cli}/RunCLI.java | 4 +- settings.gradle.kts | 23 +++ 35 files changed, 496 insertions(+), 274 deletions(-) create mode 100644 api-test/build.gradle.kts rename {src/test/java/org/projectnessie/tools/catalog/migration => api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test}/AbstractTest.java (73%) rename {src/test/java/org/projectnessie/tools/catalog/migration => api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test}/HiveMetaStoreRunner.java (89%) rename {src => api-test/src}/main/resources/logback.xml (100%) rename {src/test/java/org/projectnessie/tools/catalog/migration => api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test}/AbstractTestCatalogMigrator.java (98%) rename {src/test/java/org/projectnessie/tools/catalog/migration => api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test}/HadoopCatalogMigratorTest.java (82%) rename {src/test/java/org/projectnessie/tools/catalog/migration => api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test}/ITHadoopToHiveCatalogMigrator.java (86%) rename {src/test/java/org/projectnessie/tools/catalog/migration => api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test}/ITHiveToHadoopCatalogMigrator.java (86%) rename {src/test/java/org/projectnessie/tools/catalog/migration => api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test}/ITHiveToNessieCatalogMigrator.java (87%) rename {src/test/java/org/projectnessie/tools/catalog/migration => api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test}/ITNessieToHiveCatalogMigrator.java (87%) create mode 100644 api/build.gradle.kts rename {src/main/java/org/projectnessie/tools/catalog/migration => api/src/main/java/org/projectnessie/tools/catalog/migration/api}/CatalogMigrationResult.java (88%) rename {src/main/java/org/projectnessie/tools/catalog/migration => api/src/main/java/org/projectnessie/tools/catalog/migration/api}/CatalogMigrator.java (97%) rename {src/main/java/org/projectnessie/tools/catalog/migration => api/src/main/java/org/projectnessie/tools/catalog/migration/api}/CatalogMigratorParams.java (97%) create mode 100644 api/src/main/resources/logback.xml rename {src/test/java/org/projectnessie/tools/catalog/migration => api/src/test/java/org/projectnessie/tools/catalog/migration/api}/CatalogMigratorParamsTest.java (98%) create mode 100644 cli/build.gradle.kts rename {src => cli/src}/exec/exec-preamble.sh (100%) rename {src/main/java/org/projectnessie/tools/catalog/migration => cli/src/main/java/org/projectnessie/tools/catalog/migration/cli}/CLIVersionProvider.java (95%) rename {src/main/java/org/projectnessie/tools/catalog/migration => cli/src/main/java/org/projectnessie/tools/catalog/migration/cli}/CatalogMigrationCLI.java (98%) rename {src/main/java/org/projectnessie/tools/catalog/migration => cli/src/main/java/org/projectnessie/tools/catalog/migration/cli}/PromptUtil.java (93%) create mode 100644 cli/src/main/resources/logback.xml rename {src/main/resources/org/projectnessie/tools/catalog/migration => cli/src/main/resources/org/projectnessie/tools/catalog/migration/cli}/version.properties (100%) rename {src/test/java/org/projectnessie/tools/catalog/migration => cli/src/test/java/org/projectnessie/tools/catlog/migration/cli}/AbstractCLIMigrationTest.java (93%) rename {src/test/java/org/projectnessie/tools/catalog/migration => cli/src/test/java/org/projectnessie/tools/catlog/migration/cli}/CLITest.java (99%) rename {src/test/java/org/projectnessie/tools/catalog/migration => cli/src/test/java/org/projectnessie/tools/catlog/migration/cli}/HadoopCLIMigrationTest.java (85%) rename {src/test/java/org/projectnessie/tools/catalog/migration => cli/src/test/java/org/projectnessie/tools/catlog/migration/cli}/ITHadoopToHiveCLIMigrationTest.java (83%) rename {src/test/java/org/projectnessie/tools/catalog/migration => cli/src/test/java/org/projectnessie/tools/catlog/migration/cli}/ITHiveToHadoopCLIMigrationTest.java (83%) rename {src/test/java/org/projectnessie/tools/catalog/migration => cli/src/test/java/org/projectnessie/tools/catlog/migration/cli}/ITHiveToNessieCLIMigrationTest.java (85%) rename {src/test/java/org/projectnessie/tools/catalog/migration => cli/src/test/java/org/projectnessie/tools/catlog/migration/cli}/ITNessieToHiveCLIMigrationTest.java (85%) rename {src/test/java/org/projectnessie/tools/catalog/migration => cli/src/test/java/org/projectnessie/tools/catlog/migration/cli}/RunCLI.java (95%) diff --git a/README.md b/README.md index 4e9d1aa..ee01fd0 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ Need to have java installed in your machine(JDK11 or later version) to use this Below is the CLI syntax: ``` -$ java -jar iceberg-catalog-migrator-0.1.0-SNAPSHOT.jar -h +$ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar -h Usage: register [-hV] [--delete-source-tables] [--dry-run] [--identifiers-from-file=] [--identifiers-regex=] [--output-dir=] --source-catalog-type= [--source-custom-catalog-impl=] --target-catalog-type= [--target-custom-catalog-impl=] [--identifiers=[, diff --git a/api-test/build.gradle.kts b/api-test/build.gradle.kts new file mode 100644 index 0000000..4073a11 --- /dev/null +++ b/api-test/build.gradle.kts @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +plugins { + `java-library` + `maven-publish` + alias(libs.plugins.nessie.run) + BuildSupport +} + +dependencies { + implementation(libs.slf4j) + implementation(libs.picocli) + implementation(libs.logback.classic) + implementation(libs.logback.core) + implementation(libs.hadoop.common) + implementation(libs.iceberg.spark.runtime) + implementation(libs.junit.jupiter.api) + implementation("org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests") + + testImplementation(project(":iceberg-catalog-migrator-api")) + + testImplementation(libs.assertj) + testImplementation(libs.junit.jupiter.params) + implementation(libs.junit.jupiter.engine) + + // for integration tests + testImplementation("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") { + // these are taken from iceberg repo configurations + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hbase") + exclude("org.apache.logging.log4j") + exclude("co.cask.tephra") + exclude("com.google.code.findbugs", "jsr305") + exclude("org.eclipse.jetty.aggregate", "jetty-all") + exclude("org.eclipse.jetty.orbit", "javax.servlet") + exclude("org.apache.parquet", "parquet-hadoop-bundle") + exclude("com.tdunning", "json") + exclude("javax.transaction", "transaction-api") + exclude("com.zaxxer", "HikariCP") + } + testImplementation("org.apache.hive:hive-exec:${libs.versions.hive.get()}:core") { + // these are taken from iceberg repo configurations + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hive", "hive-llap-tez") + exclude("org.apache.logging.log4j") + exclude("com.google.protobuf", "protobuf-java") + exclude("org.apache.calcite") + exclude("org.apache.calcite.avatica") + exclude("com.google.code.findbugs", "jsr305") + } + testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}") + + nessieQuarkusServer("org.projectnessie:nessie-quarkus:${libs.versions.nessie.get()}:runner") +} + +nessieQuarkusApp { includeTask(tasks.named("intTest")) } diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTest.java b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java similarity index 73% rename from src/test/java/org/projectnessie/tools/catalog/migration/AbstractTest.java rename to api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java index edcfa87..c9934ea 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTest.java +++ b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java @@ -13,9 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; - -import static org.apache.iceberg.types.Types.NestedField.required; +package org.projectnessie.tools.catalog.migration.api.test; import java.io.IOException; import java.nio.file.Files; @@ -27,18 +25,12 @@ import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.Schema; -import org.apache.iceberg.aws.dynamodb.DynamoDbCatalog; -import org.apache.iceberg.aws.glue.GlueCatalog; import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.dell.ecs.EcsCatalog; import org.apache.iceberg.hadoop.HadoopCatalog; -import org.apache.iceberg.hive.HiveCatalog; -import org.apache.iceberg.jdbc.JdbcCatalog; import org.apache.iceberg.nessie.NessieCatalog; -import org.apache.iceberg.rest.RESTCatalog; import org.apache.iceberg.types.Types; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.io.TempDir; @@ -56,7 +48,8 @@ protected static void initLogDir() { protected static Catalog catalog2; protected static final Schema schema = - new Schema(Types.StructType.of(required(1, "id", Types.LongType.get())).fields()); + new Schema( + Types.StructType.of(Types.NestedField.required(1, "id", Types.LongType.get())).fields()); protected static void createTables() { // two tables in 'foo' namespace @@ -89,28 +82,6 @@ protected static void deleteFileIfExists(Path filePath) throws IOException { } } - protected static String catalogType(Catalog catalog) { - if (catalog instanceof DynamoDbCatalog) { - return CatalogMigrationCLI.CatalogType.DYNAMODB.name(); - } else if (catalog instanceof EcsCatalog) { - return CatalogMigrationCLI.CatalogType.ECS.name(); - } else if (catalog instanceof GlueCatalog) { - return CatalogMigrationCLI.CatalogType.GLUE.name(); - } else if (catalog instanceof HadoopCatalog) { - return CatalogMigrationCLI.CatalogType.HADOOP.name(); - } else if (catalog instanceof HiveCatalog) { - return CatalogMigrationCLI.CatalogType.HIVE.name(); - } else if (catalog instanceof JdbcCatalog) { - return CatalogMigrationCLI.CatalogType.JDBC.name(); - } else if (catalog instanceof NessieCatalog) { - return CatalogMigrationCLI.CatalogType.NESSIE.name(); - } else if (catalog instanceof RESTCatalog) { - return CatalogMigrationCLI.CatalogType.REST.name(); - } else { - return CatalogMigrationCLI.CatalogType.CUSTOM.name(); - } - } - protected static Catalog createHadoopCatalog(String warehousePath, String name) { Map properties = new HashMap<>(); properties.put("warehouse", warehousePath); diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/HiveMetaStoreRunner.java b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/HiveMetaStoreRunner.java similarity index 89% rename from src/test/java/org/projectnessie/tools/catalog/migration/HiveMetaStoreRunner.java rename to api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/HiveMetaStoreRunner.java index a68c935..e072f93 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/HiveMetaStoreRunner.java +++ b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/HiveMetaStoreRunner.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catalog.migration.api.test; import org.apache.iceberg.hive.HiveCatalog; import org.apache.iceberg.hive.HiveMetastoreTest; @@ -22,6 +22,6 @@ public class HiveMetaStoreRunner extends HiveMetastoreTest { // Expose the catalog for tests public static HiveCatalog hiveCatalog() { - return catalog; + return HiveMetastoreTest.catalog; } } diff --git a/src/main/resources/logback.xml b/api-test/src/main/resources/logback.xml similarity index 100% rename from src/main/resources/logback.xml rename to api-test/src/main/resources/logback.xml diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTestCatalogMigrator.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java similarity index 98% rename from src/test/java/org/projectnessie/tools/catalog/migration/AbstractTestCatalogMigrator.java rename to api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java index 8806734..4de13d1 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/AbstractTestCatalogMigrator.java +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catalog.migration.api.test; import java.io.IOException; import java.io.PrintWriter; @@ -32,6 +32,9 @@ import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrationResult; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; +import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigratorParams; public abstract class AbstractTestCatalogMigrator extends AbstractTest { diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCatalogMigratorTest.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/HadoopCatalogMigratorTest.java similarity index 82% rename from src/test/java/org/projectnessie/tools/catalog/migration/HadoopCatalogMigratorTest.java rename to api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/HadoopCatalogMigratorTest.java index 359967f..545123b 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCatalogMigratorTest.java +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/HadoopCatalogMigratorTest.java @@ -13,10 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catalog.migration.api.test; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCatalogMigrator.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHadoopToHiveCatalogMigrator.java similarity index 86% rename from src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCatalogMigrator.java rename to api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHadoopToHiveCatalogMigrator.java index 9058c27..2f77c31 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCatalogMigrator.java +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHadoopToHiveCatalogMigrator.java @@ -13,10 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catalog.migration.api.test; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCatalogMigrator.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToHadoopCatalogMigrator.java similarity index 86% rename from src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCatalogMigrator.java rename to api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToHadoopCatalogMigrator.java index 587cbad..ba7f176 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCatalogMigrator.java +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToHadoopCatalogMigrator.java @@ -13,10 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catalog.migration.api.test; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCatalogMigrator.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToNessieCatalogMigrator.java similarity index 87% rename from src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCatalogMigrator.java rename to api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToNessieCatalogMigrator.java index 2181587..6486997 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCatalogMigrator.java +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToNessieCatalogMigrator.java @@ -13,10 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catalog.migration.api.test; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCatalogMigrator.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITNessieToHiveCatalogMigrator.java similarity index 87% rename from src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCatalogMigrator.java rename to api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITNessieToHiveCatalogMigrator.java index c2c0b2f..0055888 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCatalogMigrator.java +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITNessieToHiveCatalogMigrator.java @@ -13,10 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catalog.migration.api.test; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; diff --git a/api/build.gradle.kts b/api/build.gradle.kts new file mode 100644 index 0000000..442a3bd --- /dev/null +++ b/api/build.gradle.kts @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +plugins { + `java-library` + `maven-publish` + BuildSupport +} + +dependencies { + implementation(libs.guava) + implementation(libs.slf4j) + implementation(libs.logback.classic) + implementation(libs.logback.core) + implementation(libs.iceberg.spark.runtime) + + annotationProcessor(libs.immutables) + compileOnly(libs.immutables) + + testImplementation(libs.junit.jupiter.params) + testImplementation(libs.junit.jupiter.api) + testImplementation(libs.junit.jupiter.engine) + testImplementation(libs.assertj) + testImplementation(libs.hadoop.common) +} diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationResult.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationResult.java similarity index 88% rename from src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationResult.java rename to api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationResult.java index 3386194..760d7ca 100644 --- a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationResult.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationResult.java @@ -13,11 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catalog.migration.api; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_TO_DELETE_AT_SOURCE_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_TO_DELETE_AT_SOURCE_FILE; import java.io.PrintWriter; import java.util.List; @@ -27,11 +27,11 @@ @Value.Immutable public abstract class CatalogMigrationResult { - abstract List registeredTableIdentifiers(); + public abstract List registeredTableIdentifiers(); - abstract List failedToRegisterTableIdentifiers(); + public abstract List failedToRegisterTableIdentifiers(); - abstract List failedToDeleteTableIdentifiers(); + public abstract List failedToDeleteTableIdentifiers(); public void printSummary( PrintWriter printWriter, diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java similarity index 97% rename from src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrator.java rename to api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java index de9a263..c4933f8 100644 --- a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrator.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catalog.migration.api; import com.google.common.collect.ImmutableList; import java.io.IOException; @@ -42,9 +42,9 @@ public class CatalogMigrator { private static final Logger LOG = LoggerFactory.getLogger(CatalogMigrator.class); - static final String FAILED_IDENTIFIERS_FILE = "failed_identifiers.txt"; - static final String FAILED_TO_DELETE_AT_SOURCE_FILE = "failed_to_delete_at_source.txt"; - static final String DRY_RUN_FILE = "dry_run_identifiers.txt"; + public static final String FAILED_IDENTIFIERS_FILE = "failed_identifiers.txt"; + public static final String FAILED_TO_DELETE_AT_SOURCE_FILE = "failed_to_delete_at_source.txt"; + public static final String DRY_RUN_FILE = "dry_run_identifiers.txt"; private CatalogMigrator() {} diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigratorParams.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParams.java similarity index 97% rename from src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigratorParams.java rename to api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParams.java index 5fc7c27..6d83467 100644 --- a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigratorParams.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParams.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catalog.migration.api; import com.google.common.base.Preconditions; import java.io.PrintWriter; diff --git a/api/src/main/resources/logback.xml b/api/src/main/resources/logback.xml new file mode 100644 index 0000000..cb1446a --- /dev/null +++ b/api/src/main/resources/logback.xml @@ -0,0 +1,34 @@ + + + + + + ${catalog.migration.log.dir}/catalog_migration.log + true + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/CatalogMigratorParamsTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java similarity index 98% rename from src/test/java/org/projectnessie/tools/catalog/migration/CatalogMigratorParamsTest.java rename to api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java index c3091ca..abaa8da 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/CatalogMigratorParamsTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catalog.migration.api; import java.io.IOException; import java.io.PrintWriter; diff --git a/build.gradle.kts b/build.gradle.kts index c4209d8..231cc69 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -14,191 +14,4 @@ * limitations under the License. */ -import com.github.jengelman.gradle.plugins.shadow.ShadowPlugin -import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar - -plugins { - `java-library` - `maven-publish` - alias(libs.plugins.nessie.run) - BuildSupport -} - -applyShadowJar() - -testTasks() - -dependencies { - implementation(libs.guava) - implementation(libs.slf4j) - implementation(libs.picocli) - implementation(libs.logback.classic) - implementation(libs.logback.core) - implementation(libs.iceberg.spark.runtime) - implementation(libs.iceberg.dell) - implementation(libs.hadoop.aws) - implementation(libs.hadoop.common) - implementation(libs.aws.sdk) - - annotationProcessor(libs.immutables) - compileOnly(libs.immutables) - - testImplementation(libs.junit.jupiter.params) - testImplementation(libs.junit.jupiter.api) - testImplementation(libs.junit.jupiter.engine) - testImplementation(libs.assertj) - testImplementation(libs.mockito) - - // for integration tests - testImplementation( - "org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests" - ) - testImplementation("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") { - // these are taken from iceberg repo configurations - exclude("org.apache.avro", "avro") - exclude("org.slf4j", "slf4j-log4j12") - exclude("org.pentaho") // missing dependency - exclude("org.apache.hbase") - exclude("org.apache.logging.log4j") - exclude("co.cask.tephra") - exclude("com.google.code.findbugs", "jsr305") - exclude("org.eclipse.jetty.aggregate", "jetty-all") - exclude("org.eclipse.jetty.orbit", "javax.servlet") - exclude("org.apache.parquet", "parquet-hadoop-bundle") - exclude("com.tdunning", "json") - exclude("javax.transaction", "transaction-api") - exclude("com.zaxxer", "HikariCP") - } - testImplementation("org.apache.hive:hive-exec:${libs.versions.hive.get()}:core") { - // these are taken from iceberg repo configurations - exclude("org.apache.avro", "avro") - exclude("org.slf4j", "slf4j-log4j12") - exclude("org.pentaho") // missing dependency - exclude("org.apache.hive", "hive-llap-tez") - exclude("org.apache.logging.log4j") - exclude("com.google.protobuf", "protobuf-java") - exclude("org.apache.calcite") - exclude("org.apache.calcite.avatica") - exclude("com.google.code.findbugs", "jsr305") - } - testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}") - - nessieQuarkusServer("org.projectnessie:nessie-quarkus:${libs.versions.nessie.get()}:runner") -} - -group = "org.projectnessie.tools.catalog.migration" - -version = file("version.txt").readText().trim() - -description = "iceberg-catalog-migrator" - -java.sourceCompatibility = JavaVersion.VERSION_1_8 - -val processResources = - tasks.named("processResources") { - inputs.property("projectVersion", project.version) - filter( - org.apache.tools.ant.filters.ReplaceTokens::class, - mapOf("tokens" to mapOf("projectVersion" to project.version)) - ) - } - -tasks.named("test") { systemProperty("expectedCLIVersion", project.version) } - -val mainClassName = "org.projectnessie.tools.catalog.migration.CatalogMigrationCLI" - extra["versionGoogleJavaFormat"] = libs.versions.googleJavaFormat.get() - -val shadowJar = tasks.named("shadowJar") - -val unixExecutable by - tasks.registering { - group = "build" - description = "Generates the Unix executable" - - dependsOn(shadowJar) - val dir = buildDir.resolve("executable") - val executable = dir.resolve("iceberg-catalog-migrator") - inputs.files(shadowJar.get().archiveFile).withPathSensitivity(PathSensitivity.RELATIVE) - outputs.files(executable) - outputs.cacheIf { false } // very big file - doFirst { - dir.mkdirs() - executable.outputStream().use { out -> - projectDir.resolve("src/exec/exec-preamble.sh").inputStream().use { i -> i.transferTo(out) } - shadowJar.get().archiveFile.get().asFile.inputStream().use { i -> i.transferTo(out) } - } - executable.setExecutable(true) - } - } - -shadowJar { - manifest { attributes["Main-Class"] = mainClassName } - finalizedBy(unixExecutable) -} - -nessieQuarkusApp { - includeTask(tasks.named("intTest")) - environmentNonInput.put("HTTP_ACCESS_LOG_LEVEL", testLogLevel()) -} - -fun Project.applyShadowJar() { - plugins.apply(ShadowPlugin::class.java) - - plugins.withType().configureEach { - val shadowJar = - tasks.named("shadowJar") { - isZip64 = true // as the package has more than 65535 files - outputs.cacheIf { false } // do not cache uber/shaded jars - archiveClassifier.set("") - mergeServiceFiles() - } - - tasks.named("jar") { - dependsOn(shadowJar) - archiveClassifier.set("raw") - } - } -} - -fun Project.testTasks() { - if (projectDir.resolve("src/test").exists()) { - tasks.withType().configureEach { - useJUnitPlatform {} - val testJvmArgs: String? by project - if (testJvmArgs != null) { - jvmArgs((testJvmArgs as String).split(" ")) - } - - systemProperty("file.encoding", "UTF-8") - systemProperty("user.language", "en") - systemProperty("user.country", "US") - systemProperty("user.variant", "") - systemProperty("test.log.level", testLogLevel()) - environment("TESTCONTAINERS_REUSE_ENABLE", "true") - filter { - isFailOnNoMatchingTests = false - when (name) { - "test" -> { - includeTestsMatching("*Test") - includeTestsMatching("Test*") - excludeTestsMatching("Abstract*") - excludeTestsMatching("IT*") - } - "intTest" -> includeTestsMatching("IT*") - } - } - if (name != "test") { - mustRunAfter(tasks.named("test")) - } - } - val intTest = - tasks.register("intTest") { - group = "verification" - description = "Runs the integration tests." - } - tasks.named("check") { dependsOn(intTest) } - } -} - -fun testLogLevel() = System.getProperty("test.log.level", "WARN") diff --git a/buildSrc/src/main/kotlin/BuildSupport.gradle.kts b/buildSrc/src/main/kotlin/BuildSupport.gradle.kts index 5f7ddd5..10c9a90 100644 --- a/buildSrc/src/main/kotlin/BuildSupport.gradle.kts +++ b/buildSrc/src/main/kotlin/BuildSupport.gradle.kts @@ -34,6 +34,8 @@ repositories { } } +testTasks() + tasks.withType().configureEach { manifest { attributes["Implementation-Title"] = "iceberg-catalog-migrator" @@ -61,3 +63,45 @@ plugins.withType().configureEach { modularity.inferModulePath.set(true) } } + +fun Project.testTasks() { + if (projectDir.resolve("src/test").exists()) { + tasks.withType().configureEach { + useJUnitPlatform {} + val testJvmArgs: String? by project + if (testJvmArgs != null) { + jvmArgs((testJvmArgs as String).split(" ")) + } + + systemProperty("file.encoding", "UTF-8") + systemProperty("user.language", "en") + systemProperty("user.country", "US") + systemProperty("user.variant", "") + systemProperty("test.log.level", testLogLevel()) + environment("TESTCONTAINERS_REUSE_ENABLE", "true") + filter { + isFailOnNoMatchingTests = false + when (name) { + "test" -> { + includeTestsMatching("*Test") + includeTestsMatching("Test*") + excludeTestsMatching("Abstract*") + excludeTestsMatching("IT*") + } + "intTest" -> includeTestsMatching("IT*") + } + } + if (name != "test") { + mustRunAfter(tasks.named("test")) + } + } + val intTest = + tasks.register("intTest") { + group = "verification" + description = "Runs the integration tests." + } + tasks.named("check") { dependsOn(intTest) } + } +} + +fun testLogLevel() = System.getProperty("test.log.level", "WARN") diff --git a/cli/build.gradle.kts b/cli/build.gradle.kts new file mode 100644 index 0000000..64bf06c --- /dev/null +++ b/cli/build.gradle.kts @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.github.jengelman.gradle.plugins.shadow.ShadowPlugin +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + +plugins { + `java-library` + `maven-publish` + alias(libs.plugins.nessie.run) + BuildSupport +} + +java.sourceCompatibility = JavaVersion.VERSION_1_8 + +applyShadowJar() + +dependencies { + implementation(project(":iceberg-catalog-migrator-api")) + implementation(libs.slf4j) + implementation(libs.logback.classic) + implementation(libs.logback.core) + implementation(libs.picocli) + implementation(libs.iceberg.spark.runtime) + implementation(libs.iceberg.dell) + implementation(libs.hadoop.aws) + implementation(libs.hadoop.common) + implementation(libs.aws.sdk) + + testImplementation(libs.junit.jupiter.params) + testImplementation(libs.junit.jupiter.api) + testImplementation(libs.junit.jupiter.engine) + testImplementation(libs.assertj) + testImplementation(libs.mockito) + + testImplementation(project(":iceberg-catalog-migrator-api-test")) + + // for integration tests + testImplementation( + "org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests" + ) + testImplementation("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") { + // these are taken from iceberg repo configurations + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hbase") + exclude("org.apache.logging.log4j") + exclude("co.cask.tephra") + exclude("com.google.code.findbugs", "jsr305") + exclude("org.eclipse.jetty.aggregate", "jetty-all") + exclude("org.eclipse.jetty.orbit", "javax.servlet") + exclude("org.apache.parquet", "parquet-hadoop-bundle") + exclude("com.tdunning", "json") + exclude("javax.transaction", "transaction-api") + exclude("com.zaxxer", "HikariCP") + } + testImplementation("org.apache.hive:hive-exec:${libs.versions.hive.get()}:core") { + // these are taken from iceberg repo configurations + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hive", "hive-llap-tez") + exclude("org.apache.logging.log4j") + exclude("com.google.protobuf", "protobuf-java") + exclude("org.apache.calcite") + exclude("org.apache.calcite.avatica") + exclude("com.google.code.findbugs", "jsr305") + } + testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}") + + nessieQuarkusServer("org.projectnessie:nessie-quarkus:${libs.versions.nessie.get()}:runner") +} + +nessieQuarkusApp { includeTask(tasks.named("intTest")) } + +tasks.named("test") { systemProperty("expectedCLIVersion", project.version) } + +val processResources = + tasks.named("processResources") { + inputs.property("projectVersion", project.version) + filter( + org.apache.tools.ant.filters.ReplaceTokens::class, + mapOf("tokens" to mapOf("projectVersion" to project.version)) + ) + } + +val mainClassName = "org.projectnessie.tools.catalog.migration.CatalogMigrationCLI" + +val shadowJar = tasks.named("shadowJar") + +val unixExecutable by + tasks.registering { + group = "build" + description = "Generates the Unix executable" + + dependsOn(shadowJar) + val dir = buildDir.resolve("executable") + val executable = dir.resolve("iceberg-catalog-migrator") + inputs.files(shadowJar.get().archiveFile).withPathSensitivity(PathSensitivity.RELATIVE) + outputs.files(executable) + outputs.cacheIf { false } // very big file + doFirst { + dir.mkdirs() + executable.outputStream().use { out -> + projectDir.resolve("src/exec/exec-preamble.sh").inputStream().use { i -> i.transferTo(out) } + shadowJar.get().archiveFile.get().asFile.inputStream().use { i -> i.transferTo(out) } + } + executable.setExecutable(true) + } + } + +shadowJar { + manifest { attributes["Main-Class"] = mainClassName } + finalizedBy(unixExecutable) +} + +fun Project.applyShadowJar() { + plugins.apply(ShadowPlugin::class.java) + + plugins.withType().configureEach { + val shadowJar = + tasks.named("shadowJar") { + isZip64 = true // as the package has more than 65535 files + outputs.cacheIf { false } // do not cache uber/shaded jars + archiveClassifier.set("") + mergeServiceFiles() + } + + tasks.named("jar") { + dependsOn(shadowJar) + archiveClassifier.set("raw") + } + } +} diff --git a/src/exec/exec-preamble.sh b/cli/src/exec/exec-preamble.sh similarity index 100% rename from src/exec/exec-preamble.sh rename to cli/src/exec/exec-preamble.sh diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CLIVersionProvider.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CLIVersionProvider.java similarity index 95% rename from src/main/java/org/projectnessie/tools/catalog/migration/CLIVersionProvider.java rename to cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CLIVersionProvider.java index 5e71dff..0c4223a 100644 --- a/src/main/java/org/projectnessie/tools/catalog/migration/CLIVersionProvider.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CLIVersionProvider.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catalog.migration.cli; import java.io.InputStream; import java.util.Properties; diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java similarity index 98% rename from src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java rename to cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java index ce3897e..dbb342a 100644 --- a/src/main/java/org/projectnessie/tools/catalog/migration/CatalogMigrationCLI.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catalog.migration.cli; import java.io.IOException; import java.io.PrintWriter; @@ -38,6 +38,8 @@ import org.apache.iceberg.jdbc.JdbcCatalog; import org.apache.iceberg.nessie.NessieCatalog; import org.apache.iceberg.rest.RESTCatalog; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; +import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigratorParams; import picocli.CommandLine; @CommandLine.Command( diff --git a/src/main/java/org/projectnessie/tools/catalog/migration/PromptUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java similarity index 93% rename from src/main/java/org/projectnessie/tools/catalog/migration/PromptUtil.java rename to cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java index 5a43460..caf1055 100644 --- a/src/main/java/org/projectnessie/tools/catalog/migration/PromptUtil.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catalog.migration.cli; import java.io.PrintWriter; import java.util.Scanner; @@ -22,7 +22,7 @@ public final class PromptUtil { private PromptUtil() {} - static boolean proceedForRegistration(PrintWriter printWriter) { + public static boolean proceedForRegistration(PrintWriter printWriter) { String warning = String.format( "%n[WARNING]%n" @@ -40,7 +40,7 @@ static boolean proceedForRegistration(PrintWriter printWriter) { return proceed(warning, printWriter); } - static boolean proceedForMigration(PrintWriter printWriter) { + public static boolean proceedForMigration(PrintWriter printWriter) { String warning = String.format( "%n[WARNING]%n" diff --git a/cli/src/main/resources/logback.xml b/cli/src/main/resources/logback.xml new file mode 100644 index 0000000..cb1446a --- /dev/null +++ b/cli/src/main/resources/logback.xml @@ -0,0 +1,34 @@ + + + + + + ${catalog.migration.log.dir}/catalog_migration.log + true + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + diff --git a/src/main/resources/org/projectnessie/tools/catalog/migration/version.properties b/cli/src/main/resources/org/projectnessie/tools/catalog/migration/cli/version.properties similarity index 100% rename from src/main/resources/org/projectnessie/tools/catalog/migration/version.properties rename to cli/src/main/resources/org/projectnessie/tools/catalog/migration/cli/version.properties diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/AbstractCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java similarity index 93% rename from src/test/java/org/projectnessie/tools/catalog/migration/AbstractCLIMigrationTest.java rename to cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java index ce9569f..5332ad9 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/AbstractCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catlog.migration.cli; import com.google.common.collect.Lists; import java.io.IOException; @@ -22,9 +22,17 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import org.apache.iceberg.aws.dynamodb.DynamoDbCatalog; +import org.apache.iceberg.aws.glue.GlueCatalog; +import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.dell.ecs.EcsCatalog; import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.iceberg.hive.HiveCatalog; +import org.apache.iceberg.jdbc.JdbcCatalog; +import org.apache.iceberg.nessie.NessieCatalog; +import org.apache.iceberg.rest.RESTCatalog; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -32,6 +40,8 @@ import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; +import org.projectnessie.tools.catalog.migration.api.test.AbstractTest; +import org.projectnessie.tools.catalog.migration.cli.CatalogMigrationCLI; public abstract class AbstractCLIMigrationTest extends AbstractTest { @@ -546,4 +556,26 @@ private static RunCLI registerTablesCLI(boolean deleteSourceTables, String... ar argsList.add("--delete-source-tables"); return RunCLI.runWithMockedPrompts(argsList.toArray(new String[0])); } + + protected static String catalogType(Catalog catalog) { + if (catalog instanceof DynamoDbCatalog) { + return CatalogMigrationCLI.CatalogType.DYNAMODB.name(); + } else if (catalog instanceof EcsCatalog) { + return CatalogMigrationCLI.CatalogType.ECS.name(); + } else if (catalog instanceof GlueCatalog) { + return CatalogMigrationCLI.CatalogType.GLUE.name(); + } else if (catalog instanceof HadoopCatalog) { + return CatalogMigrationCLI.CatalogType.HADOOP.name(); + } else if (catalog instanceof HiveCatalog) { + return CatalogMigrationCLI.CatalogType.HIVE.name(); + } else if (catalog instanceof JdbcCatalog) { + return CatalogMigrationCLI.CatalogType.JDBC.name(); + } else if (catalog instanceof NessieCatalog) { + return CatalogMigrationCLI.CatalogType.NESSIE.name(); + } else if (catalog instanceof RESTCatalog) { + return CatalogMigrationCLI.CatalogType.REST.name(); + } else { + return CatalogMigrationCLI.CatalogType.CUSTOM.name(); + } + } } diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/CLITest.java similarity index 99% rename from src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java rename to cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/CLITest.java index 5e99f2d..91ff956 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/CLITest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/CLITest.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catlog.migration.cli; import static java.util.Collections.singletonList; import static org.junit.jupiter.params.provider.Arguments.arguments; diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/HadoopCLIMigrationTest.java similarity index 85% rename from src/test/java/org/projectnessie/tools/catalog/migration/HadoopCLIMigrationTest.java rename to cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/HadoopCLIMigrationTest.java index 7e99990..366b399 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/HadoopCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/HadoopCLIMigrationTest.java @@ -13,10 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catlog.migration.cli; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHadoopToHiveCLIMigrationTest.java similarity index 83% rename from src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCLIMigrationTest.java rename to cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHadoopToHiveCLIMigrationTest.java index bc99ad2..fe1ca23 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHadoopToHiveCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHadoopToHiveCLIMigrationTest.java @@ -13,13 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catlog.migration.cli; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHadoopToHiveCLIMigrationTest extends AbstractCLIMigrationTest { diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToHadoopCLIMigrationTest.java similarity index 83% rename from src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCLIMigrationTest.java rename to cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToHadoopCLIMigrationTest.java index 10573da..fd8736f 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToHadoopCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToHadoopCLIMigrationTest.java @@ -13,13 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catlog.migration.cli; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHiveToHadoopCLIMigrationTest extends AbstractCLIMigrationTest { diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToNessieCLIMigrationTest.java similarity index 85% rename from src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCLIMigrationTest.java rename to cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToNessieCLIMigrationTest.java index df92611..806ffed 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITHiveToNessieCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToNessieCLIMigrationTest.java @@ -13,13 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catlog.migration.cli; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHiveToNessieCLIMigrationTest extends AbstractCLIMigrationTest { diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITNessieToHiveCLIMigrationTest.java similarity index 85% rename from src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCLIMigrationTest.java rename to cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITNessieToHiveCLIMigrationTest.java index 81e1cb8..34ea3bb 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/ITNessieToHiveCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITNessieToHiveCLIMigrationTest.java @@ -13,13 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catlog.migration.cli; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITNessieToHiveCLIMigrationTest extends AbstractCLIMigrationTest { diff --git a/src/test/java/org/projectnessie/tools/catalog/migration/RunCLI.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java similarity index 95% rename from src/test/java/org/projectnessie/tools/catalog/migration/RunCLI.java rename to cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java index 9a9bb41..dcf18c8 100644 --- a/src/test/java/org/projectnessie/tools/catalog/migration/RunCLI.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration; +package org.projectnessie.tools.catlog.migration.cli; import static org.mockito.Mockito.mockStatic; @@ -23,6 +23,8 @@ import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import org.mockito.MockedStatic; +import org.projectnessie.tools.catalog.migration.cli.CatalogMigrationCLI; +import org.projectnessie.tools.catalog.migration.cli.PromptUtil; import picocli.CommandLine; /** Helper class for tests. */ diff --git a/settings.gradle.kts b/settings.gradle.kts index fa146f0..71b23b6 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -14,4 +14,27 @@ * limitations under the License. */ +val baseVersion = file("version.txt").readText().trim() + rootProject.name = "iceberg-catalog-migrator" + +gradle.beforeProject { + group = "org.projectnessie.tools.catalog.migration" + version = baseVersion + description = + when (name) { + "api" -> "Iceberg catalog migrator - api implementation" + "api-test" -> "Iceberg catalog migrator - common test implementation" + "cli" -> "Iceberg catalog migrator - CLI implementation" + else -> name + } +} + +fun catalogMigratorProject(name: String) { + include("iceberg-catalog-migrator-$name") + project(":iceberg-catalog-migrator-$name").projectDir = file(name) +} + +catalogMigratorProject("api") +catalogMigratorProject("api-test") +catalogMigratorProject("cli") From 71b587f6d3fcde621fdd8c3b8ba03bed88bab9de Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Thu, 23 Feb 2023 13:46:26 +0530 Subject: [PATCH 12/31] no more mocks and fix few self review findings --- cli/build.gradle.kts | 3 +- .../migration/cli/CatalogMigrationCLI.java | 33 +++++--- .../catalog/migration/cli/PromptUtil.java | 63 ++++++++------- .../cli/AbstractCLIMigrationTest.java | 23 +++--- .../catlog/migration/cli/PromptUtilTest.java | 81 +++++++++++++++++++ .../tools/catlog/migration/cli/RunCLI.java | 54 +++++-------- gradle/libs.versions.toml | 2 - 7 files changed, 168 insertions(+), 91 deletions(-) create mode 100644 cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/PromptUtilTest.java diff --git a/cli/build.gradle.kts b/cli/build.gradle.kts index 64bf06c..4d4bce8 100644 --- a/cli/build.gradle.kts +++ b/cli/build.gradle.kts @@ -44,7 +44,6 @@ dependencies { testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.engine) testImplementation(libs.assertj) - testImplementation(libs.mockito) testImplementation(project(":iceberg-catalog-migrator-api-test")) @@ -98,7 +97,7 @@ val processResources = ) } -val mainClassName = "org.projectnessie.tools.catalog.migration.CatalogMigrationCLI" +val mainClassName = "org.projectnessie.tools.catalog.migration.cli.CatalogMigrationCLI" val shadowJar = tasks.named("shadowJar") diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java index dbb342a..6d1adb4 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java @@ -16,6 +16,7 @@ package org.projectnessie.tools.catalog.migration.cli; import java.io.IOException; +import java.io.InputStream; import java.io.PrintWriter; import java.nio.file.Files; import java.nio.file.Paths; @@ -76,14 +77,14 @@ public class CatalogMigrationCLI implements Callable { description = "optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when " + "using an Iceberg FileIO.") - Map sourceHadoopConf = new HashMap<>(); + private Map sourceHadoopConf = new HashMap<>(); @CommandLine.Option( names = {"--source-custom-catalog-impl"}, description = "optional fully qualified class name of the custom catalog implementation of the source catalog. Required " + "when the catalog type is CUSTOM.") - String sourceCustomCatalogImpl; + private String sourceCustomCatalogImpl; @CommandLine.Option( names = "--target-catalog-type", @@ -106,14 +107,14 @@ public class CatalogMigrationCLI implements Callable { description = "optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when " + "using an Iceberg FileIO.") - Map targetHadoopConf = new HashMap<>(); + private Map targetHadoopConf = new HashMap<>(); @CommandLine.Option( names = {"--target-custom-catalog-impl"}, description = "optional fully qualified class name of the custom catalog implementation of the target catalog. Required " + "when the catalog type is CUSTOM.") - String targetCustomCatalogImpl; + private String targetCustomCatalogImpl; @CommandLine.Option( names = {"--identifiers"}, @@ -122,21 +123,21 @@ public class CatalogMigrationCLI implements Callable { "optional selective list of identifiers to register. If not specified, all the tables will be registered. " + "Use this when there are few identifiers that need to be registered. For a large number of identifiers, " + "use the `--identifiers-from-file` or `--identifiers-regex` option.") - List identifiers = new ArrayList<>(); + private List identifiers = new ArrayList<>(); @CommandLine.Option( names = {"--identifiers-from-file"}, description = "optional text file path that contains a list of table identifiers (one per line) to register. Should not be " + "used with `--identifiers` or `--identifiers-regex` option.") - String identifiersFromFile; + private String identifiersFromFile; @CommandLine.Option( names = {"--identifiers-regex"}, description = "optional regular expression pattern used to register only the tables whose identifiers match this pattern. " + "Should not be used with `--identifiers` or '--identifiers-from-file' option.") - String identifiersRegEx; + private String identifiersRegEx; @CommandLine.Option( names = {"--dry-run"}, @@ -158,10 +159,20 @@ public class CatalogMigrationCLI implements Callable { "optional local output directory path to write CLI output files like `failed_identifiers.txt`, " + "`failed_to_delete_at_source.txt`, `dry_run_identifiers.txt`. " + "Uses the present working directory if not specified.") - String outputDirPath; + private String outputDirPath; + + private final InputStream input; + + public CatalogMigrationCLI(InputStream input) { + this.input = input; + } public static void main(String... args) { - CommandLine commandLine = new CommandLine(new CatalogMigrationCLI()); + runWithInput(System.in, args); + } + + public static void runWithInput(InputStream input, String... args) { + CommandLine commandLine = new CommandLine(new CatalogMigrationCLI(input)); commandLine.setUsageHelpWidth(150); int exitCode = commandLine.execute(args); System.exit(exitCode); @@ -216,11 +227,11 @@ public Integer call() { if (!isDryRun) { if (deleteSourceCatalogTables) { - if (!PromptUtil.proceedForMigration(printWriter)) { + if (!PromptUtil.proceedForMigration(input, printWriter)) { return 0; } } else { - if (!PromptUtil.proceedForRegistration(printWriter)) { + if (!PromptUtil.proceedForRegistration(input, printWriter)) { return 0; } } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java index caf1055..29d0071 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java @@ -15,6 +15,7 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import java.io.InputStream; import java.io.PrintWriter; import java.util.Scanner; @@ -22,40 +23,42 @@ public final class PromptUtil { private PromptUtil() {} - public static boolean proceedForRegistration(PrintWriter printWriter) { - String warning = - String.format( - "%n[WARNING]%n" - + "\ta) Executing catalog migration when the source catalog has some in-progress commits " - + "%n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " - + "%n\tSo, while using this tool please make sure there are no in-progress commits for the source " - + "catalog%n" - + "%n" - + "\tb) After the registration, successfully registered tables will be present in both source and target " - + "catalog. " - + "%n\tHaving the same metadata.json registered in more than one catalog can lead to missing updates, " - + "loss of data, and table corruption. " - + "%n\tUse `--delete-source-tables` option to automatically delete the table from source catalog after " - + "migration."); - return proceed(warning, printWriter); + public static final String WARNING_FOR_REGISTRATION = + String.format( + "%n[WARNING]%n" + + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + + "%n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + + "%n\tSo, while using this tool please make sure there are no in-progress commits for the source " + + "catalog%n" + + "%n" + + "\tb) After the registration, successfully registered tables will be present in both source and target " + + "catalog. " + + "%n\tHaving the same metadata.json registered in more than one catalog can lead to missing updates, " + + "loss of data, and table corruption. " + + "%n\tUse `--delete-source-tables` option to automatically delete the table from source catalog after " + + "migration."); + + public static final String WARNING_FOR_MIGRATION = + String.format( + "%n[WARNING]%n" + + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + + "%n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + + "%n\tSo, while using this tool please make sure there are no in-progress commits for the source " + + "catalog%n" + + "%n" + + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " + + "%n\tand can only be accessed from the target catalog."); + + public static boolean proceedForRegistration(InputStream input, PrintWriter printWriter) { + return proceed(input, WARNING_FOR_REGISTRATION, printWriter); } - public static boolean proceedForMigration(PrintWriter printWriter) { - String warning = - String.format( - "%n[WARNING]%n" - + "\ta) Executing catalog migration when the source catalog has some in-progress commits " - + "%n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " - + "%n\tSo, while using this tool please make sure there are no in-progress commits for the source " - + "catalog%n" - + "%n" - + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " - + "%n\tand can only be accessed from the target catalog."); - return proceed(warning, printWriter); + public static boolean proceedForMigration(InputStream input, PrintWriter printWriter) { + return proceed(input, WARNING_FOR_MIGRATION, printWriter); } - private static boolean proceed(String warning, PrintWriter printWriter) { - try (Scanner scanner = new Scanner(System.in)) { + private static boolean proceed(InputStream inputStream, String warning, PrintWriter printWriter) { + try (Scanner scanner = new Scanner(inputStream)) { printWriter.println(warning); while (true) { diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java index 5332ad9..759f918 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java @@ -16,6 +16,7 @@ package org.projectnessie.tools.catlog.migration.cli; import com.google.common.collect.Lists; +import java.io.ByteArrayInputStream; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -81,7 +82,7 @@ protected void afterEach() throws IOException { @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegister(boolean deleteSourceTables) throws Exception { - RunCLI run = RunCLI.runWithMockedPrompts(registerAllTablesArgs(deleteSourceTables)); + RunCLI run = registerTablesCLI(deleteSourceTables, registerAllTablesArgs()); Assertions.assertThat(run.getExitCode()).isEqualTo(0); Assertions.assertThat(run.getOut()) @@ -528,7 +529,7 @@ public void testDryRun(boolean deleteSourceTables) throws Exception { .containsExactlyInAnyOrder("foo.tbl1", "foo.tbl2", "bar.tbl3", "bar.tbl4"); } - private static String[] registerAllTablesArgs(boolean deleteSourceTables) { + private static String[] registerAllTablesArgs() { ArrayList args = Lists.newArrayList( "--source-catalog-type", @@ -541,20 +542,22 @@ private static String[] registerAllTablesArgs(boolean deleteSourceTables) { targetCatalogProperties, "--output-dir", outputDir.toAbsolutePath().toString()); - if (deleteSourceTables) { - args.add("--delete-source-tables"); - } return args.toArray(new String[0]); } private static RunCLI registerTablesCLI(boolean deleteSourceTables, String... args) throws Exception { - if (!deleteSourceTables) { - return RunCLI.runWithMockedPrompts(args); + ByteArrayInputStream input = new ByteArrayInputStream("yes\n".getBytes()); + try { + if (!deleteSourceTables) { + return RunCLI.runWithInput(input, args); + } + List argsList = Lists.newArrayList(args); + argsList.add("--delete-source-tables"); + return RunCLI.runWithInput(input, argsList.toArray(new String[0])); + } finally { + input.close(); } - List argsList = Lists.newArrayList(args); - argsList.add("--delete-source-tables"); - return RunCLI.runWithMockedPrompts(argsList.toArray(new String[0])); } protected static String catalogType(Catalog catalog) { diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/PromptUtilTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/PromptUtilTest.java new file mode 100644 index 0000000..5b2036a --- /dev/null +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/PromptUtilTest.java @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catlog.migration.cli; + +import static org.projectnessie.tools.catalog.migration.cli.PromptUtil.WARNING_FOR_MIGRATION; +import static org.projectnessie.tools.catalog.migration.cli.PromptUtil.WARNING_FOR_REGISTRATION; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.NoSuchElementException; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.projectnessie.tools.catalog.migration.cli.PromptUtil; + +public class PromptUtilTest { + + @ParameterizedTest + @CsvSource({"yes, false", "yes, true", "no, false", "no, true", "dummy, false", "dummy, true"}) + public void testPrompts(String input, boolean deleteSourceTables) throws Exception { + String warning = deleteSourceTables ? WARNING_FOR_MIGRATION : WARNING_FOR_REGISTRATION; + StringWriter stringWriter = new StringWriter(); + PrintWriter printWriter = new PrintWriter(stringWriter); + ByteArrayInputStream inputStream = new ByteArrayInputStream(input.getBytes()); + try { + switch (input) { + case "yes": + Assertions.assertThat(callPrompt(deleteSourceTables, inputStream, printWriter)).isTrue(); + Assertions.assertThat(stringWriter.toString()).contains(warning); + Assertions.assertThat(stringWriter.toString()) + .contains( + "Have you read the above warnings and are you sure you want to continue? (yes/no):"); + Assertions.assertThat(stringWriter.toString()).contains("Continuing..."); + break; + case "no": + Assertions.assertThat(callPrompt(deleteSourceTables, inputStream, printWriter)).isFalse(); + Assertions.assertThat(stringWriter.toString()).contains(warning); + Assertions.assertThat(stringWriter.toString()) + .contains( + "Have you read the above warnings and are you sure you want to continue? (yes/no):"); + Assertions.assertThat(stringWriter.toString()).contains("Aborting..."); + break; + case "dummy": + Assertions.assertThatThrownBy( + () -> callPrompt(deleteSourceTables, inputStream, printWriter)) + .isInstanceOf(NoSuchElementException.class) + .hasMessageContaining("No line found"); + Assertions.assertThat(stringWriter.toString()) + .contains("Invalid input. Please enter 'yes' or 'no'."); + break; + default: + } + } finally { + inputStream.close(); + stringWriter.close(); + printWriter.close(); + } + } + + private boolean callPrompt( + boolean deleteSourceTables, InputStream inputStream, PrintWriter printWriter) { + return deleteSourceTables + ? PromptUtil.proceedForMigration(inputStream, printWriter) + : PromptUtil.proceedForRegistration(inputStream, printWriter); + } +} diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java index dcf18c8..bc8ecc9 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java @@ -15,16 +15,12 @@ */ package org.projectnessie.tools.catlog.migration.cli; -import static org.mockito.Mockito.mockStatic; - +import java.io.InputStream; import java.io.PrintWriter; import java.io.StringWriter; import java.util.Arrays; import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; -import org.mockito.MockedStatic; import org.projectnessie.tools.catalog.migration.cli.CatalogMigrationCLI; -import org.projectnessie.tools.catalog.migration.cli.PromptUtil; import picocli.CommandLine; /** Helper class for tests. */ @@ -46,9 +42,24 @@ public static RunCLI run(List args) throws Exception { return run(args.toArray(new String[0])); } - private static int runMain(PrintWriter out, PrintWriter err, String... arguments) { + public static RunCLI run(String... args) throws Exception { + return runWithInput(System.in, args); + } + + public static RunCLI runWithInput(InputStream inputStream, String... args) throws Exception { + try (StringWriter out = new StringWriter(); + PrintWriter outWriter = new PrintWriter(out); + StringWriter err = new StringWriter(); + PrintWriter errWriter = new PrintWriter(err)) { + int exitCode = runMain(outWriter, errWriter, inputStream, args); + return new RunCLI(exitCode, out.toString(), err.toString(), args); + } + } + + private static int runMain( + PrintWriter out, PrintWriter err, InputStream inputStream, String... arguments) { CommandLine commandLine = - new CommandLine(new CatalogMigrationCLI()) + new CommandLine(new CatalogMigrationCLI(inputStream)) .setExecutionExceptionHandler( (ex, cmd, parseResult) -> { cmd.getErr().println(cmd.getColorScheme().richStackTraceString(ex)); @@ -70,35 +81,6 @@ private static int runMain(PrintWriter out, PrintWriter err, String... arguments } } - public static RunCLI run(String... args) throws Exception { - try (StringWriter out = new StringWriter(); - PrintWriter outWriter = new PrintWriter(out); - StringWriter err = new StringWriter(); - PrintWriter errWriter = new PrintWriter(err)) { - int exitCode = runMain(outWriter, errWriter, args); - return new RunCLI(exitCode, out.toString(), err.toString(), args); - } - } - - static RunCLI runWithMockedPrompts(String... args) throws Exception { - try (StringWriter out = new StringWriter(); - PrintWriter outWriter = new PrintWriter(out); - StringWriter err = new StringWriter(); - PrintWriter errWriter = new PrintWriter(err)) { - - AtomicInteger exitCode = new AtomicInteger(); - try (MockedStatic mocked = mockStatic(PromptUtil.class)) { - - // To avoid manipulating `System.in`, mock the APIs that use `System.in` - mocked.when(() -> PromptUtil.proceedForMigration(outWriter)).thenReturn(true); - mocked.when(() -> PromptUtil.proceedForRegistration(outWriter)).thenReturn(true); - - exitCode.set(runMain(outWriter, errWriter, args)); - return new RunCLI(exitCode.get(), out.toString(), err.toString(), args); - } - } - } - public int getExitCode() { return exitCode; } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 7e80f1d..c7b30d6 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -9,7 +9,6 @@ iceberg = "1.1.0" immutables = "2.9.3" junit = "5.9.1" logback = "1.2.11" -mockito = "5.1.1" nessie = "0.48.2" nessieBuildPlugins = "0.2.14" nessieRunner = "0.28.1" @@ -34,7 +33,6 @@ junit-jupiter-engine = { module = "org.junit.jupiter:junit-jupiter-engine" } junit-jupiter-params = { module = "org.junit.jupiter:junit-jupiter-params" } logback-core = { module = "ch.qos.logback:logback-core", version.ref = "logback" } logback-classic = { module = "ch.qos.logback:logback-classic", version.ref = "logback" } -mockito = { module = "org.mockito:mockito-inline", version.ref = "mockito" } picocli = { module = "info.picocli:picocli", version.ref = "picocli" } slf4j = { module = "org.slf4j:log4j-over-slf4j", version.ref = "slf4j" } From eebf26eb280e7874517b702b4e66725babcd12ec Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Mon, 27 Feb 2023 23:30:41 +0530 Subject: [PATCH 13/31] Address review comments from 27th feb --- api-test/build.gradle.kts | 3 +- .../migration/api/test/AbstractTest.java | 8 - .../api/test/AbstractTestCatalogMigrator.java | 247 ++------------- .../api/test/HadoopCatalogMigratorTest.java | 6 - .../test/ITHadoopToHiveCatalogMigrator.java | 6 - .../test/ITHiveToHadoopCatalogMigrator.java | 6 - .../test/ITHiveToNessieCatalogMigrator.java | 6 - .../test/ITNessieToHiveCatalogMigrator.java | 6 - api/build.gradle.kts | 3 +- .../migration/api/CatalogMigrationResult.java | 94 ------ .../migration/api/CatalogMigrator.java | 281 ++++++------------ .../migration/api/CatalogMigratorParams.java | 35 --- .../api/CatalogMigratorParamsTest.java | 114 +++---- buildSrc/build.gradle.kts | 5 + cli/build.gradle.kts | 2 - .../migration/cli/CatalogMigrationCLI.java | 262 +++++++++++++--- .../catalog/migration/cli/PromptUtil.java | 42 ++- .../cli/AbstractCLIMigrationTest.java | 64 ++-- .../cli/ITHadoopToHiveCLIMigrationTest.java | 8 + .../cli/ITHiveToHadoopCLIMigrationTest.java | 8 + .../cli/ITHiveToNessieCLIMigrationTest.java | 8 + .../cli/ITNessieToHiveCLIMigrationTest.java | 8 + .../catlog/migration/cli/PromptUtilTest.java | 81 ----- .../tools/catlog/migration/cli/RunCLI.java | 18 +- gradle/libs.versions.toml | 19 +- gradle/wrapper/gradle-wrapper.properties | 4 +- 26 files changed, 494 insertions(+), 850 deletions(-) delete mode 100644 cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/PromptUtilTest.java diff --git a/api-test/build.gradle.kts b/api-test/build.gradle.kts index 4073a11..ac7547b 100644 --- a/api-test/build.gradle.kts +++ b/api-test/build.gradle.kts @@ -24,8 +24,7 @@ plugins { dependencies { implementation(libs.slf4j) implementation(libs.picocli) - implementation(libs.logback.classic) - implementation(libs.logback.core) + runtimeOnly(libs.logback.classic) implementation(libs.hadoop.common) implementation(libs.iceberg.spark.runtime) implementation(libs.junit.jupiter.api) diff --git a/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java index c9934ea..d12d20c 100644 --- a/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java +++ b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java @@ -15,8 +15,6 @@ */ package org.projectnessie.tools.catalog.migration.api.test; -import java.io.IOException; -import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; import java.util.Collections; @@ -76,12 +74,6 @@ protected static void dropNamespaces() { ((SupportsNamespaces) catalog2).dropNamespace(Namespace.of("bar")); } - protected static void deleteFileIfExists(Path filePath) throws IOException { - if (Files.exists(filePath)) { - Files.delete(filePath); - } - } - protected static Catalog createHadoopCatalog(String warehousePath, String name) { Map properties = new HashMap<>(); properties.put("warehouse", warehousePath); diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java index 4de13d1..78ae04a 100644 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java @@ -15,10 +15,6 @@ */ package org.projectnessie.tools.catalog.migration.api.test; -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.nio.file.Files; import java.nio.file.Path; import java.util.Collections; import java.util.stream.IntStream; @@ -42,29 +38,14 @@ public abstract class AbstractTestCatalogMigrator extends AbstractTest { protected static @TempDir Path warehouse2; - protected static @TempDir Path outputDir; - - protected static Path dryRunFile; - protected static Path failedIdentifiersFile; - - private static StringWriter stringWriter; - private static PrintWriter printWriter; - @BeforeEach protected void beforeEach() { createTables(); - - stringWriter = new StringWriter(); - printWriter = new PrintWriter(stringWriter); } @AfterEach - protected void afterEach() throws IOException { + protected void afterEach() { dropTables(); - deleteFileIfExists(dryRunFile); - deleteFileIfExists(failedIdentifiersFile); - stringWriter.close(); - printWriter.close(); } @Order(0) @@ -84,22 +65,6 @@ public void testRegister(boolean deleteSourceTables) { Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - String output = stringWriter.toString(); - Assertions.assertThat(output) - .contains( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog."); - String operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(output).contains(String.format("Identified 4 tables for %s.", operation)); - operation = deleteSourceTables ? "migrated" : "registered"; - Assertions.assertThat(output) - .contains( - String.format( - "Summary: %n- Successfully %s 4 tables from %s catalog to" + " %s catalog.", - operation, catalog1.name(), catalog2.name())); - Assertions.assertThat(output) - .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); @@ -114,65 +79,29 @@ public void testRegister(boolean deleteSourceTables) { public void testRegisterSelectedTables(boolean deleteSourceTables) { // using `--identifiers` option ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); - builder.tableIdentifiers(Collections.singletonList(TableIdentifier.parse("bar.tbl3"))); - CatalogMigrationResult result = CatalogMigrator.registerTables(builder.build()); + CatalogMigrationResult result = + new CatalogMigrator(builder.build()) + .registerTables(Collections.singletonList(TableIdentifier.parse("bar.tbl3"))); Assertions.assertThat(result.registeredTableIdentifiers()) .containsExactly(TableIdentifier.parse("bar.tbl3")); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - String output = stringWriter.toString(); - Assertions.assertThat(output) - .doesNotContain( - "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog."); - String operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(output).contains(String.format("Identified 1 tables for %s.", operation)); - operation = deleteSourceTables ? "migrated" : "registered"; - Assertions.assertThat(output) - .contains( - String.format( - "Summary: %n- Successfully %s 1 tables from %s catalog to" + " %s catalog.", - operation, catalog1.name(), catalog2.name())); - Assertions.assertThat(output) - .contains( - String.format( - "Details: %n- Successfully %s these tables:%n" + "[bar.tbl3]", operation)); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))).isEmpty(); Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) .containsExactly(TableIdentifier.parse("bar.tbl3")); // using --identifiers-regex option which matches all the tables starts with "foo." builder = builderWithDefaultArgs(deleteSourceTables); - builder.identifierRegex("^foo\\..*"); - result = CatalogMigrator.registerTables(builder.build()); + CatalogMigrator catalogMigrator = new CatalogMigrator(builder.build()); + result = + catalogMigrator.registerTables(catalogMigrator.getMatchingTableIdentifiers("^foo\\..*")); Assertions.assertThat(result.registeredTableIdentifiers()) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - output = stringWriter.toString(); - Assertions.assertThat(output) - .contains( - "User has not specified the table identifiers. Selecting all the tables from all the namespaces " - + "from the source catalog which matches the regex pattern:^foo\\..*"); - Assertions.assertThat(output) - .contains( - "Collecting all the tables from all the namespaces of source catalog " - + "which matches the regex pattern:^foo\\..*"); - operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(output).contains(String.format("Identified 2 tables for %s.", operation)); - operation = deleteSourceTables ? "migrated" : "registered"; - Assertions.assertThat(output) - .contains( - String.format( - "Summary: %n- Successfully %s 2 tables from %s catalog to" + " %s catalog.", - operation, catalog1.name(), catalog2.name())); - Assertions.assertThat(output) - .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); @@ -186,56 +115,32 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) { public void testRegisterError(boolean deleteSourceTables) { // use invalid namespace which leads to NoSuchTableException ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); - builder.tableIdentifiers(Collections.singletonList(TableIdentifier.parse("dummy.tbl3"))); - CatalogMigrationResult result = CatalogMigrator.registerTables(builder.build()); + CatalogMigrationResult result = + new CatalogMigrator(builder.build()) + .registerTables(Collections.singletonList(TableIdentifier.parse("dummy.tbl3"))); Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToRegisterTableIdentifiers()) .containsExactly(TableIdentifier.parse("dummy.tbl3")); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - String output = stringWriter.toString(); - String operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(output).contains(String.format("Identified 1 tables for %s.", operation)); - operation = deleteSourceTables ? "migrate" : "register"; - Assertions.assertThat(output) - .contains( - String.format( - "Summary: %n- Failed to %s 1 tables from %s catalog to %s catalog." - + " Please check the `catalog_migration.log`", - operation, catalog1.name(), catalog2.name())); - Assertions.assertThat(output) - .contains( - String.format("Details: %n- Failed to %s these tables:%n[dummy.tbl3]", operation)); - // try to register same table twice which leads to AlreadyExistsException builder = builderWithDefaultArgs(deleteSourceTables); - builder.tableIdentifiers(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))); - result = CatalogMigrator.registerTables(builder.build()); + result = + new CatalogMigrator(builder.build()) + .registerTables(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))); Assertions.assertThat(result.registeredTableIdentifiers()) .containsExactly(TableIdentifier.parse("foo.tbl2")); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); builder = builderWithDefaultArgs(deleteSourceTables); - builder.tableIdentifiers(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))); - result = CatalogMigrator.registerTables(builder.build()); + result = + new CatalogMigrator(builder.build()) + .registerTables(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))); Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToRegisterTableIdentifiers()) .contains(TableIdentifier.parse("foo.tbl2")); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - - output = stringWriter.toString(); - operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(output).contains(String.format("Identified 1 tables for %s.", operation)); - operation = deleteSourceTables ? "migrate" : "register"; - Assertions.assertThat(output) - .contains( - String.format( - "Summary: %n- Failed to %s 1 tables from %s catalog to %s catalog." - + " Please check the `catalog_migration.log`", - operation, catalog1.name(), catalog2.name())); - Assertions.assertThat(output) - .contains(String.format("Details: %n- Failed to %s these tables:%n[foo.tbl2]", operation)); } @Order(3) @@ -244,25 +149,13 @@ public void testRegisterError(boolean deleteSourceTables) { public void testRegisterWithFewFailures(boolean deleteSourceTables) { // register only foo.tbl2 ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); - builder.tableIdentifiers(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))); - CatalogMigrationResult result = CatalogMigrator.registerTables(builder.build()); + CatalogMigrationResult result = + new CatalogMigrator(builder.build()) + .registerTables(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))); Assertions.assertThat(result.registeredTableIdentifiers()) .containsExactly(TableIdentifier.parse("foo.tbl2")); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - String output = stringWriter.toString(); - String operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(output).contains(String.format("Identified 1 tables for %s.", operation)); - operation = deleteSourceTables ? "migrated" : "registered"; - Assertions.assertThat(output) - .contains( - String.format( - "Summary: %n- Successfully %s 1 tables from %s catalog to %s catalog.", - operation, catalog1.name(), catalog2.name())); - Assertions.assertThat(output) - .contains( - String.format( - "Details: %n" + "- Successfully %s these tables:%n" + "[foo.tbl2]", operation)); if (deleteSourceTables && !(catalog1 instanceof HadoopCatalog)) { // create a table with the same name in source catalog which got deleted. @@ -280,31 +173,6 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) { .contains(TableIdentifier.parse("foo.tbl2")); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - output = stringWriter.toString(); - operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(output).contains(String.format("Identified 4 tables for %s.", operation)); - operation = deleteSourceTables ? "migrated" : "registered"; - String ops = deleteSourceTables ? "migrate" : "register"; - Assertions.assertThat(output) - .contains( - String.format( - "Summary: %n" - + "- Successfully %s 3 tables from %s catalog to %s catalog.%n" - + "- Failed to %s 1 tables from %s catalog to %s catalog. " - + "Please check the `catalog_migration.log` file for the failure reason. %n" - + "Failed identifiers are written into `failed_identifiers.txt`. " - + "Retry with that file using `--identifiers-from-file` option " - + "if the failure is because of network/connection timeouts.", - operation, - catalog1.name(), - catalog2.name(), - ops, - catalog1.name(), - catalog2.name())); - Assertions.assertThat(output) - .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); - Assertions.assertThat(output) - .contains(String.format("- Failed to %s these tables:%n[foo.tbl2]", ops)); Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); @@ -322,63 +190,18 @@ public void testRegisterNoTables(boolean deleteSourceTables) { ImmutableCatalogMigratorParams.builder() .sourceCatalog(catalog2) .targetCatalog(catalog1) - .isDryRun(false) - .printWriter(printWriter) - .outputDirPath(outputDir.toAbsolutePath().toString()) .deleteEntriesFromSourceCatalog(deleteSourceTables); - CatalogMigrationResult result = CatalogMigrator.registerTables(builder.build()); + CatalogMigrator catalogMigrator = new CatalogMigrator(builder.build()); + CatalogMigrationResult result = + catalogMigrator.registerTables(catalogMigrator.getMatchingTableIdentifiers(null)); Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - - String output = stringWriter.toString(); - String operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(output).contains(String.format("Identified 0 tables for %s.", operation)); } @Order(5) @ParameterizedTest @ValueSource(booleans = {true, false}) - public void testDryRun(boolean deleteSourceTables) throws Exception { - ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); - builder.isDryRun(true); - CatalogMigrationResult result = CatalogMigrator.registerTables(builder.build()); - Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), - TableIdentifier.parse("foo.tbl2"), - TableIdentifier.parse("bar.tbl3"), - TableIdentifier.parse("bar.tbl4")); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - - String output = stringWriter.toString(); - // should not prompt for dry run - Assertions.assertThat(output) - .doesNotContain( - "Have you read the above warnings and are you sure you want to continue? (yes/no):"); - Assertions.assertThat(output).contains("Dry run is completed."); - String operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(output) - .contains( - String.format( - "Summary: %n" - + "- Identified 4 tables for %s by dry-run. " - + "These identifiers are also written into dry_run_identifiers.txt. " - + "You can use this file with `--identifiers-from-file` option.", - operation)); - Assertions.assertThat(output) - .contains( - String.format( - "Details: %n" + "- Identified these tables for %s by dry-run:%n", operation)); - Assertions.assertThat(Files.exists(dryRunFile)).isTrue(); - Assertions.assertThat(Files.readAllLines(dryRunFile)) - .containsExactlyInAnyOrder("foo.tbl1", "foo.tbl2", "bar.tbl3", "bar.tbl4"); - } - - @Order(6) - @ParameterizedTest - @ValueSource(booleans = {true, false}) public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { // additionally create 240 tables along with 4 tables created in beforeEach() IntStream.range(0, 240) @@ -394,26 +217,6 @@ public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws E Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - String operation = deleteSourceTables ? "migration" : "registration"; - String output = stringWriter.toString(); - Assertions.assertThat(output) - .contains(String.format("Identified 244 tables for %s.", operation)); - operation = deleteSourceTables ? "migrated" : "registered"; - Assertions.assertThat(output) - .contains( - String.format( - "Summary: %n- Successfully %s 244 tables from %s catalog to" + " %s catalog.", - operation, catalog1.name(), catalog2.name())); - Assertions.assertThat(output) - .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); - - operation = deleteSourceTables ? "migration" : "registration"; - // validate intermediate output - Assertions.assertThat(output) - .contains(String.format("Attempted %s for 100 tables out of 244 tables.", operation)); - Assertions.assertThat(output) - .contains(String.format("Attempted %s for 200 tables out of 244 tables.", operation)); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))).hasSize(242); Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) .containsExactlyInAnyOrder( @@ -422,7 +225,8 @@ public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws E private CatalogMigrationResult registerAllTables(boolean deleteSourceTables) { ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); - return CatalogMigrator.registerTables(builder.build()); + CatalogMigrator catalogMigrator = new CatalogMigrator(builder.build()); + return catalogMigrator.registerTables(catalogMigrator.getMatchingTableIdentifiers(null)); } private ImmutableCatalogMigratorParams.Builder builderWithDefaultArgs( @@ -430,9 +234,6 @@ private ImmutableCatalogMigratorParams.Builder builderWithDefaultArgs( return ImmutableCatalogMigratorParams.builder() .sourceCatalog(catalog1) .targetCatalog(catalog2) - .isDryRun(false) - .printWriter(printWriter) - .outputDirPath(outputDir.toAbsolutePath().toString()) .deleteEntriesFromSourceCatalog(deleteSourceTables); } } diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/HadoopCatalogMigratorTest.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/HadoopCatalogMigratorTest.java index 545123b..c27fb92 100644 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/HadoopCatalogMigratorTest.java +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/HadoopCatalogMigratorTest.java @@ -15,9 +15,6 @@ */ package org.projectnessie.tools.catalog.migration.api.test; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; - import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; @@ -25,9 +22,6 @@ public class HadoopCatalogMigratorTest extends AbstractTestCatalogMigrator { @BeforeAll protected static void setup() { - dryRunFile = outputDir.resolve(DRY_RUN_FILE); - failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); - catalog1 = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "catalog1"); catalog2 = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "catalog2"); diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHadoopToHiveCatalogMigrator.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHadoopToHiveCatalogMigrator.java index 2f77c31..7f441ef 100644 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHadoopToHiveCatalogMigrator.java +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHadoopToHiveCatalogMigrator.java @@ -15,9 +15,6 @@ */ package org.projectnessie.tools.catalog.migration.api.test; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; - import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; @@ -26,9 +23,6 @@ public class ITHadoopToHiveCatalogMigrator extends AbstractTestCatalogMigrator { @BeforeAll protected static void setup() throws Exception { - dryRunFile = outputDir.resolve(DRY_RUN_FILE); - failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); - HiveMetaStoreRunner.startMetastore(); catalog1 = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "hadoop"); diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToHadoopCatalogMigrator.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToHadoopCatalogMigrator.java index ba7f176..39d8d9f 100644 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToHadoopCatalogMigrator.java +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToHadoopCatalogMigrator.java @@ -15,9 +15,6 @@ */ package org.projectnessie.tools.catalog.migration.api.test; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; - import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; @@ -26,9 +23,6 @@ public class ITHiveToHadoopCatalogMigrator extends AbstractTestCatalogMigrator { @BeforeAll protected static void setup() throws Exception { - dryRunFile = outputDir.resolve(DRY_RUN_FILE); - failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); - HiveMetaStoreRunner.startMetastore(); catalog1 = HiveMetaStoreRunner.hiveCatalog(); diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToNessieCatalogMigrator.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToNessieCatalogMigrator.java index 6486997..56a68f9 100644 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToNessieCatalogMigrator.java +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToNessieCatalogMigrator.java @@ -15,9 +15,6 @@ */ package org.projectnessie.tools.catalog.migration.api.test; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; - import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; @@ -30,9 +27,6 @@ public class ITHiveToNessieCatalogMigrator extends AbstractTestCatalogMigrator { @BeforeAll protected static void setup() throws Exception { - dryRunFile = outputDir.resolve(DRY_RUN_FILE); - failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); - HiveMetaStoreRunner.startMetastore(); catalog1 = HiveMetaStoreRunner.hiveCatalog(); diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITNessieToHiveCatalogMigrator.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITNessieToHiveCatalogMigrator.java index 0055888..da249af 100644 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITNessieToHiveCatalogMigrator.java +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITNessieToHiveCatalogMigrator.java @@ -15,9 +15,6 @@ */ package org.projectnessie.tools.catalog.migration.api.test; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; - import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; @@ -30,9 +27,6 @@ public class ITNessieToHiveCatalogMigrator extends AbstractTestCatalogMigrator { @BeforeAll protected static void setup() throws Exception { - dryRunFile = outputDir.resolve(DRY_RUN_FILE); - failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); - HiveMetaStoreRunner.startMetastore(); catalog1 = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); diff --git a/api/build.gradle.kts b/api/build.gradle.kts index 442a3bd..633ab9e 100644 --- a/api/build.gradle.kts +++ b/api/build.gradle.kts @@ -23,8 +23,7 @@ plugins { dependencies { implementation(libs.guava) implementation(libs.slf4j) - implementation(libs.logback.classic) - implementation(libs.logback.core) + runtimeOnly(libs.logback.classic) implementation(libs.iceberg.spark.runtime) annotationProcessor(libs.immutables) diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationResult.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationResult.java index 760d7ca..7edc147 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationResult.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationResult.java @@ -15,11 +15,6 @@ */ package org.projectnessie.tools.catalog.migration.api; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_TO_DELETE_AT_SOURCE_FILE; - -import java.io.PrintWriter; import java.util.List; import org.apache.iceberg.catalog.TableIdentifier; import org.immutables.value.Value; @@ -32,93 +27,4 @@ public abstract class CatalogMigrationResult { public abstract List failedToRegisterTableIdentifiers(); public abstract List failedToDeleteTableIdentifiers(); - - public void printSummary( - PrintWriter printWriter, - boolean deleteSourceCatalogTables, - String sourceCatalogType, - String targetCatalogType) { - printWriter.println(String.format("%nSummary: ")); - if (!registeredTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Successfully %s %d tables from %s catalog to %s catalog.", - deleteSourceCatalogTables ? "migrated" : "registered", - registeredTableIdentifiers().size(), - sourceCatalogType, - targetCatalogType)); - } - if (!failedToRegisterTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Failed to %s %d tables from %s catalog to %s catalog. " - + "Please check the `catalog_migration.log` file for the failure reason. " - + "%nFailed identifiers are written into `%s`. " - + "Retry with that file using `--identifiers-from-file` option " - + "if the failure is because of network/connection timeouts.", - deleteSourceCatalogTables ? "migrate" : "register", - failedToRegisterTableIdentifiers().size(), - sourceCatalogType, - targetCatalogType, - FAILED_IDENTIFIERS_FILE)); - } - if (!failedToDeleteTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Failed to delete %d tables from %s catalog. " - + "Please check the `catalog_migration.log` file for the reason. " - + "%nFailed to delete identifiers are written into `%s`. ", - failedToDeleteTableIdentifiers().size(), - sourceCatalogType, - FAILED_TO_DELETE_AT_SOURCE_FILE)); - } - } - - public void printDetails(PrintWriter printWriter, boolean deleteSourceCatalogTables) { - printWriter.println(String.format("%nDetails: ")); - if (!registeredTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Successfully %s these tables:", - deleteSourceCatalogTables ? "migrated" : "registered")); - printWriter.println(registeredTableIdentifiers()); - } - - if (!failedToRegisterTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Failed to %s these tables:", deleteSourceCatalogTables ? "migrate" : "register")); - printWriter.println(failedToRegisterTableIdentifiers()); - } - - if (!failedToDeleteTableIdentifiers().isEmpty()) { - printWriter.println("- [WARNING] Failed to delete these tables from source catalog:"); - printWriter.println(failedToDeleteTableIdentifiers()); - } - } - - public void printDryRunResults(PrintWriter printWriter, boolean deleteSourceCatalogTables) { - printWriter.println(String.format("%nSummary: ")); - if (registeredTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- No tables are identified for %s. Please check logs for more info.", - deleteSourceCatalogTables ? "migration" : "registration")); - return; - } - printWriter.println( - String.format( - "- Identified %d tables for %s by dry-run. These identifiers are also written into %s. " - + "You can use this file with `--identifiers-from-file` option.", - registeredTableIdentifiers().size(), - deleteSourceCatalogTables ? "migration" : "registration", - DRY_RUN_FILE)); - - printWriter.println(String.format("%nDetails: ")); - printWriter.println( - String.format( - "- Identified these tables for %s by dry-run:", - deleteSourceCatalogTables ? "migration" : "registration")); - printWriter.println(registeredTableIdentifiers()); - } } diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java index c4933f8..6086c6e 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java @@ -15,17 +15,10 @@ */ package org.projectnessie.tools.catalog.migration.api; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; -import java.io.IOException; -import java.io.PrintWriter; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.Objects; -import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Predicate; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -46,232 +39,124 @@ public class CatalogMigrator { public static final String FAILED_TO_DELETE_AT_SOURCE_FILE = "failed_to_delete_at_source.txt"; public static final String DRY_RUN_FILE = "dry_run_identifiers.txt"; - private CatalogMigrator() {} + private final Catalog sourceCatalog; + private final Catalog targetCatalog; + private final boolean deleteEntriesFromSourceCatalog; + + public CatalogMigrator(CatalogMigratorParams catalogMigratorParams) { + this.sourceCatalog = catalogMigratorParams.sourceCatalog(); + this.targetCatalog = catalogMigratorParams.targetCatalog(); + this.deleteEntriesFromSourceCatalog = catalogMigratorParams.deleteEntriesFromSourceCatalog(); + } /** - * Register or Migrate tables from one catalog(source catalog) to another catalog(target catalog). - * - *

Users must make sure that no in-progress commits on the tables of source catalog during - * registration. + * Get the table identifiers which matches the regular expression pattern input from all the + * namespaces. * - * @param catalogMigratorParams configuration params - * @return List of successfully registered/migrated and list of failed to register/migrate table - * identifiers. + * @param identifierRegex regular expression pattern. If null, fetches all the table identifiers + * from all the namespaces. + * @return List of table identifiers. */ - public static CatalogMigrationResult registerTables(CatalogMigratorParams catalogMigratorParams) { - - PrintWriter printWriter = catalogMigratorParams.printWriter(); - boolean deleteEntriesFromSourceCatalog = catalogMigratorParams.deleteEntriesFromSourceCatalog(); - String operation = deleteEntriesFromSourceCatalog ? "migration" : "registration"; + public List getMatchingTableIdentifiers(String identifierRegex) { + LOG.info("Collecting all the namespaces from source catalog..."); + // fetch all the table identifiers from all the namespaces. + List namespaces = + (sourceCatalog instanceof SupportsNamespaces) + ? ((SupportsNamespaces) sourceCatalog).listNamespaces() + : ImmutableList.of(Namespace.empty()); - List identifiers; - if (catalogMigratorParams.tableIdentifiers() == null - || catalogMigratorParams.tableIdentifiers().isEmpty()) { - identifiers = - getMatchingTableIdentifiers( - catalogMigratorParams.sourceCatalog(), - catalogMigratorParams.identifierRegex(), - printWriter); + Predicate matchedIdentifiersPredicate; + if (identifierRegex == null) { + LOG.info("Collecting all the tables from all the namespaces of source catalog..."); + matchedIdentifiersPredicate = tableIdentifier -> true; } else { - identifiers = catalogMigratorParams.tableIdentifiers(); + LOG.info( + "Collecting all the tables from all the namespaces of source catalog" + + " which matches the regex pattern:" + + identifierRegex); + Pattern pattern = Pattern.compile(identifierRegex); + matchedIdentifiersPredicate = + tableIdentifier -> pattern.matcher(tableIdentifier.toString()).matches(); } + return namespaces.stream() + .filter(Objects::nonNull) + .flatMap( + namespace -> + sourceCatalog.listTables(namespace).stream().filter(matchedIdentifiersPredicate)) + .collect(Collectors.toList()); + } - printWriter.println( - String.format("%nIdentified %d tables for %s.", identifiers.size(), operation)); - - if (catalogMigratorParams.isDryRun()) { - CatalogMigrationResult result = - ImmutableCatalogMigrationResult.builder() - .registeredTableIdentifiers(identifiers) - .failedToRegisterTableIdentifiers(Collections.emptyList()) - .failedToDeleteTableIdentifiers(Collections.emptyList()) - .build(); - printWriter.println("Dry run is completed."); - - writeToFile( - pathWithOutputDir(catalogMigratorParams.outputDirPath(), DRY_RUN_FILE), - result.registeredTableIdentifiers()); - result.printDryRunResults(printWriter, deleteEntriesFromSourceCatalog); - return result; - } + /** + * Register or Migrate tables from one catalog(source catalog) to another catalog(target catalog). + * + *

Users must make sure that no in-progress commits on the tables of source catalog during + * registration. + * + * @param identifiers List of table identifiers to register or migrate + * @return {@link CatalogMigrationResult} instance + */ + public CatalogMigrationResult registerTables(List identifiers) { + ImmutableCatalogMigrationResult.Builder resultBuilder = + ImmutableCatalogMigrationResult.builder(); + registerTables(identifiers, resultBuilder); + return resultBuilder.build(); + } - if (deleteEntriesFromSourceCatalog - && catalogMigratorParams.sourceCatalog() instanceof HadoopCatalog) { - printWriter.println( - String.format( - "[WARNING]: Source catalog type is HADOOP and it doesn't support dropping tables just from " - + "catalog. %nAvoid operating the migrated tables from the source catalog after migration. " - + "Use the tables from target catalog.")); + /** + * Register or Migrate tables from one catalog(source catalog) to another catalog(target catalog). + * + *

Users must make sure that no in-progress commits on the tables of source catalog during + * registration. + * + * @param identifiers List of table identifiers to register or migrate + * @param resultBuilder result builder to collect the results + */ + public void registerTables( + List identifiers, ImmutableCatalogMigrationResult.Builder resultBuilder) { + Preconditions.checkArgument(identifiers != null, "Identifiers list is null"); + Preconditions.checkArgument(resultBuilder != null, "result builder is null"); + + if (identifiers.isEmpty()) { + LOG.info("Identifiers list is empty"); + return; } - printWriter.println(String.format("%nStarted %s ...", operation)); - List registeredTableIdentifiers = new ArrayList<>(); - List failedToRegisterTableIdentifiers = new ArrayList<>(); - List failedToDeleteTableIdentifiers = new ArrayList<>(); - AtomicInteger counter = new AtomicInteger(); identifiers.forEach( tableIdentifier -> { - boolean isRegistered = - registerTable( - catalogMigratorParams.sourceCatalog(), - catalogMigratorParams.targetCatalog(), - registeredTableIdentifiers, - failedToRegisterTableIdentifiers, - tableIdentifier); + boolean isRegistered = registerTable(tableIdentifier); + if (isRegistered) { + resultBuilder.addRegisteredTableIdentifiers(tableIdentifier); + } else { + resultBuilder.addFailedToRegisterTableIdentifiers(tableIdentifier); + } // HadoopCatalog dropTable will delete the table files completely even when purge is // false. So, skip dropTable for HadoopCatalog. boolean deleteTableFromSourceCatalog = - !(catalogMigratorParams.sourceCatalog() instanceof HadoopCatalog) + !(sourceCatalog instanceof HadoopCatalog) && isRegistered && deleteEntriesFromSourceCatalog; try { - if (deleteTableFromSourceCatalog) { - boolean isDropped = - catalogMigratorParams.sourceCatalog().dropTable(tableIdentifier, false); - if (!isDropped) { - failedToDeleteTableIdentifiers.add(tableIdentifier); - } + if (deleteTableFromSourceCatalog && !sourceCatalog.dropTable(tableIdentifier, false)) { + resultBuilder.addFailedToDeleteTableIdentifiers(tableIdentifier); } } catch (Exception exception) { - failedToDeleteTableIdentifiers.add(tableIdentifier); + resultBuilder.addFailedToDeleteTableIdentifiers(tableIdentifier); LOG.warn("Failed to delete the table after migration {}", tableIdentifier, exception); } - - int count = counter.incrementAndGet(); - if (count % 100 == 0) { - printWriter.println( - String.format( - "%nAttempted %s for %d tables out of %d tables.", - operation, count, identifiers.size())); - } }); - printWriter.println(String.format("%nFinished %s ...", operation)); - - CatalogMigrationResult result = - ImmutableCatalogMigrationResult.builder() - .registeredTableIdentifiers(registeredTableIdentifiers) - .failedToRegisterTableIdentifiers(failedToRegisterTableIdentifiers) - .failedToDeleteTableIdentifiers(failedToDeleteTableIdentifiers) - .build(); - - if (!result.failedToRegisterTableIdentifiers().isEmpty()) { - writeToFile( - pathWithOutputDir(catalogMigratorParams.outputDirPath(), FAILED_IDENTIFIERS_FILE), - result.failedToRegisterTableIdentifiers()); - } - if (!result.failedToDeleteTableIdentifiers().isEmpty()) { - writeToFile( - pathWithOutputDir(catalogMigratorParams.outputDirPath(), FAILED_TO_DELETE_AT_SOURCE_FILE), - result.failedToDeleteTableIdentifiers()); - } - - result.printSummary( - printWriter, - deleteEntriesFromSourceCatalog, - catalogMigratorParams.sourceCatalog().name(), - catalogMigratorParams.targetCatalog().name()); - - result.printDetails(printWriter, deleteEntriesFromSourceCatalog); - - return result; } - private static boolean registerTable( - Catalog sourceCatalog, - Catalog targetCatalog, - List registeredTableIdentifiers, - List failedToMigrateTableIdentifiers, - TableIdentifier tableIdentifier) { + private boolean registerTable(TableIdentifier tableIdentifier) { try { // register the table to the target catalog TableOperations ops = ((BaseTable) sourceCatalog.loadTable(tableIdentifier)).operations(); targetCatalog.registerTable(tableIdentifier, ops.current().metadataFileLocation()); - - registeredTableIdentifiers.add(tableIdentifier); LOG.info("Successfully migrated the table {}", tableIdentifier); return true; } catch (Exception ex) { - failedToMigrateTableIdentifiers.add(tableIdentifier); LOG.warn("Unable to register the table {}", tableIdentifier, ex); return false; } } - - private static List getMatchingTableIdentifiers( - Catalog sourceCatalog, String identifierRegex, PrintWriter printWriter) { - if (identifierRegex == null) { - printWriter.println( - String.format( - "%nUser has not specified the table identifiers." - + " Selecting all the tables from all the namespaces from the source catalog.")); - } else { - printWriter.println( - String.format( - "%nUser has not specified the table identifiers." - + " Selecting all the tables from all the namespaces from the source catalog " - + "which matches the regex pattern:" - + identifierRegex)); - } - - printWriter.println("Collecting all the namespaces from source catalog..."); - // fetch all the table identifiers from all the namespaces. - List namespaces = - (sourceCatalog instanceof SupportsNamespaces) - ? ((SupportsNamespaces) sourceCatalog).listNamespaces() - : ImmutableList.of(Namespace.empty()); - if (identifierRegex == null) { - printWriter.println("Collecting all the tables from all the namespaces of source catalog..."); - } else { - printWriter.println( - "Collecting all the tables from all the namespaces of source catalog" - + " which matches the regex pattern:" - + identifierRegex); - } - - Predicate matchedIdentifiersPredicate; - if (identifierRegex != null) { - Pattern pattern = Pattern.compile(identifierRegex); - matchedIdentifiersPredicate = - tableIdentifier -> pattern.matcher(tableIdentifier.toString()).matches(); - } else { - matchedIdentifiersPredicate = tableIdentifier -> true; - } - return getMatchingTableIdentifiers(sourceCatalog, namespaces, matchedIdentifiersPredicate); - } - - private static List getMatchingTableIdentifiers( - Catalog sourceCatalog, - List namespaces, - Predicate matchedIdentifiersPredicate) { - List allIdentifiers = new ArrayList<>(); - namespaces.stream() - .filter(Objects::nonNull) - .forEach( - namespace -> { - List matchedIdentifiers = - sourceCatalog.listTables(namespace).stream() - .filter(matchedIdentifiersPredicate) - .collect(Collectors.toList()); - allIdentifiers.addAll(matchedIdentifiers); - }); - return allIdentifiers; - } - - private static Path pathWithOutputDir(String outputDirPath, String fileName) { - if (outputDirPath == null) { - return Paths.get(fileName); - } - return Paths.get(outputDirPath, fileName).toAbsolutePath(); - } - - private static void writeToFile(Path filePath, List identifiers) { - List identifiersString = - identifiers.stream().map(TableIdentifier::toString).collect(Collectors.toList()); - try { - Files.write(filePath, identifiersString); - } catch (IOException e) { - throw new RuntimeException("Failed to write the file:" + filePath, e); - } - } } diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParams.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParams.java index 6d83467..2dfdc33 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParams.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParams.java @@ -16,11 +16,7 @@ package org.projectnessie.tools.catalog.migration.api; import com.google.common.base.Preconditions; -import java.io.PrintWriter; -import java.util.List; -import javax.annotation.Nullable; import org.apache.iceberg.catalog.Catalog; -import org.apache.iceberg.catalog.TableIdentifier; import org.immutables.value.Value; @Value.Immutable @@ -32,43 +28,12 @@ public interface CatalogMigratorParams { /** Target {@link Catalog} to which the tables need to be migrated. */ Catalog targetCatalog(); - /** - * Optional List of {@link TableIdentifier} for the tables required to be migrated. If not - * specified, all the tables would be migrated. - */ - @Nullable - List tableIdentifiers(); - - /** - * Optional Regular expression pattern used to migrate only the tables whose identifiers match - * this pattern. Can be provided instead of `tableIdentifiers`. - */ - @Nullable - String identifierRegex(); - - /** To execute as dry run. */ - boolean isDryRun(); - /** Delete the table entries from source catalog after successful migration. */ boolean deleteEntriesFromSourceCatalog(); - /** To print the regular updates on the console. */ - PrintWriter printWriter(); - - /** optional path to store the result files. If null, uses present working directory. */ - @Nullable - String outputDirPath(); - @Value.Check default void validate() { - Preconditions.checkArgument(sourceCatalog() != null, "Invalid source catalog: null"); - Preconditions.checkArgument(targetCatalog() != null, "Invalid target catalog: null"); Preconditions.checkArgument( !targetCatalog().equals(sourceCatalog()), "target catalog is same as source catalog"); - - if (identifierRegex() != null && tableIdentifiers() != null && !tableIdentifiers().isEmpty()) { - throw new IllegalArgumentException( - "Both the identifiers list and identifierRegex is configured."); - } } } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java index abaa8da..3d4c995 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java @@ -15,94 +15,56 @@ */ package org.projectnessie.tools.catalog.migration.api; -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.util.Collections; +import java.nio.file.Path; import org.apache.iceberg.catalog.Catalog; -import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.hadoop.HadoopCatalog; import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class CatalogMigratorParamsTest { + protected static @TempDir Path logDir; + + @BeforeAll + protected static void initLogDir() { + System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); + } + @Test - public void testInvalidArgs() throws IOException { + public void testInvalidArgs() { Catalog catalog1 = new HadoopCatalog(); Catalog catalog2 = new HadoopCatalog(); - StringWriter stringWriter = new StringWriter(); - PrintWriter printWriter = new PrintWriter(stringWriter); - - try { - Assertions.assertThatThrownBy( - () -> - ImmutableCatalogMigratorParams.builder() - .sourceCatalog(catalog2) // source-catalog is same as target catalog - .targetCatalog(catalog2) - .isDryRun(false) - .printWriter(printWriter) - .outputDirPath("temp") - .deleteEntriesFromSourceCatalog(true) - .build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("target catalog is same as source catalog"); - - Assertions.assertThatThrownBy( - () -> - ImmutableCatalogMigratorParams.builder() - .sourceCatalog(catalog1) - .targetCatalog(catalog2) - .isDryRun(false) - .printWriter(printWriter) - .deleteEntriesFromSourceCatalog(true) - .identifierRegex(".*") - .tableIdentifiers(Collections.singletonList(TableIdentifier.parse("foo.abc"))) - .build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("Both the identifiers list and identifierRegex is configured."); - - Assertions.assertThatThrownBy( - () -> - ImmutableCatalogMigratorParams.builder() - .sourceCatalog(catalog1) - .targetCatalog(null) // target-catalog is null - .isDryRun(false) - .printWriter(printWriter) - .outputDirPath("temp") - .deleteEntriesFromSourceCatalog(true) - .build()) - .isInstanceOf(NullPointerException.class) - .hasMessageContaining("targetCatalog"); + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigratorParams.builder() + .sourceCatalog(catalog2) // source-catalog is same as target catalog + .targetCatalog(catalog2) + .deleteEntriesFromSourceCatalog(true) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("target catalog is same as source catalog"); - Assertions.assertThatThrownBy( - () -> - ImmutableCatalogMigratorParams.builder() - .sourceCatalog(null) // source-catalog is null - .targetCatalog(catalog2) - .isDryRun(false) - .printWriter(printWriter) - .outputDirPath("temp") - .deleteEntriesFromSourceCatalog(true) - .build()) - .isInstanceOf(NullPointerException.class) - .hasMessageContaining("sourceCatalog"); + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigratorParams.builder() + .sourceCatalog(catalog1) + .targetCatalog(null) // target-catalog is null + .deleteEntriesFromSourceCatalog(true) + .build()) + .isInstanceOf(NullPointerException.class) + .hasMessageContaining("targetCatalog"); - Assertions.assertThatThrownBy( - () -> - ImmutableCatalogMigratorParams.builder() - .sourceCatalog(catalog1) - .targetCatalog(catalog2) - .isDryRun(false) - .printWriter(null) - .deleteEntriesFromSourceCatalog(true) - .build()) - .isInstanceOf(NullPointerException.class) - .hasMessageContaining("printWriter"); - } finally { - stringWriter.close(); - printWriter.close(); - } + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigratorParams.builder() + .sourceCatalog(null) // source-catalog is null + .targetCatalog(catalog2) + .deleteEntriesFromSourceCatalog(true) + .build()) + .isInstanceOf(NullPointerException.class) + .hasMessageContaining("sourceCatalog"); } } diff --git a/buildSrc/build.gradle.kts b/buildSrc/build.gradle.kts index b004931..4543001 100644 --- a/buildSrc/build.gradle.kts +++ b/buildSrc/build.gradle.kts @@ -33,4 +33,9 @@ dependencies { implementation("org.projectnessie.buildsupport:spotless:$nessieVer") } +java { + sourceCompatibility = JavaVersion.VERSION_11 + targetCompatibility = JavaVersion.VERSION_11 +} + kotlinDslPluginOptions { jvmTarget.set(JavaVersion.VERSION_11.toString()) } diff --git a/cli/build.gradle.kts b/cli/build.gradle.kts index 4d4bce8..e023e9c 100644 --- a/cli/build.gradle.kts +++ b/cli/build.gradle.kts @@ -31,8 +31,6 @@ applyShadowJar() dependencies { implementation(project(":iceberg-catalog-migrator-api")) implementation(libs.slf4j) - implementation(libs.logback.classic) - implementation(libs.logback.core) implementation(libs.picocli) implementation(libs.iceberg.spark.runtime) implementation(libs.iceberg.dell) diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java index 6d1adb4..ee028fa 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java @@ -15,17 +15,24 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_TO_DELETE_AT_SOURCE_FILE; + +import com.google.common.collect.Lists; import java.io.IOException; -import java.io.InputStream; import java.io.PrintWriter; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.concurrent.Callable; +import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.CatalogUtil; @@ -39,7 +46,9 @@ import org.apache.iceberg.jdbc.JdbcCatalog; import org.apache.iceberg.nessie.NessieCatalog; import org.apache.iceberg.rest.RESTCatalog; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrationResult; import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; +import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigrationResult; import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigratorParams; import picocli.CommandLine; @@ -159,25 +168,25 @@ public class CatalogMigrationCLI implements Callable { "optional local output directory path to write CLI output files like `failed_identifiers.txt`, " + "`failed_to_delete_at_source.txt`, `dry_run_identifiers.txt`. " + "Uses the present working directory if not specified.") - private String outputDirPath; + private Path outputDirPath; - private final InputStream input; + private boolean disablePrompts; - public CatalogMigrationCLI(InputStream input) { - this.input = input; - } + private static final int BATCH_SIZE = 100; - public static void main(String... args) { - runWithInput(System.in, args); - } + public CatalogMigrationCLI() {} - public static void runWithInput(InputStream input, String... args) { - CommandLine commandLine = new CommandLine(new CatalogMigrationCLI(input)); + public static void main(String... args) { + CommandLine commandLine = new CommandLine(new CatalogMigrationCLI()); commandLine.setUsageHelpWidth(150); int exitCode = commandLine.execute(args); System.exit(exitCode); } + public void disablePrompts() { + this.disablePrompts = true; + } + @Override public Integer call() { validateIdentifierOptions(); @@ -207,7 +216,101 @@ public Integer call() { targetCatalogConf); printWriter.println(String.format("%nConfigured target catalog: %s", targetCatalogType.name())); - List tableIdentifiers = null; + List tableIdentifiers = processIdentifiersInput(printWriter); + + if (!canProceed(printWriter, sourceCatalog)) { + return 0; + } + + ImmutableCatalogMigratorParams params = + ImmutableCatalogMigratorParams.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(deleteSourceCatalogTables) + .build(); + CatalogMigrator catalogMigrator = new CatalogMigrator(params); + + List identifiers; + if (tableIdentifiers.isEmpty()) { + if (identifiersRegEx == null) { + printWriter.println( + String.format( + "%nUser has not specified the table identifiers." + + " Selecting all the tables from all the namespaces from the source catalog.")); + } else { + printWriter.println( + String.format( + "%nUser has not specified the table identifiers." + + " Selecting all the tables from all the namespaces from the source catalog " + + "which matches the regex pattern:" + + identifiersRegEx)); + } + identifiers = catalogMigrator.getMatchingTableIdentifiers(identifiersRegEx); + } else { + identifiers = tableIdentifiers; + } + + String operation = deleteSourceCatalogTables ? "migration" : "registration"; + printWriter.println( + String.format("%nIdentified %d tables for %s.", identifiers.size(), operation)); + + ImmutableCatalogMigrationResult.Builder resultBuilder = + ImmutableCatalogMigrationResult.builder(); + if (isDryRun) { + CatalogMigrationResult result = + resultBuilder.addAllRegisteredTableIdentifiers(identifiers).build(); + writeToFile(pathWithOutputDir(DRY_RUN_FILE), result.registeredTableIdentifiers()); + printWriter.println("Dry run is completed."); + printDryRunResults(result, printWriter); + return 0; + } + + printWriter.println(String.format("%nStarted %s ...", operation)); + + List> IdentifierBatches = Lists.partition(identifiers, BATCH_SIZE); + AtomicInteger counter = new AtomicInteger(); + IdentifierBatches.forEach( + identifierBatch -> { + catalogMigrator.registerTables(identifierBatch, resultBuilder); + printWriter.println( + String.format( + "%nAttempted %s for %d tables out of %d tables.", + operation, counter.incrementAndGet() * BATCH_SIZE, identifiers.size())); + }); + + CatalogMigrationResult result = resultBuilder.build(); + writeToFile( + pathWithOutputDir(FAILED_IDENTIFIERS_FILE), result.failedToRegisterTableIdentifiers()); + writeToFile( + pathWithOutputDir(FAILED_TO_DELETE_AT_SOURCE_FILE), + result.failedToDeleteTableIdentifiers()); + + printWriter.println(String.format("%nFinished %s ...", operation)); + printSummary(result, printWriter, sourceCatalog.name(), targetCatalog.name()); + printDetails(result, printWriter); + return 0; + } + + private boolean canProceed(PrintWriter printWriter, Catalog sourceCatalog) { + if (isDryRun || disablePrompts) { + return true; + } + if (deleteSourceCatalogTables) { + if (sourceCatalog instanceof HadoopCatalog) { + printWriter.println( + String.format( + "[WARNING]: Source catalog type is HADOOP and it doesn't support dropping tables just from " + + "catalog. %nAvoid operating the migrated tables from the source catalog after migration. " + + "Use the tables from target catalog.")); + } + return PromptUtil.proceedForMigration(printWriter); + } else { + return PromptUtil.proceedForRegistration(printWriter); + } + } + + private List processIdentifiersInput(PrintWriter printWriter) { + List tableIdentifiers; if (identifiersFromFile != null) { try { printWriter.println( @@ -223,33 +326,10 @@ public Integer call() { } else if (!identifiers.isEmpty()) { tableIdentifiers = identifiers.stream().map(TableIdentifier::parse).collect(Collectors.toList()); + } else { + tableIdentifiers = Collections.emptyList(); } - - if (!isDryRun) { - if (deleteSourceCatalogTables) { - if (!PromptUtil.proceedForMigration(input, printWriter)) { - return 0; - } - } else { - if (!PromptUtil.proceedForRegistration(input, printWriter)) { - return 0; - } - } - } - - ImmutableCatalogMigratorParams params = - ImmutableCatalogMigratorParams.builder() - .sourceCatalog(sourceCatalog) - .targetCatalog(targetCatalog) - .tableIdentifiers(tableIdentifiers) - .identifierRegex(identifiersRegEx) - .deleteEntriesFromSourceCatalog(deleteSourceCatalogTables) - .isDryRun(isDryRun) - .outputDirPath(outputDirPath) - .printWriter(printWriter) - .build(); - CatalogMigrator.registerTables(params); - return 0; + return tableIdentifiers; } private void validateIdentifierOptions() { @@ -278,6 +358,112 @@ private void validateIdentifierOptions() { } } + private void printSummary( + CatalogMigrationResult result, + PrintWriter printWriter, + String sourceCatalogType, + String targetCatalogType) { + printWriter.println(String.format("%nSummary: ")); + if (!result.registeredTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Successfully %s %d tables from %s catalog to %s catalog.", + deleteSourceCatalogTables ? "migrated" : "registered", + result.registeredTableIdentifiers().size(), + sourceCatalogType, + targetCatalogType)); + } + if (!result.failedToRegisterTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Failed to %s %d tables from %s catalog to %s catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. " + + "%nFailed identifiers are written into `%s`. " + + "Retry with that file using `--identifiers-from-file` option " + + "if the failure is because of network/connection timeouts.", + deleteSourceCatalogTables ? "migrate" : "register", + result.failedToRegisterTableIdentifiers().size(), + sourceCatalogType, + targetCatalogType, + FAILED_IDENTIFIERS_FILE)); + } + if (!result.failedToDeleteTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Failed to delete %d tables from %s catalog. " + + "Please check the `catalog_migration.log` file for the reason. " + + "%nFailed to delete identifiers are written into `%s`. ", + result.failedToDeleteTableIdentifiers().size(), + sourceCatalogType, + FAILED_TO_DELETE_AT_SOURCE_FILE)); + } + } + + private void printDetails(CatalogMigrationResult result, PrintWriter printWriter) { + printWriter.println(String.format("%nDetails: ")); + if (!result.registeredTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Successfully %s these tables:", + deleteSourceCatalogTables ? "migrated" : "registered")); + printWriter.println(result.registeredTableIdentifiers()); + } + + if (!result.failedToRegisterTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- Failed to %s these tables:", deleteSourceCatalogTables ? "migrate" : "register")); + printWriter.println(result.failedToRegisterTableIdentifiers()); + } + + if (!result.failedToDeleteTableIdentifiers().isEmpty()) { + printWriter.println("- [WARNING] Failed to delete these tables from source catalog:"); + printWriter.println(result.failedToDeleteTableIdentifiers()); + } + } + + private void printDryRunResults(CatalogMigrationResult result, PrintWriter printWriter) { + printWriter.println(String.format("%nSummary: ")); + if (result.registeredTableIdentifiers().isEmpty()) { + printWriter.println( + String.format( + "- No tables are identified for %s. Please check logs for more info.", + deleteSourceCatalogTables ? "migration" : "registration")); + return; + } + printWriter.println( + String.format( + "- Identified %d tables for %s by dry-run. These identifiers are also written into %s. " + + "You can use this file with `--identifiers-from-file` option.", + result.registeredTableIdentifiers().size(), + deleteSourceCatalogTables ? "migration" : "registration", + DRY_RUN_FILE)); + + printWriter.println(String.format("%nDetails: ")); + printWriter.println( + String.format( + "- Identified these tables for %s by dry-run:", + deleteSourceCatalogTables ? "migration" : "registration")); + printWriter.println(result.registeredTableIdentifiers()); + } + + private static void writeToFile(Path filePath, List identifiers) { + List identifiersString = + identifiers.stream().map(TableIdentifier::toString).collect(Collectors.toList()); + try { + Files.write(filePath, identifiersString); + } catch (IOException e) { + throw new RuntimeException("Failed to write the file:" + filePath, e); + } + } + + private Path pathWithOutputDir(String fileName) { + if (outputDirPath == null) { + return Paths.get(fileName); + } + return outputDirPath.resolve(fileName); + } + private static String catalogImpl(CatalogType type, String customCatalogImpl) { switch (type) { case CUSTOM: diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java index 29d0071..0ceccea 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java @@ -15,9 +15,8 @@ */ package org.projectnessie.tools.catalog.migration.cli; -import java.io.InputStream; +import java.io.Console; import java.io.PrintWriter; -import java.util.Scanner; public final class PromptUtil { @@ -49,32 +48,31 @@ private PromptUtil() {} + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " + "%n\tand can only be accessed from the target catalog."); - public static boolean proceedForRegistration(InputStream input, PrintWriter printWriter) { - return proceed(input, WARNING_FOR_REGISTRATION, printWriter); + public static boolean proceedForRegistration(PrintWriter printWriter) { + return proceed(WARNING_FOR_REGISTRATION, printWriter); } - public static boolean proceedForMigration(InputStream input, PrintWriter printWriter) { - return proceed(input, WARNING_FOR_MIGRATION, printWriter); + public static boolean proceedForMigration(PrintWriter printWriter) { + return proceed(WARNING_FOR_MIGRATION, printWriter); } - private static boolean proceed(InputStream inputStream, String warning, PrintWriter printWriter) { - try (Scanner scanner = new Scanner(inputStream)) { - printWriter.println(warning); + private static boolean proceed(String warning, PrintWriter printWriter) { + printWriter.println(warning); - while (true) { - printWriter.println( - "Have you read the above warnings and are you sure you want to continue? (yes/no):"); - String input = scanner.nextLine(); + Console console = System.console(); + while (true) { + printWriter.println( + "Have you read the above warnings and are you sure you want to continue? (yes/no):"); + String input = console.readLine(); - if (input.equalsIgnoreCase("yes")) { - printWriter.println("Continuing..."); - return true; - } else if (input.equalsIgnoreCase("no")) { - printWriter.println("Aborting..."); - return false; - } else { - printWriter.println("Invalid input. Please enter 'yes' or 'no'."); - } + if (input.equalsIgnoreCase("yes")) { + printWriter.println("Continuing..."); + return true; + } else if (input.equalsIgnoreCase("no")) { + printWriter.println("Aborting..."); + return false; + } else { + printWriter.println("Invalid input. Please enter 'yes' or 'no'."); } } } diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java index 759f918..13829d4 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java @@ -16,13 +16,13 @@ package org.projectnessie.tools.catlog.migration.cli; import com.google.common.collect.Lists; -import java.io.ByteArrayInputStream; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.stream.IntStream; import org.apache.iceberg.aws.dynamodb.DynamoDbCatalog; import org.apache.iceberg.aws.glue.GlueCatalog; import org.apache.iceberg.catalog.Catalog; @@ -75,8 +75,8 @@ protected void afterEach() throws IOException { catalog2.createTable(TableIdentifier.of(Namespace.of("bar"), "tblx"), schema).refresh(); dropTables(); - deleteFileIfExists(dryRunFile); - deleteFileIfExists(failedIdentifiersFile); + Files.deleteIfExists(dryRunFile); + Files.deleteIfExists(failedIdentifiersFile); } @ParameterizedTest @@ -228,10 +228,6 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except .contains( "User has not specified the table identifiers. Selecting all the tables from all the namespaces " + "from the source catalog which matches the regex pattern:^foo\\..*"); - Assertions.assertThat(run.getOut()) - .contains( - "Collecting all the tables from all the namespaces of source catalog " - + "which matches the regex pattern:^foo\\..*"); operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) .contains(String.format("Identified 2 tables for %s.", operation)); @@ -529,6 +525,45 @@ public void testDryRun(boolean deleteSourceTables) throws Exception { .containsExactlyInAnyOrder("foo.tbl1", "foo.tbl2", "bar.tbl3", "bar.tbl4"); } + @Order(6) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { + // additionally create 240 tables along with 4 tables created in beforeEach() + IntStream.range(0, 240) + .forEach( + val -> + catalog1.createTable( + TableIdentifier.of(Namespace.of("foo"), "tblx" + val), schema)); + + RunCLI run = registerTablesCLI(deleteSourceTables, registerAllTablesArgs()); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + String operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 244 tables for %s.", operation)); + operation = deleteSourceTables ? "migrated" : "registered"; + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Summary: %n- Successfully %s 244 tables from %s catalog to" + " %s catalog.", + operation, sourceCatalogType, targetCatalogType)); + Assertions.assertThat(run.getOut()) + .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); + + operation = deleteSourceTables ? "migration" : "registration"; + // validate intermediate output + Assertions.assertThat(run.getOut()) + .contains(String.format("Attempted %s for 100 tables out of 244 tables.", operation)); + Assertions.assertThat(run.getOut()) + .contains(String.format("Attempted %s for 200 tables out of 244 tables.", operation)); + + Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))).hasSize(242); + Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + } + private static String[] registerAllTablesArgs() { ArrayList args = Lists.newArrayList( @@ -547,17 +582,12 @@ private static String[] registerAllTablesArgs() { private static RunCLI registerTablesCLI(boolean deleteSourceTables, String... args) throws Exception { - ByteArrayInputStream input = new ByteArrayInputStream("yes\n".getBytes()); - try { - if (!deleteSourceTables) { - return RunCLI.runWithInput(input, args); - } - List argsList = Lists.newArrayList(args); - argsList.add("--delete-source-tables"); - return RunCLI.runWithInput(input, argsList.toArray(new String[0])); - } finally { - input.close(); + if (!deleteSourceTables) { + return RunCLI.run(args); } + List argsList = Lists.newArrayList(args); + argsList.add("--delete-source-tables"); + return RunCLI.run(argsList.toArray(new String[0])); } protected static String catalogType(Catalog catalog) { diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHadoopToHiveCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHadoopToHiveCLIMigrationTest.java index fe1ca23..fdffe5d 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHadoopToHiveCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHadoopToHiveCLIMigrationTest.java @@ -20,6 +20,7 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHadoopToHiveCLIMigrationTest extends AbstractCLIMigrationTest { @@ -50,4 +51,11 @@ protected static void tearDown() throws Exception { dropNamespaces(); HiveMetaStoreRunner.stopMetastore(); } + + // disable large table test for IT to save CI time. It will be executed only for UT. + @Override + @Disabled + public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { + super.testRegisterLargeNumberOfTables(deleteSourceTables); + } } diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToHadoopCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToHadoopCLIMigrationTest.java index fd8736f..7b70f01 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToHadoopCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToHadoopCLIMigrationTest.java @@ -20,6 +20,7 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHiveToHadoopCLIMigrationTest extends AbstractCLIMigrationTest { @@ -50,4 +51,11 @@ protected static void tearDown() throws Exception { dropNamespaces(); HiveMetaStoreRunner.stopMetastore(); } + + // disable large table test for IT to save CI time. It will be executed only for UT. + @Override + @Disabled + public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { + super.testRegisterLargeNumberOfTables(deleteSourceTables); + } } diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToNessieCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToNessieCLIMigrationTest.java index 806ffed..44525b1 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToNessieCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToNessieCLIMigrationTest.java @@ -20,6 +20,7 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHiveToNessieCLIMigrationTest extends AbstractCLIMigrationTest { @@ -55,4 +56,11 @@ protected static void tearDown() throws Exception { dropNamespaces(); HiveMetaStoreRunner.stopMetastore(); } + + // disable large table test for IT to save CI time. It will be executed only for UT. + @Override + @Disabled + public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { + super.testRegisterLargeNumberOfTables(deleteSourceTables); + } } diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITNessieToHiveCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITNessieToHiveCLIMigrationTest.java index 34ea3bb..4ff203f 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITNessieToHiveCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITNessieToHiveCLIMigrationTest.java @@ -20,6 +20,7 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITNessieToHiveCLIMigrationTest extends AbstractCLIMigrationTest { @@ -54,4 +55,11 @@ protected static void tearDown() throws Exception { dropNamespaces(); HiveMetaStoreRunner.stopMetastore(); } + + // disable large table test for IT to save CI time. It will be executed only for UT. + @Override + @Disabled + public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { + super.testRegisterLargeNumberOfTables(deleteSourceTables); + } } diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/PromptUtilTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/PromptUtilTest.java deleted file mode 100644 index 5b2036a..0000000 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/PromptUtilTest.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (C) 2023 Dremio - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.projectnessie.tools.catlog.migration.cli; - -import static org.projectnessie.tools.catalog.migration.cli.PromptUtil.WARNING_FOR_MIGRATION; -import static org.projectnessie.tools.catalog.migration.cli.PromptUtil.WARNING_FOR_REGISTRATION; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.util.NoSuchElementException; -import org.assertj.core.api.Assertions; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.CsvSource; -import org.projectnessie.tools.catalog.migration.cli.PromptUtil; - -public class PromptUtilTest { - - @ParameterizedTest - @CsvSource({"yes, false", "yes, true", "no, false", "no, true", "dummy, false", "dummy, true"}) - public void testPrompts(String input, boolean deleteSourceTables) throws Exception { - String warning = deleteSourceTables ? WARNING_FOR_MIGRATION : WARNING_FOR_REGISTRATION; - StringWriter stringWriter = new StringWriter(); - PrintWriter printWriter = new PrintWriter(stringWriter); - ByteArrayInputStream inputStream = new ByteArrayInputStream(input.getBytes()); - try { - switch (input) { - case "yes": - Assertions.assertThat(callPrompt(deleteSourceTables, inputStream, printWriter)).isTrue(); - Assertions.assertThat(stringWriter.toString()).contains(warning); - Assertions.assertThat(stringWriter.toString()) - .contains( - "Have you read the above warnings and are you sure you want to continue? (yes/no):"); - Assertions.assertThat(stringWriter.toString()).contains("Continuing..."); - break; - case "no": - Assertions.assertThat(callPrompt(deleteSourceTables, inputStream, printWriter)).isFalse(); - Assertions.assertThat(stringWriter.toString()).contains(warning); - Assertions.assertThat(stringWriter.toString()) - .contains( - "Have you read the above warnings and are you sure you want to continue? (yes/no):"); - Assertions.assertThat(stringWriter.toString()).contains("Aborting..."); - break; - case "dummy": - Assertions.assertThatThrownBy( - () -> callPrompt(deleteSourceTables, inputStream, printWriter)) - .isInstanceOf(NoSuchElementException.class) - .hasMessageContaining("No line found"); - Assertions.assertThat(stringWriter.toString()) - .contains("Invalid input. Please enter 'yes' or 'no'."); - break; - default: - } - } finally { - inputStream.close(); - stringWriter.close(); - printWriter.close(); - } - } - - private boolean callPrompt( - boolean deleteSourceTables, InputStream inputStream, PrintWriter printWriter) { - return deleteSourceTables - ? PromptUtil.proceedForMigration(inputStream, printWriter) - : PromptUtil.proceedForRegistration(inputStream, printWriter); - } -} diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java index bc8ecc9..b97d31a 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java @@ -15,7 +15,6 @@ */ package org.projectnessie.tools.catlog.migration.cli; -import java.io.InputStream; import java.io.PrintWriter; import java.io.StringWriter; import java.util.Arrays; @@ -43,23 +42,22 @@ public static RunCLI run(List args) throws Exception { } public static RunCLI run(String... args) throws Exception { - return runWithInput(System.in, args); - } - - public static RunCLI runWithInput(InputStream inputStream, String... args) throws Exception { try (StringWriter out = new StringWriter(); PrintWriter outWriter = new PrintWriter(out); StringWriter err = new StringWriter(); PrintWriter errWriter = new PrintWriter(err)) { - int exitCode = runMain(outWriter, errWriter, inputStream, args); + int exitCode = runMain(outWriter, errWriter, args); return new RunCLI(exitCode, out.toString(), err.toString(), args); } } - private static int runMain( - PrintWriter out, PrintWriter err, InputStream inputStream, String... arguments) { + private static int runMain(PrintWriter out, PrintWriter err, String... arguments) { + CatalogMigrationCLI cli = new CatalogMigrationCLI(); + // disable prompts for tests + cli.disablePrompts(); + CommandLine commandLine = - new CommandLine(new CatalogMigrationCLI(inputStream)) + new CommandLine(cli) .setExecutionExceptionHandler( (ex, cmd, parseResult) -> { cmd.getErr().println(cmd.getColorScheme().richStackTraceString(ex)); @@ -96,7 +94,7 @@ public String getErr() { @Override public String toString() { return String.format( - "org.projectnessie.tools.catalog.migration" + "org.projectnessie.tools.catalog.migration.cli" + ".RunCLI{args=%s%nexitCode=%d%n%nstdout:%n%s%n%nstderr:%n%s", Arrays.toString(args), exitCode, out, err); } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index c7b30d6..ae7265c 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,21 +1,21 @@ [versions] assertj = "3.24.2" -aws = "1.7.4" -googleJavaFormat = "1.15.0" +aws = "1.12.415" +googleJavaFormat = "1.16.0" guava = "31.1-jre" hadoop = "3.2.4" -hive = "2.3.8" +hive = "2.3.8" # this is in mapping with iceberg repo. Later versions have junit depedency problem iceberg = "1.1.0" immutables = "2.9.3" -junit = "5.9.1" +junit = "5.9.2" logback = "1.2.11" -nessie = "0.48.2" -nessieBuildPlugins = "0.2.14" -nessieRunner = "0.28.1" -picocli = "4.7.0" +nessie = "0.50.0" +nessieBuildPlugins = "0.2.19" +nessieRunner = "0.29.0" +picocli = "4.7.1" shadowPlugin = "7.1.2" slf4j = "1.7.36" -spotlessPlugin = "6.12.0" +spotlessPlugin = "6.16.0" [libraries] assertj = { module = "org.assertj:assertj-core", version.ref = "assertj" } @@ -31,7 +31,6 @@ junit-bom = { module = "org.junit:junit-bom", version.ref = "junit" } junit-jupiter-api = { module = "org.junit.jupiter:junit-jupiter-api", version.ref = "junit" } junit-jupiter-engine = { module = "org.junit.jupiter:junit-jupiter-engine" } junit-jupiter-params = { module = "org.junit.jupiter:junit-jupiter-params" } -logback-core = { module = "ch.qos.logback:logback-core", version.ref = "logback" } logback-classic = { module = "ch.qos.logback:logback-classic", version.ref = "logback" } picocli = { module = "info.picocli:picocli", version.ref = "picocli" } slf4j = { module = "org.slf4j:log4j-over-slf4j", version.ref = "slf4j" } diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index bc073f6..f396aaa 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=312eb12875e1747e05c2f81a4789902d7e4ec5defbd1eefeaccc08acf096505d -distributionUrl=https\://services.gradle.org/distributions/gradle-7.6-all.zip +distributionSha256Sum=1b6b558be93f29438d3df94b7dfee02e794b94d9aca4611a92cdb79b6b88e909 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.0.1-bin.zip networkTimeout=10000 zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists From bcaff7be2f582453f01cbadb0da3b390cec02ee0 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Fri, 3 Mar 2023 13:17:19 +0530 Subject: [PATCH 14/31] Address review comments from 2nd March --- README.md | 150 +++--- api-test/build.gradle.kts | 2 +- .../api/test/AbstractTestCatalogMigrator.java | 69 ++- .../api/test/CustomCatalogMigratorTest.java | 105 ++++ .../src/{main => test}/resources/logback.xml | 0 api/build.gradle.kts | 2 +- .../api/AbstractCatalogMigrator.java | 144 ++++++ .../migration/api/CatalogMigrationResult.java | 8 +- .../migration/api/CatalogMigrator.java | 146 +----- api/src/main/resources/logback.xml | 34 -- .../api/UnsupportedNamespaceTest.java | 85 ++++ cli/build.gradle.kts | 1 + .../migration/cli/BaseRegisterCommand.java | 275 ++++++++++ .../migration/cli/CatalogMigrationCLI.java | 478 +----------------- .../catalog/migration/cli/CatalogUtil.java | 89 ++++ .../migration/cli/IdentifierOptions.java | 81 +++ .../catalog/migration/cli/MigrateCommand.java | 38 ++ .../catalog/migration/cli/PromptUtil.java | 10 +- .../migration/cli/RegisterCommand.java | 36 ++ .../migration/cli/SourceCatalogOptions.java | 58 +++ .../migration/cli/TargetCatalogOptions.java | 58 +++ .../cli/AbstractCLIMigrationTest.java | 92 ++-- .../tools/catlog/migration/cli/CLITest.java | 181 ++++--- .../migration/cli/HadoopCLIMigrationTest.java | 4 +- .../cli/ITHadoopToHiveCLIMigrationTest.java | 4 +- .../cli/ITHiveToHadoopCLIMigrationTest.java | 4 +- .../cli/ITHiveToNessieCLIMigrationTest.java | 4 +- .../cli/ITNessieToHiveCLIMigrationTest.java | 4 +- .../tools/catlog/migration/cli/RunCLI.java | 6 +- gradle/wrapper/gradle-wrapper.jar | Bin 61574 -> 61608 bytes gradle/wrapper/gradle-wrapper.properties | 4 +- gradlew | 4 +- 32 files changed, 1308 insertions(+), 868 deletions(-) create mode 100644 api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/CustomCatalogMigratorTest.java rename api-test/src/{main => test}/resources/logback.xml (100%) create mode 100644 api/src/main/java/org/projectnessie/tools/catalog/migration/api/AbstractCatalogMigrator.java delete mode 100644 api/src/main/resources/logback.xml create mode 100644 api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java create mode 100644 cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java create mode 100644 cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtil.java create mode 100644 cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java create mode 100644 cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java create mode 100644 cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java create mode 100644 cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java create mode 100644 cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java diff --git a/README.md b/README.md index ee01fd0..c64a4d3 100644 --- a/README.md +++ b/README.md @@ -27,74 +27,87 @@ Need to have java installed in your machine(JDK11 or later version) to use this Below is the CLI syntax: ``` -$ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar -h -Usage: register [-hV] [--delete-source-tables] [--dry-run] [--identifiers-from-file=] [--identifiers-regex=] - [--output-dir=] --source-catalog-type= [--source-custom-catalog-impl=] - --target-catalog-type= [--target-custom-catalog-impl=] [--identifiers=[, - ...]]... [--source-catalog-hadoop-conf=[,...]]... - --source-catalog-properties=[,...] [--source-catalog-properties=[, - ...]]... [--target-catalog-hadoop-conf=[,...]]... - --target-catalog-properties=[,...] [--target-catalog-properties=[,...]]... +$ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar -h +Usage: iceberg-catalog-migrator [-hV] [COMMAND] +-h, --help Show this help message and exit. +-V, --version Print version information and exit. +``` +``` +$ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar register -h +Usage: iceberg-catalog-migrator register [-hV] [--disable-prompts] [--dry-run] --output-dir= (--source-catalog-type= + --source-catalog-properties=[,...] [--source-catalog-properties= + [,...]]... [--source-catalog-hadoop-conf=[,...]]... + [--source-custom-catalog-impl=]) (--target-catalog-type= + --target-catalog-properties=[,...] [--target-catalog-properties= + [,...]]... [--target-catalog-hadoop-conf=[,...]]... + [--target-custom-catalog-impl=]) [--identifiers=[,...] + [--identifiers=[,...]]... | --identifiers-from-file= | + --identifiers-regex=] Bulk register the iceberg tables from source catalog to target catalog without data copy. - - --source-catalog-type= - source catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] + --output-dir= + local output directory path to write CLI output files like `failed_identifiers.txt`, `failed_to_delete_at_source.txt`, + `dry_run_identifiers.txt`. + --dry-run optional configuration to simulate the registration without actually registering. Can learn about a list of the tables that + will be registered by running this. + --disable-prompts optional configuration to disable warning prompts which needs console input. + -h, --help Show this help message and exit. + -V, --version Print version information and exit. +source catalog options: + --source-catalog-type= + source catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] --source-catalog-properties=[,...] - source catalog properties (like uri, warehouse, etc) + source catalog properties (like uri, warehouse, etc) --source-catalog-hadoop-conf=[,...] - optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg FileIO. - --source-custom-catalog-impl= - optional fully qualified class name of the custom catalog implementation of the source catalog. Required when the catalog type is - CUSTOM. - --target-catalog-type= - target catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] + optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg + FileIO. + --source-custom-catalog-impl= + optional fully qualified class name of the custom catalog implementation of the source catalog. Required when the catalog + type is CUSTOM. +target catalog options: + --target-catalog-type= + target catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] --target-catalog-properties=[,...] - target catalog properties (like uri, warehouse, etc) + target catalog properties (like uri, warehouse, etc) --target-catalog-hadoop-conf=[,...] - optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg FileIO. - --target-custom-catalog-impl= - optional fully qualified class name of the custom catalog implementation of the target catalog. Required when the catalog type is - CUSTOM. + optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg + FileIO. + --target-custom-catalog-impl= + optional fully qualified class name of the custom catalog implementation of the target catalog. Required when the catalog + type is CUSTOM. +identifier options: --identifiers=[,...] - optional selective list of identifiers to register. If not specified, all the tables will be registered. Use this when there are - few identifiers that need to be registered. For a large number of identifiers, use the `--identifiers-from-file` or - `--identifiers-regex` option. + optional selective list of identifiers to register. If not specified, all the tables will be registered. Use this when + there are few identifiers that need to be registered. For a large number of identifiers, use the + `--identifiers-from-file` or `--identifiers-regex` option. --identifiers-from-file= - optional text file path that contains a list of table identifiers (one per line) to register. Should not be used with - `--identifiers` or `--identifiers-regex` option. + optional text file path that contains a list of table identifiers (one per line) to register. Should not be used with + `--identifiers` or `--identifiers-regex` option. --identifiers-regex= - optional regular expression pattern used to register only the tables whose identifiers match this pattern. Should not be used with - `--identifiers` or '--identifiers-from-file' option. - --dry-run optional configuration to simulate the registration without actually registering. Can learn about a list of the tables that will be - registered by running this. - --delete-source-tables - optional configuration to delete the table entry from source catalog after successfully registering it to target catalog. - --output-dir= - optional local output directory path to write CLI output files like `failed_identifiers.txt`, `failed_to_delete_at_source.txt`, - `dry_run_identifiers.txt`. Uses the present working directory if not specified. - -h, --help Show this help message and exit. - -V, --version Print version information and exit. + optional regular expression pattern used to register only the tables whose identifiers match this pattern. Should not be + used with `--identifiers` or '--identifiers-from-file' option. ``` +Note: options for migrate command is exactly same as register command. + > :warning: **It is recommended to use this CLI tool when there is no in-progress commits for the tables in the source catalog.** In-progress commits may not make it into the target catalog if used. -> :warning: By default this tool just registers the table. +> :warning: `register` command just registers the table. Which means the table will be present in both the catalogs after registering. Operating same table from more than one catalog can lead to missing updates, loss of data and table corruption. -So, it is recommended to use the '--delete-source-tables' option in CLI to automatically delete the table from source catalog after registering -or avoid operating tables from the source catalog after registering if '--delete-source-tables' option is not used. +So, it is recommended to use the 'migrate' command in CLI to automatically delete the table from source catalog after registering +or avoid operating tables from the source catalog after registering if 'migrate' command is not used. # Sample Inputs ## Bulk migrating all the tables from Hadoop catalog to Nessie catalog (main branch) ```shell -java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---delete-source-tables +--output-dir $PWD/output ``` ## Register all the tables from Hadoop catalog to Arctic catalog (main branch) @@ -106,12 +119,13 @@ export ACCESSKEY=xxxxxxx ``` ```shell -java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar register \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ --target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/8158e68a-5046-42c6-a7e4-c920d9ae2475,ref=main,warehouse=/tmp/warehouse,authentication.type=BEARER,authentication.token=$PAT \ ---target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY +--target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY \ +--output-dir $PWD/output ``` ## Migrate selected tables (t1,t2 in namespace foo) from Arctic catalog (main branch) to Hadoop catalog. @@ -123,13 +137,13 @@ export ACCESSKEY=xxxxxxx ``` ```shell -java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type NESSIE \ --source-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/8158e68a-5046-42c6-a7e4-c920d9ae2475,ref=main,warehouse=/tmp/warehouse,authentication.type=BEARER,authentication.token=$PAT \ --target-catalog-type HADOOP \ --target-catalog-properties warehouse=/tmp/warehouse,type=hadoop --source-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY \ --identifiers foo.t1,foo.t2 \ ---delete-source-tables +--output-dir $PWD/output ``` # Scenarios @@ -143,12 +157,13 @@ No need for a catalog migration tool. Sample input: ```shell -java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar register \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---dry-run +--dry-run \ +--output-dir $PWD/output ``` All the inputs will be validated and a list of identified table identifiers for migration will be printed on the console @@ -159,12 +174,12 @@ which can be used for actual migration using the `--identifiers-from-file` optio Sample input: ```shell -java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---delete-source-tables +--output-dir $PWD/output ``` Once the input validations are done, users will be prompted with this message. @@ -222,12 +237,12 @@ Note: a log file will also be generated which prints “successfully migrated ta Sample input: ```shell -java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---delete-source-tables +--output-dir $PWD/output ``` Console output will be same as B.2) till summary because even in case of failure, @@ -257,12 +272,12 @@ Users can rename the tables in the source catalog and migrate only these 10 tabl Sample input: ```shell -java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---delete-source-tables +--output-dir $PWD/output ``` Console output will be same as B.2) till summary because even in case of failure, @@ -291,12 +306,12 @@ As these were timeout exceptions, users can retry migration of only these 900 ta Sample input: ```shell -java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---delete-source-tables +--output-dir $PWD/output ``` Console output will be same as B.2) till summary because even in case of failure, @@ -333,35 +348,36 @@ Users can provide the selective list of identifiers to migrate using any of thes Sample input: (only migrate tables that starts with "foo.") ```shell -java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---delete-source-tables \ ---identifiers-regex ^foo\..* +--identifiers-regex ^foo\..* \ +--output-dir $PWD/output + ``` Sample input: (migrate all tables in the file ids.txt where each entry is delimited by newline) ```shell -java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---delete-source-tables \ ---identifiers-from-file ids.txt +--identifiers-from-file ids.txt \ +--output-dir $PWD/output ``` Sample input: (migrate only two tables foo.tbl1, foo.tbl2) ```shell -java -jar catalog-migration-tool-1.0-SNAPSHOT.jar \ +java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---delete-source-tables \ ---identifiers foo.tbl1,foo.tbl2 +--identifiers foo.tbl1,foo.tbl2 \ +--output-dir $PWD/output ``` Console will clearly print that only these identifiers are used for table migration. diff --git a/api-test/build.gradle.kts b/api-test/build.gradle.kts index ac7547b..d86287f 100644 --- a/api-test/build.gradle.kts +++ b/api-test/build.gradle.kts @@ -24,7 +24,6 @@ plugins { dependencies { implementation(libs.slf4j) implementation(libs.picocli) - runtimeOnly(libs.logback.classic) implementation(libs.hadoop.common) implementation(libs.iceberg.spark.runtime) implementation(libs.junit.jupiter.api) @@ -32,6 +31,7 @@ dependencies { testImplementation(project(":iceberg-catalog-migrator-api")) + testRuntimeOnly(libs.logback.classic) testImplementation(libs.assertj) testImplementation(libs.junit.jupiter.params) implementation(libs.junit.jupiter.engine) diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java index 78ae04a..f15d84c 100644 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java @@ -17,6 +17,7 @@ import java.nio.file.Path; import java.util.Collections; +import java.util.List; import java.util.stream.IntStream; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; @@ -52,9 +53,7 @@ protected void afterEach() { @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegister(boolean deleteSourceTables) { - - CatalogMigrationResult result; - result = registerAllTables(deleteSourceTables); + CatalogMigrationResult result = registerAllTables(deleteSourceTables); Assertions.assertThat(result.registeredTableIdentifiers()) .containsExactlyInAnyOrder( @@ -71,6 +70,20 @@ public void testRegister(boolean deleteSourceTables) { Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) .containsExactlyInAnyOrder( TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + + if (deleteSourceTables && !(catalog1 instanceof HadoopCatalog)) { + // table should be deleted after migration from source catalog + Assertions.assertThat(catalog1.listTables(Namespace.of("foo"))).isEmpty(); + Assertions.assertThat(catalog1.listTables(Namespace.of("bar"))).isEmpty(); + return; + } + // tables should be present in source catalog. + Assertions.assertThat(catalog1.listTables(Namespace.of("foo"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(catalog1.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); } @Order(1) @@ -81,7 +94,8 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) { ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); CatalogMigrationResult result = new CatalogMigrator(builder.build()) - .registerTables(Collections.singletonList(TableIdentifier.parse("bar.tbl3"))); + .registerTables(Collections.singletonList(TableIdentifier.parse("bar.tbl3"))) + .result(); Assertions.assertThat(result.registeredTableIdentifiers()) .containsExactly(TableIdentifier.parse("bar.tbl3")); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); @@ -95,7 +109,9 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) { builder = builderWithDefaultArgs(deleteSourceTables); CatalogMigrator catalogMigrator = new CatalogMigrator(builder.build()); result = - catalogMigrator.registerTables(catalogMigrator.getMatchingTableIdentifiers("^foo\\..*")); + catalogMigrator + .registerTables(catalogMigrator.getMatchingTableIdentifiers("^foo\\..*")) + .result(); Assertions.assertThat(result.registeredTableIdentifiers()) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); @@ -117,7 +133,8 @@ public void testRegisterError(boolean deleteSourceTables) { ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); CatalogMigrationResult result = new CatalogMigrator(builder.build()) - .registerTables(Collections.singletonList(TableIdentifier.parse("dummy.tbl3"))); + .registerTables(Collections.singletonList(TableIdentifier.parse("dummy.tbl3"))) + .result(); Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToRegisterTableIdentifiers()) .containsExactly(TableIdentifier.parse("dummy.tbl3")); @@ -127,7 +144,8 @@ public void testRegisterError(boolean deleteSourceTables) { builder = builderWithDefaultArgs(deleteSourceTables); result = new CatalogMigrator(builder.build()) - .registerTables(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))); + .registerTables(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))) + .result(); Assertions.assertThat(result.registeredTableIdentifiers()) .containsExactly(TableIdentifier.parse("foo.tbl2")); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); @@ -136,7 +154,8 @@ public void testRegisterError(boolean deleteSourceTables) { builder = builderWithDefaultArgs(deleteSourceTables); result = new CatalogMigrator(builder.build()) - .registerTables(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))); + .registerTables(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))) + .result(); Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToRegisterTableIdentifiers()) .contains(TableIdentifier.parse("foo.tbl2")); @@ -151,7 +170,8 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) { ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); CatalogMigrationResult result = new CatalogMigrator(builder.build()) - .registerTables(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))); + .registerTables(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))) + .result(); Assertions.assertThat(result.registeredTableIdentifiers()) .containsExactly(TableIdentifier.parse("foo.tbl2")); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); @@ -192,8 +212,11 @@ public void testRegisterNoTables(boolean deleteSourceTables) { .targetCatalog(catalog1) .deleteEntriesFromSourceCatalog(deleteSourceTables); CatalogMigrator catalogMigrator = new CatalogMigrator(builder.build()); + List matchingTableIdentifiers = + catalogMigrator.getMatchingTableIdentifiers(null); + Assertions.assertThat(matchingTableIdentifiers).isEmpty(); CatalogMigrationResult result = - catalogMigrator.registerTables(catalogMigrator.getMatchingTableIdentifiers(null)); + catalogMigrator.registerTables(matchingTableIdentifiers).result(); Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); @@ -223,10 +246,34 @@ public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws E TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); } + @Order(6) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testListingTableIdentifiers(boolean deleteSourceTables) { + ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); + CatalogMigrator catalogMigrator = new CatalogMigrator(builder.build()); + + List matchingTableIdentifiers = + catalogMigrator.getMatchingTableIdentifiers(null); + Assertions.assertThat(matchingTableIdentifiers) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), + TableIdentifier.parse("foo.tbl2"), + TableIdentifier.parse("bar.tbl3"), + TableIdentifier.parse("bar.tbl4")); + + matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers("^foo\\..*"); + Assertions.assertThat(matchingTableIdentifiers) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + } + private CatalogMigrationResult registerAllTables(boolean deleteSourceTables) { ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); CatalogMigrator catalogMigrator = new CatalogMigrator(builder.build()); - return catalogMigrator.registerTables(catalogMigrator.getMatchingTableIdentifiers(null)); + return catalogMigrator + .registerTables(catalogMigrator.getMatchingTableIdentifiers(null)) + .result(); } private ImmutableCatalogMigratorParams.Builder builderWithDefaultArgs( diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/CustomCatalogMigratorTest.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/CustomCatalogMigratorTest.java new file mode 100644 index 0000000..a407518 --- /dev/null +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/CustomCatalogMigratorTest.java @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.api.test; + +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.Table; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrationResult; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; +import org.projectnessie.tools.catalog.migration.api.CatalogMigratorParams; +import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigratorParams; + +public class CustomCatalogMigratorTest extends AbstractTest { + + protected static @TempDir Path warehouse1; + protected static @TempDir Path warehouse2; + + @BeforeAll + protected static void setup() { + catalog1 = createCustomCatalog(warehouse1.toAbsolutePath().toString(), "catalog1"); + catalog2 = createCustomCatalog(warehouse2.toAbsolutePath().toString(), "catalog2"); + + createNamespaces(); + } + + @BeforeEach + protected void beforeEach() { + createTables(); + } + + @AfterEach + protected void afterEach() { + dropTables(); + } + + @AfterAll + protected static void tearDown() { + dropNamespaces(); + } + + @Test + public void testRegister() { + CatalogMigratorParams params = + ImmutableCatalogMigratorParams.builder() + .sourceCatalog(catalog1) + .targetCatalog(catalog2) + .deleteEntriesFromSourceCatalog(true) + .build(); + + CatalogMigrator catalogMigrator = new CatalogMigrator(params); + // should fail to register as catalog doesn't support register table operations. + CatalogMigrationResult result = + catalogMigrator.registerTables(catalogMigrator.getMatchingTableIdentifiers(null)).result(); + Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), + TableIdentifier.parse("foo.tbl2"), + TableIdentifier.parse("bar.tbl3"), + TableIdentifier.parse("bar.tbl4")); + } + + private static Catalog createCustomCatalog(String warehousePath, String name) { + + class TestCatalog extends HadoopCatalog { + @Override + public Table registerTable(TableIdentifier identifier, String metadataFileLocation) { + throw new UnsupportedOperationException("This catalog doesn't support register table"); + } + } + + Map properties = new HashMap<>(); + properties.put("warehouse", warehousePath); + properties.put("type", "hadoop"); + TestCatalog testCatalog = new TestCatalog(); + testCatalog.setConf(new Configuration()); + testCatalog.initialize(name, properties); + return testCatalog; + } +} diff --git a/api-test/src/main/resources/logback.xml b/api-test/src/test/resources/logback.xml similarity index 100% rename from api-test/src/main/resources/logback.xml rename to api-test/src/test/resources/logback.xml diff --git a/api/build.gradle.kts b/api/build.gradle.kts index 633ab9e..0b26d71 100644 --- a/api/build.gradle.kts +++ b/api/build.gradle.kts @@ -23,12 +23,12 @@ plugins { dependencies { implementation(libs.guava) implementation(libs.slf4j) - runtimeOnly(libs.logback.classic) implementation(libs.iceberg.spark.runtime) annotationProcessor(libs.immutables) compileOnly(libs.immutables) + testRuntimeOnly(libs.logback.classic) testImplementation(libs.junit.jupiter.params) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.engine) diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/AbstractCatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/AbstractCatalogMigrator.java new file mode 100644 index 0000000..7934e20 --- /dev/null +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/AbstractCatalogMigrator.java @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.api; + +import com.google.common.base.Preconditions; +import java.util.List; +import java.util.Objects; +import java.util.function.Predicate; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import org.apache.iceberg.BaseTable; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class AbstractCatalogMigrator { + private static final Logger LOG = LoggerFactory.getLogger(AbstractCatalogMigrator.class); + private final ImmutableCatalogMigrationResult.Builder resultBuilder = + ImmutableCatalogMigrationResult.builder(); + + public abstract CatalogMigratorParams getParams(); + + /** + * Get the table identifiers which matches the regular expression pattern input from all the + * namespaces. + * + * @param identifierRegex regular expression pattern. If null, fetches all the table identifiers + * from all the namespaces. + * @return List of table identifiers. + */ + public List getMatchingTableIdentifiers(String identifierRegex) { + Catalog sourceCatalog = getParams().sourceCatalog(); + if (!(sourceCatalog instanceof SupportsNamespaces)) { + throw new UnsupportedOperationException( + String.format( + "source catalog %s doesn't implement SupportsNamespaces to list all namespaces.", + sourceCatalog.name())); + } + LOG.info("Collecting all the namespaces from source catalog..."); + List namespaces = ((SupportsNamespaces) sourceCatalog).listNamespaces(); + Predicate matchedIdentifiersPredicate; + if (identifierRegex == null) { + LOG.info("Collecting all the tables from all the namespaces of source catalog..."); + matchedIdentifiersPredicate = tableIdentifier -> true; + } else { + LOG.info( + "Collecting all the tables from all the namespaces of source catalog" + + " which matches the regex pattern:" + + identifierRegex); + Pattern pattern = Pattern.compile(identifierRegex); + matchedIdentifiersPredicate = + tableIdentifier -> pattern.matcher(tableIdentifier.toString()).matches(); + } + return namespaces.stream() + .filter(Objects::nonNull) + .flatMap( + namespace -> + sourceCatalog.listTables(namespace).stream().filter(matchedIdentifiersPredicate)) + .collect(Collectors.toList()); + } + + /** + * Register or Migrate tables from one catalog(source catalog) to another catalog(target catalog). + * + *

Users must make sure that no in-progress commits on the tables of source catalog during + * registration. + * + * @param identifiers List of table identifiers to register or migrate + * @return {@code this} for use in a chained invocation + */ + public AbstractCatalogMigrator registerTables(List identifiers) { + Preconditions.checkArgument(identifiers != null, "Identifiers list is null"); + + if (identifiers.isEmpty()) { + LOG.info("Identifiers list is empty"); + return this; + } + + identifiers.forEach( + tableIdentifier -> { + boolean isRegistered = registerTable(tableIdentifier); + if (isRegistered) { + resultBuilder.addRegisteredTableIdentifiers(tableIdentifier); + } else { + resultBuilder.addFailedToRegisterTableIdentifiers(tableIdentifier); + } + + // HadoopCatalog dropTable will delete the table files completely even when purge is + // false. So, skip dropTable for HadoopCatalog. + boolean deleteTableFromSourceCatalog = + !(getParams().sourceCatalog() instanceof HadoopCatalog) + && isRegistered + && getParams().deleteEntriesFromSourceCatalog(); + try { + if (deleteTableFromSourceCatalog + && !getParams().sourceCatalog().dropTable(tableIdentifier, false)) { + resultBuilder.addFailedToDeleteTableIdentifiers(tableIdentifier); + } + } catch (Exception exception) { + resultBuilder.addFailedToDeleteTableIdentifiers(tableIdentifier); + LOG.warn("Failed to delete the table after migration {}", tableIdentifier, exception); + } + }); + return this; + } + + public CatalogMigrationResult result() { + return resultBuilder.build(); + } + + private boolean registerTable(TableIdentifier tableIdentifier) { + try { + // register the table to the target catalog + TableOperations ops = + ((BaseTable) getParams().sourceCatalog().loadTable(tableIdentifier)).operations(); + getParams() + .targetCatalog() + .registerTable(tableIdentifier, ops.current().metadataFileLocation()); + LOG.info("Successfully migrated the table {}", tableIdentifier); + return true; + } catch (Exception ex) { + LOG.warn("Unable to register the table {}", tableIdentifier, ex); + return false; + } + } +} diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationResult.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationResult.java index 7edc147..81494ff 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationResult.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationResult.java @@ -20,11 +20,11 @@ import org.immutables.value.Value; @Value.Immutable -public abstract class CatalogMigrationResult { +public interface CatalogMigrationResult { - public abstract List registeredTableIdentifiers(); + List registeredTableIdentifiers(); - public abstract List failedToRegisterTableIdentifiers(); + List failedToRegisterTableIdentifiers(); - public abstract List failedToDeleteTableIdentifiers(); + List failedToDeleteTableIdentifiers(); } diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java index 6086c6e..aa1df19 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java @@ -15,148 +15,16 @@ */ package org.projectnessie.tools.catalog.migration.api; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; -import java.util.List; -import java.util.Objects; -import java.util.function.Predicate; -import java.util.regex.Pattern; -import java.util.stream.Collectors; -import org.apache.iceberg.BaseTable; -import org.apache.iceberg.TableOperations; -import org.apache.iceberg.catalog.Catalog; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.SupportsNamespaces; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.hadoop.HadoopCatalog; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +public class CatalogMigrator extends AbstractCatalogMigrator { -public class CatalogMigrator { - private static final Logger LOG = LoggerFactory.getLogger(CatalogMigrator.class); + private final CatalogMigratorParams params; - public static final String FAILED_IDENTIFIERS_FILE = "failed_identifiers.txt"; - public static final String FAILED_TO_DELETE_AT_SOURCE_FILE = "failed_to_delete_at_source.txt"; - public static final String DRY_RUN_FILE = "dry_run_identifiers.txt"; - - private final Catalog sourceCatalog; - private final Catalog targetCatalog; - private final boolean deleteEntriesFromSourceCatalog; - - public CatalogMigrator(CatalogMigratorParams catalogMigratorParams) { - this.sourceCatalog = catalogMigratorParams.sourceCatalog(); - this.targetCatalog = catalogMigratorParams.targetCatalog(); - this.deleteEntriesFromSourceCatalog = catalogMigratorParams.deleteEntriesFromSourceCatalog(); - } - - /** - * Get the table identifiers which matches the regular expression pattern input from all the - * namespaces. - * - * @param identifierRegex regular expression pattern. If null, fetches all the table identifiers - * from all the namespaces. - * @return List of table identifiers. - */ - public List getMatchingTableIdentifiers(String identifierRegex) { - LOG.info("Collecting all the namespaces from source catalog..."); - // fetch all the table identifiers from all the namespaces. - List namespaces = - (sourceCatalog instanceof SupportsNamespaces) - ? ((SupportsNamespaces) sourceCatalog).listNamespaces() - : ImmutableList.of(Namespace.empty()); - - Predicate matchedIdentifiersPredicate; - if (identifierRegex == null) { - LOG.info("Collecting all the tables from all the namespaces of source catalog..."); - matchedIdentifiersPredicate = tableIdentifier -> true; - } else { - LOG.info( - "Collecting all the tables from all the namespaces of source catalog" - + " which matches the regex pattern:" - + identifierRegex); - Pattern pattern = Pattern.compile(identifierRegex); - matchedIdentifiersPredicate = - tableIdentifier -> pattern.matcher(tableIdentifier.toString()).matches(); - } - return namespaces.stream() - .filter(Objects::nonNull) - .flatMap( - namespace -> - sourceCatalog.listTables(namespace).stream().filter(matchedIdentifiersPredicate)) - .collect(Collectors.toList()); - } - - /** - * Register or Migrate tables from one catalog(source catalog) to another catalog(target catalog). - * - *

Users must make sure that no in-progress commits on the tables of source catalog during - * registration. - * - * @param identifiers List of table identifiers to register or migrate - * @return {@link CatalogMigrationResult} instance - */ - public CatalogMigrationResult registerTables(List identifiers) { - ImmutableCatalogMigrationResult.Builder resultBuilder = - ImmutableCatalogMigrationResult.builder(); - registerTables(identifiers, resultBuilder); - return resultBuilder.build(); - } - - /** - * Register or Migrate tables from one catalog(source catalog) to another catalog(target catalog). - * - *

Users must make sure that no in-progress commits on the tables of source catalog during - * registration. - * - * @param identifiers List of table identifiers to register or migrate - * @param resultBuilder result builder to collect the results - */ - public void registerTables( - List identifiers, ImmutableCatalogMigrationResult.Builder resultBuilder) { - Preconditions.checkArgument(identifiers != null, "Identifiers list is null"); - Preconditions.checkArgument(resultBuilder != null, "result builder is null"); - - if (identifiers.isEmpty()) { - LOG.info("Identifiers list is empty"); - return; - } - - identifiers.forEach( - tableIdentifier -> { - boolean isRegistered = registerTable(tableIdentifier); - if (isRegistered) { - resultBuilder.addRegisteredTableIdentifiers(tableIdentifier); - } else { - resultBuilder.addFailedToRegisterTableIdentifiers(tableIdentifier); - } - - // HadoopCatalog dropTable will delete the table files completely even when purge is - // false. So, skip dropTable for HadoopCatalog. - boolean deleteTableFromSourceCatalog = - !(sourceCatalog instanceof HadoopCatalog) - && isRegistered - && deleteEntriesFromSourceCatalog; - try { - if (deleteTableFromSourceCatalog && !sourceCatalog.dropTable(tableIdentifier, false)) { - resultBuilder.addFailedToDeleteTableIdentifiers(tableIdentifier); - } - } catch (Exception exception) { - resultBuilder.addFailedToDeleteTableIdentifiers(tableIdentifier); - LOG.warn("Failed to delete the table after migration {}", tableIdentifier, exception); - } - }); + public CatalogMigrator(CatalogMigratorParams params) { + this.params = params; } - private boolean registerTable(TableIdentifier tableIdentifier) { - try { - // register the table to the target catalog - TableOperations ops = ((BaseTable) sourceCatalog.loadTable(tableIdentifier)).operations(); - targetCatalog.registerTable(tableIdentifier, ops.current().metadataFileLocation()); - LOG.info("Successfully migrated the table {}", tableIdentifier); - return true; - } catch (Exception ex) { - LOG.warn("Unable to register the table {}", tableIdentifier, ex); - return false; - } + @Override + public CatalogMigratorParams getParams() { + return params; } } diff --git a/api/src/main/resources/logback.xml b/api/src/main/resources/logback.xml deleted file mode 100644 index cb1446a..0000000 --- a/api/src/main/resources/logback.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - - - - ${catalog.migration.log.dir}/catalog_migration.log - true - - %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - - diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java new file mode 100644 index 0000000..3af857d --- /dev/null +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.api; + +import java.nio.file.Path; +import java.util.List; +import org.apache.iceberg.BaseMetastoreCatalog; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class UnsupportedNamespaceTest { + + protected static @TempDir Path tempDir; + + @BeforeAll + protected static void initLogDir() { + System.setProperty("catalog.migration.log.dir", tempDir.toAbsolutePath().toString()); + } + + @Test + public void testUnsupportedNamespace() { + + class TestCatalog extends BaseMetastoreCatalog { + // doesn't support namespaces + @Override + protected TableOperations newTableOps(TableIdentifier tableIdentifier) { + return null; + } + + @Override + protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { + return null; + } + + @Override + public List listTables(Namespace namespace) { + return null; + } + + @Override + public boolean dropTable(TableIdentifier identifier, boolean purge) { + return false; + } + + @Override + public void renameTable(TableIdentifier from, TableIdentifier to) {} + } + + Catalog catalog1 = new TestCatalog(); + Catalog catalog2 = new TestCatalog(); + + CatalogMigratorParams params = + ImmutableCatalogMigratorParams.builder() + .sourceCatalog(catalog1) + .targetCatalog(catalog2) + .deleteEntriesFromSourceCatalog(true) + .build(); + + CatalogMigrator catalogMigrator = new CatalogMigrator(params); + + Assertions.assertThatThrownBy(() -> catalogMigrator.getMatchingTableIdentifiers(null)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining( + "source catalog TestCatalog{} doesn't implement SupportsNamespaces to list all namespaces."); + } +} diff --git a/cli/build.gradle.kts b/cli/build.gradle.kts index e023e9c..b8b7b28 100644 --- a/cli/build.gradle.kts +++ b/cli/build.gradle.kts @@ -31,6 +31,7 @@ applyShadowJar() dependencies { implementation(project(":iceberg-catalog-migrator-api")) implementation(libs.slf4j) + runtimeOnly(libs.logback.classic) implementation(libs.picocli) implementation(libs.iceberg.spark.runtime) implementation(libs.iceberg.dell) diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java new file mode 100644 index 0000000..4035d69 --- /dev/null +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java @@ -0,0 +1,275 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.cli; + +import com.google.common.collect.Lists; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrationResult; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; +import org.projectnessie.tools.catalog.migration.api.CatalogMigratorParams; +import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigratorParams; +import picocli.CommandLine; + +public abstract class BaseRegisterCommand implements Callable { + @CommandLine.Spec CommandLine.Model.CommandSpec commandSpec; + + @CommandLine.ArgGroup( + exclusive = false, + multiplicity = "1", + heading = "source catalog options: %n") + private SourceCatalogOptions sourceCatalogOptions; + + @CommandLine.ArgGroup( + exclusive = false, + multiplicity = "1", + heading = "target catalog options: %n") + private TargetCatalogOptions targetCatalogOptions; + + @CommandLine.ArgGroup(heading = "identifier options: %n") + private IdentifierOptions identifierOptions; + + @CommandLine.Option( + names = {"--output-dir"}, + required = true, + description = + "local output directory path to write CLI output files like `failed_identifiers.txt`, " + + "`failed_to_delete_at_source.txt`, `dry_run_identifiers.txt`. ") + private Path outputDirPath; + + @CommandLine.Option( + names = {"--dry-run"}, + description = + "optional configuration to simulate the registration without actually registering. Can learn about a list " + + "of the tables that will be registered by running this.") + private boolean isDryRun; + + @CommandLine.Option( + names = {"--disable-prompts"}, + description = "optional configuration to disable warning prompts which needs console input.") + private boolean disablePrompts; + + private boolean deleteSourceCatalogTables; + + private static final int BATCH_SIZE = 100; + public static final String FAILED_IDENTIFIERS_FILE = "failed_identifiers.txt"; + public static final String FAILED_TO_DELETE_AT_SOURCE_FILE = "failed_to_delete_at_source.txt"; + public static final String DRY_RUN_FILE = "dry_run_identifiers.txt"; + + public BaseRegisterCommand() {} + + protected abstract boolean isDeleteSourceCatalogTables(); + + @Override + public Integer call() { + List identifiers; + if (identifierOptions != null) { + identifiers = identifierOptions.processIdentifiersInput(); + } else { + identifiers = Collections.emptyList(); + } + + PrintWriter printWriter = commandSpec.commandLine().getOut(); + + Catalog sourceCatalog = sourceCatalogOptions.build(); + printWriter.printf("%nConfigured source catalog: %s%n", sourceCatalog.name()); + + Catalog targetCatalog = targetCatalogOptions.build(); + printWriter.printf("%nConfigured target catalog: %s%n", targetCatalog.name()); + + if (!canProceed(sourceCatalog)) { + return 0; + } + + deleteSourceCatalogTables = isDeleteSourceCatalogTables(); + CatalogMigratorParams params = + ImmutableCatalogMigratorParams.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(deleteSourceCatalogTables) + .build(); + CatalogMigrator catalogMigrator = new CatalogMigrator(params); + + String identifierRegEx = identifierOptions != null ? identifierOptions.identifiersRegEx : null; + if (identifiers.isEmpty()) { + if (identifierRegEx != null) { + printWriter.printf( + "%nUser has not specified the table identifiers." + + " Selecting all the tables from all the namespaces from the source catalog " + + "which matches the regex pattern:%s%n", + identifierRegEx); + } else { + printWriter.printf( + "%nUser has not specified the table identifiers." + + " Selecting all the tables from all the namespaces from the source catalog.%n"); + } + identifiers = catalogMigrator.getMatchingTableIdentifiers(identifierRegEx); + } + + String operation = deleteSourceCatalogTables ? "migration" : "registration"; + printWriter.printf("%nIdentified %d tables for %s.%n", identifiers.size(), operation); + + if (isDryRun) { + writeToFile(outputDirPath.resolve(DRY_RUN_FILE), identifiers); + printWriter.println("Dry run is completed."); + printDryRunResults(identifiers); + return 0; + } + + printWriter.printf("%nStarted %s ...%n", operation); + + List> identifierBatches = Lists.partition(identifiers, BATCH_SIZE); + int totalIdentifiers = identifiers.size(); + AtomicInteger counter = new AtomicInteger(); + identifierBatches.forEach( + identifierBatch -> { + catalogMigrator.registerTables(identifierBatch); + printWriter.printf( + "%nAttempted %s for %d tables out of %d tables.%n", + operation, counter.addAndGet(identifierBatch.size()), totalIdentifiers); + }); + + CatalogMigrationResult result = catalogMigrator.result(); + writeToFile( + outputDirPath.resolve(FAILED_IDENTIFIERS_FILE), result.failedToRegisterTableIdentifiers()); + writeToFile( + outputDirPath.resolve(FAILED_TO_DELETE_AT_SOURCE_FILE), + result.failedToDeleteTableIdentifiers()); + + printWriter.printf("%nFinished %s ...%n", operation); + printSummary(result, sourceCatalog.name(), targetCatalog.name()); + printDetails(result); + return 0; + } + + private boolean canProceed(Catalog sourceCatalog) { + if (isDryRun || disablePrompts) { + return true; + } + PrintWriter printWriter = commandSpec.commandLine().getOut(); + if (deleteSourceCatalogTables) { + if (sourceCatalog instanceof HadoopCatalog) { + printWriter.printf( + "[WARNING]: Source catalog type is HADOOP and it doesn't support dropping tables just from " + + "catalog. %nAvoid operating the migrated tables from the source catalog after migration. " + + "Use the tables from target catalog.%n"); + } + return PromptUtil.proceedForMigration(printWriter); + } else { + return PromptUtil.proceedForRegistration(printWriter); + } + } + + private void printSummary( + CatalogMigrationResult result, String sourceCatalogName, String targetCatalogName) { + PrintWriter printWriter = commandSpec.commandLine().getOut(); + printWriter.printf("%nSummary: %n"); + if (!result.registeredTableIdentifiers().isEmpty()) { + printWriter.printf( + "- Successfully %s %d tables from %s catalog to %s catalog.%n", + deleteSourceCatalogTables ? "migrated" : "registered", + result.registeredTableIdentifiers().size(), + sourceCatalogName, + targetCatalogName); + } + if (!result.failedToRegisterTableIdentifiers().isEmpty()) { + printWriter.printf( + "- Failed to %s %d tables from %s catalog to %s catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. " + + "%nFailed identifiers are written into `%s`. " + + "Retry with that file using `--identifiers-from-file` option " + + "if the failure is because of network/connection timeouts.%n", + deleteSourceCatalogTables ? "migrate" : "register", + result.failedToRegisterTableIdentifiers().size(), + sourceCatalogName, + targetCatalogName, + FAILED_IDENTIFIERS_FILE); + } + if (!result.failedToDeleteTableIdentifiers().isEmpty()) { + printWriter.printf( + "- Failed to delete %d tables from %s catalog. " + + "Please check the `catalog_migration.log` file for the reason. " + + "%nFailed to delete identifiers are written into `%s`. %n", + result.failedToDeleteTableIdentifiers().size(), + sourceCatalogName, + FAILED_TO_DELETE_AT_SOURCE_FILE); + } + } + + private void printDetails(CatalogMigrationResult result) { + PrintWriter printWriter = commandSpec.commandLine().getOut(); + printWriter.printf("%nDetails: %n"); + if (!result.registeredTableIdentifiers().isEmpty()) { + printWriter.printf( + "- Successfully %s these tables:%n", + deleteSourceCatalogTables ? "migrated" : "registered"); + printWriter.println(result.registeredTableIdentifiers()); + } + + if (!result.failedToRegisterTableIdentifiers().isEmpty()) { + printWriter.printf( + "- Failed to %s these tables:%n", deleteSourceCatalogTables ? "migrate" : "register"); + printWriter.println(result.failedToRegisterTableIdentifiers()); + } + + if (!result.failedToDeleteTableIdentifiers().isEmpty()) { + printWriter.println("- [WARNING] Failed to delete these tables from source catalog:"); + printWriter.println(result.failedToDeleteTableIdentifiers()); + } + } + + private void printDryRunResults(List result) { + PrintWriter printWriter = commandSpec.commandLine().getOut(); + printWriter.printf("%nSummary: %n"); + if (result.isEmpty()) { + printWriter.printf( + "- No tables are identified for %s. Please check logs for more info.%n", + deleteSourceCatalogTables ? "migration" : "registration"); + return; + } + printWriter.printf( + "- Identified %d tables for %s by dry-run. These identifiers are also written into %s. " + + "You can use this file with `--identifiers-from-file` option.%n", + result.size(), deleteSourceCatalogTables ? "migration" : "registration", DRY_RUN_FILE); + + printWriter.printf("%nDetails: %n"); + printWriter.printf( + "- Identified these tables for %s by dry-run:%n", + deleteSourceCatalogTables ? "migration" : "registration"); + printWriter.println(result); + } + + private static void writeToFile(Path filePath, List identifiers) { + List identifiersString = + identifiers.stream().map(TableIdentifier::toString).collect(Collectors.toList()); + try { + Files.write(filePath, identifiersString); + } catch (IOException e) { + throw new UncheckedIOException("Failed to write the file:" + filePath, e); + } + } +} diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java index ee028fa..9ea8add 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java @@ -15,164 +15,14 @@ */ package org.projectnessie.tools.catalog.migration.cli; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_TO_DELETE_AT_SOURCE_FILE; - -import com.google.common.collect.Lists; -import java.io.IOException; -import java.io.PrintWriter; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.concurrent.Callable; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; -import org.apache.iceberg.CatalogUtil; -import org.apache.iceberg.aws.dynamodb.DynamoDbCatalog; -import org.apache.iceberg.aws.glue.GlueCatalog; -import org.apache.iceberg.catalog.Catalog; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.dell.ecs.EcsCatalog; -import org.apache.iceberg.hadoop.HadoopCatalog; -import org.apache.iceberg.hive.HiveCatalog; -import org.apache.iceberg.jdbc.JdbcCatalog; -import org.apache.iceberg.nessie.NessieCatalog; -import org.apache.iceberg.rest.RESTCatalog; -import org.projectnessie.tools.catalog.migration.api.CatalogMigrationResult; -import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; -import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigrationResult; -import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigratorParams; import picocli.CommandLine; @CommandLine.Command( - name = "register", + name = "iceberg-catalog-migrator", mixinStandardHelpOptions = true, versionProvider = CLIVersionProvider.class, - // As both source and target catalog has similar configurations, - // documentation is easy to read if the target and source property is one after another instead - // of sorted order. - sortOptions = false, - description = - "Bulk register the iceberg tables from source catalog to target catalog without data copy.") -public class CatalogMigrationCLI implements Callable { - @CommandLine.Spec CommandLine.Model.CommandSpec commandSpec; - - @CommandLine.Option( - names = "--source-catalog-type", - required = true, - description = - "source catalog type. " - + "Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]") - private CatalogType sourceCatalogType; - - @CommandLine.Option( - names = "--source-catalog-properties", - required = true, - split = ",", - description = "source catalog properties (like uri, warehouse, etc)") - private Map sourceCatalogProperties; - - @CommandLine.Option( - names = "--source-catalog-hadoop-conf", - split = ",", - description = - "optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when " - + "using an Iceberg FileIO.") - private Map sourceHadoopConf = new HashMap<>(); - - @CommandLine.Option( - names = {"--source-custom-catalog-impl"}, - description = - "optional fully qualified class name of the custom catalog implementation of the source catalog. Required " - + "when the catalog type is CUSTOM.") - private String sourceCustomCatalogImpl; - - @CommandLine.Option( - names = "--target-catalog-type", - required = true, - description = - "target catalog type. " - + "Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]") - private CatalogType targetCatalogType; - - @CommandLine.Option( - names = "--target-catalog-properties", - required = true, - split = ",", - description = "target catalog properties (like uri, warehouse, etc)") - private Map targetCatalogProperties; - - @CommandLine.Option( - names = "--target-catalog-hadoop-conf", - split = ",", - description = - "optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when " - + "using an Iceberg FileIO.") - private Map targetHadoopConf = new HashMap<>(); - - @CommandLine.Option( - names = {"--target-custom-catalog-impl"}, - description = - "optional fully qualified class name of the custom catalog implementation of the target catalog. Required " - + "when the catalog type is CUSTOM.") - private String targetCustomCatalogImpl; - - @CommandLine.Option( - names = {"--identifiers"}, - split = ",", - description = - "optional selective list of identifiers to register. If not specified, all the tables will be registered. " - + "Use this when there are few identifiers that need to be registered. For a large number of identifiers, " - + "use the `--identifiers-from-file` or `--identifiers-regex` option.") - private List identifiers = new ArrayList<>(); - - @CommandLine.Option( - names = {"--identifiers-from-file"}, - description = - "optional text file path that contains a list of table identifiers (one per line) to register. Should not be " - + "used with `--identifiers` or `--identifiers-regex` option.") - private String identifiersFromFile; - - @CommandLine.Option( - names = {"--identifiers-regex"}, - description = - "optional regular expression pattern used to register only the tables whose identifiers match this pattern. " - + "Should not be used with `--identifiers` or '--identifiers-from-file' option.") - private String identifiersRegEx; - - @CommandLine.Option( - names = {"--dry-run"}, - description = - "optional configuration to simulate the registration without actually registering. Can learn about a list " - + "of the tables that will be registered by running this.") - private boolean isDryRun; - - @CommandLine.Option( - names = {"--delete-source-tables"}, - description = - "optional configuration to delete the table entry from source catalog after successfully registering it " - + "to target catalog.") - private boolean deleteSourceCatalogTables; - - @CommandLine.Option( - names = {"--output-dir"}, - description = - "optional local output directory path to write CLI output files like `failed_identifiers.txt`, " - + "`failed_to_delete_at_source.txt`, `dry_run_identifiers.txt`. " - + "Uses the present working directory if not specified.") - private Path outputDirPath; - - private boolean disablePrompts; - - private static final int BATCH_SIZE = 100; + subcommands = {MigrateCommand.class, RegisterCommand.class}) +public class CatalogMigrationCLI { public CatalogMigrationCLI() {} @@ -182,326 +32,4 @@ public static void main(String... args) { int exitCode = commandLine.execute(args); System.exit(exitCode); } - - public void disablePrompts() { - this.disablePrompts = true; - } - - @Override - public Integer call() { - validateIdentifierOptions(); - - PrintWriter printWriter = commandSpec.commandLine().getOut(); - Configuration sourceCatalogConf = new Configuration(); - if (sourceHadoopConf != null && !sourceHadoopConf.isEmpty()) { - sourceHadoopConf.forEach(sourceCatalogConf::set); - } - Catalog sourceCatalog = - CatalogUtil.loadCatalog( - Objects.requireNonNull(catalogImpl(sourceCatalogType, sourceCustomCatalogImpl)), - sourceCatalogType.name(), - sourceCatalogProperties, - sourceCatalogConf); - printWriter.println(String.format("%nConfigured source catalog: %s", sourceCatalogType.name())); - - Configuration targetCatalogConf = new Configuration(); - if (targetHadoopConf != null && !targetHadoopConf.isEmpty()) { - targetHadoopConf.forEach(targetCatalogConf::set); - } - Catalog targetCatalog = - CatalogUtil.loadCatalog( - Objects.requireNonNull(catalogImpl(targetCatalogType, targetCustomCatalogImpl)), - targetCatalogType.name(), - targetCatalogProperties, - targetCatalogConf); - printWriter.println(String.format("%nConfigured target catalog: %s", targetCatalogType.name())); - - List tableIdentifiers = processIdentifiersInput(printWriter); - - if (!canProceed(printWriter, sourceCatalog)) { - return 0; - } - - ImmutableCatalogMigratorParams params = - ImmutableCatalogMigratorParams.builder() - .sourceCatalog(sourceCatalog) - .targetCatalog(targetCatalog) - .deleteEntriesFromSourceCatalog(deleteSourceCatalogTables) - .build(); - CatalogMigrator catalogMigrator = new CatalogMigrator(params); - - List identifiers; - if (tableIdentifiers.isEmpty()) { - if (identifiersRegEx == null) { - printWriter.println( - String.format( - "%nUser has not specified the table identifiers." - + " Selecting all the tables from all the namespaces from the source catalog.")); - } else { - printWriter.println( - String.format( - "%nUser has not specified the table identifiers." - + " Selecting all the tables from all the namespaces from the source catalog " - + "which matches the regex pattern:" - + identifiersRegEx)); - } - identifiers = catalogMigrator.getMatchingTableIdentifiers(identifiersRegEx); - } else { - identifiers = tableIdentifiers; - } - - String operation = deleteSourceCatalogTables ? "migration" : "registration"; - printWriter.println( - String.format("%nIdentified %d tables for %s.", identifiers.size(), operation)); - - ImmutableCatalogMigrationResult.Builder resultBuilder = - ImmutableCatalogMigrationResult.builder(); - if (isDryRun) { - CatalogMigrationResult result = - resultBuilder.addAllRegisteredTableIdentifiers(identifiers).build(); - writeToFile(pathWithOutputDir(DRY_RUN_FILE), result.registeredTableIdentifiers()); - printWriter.println("Dry run is completed."); - printDryRunResults(result, printWriter); - return 0; - } - - printWriter.println(String.format("%nStarted %s ...", operation)); - - List> IdentifierBatches = Lists.partition(identifiers, BATCH_SIZE); - AtomicInteger counter = new AtomicInteger(); - IdentifierBatches.forEach( - identifierBatch -> { - catalogMigrator.registerTables(identifierBatch, resultBuilder); - printWriter.println( - String.format( - "%nAttempted %s for %d tables out of %d tables.", - operation, counter.incrementAndGet() * BATCH_SIZE, identifiers.size())); - }); - - CatalogMigrationResult result = resultBuilder.build(); - writeToFile( - pathWithOutputDir(FAILED_IDENTIFIERS_FILE), result.failedToRegisterTableIdentifiers()); - writeToFile( - pathWithOutputDir(FAILED_TO_DELETE_AT_SOURCE_FILE), - result.failedToDeleteTableIdentifiers()); - - printWriter.println(String.format("%nFinished %s ...", operation)); - printSummary(result, printWriter, sourceCatalog.name(), targetCatalog.name()); - printDetails(result, printWriter); - return 0; - } - - private boolean canProceed(PrintWriter printWriter, Catalog sourceCatalog) { - if (isDryRun || disablePrompts) { - return true; - } - if (deleteSourceCatalogTables) { - if (sourceCatalog instanceof HadoopCatalog) { - printWriter.println( - String.format( - "[WARNING]: Source catalog type is HADOOP and it doesn't support dropping tables just from " - + "catalog. %nAvoid operating the migrated tables from the source catalog after migration. " - + "Use the tables from target catalog.")); - } - return PromptUtil.proceedForMigration(printWriter); - } else { - return PromptUtil.proceedForRegistration(printWriter); - } - } - - private List processIdentifiersInput(PrintWriter printWriter) { - List tableIdentifiers; - if (identifiersFromFile != null) { - try { - printWriter.println( - String.format("Collecting identifiers from the file %s...", identifiersFromFile)); - printWriter.println(); - tableIdentifiers = - Files.readAllLines(Paths.get(identifiersFromFile)).stream() - .map(TableIdentifier::parse) - .collect(Collectors.toList()); - } catch (IOException e) { - throw new RuntimeException("Failed to read the file:", e); - } - } else if (!identifiers.isEmpty()) { - tableIdentifiers = - identifiers.stream().map(TableIdentifier::parse).collect(Collectors.toList()); - } else { - tableIdentifiers = Collections.emptyList(); - } - return tableIdentifiers; - } - - private void validateIdentifierOptions() { - if (identifiersFromFile != null && !identifiers.isEmpty() && identifiersRegEx != null) { - throw new IllegalArgumentException( - "All the three identifier options (`--identifiers`, `--identifiers-from-file`, " - + "`--identifiers-regex`) are configured. Please use only one of them."); - } else if (identifiersFromFile != null) { - if (!identifiers.isEmpty()) { - throw new IllegalArgumentException( - "Both `--identifiers` and `--identifiers-from-file` options are configured. Please use only one of them."); - } else if (identifiersRegEx != null) { - throw new IllegalArgumentException( - "Both `--identifiers-regex` and `--identifiers-from-file` options are configured. Please use only one of them."); - } else { - if (!Files.exists(Paths.get(identifiersFromFile))) { - throw new IllegalArgumentException( - "File specified in `--identifiers-from-file` option does not exist."); - } - } - } else if (!identifiers.isEmpty()) { - if (identifiersRegEx != null) { - throw new IllegalArgumentException( - "Both `--identifiers-regex` and `--identifiers` options are configured. Please use only one of them."); - } - } - } - - private void printSummary( - CatalogMigrationResult result, - PrintWriter printWriter, - String sourceCatalogType, - String targetCatalogType) { - printWriter.println(String.format("%nSummary: ")); - if (!result.registeredTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Successfully %s %d tables from %s catalog to %s catalog.", - deleteSourceCatalogTables ? "migrated" : "registered", - result.registeredTableIdentifiers().size(), - sourceCatalogType, - targetCatalogType)); - } - if (!result.failedToRegisterTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Failed to %s %d tables from %s catalog to %s catalog. " - + "Please check the `catalog_migration.log` file for the failure reason. " - + "%nFailed identifiers are written into `%s`. " - + "Retry with that file using `--identifiers-from-file` option " - + "if the failure is because of network/connection timeouts.", - deleteSourceCatalogTables ? "migrate" : "register", - result.failedToRegisterTableIdentifiers().size(), - sourceCatalogType, - targetCatalogType, - FAILED_IDENTIFIERS_FILE)); - } - if (!result.failedToDeleteTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Failed to delete %d tables from %s catalog. " - + "Please check the `catalog_migration.log` file for the reason. " - + "%nFailed to delete identifiers are written into `%s`. ", - result.failedToDeleteTableIdentifiers().size(), - sourceCatalogType, - FAILED_TO_DELETE_AT_SOURCE_FILE)); - } - } - - private void printDetails(CatalogMigrationResult result, PrintWriter printWriter) { - printWriter.println(String.format("%nDetails: ")); - if (!result.registeredTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Successfully %s these tables:", - deleteSourceCatalogTables ? "migrated" : "registered")); - printWriter.println(result.registeredTableIdentifiers()); - } - - if (!result.failedToRegisterTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- Failed to %s these tables:", deleteSourceCatalogTables ? "migrate" : "register")); - printWriter.println(result.failedToRegisterTableIdentifiers()); - } - - if (!result.failedToDeleteTableIdentifiers().isEmpty()) { - printWriter.println("- [WARNING] Failed to delete these tables from source catalog:"); - printWriter.println(result.failedToDeleteTableIdentifiers()); - } - } - - private void printDryRunResults(CatalogMigrationResult result, PrintWriter printWriter) { - printWriter.println(String.format("%nSummary: ")); - if (result.registeredTableIdentifiers().isEmpty()) { - printWriter.println( - String.format( - "- No tables are identified for %s. Please check logs for more info.", - deleteSourceCatalogTables ? "migration" : "registration")); - return; - } - printWriter.println( - String.format( - "- Identified %d tables for %s by dry-run. These identifiers are also written into %s. " - + "You can use this file with `--identifiers-from-file` option.", - result.registeredTableIdentifiers().size(), - deleteSourceCatalogTables ? "migration" : "registration", - DRY_RUN_FILE)); - - printWriter.println(String.format("%nDetails: ")); - printWriter.println( - String.format( - "- Identified these tables for %s by dry-run:", - deleteSourceCatalogTables ? "migration" : "registration")); - printWriter.println(result.registeredTableIdentifiers()); - } - - private static void writeToFile(Path filePath, List identifiers) { - List identifiersString = - identifiers.stream().map(TableIdentifier::toString).collect(Collectors.toList()); - try { - Files.write(filePath, identifiersString); - } catch (IOException e) { - throw new RuntimeException("Failed to write the file:" + filePath, e); - } - } - - private Path pathWithOutputDir(String fileName) { - if (outputDirPath == null) { - return Paths.get(fileName); - } - return outputDirPath.resolve(fileName); - } - - private static String catalogImpl(CatalogType type, String customCatalogImpl) { - switch (type) { - case CUSTOM: - if (customCatalogImpl == null || customCatalogImpl.isEmpty()) { - throw new IllegalArgumentException( - "Need to specify the fully qualified class name of the custom catalog " + "impl"); - } - return customCatalogImpl; - case DYNAMODB: - return DynamoDbCatalog.class.getName(); - case ECS: - return EcsCatalog.class.getName(); - case GLUE: - return GlueCatalog.class.getName(); - case HADOOP: - return HadoopCatalog.class.getName(); - case HIVE: - return HiveCatalog.class.getName(); - case JDBC: - return JdbcCatalog.class.getName(); - case NESSIE: - return NessieCatalog.class.getName(); - case REST: - return RESTCatalog.class.getName(); - default: - throw new IllegalArgumentException("Unsupported type: " + type.name()); - } - } - - public enum CatalogType { - CUSTOM, - DYNAMODB, - ECS, - GLUE, - HADOOP, - HIVE, - JDBC, - NESSIE, - REST - } } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtil.java new file mode 100644 index 0000000..f50ca5a --- /dev/null +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtil.java @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.cli; + +import java.util.Map; +import java.util.Objects; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.aws.dynamodb.DynamoDbCatalog; +import org.apache.iceberg.aws.glue.GlueCatalog; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.dell.ecs.EcsCatalog; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.iceberg.hive.HiveCatalog; +import org.apache.iceberg.jdbc.JdbcCatalog; +import org.apache.iceberg.nessie.NessieCatalog; +import org.apache.iceberg.rest.RESTCatalog; + +public final class CatalogUtil { + + private CatalogUtil() {} + + public enum CatalogType { + CUSTOM, + DYNAMODB, + ECS, + GLUE, + HADOOP, + HIVE, + JDBC, + NESSIE, + REST + } + + static Catalog buildCatalog( + Map catalogProperties, + CatalogType catalogType, + String customCatalogImpl, + Map hadoopConf) { + Configuration sourceCatalogConf = new Configuration(); + hadoopConf.forEach(sourceCatalogConf::set); + return org.apache.iceberg.CatalogUtil.loadCatalog( + Objects.requireNonNull(catalogImpl(catalogType, customCatalogImpl)), + catalogType.name(), + catalogProperties, + sourceCatalogConf); + } + + private static String catalogImpl(CatalogType type, String customCatalogImpl) { + switch (type) { + case CUSTOM: + if (customCatalogImpl == null || customCatalogImpl.isEmpty()) { + throw new IllegalArgumentException( + "Need to specify the fully qualified class name of the custom catalog " + "impl"); + } + return customCatalogImpl; + case DYNAMODB: + return DynamoDbCatalog.class.getName(); + case ECS: + return EcsCatalog.class.getName(); + case GLUE: + return GlueCatalog.class.getName(); + case HADOOP: + return HadoopCatalog.class.getName(); + case HIVE: + return HiveCatalog.class.getName(); + case JDBC: + return JdbcCatalog.class.getName(); + case NESSIE: + return NessieCatalog.class.getName(); + case REST: + return RESTCatalog.class.getName(); + default: + throw new IllegalArgumentException("Unsupported type: " + type.name()); + } + } +} diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java new file mode 100644 index 0000000..3813a68 --- /dev/null +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.cli; + +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; +import org.apache.iceberg.catalog.TableIdentifier; +import picocli.CommandLine; + +public class IdentifierOptions { + + @CommandLine.Spec CommandLine.Model.CommandSpec commandSpec; + + @CommandLine.Option( + names = {"--identifiers"}, + split = ",", + description = + "optional selective list of identifiers to register. If not specified, all the tables will be registered. " + + "Use this when there are few identifiers that need to be registered. For a large number of identifiers, " + + "use the `--identifiers-from-file` or `--identifiers-regex` option.") + private List identifiers = new ArrayList<>(); + + @CommandLine.Option( + names = {"--identifiers-from-file"}, + description = + "optional text file path that contains a list of table identifiers (one per line) to register. Should not be " + + "used with `--identifiers` or `--identifiers-regex` option.") + private String identifiersFromFile; + + @CommandLine.Option( + names = {"--identifiers-regex"}, + description = + "optional regular expression pattern used to register only the tables whose identifiers match this pattern. " + + "Should not be used with `--identifiers` or '--identifiers-from-file' option.") + protected String identifiersRegEx; + + protected List processIdentifiersInput() { + if (identifiersFromFile != null && !Files.exists(Paths.get(identifiersFromFile))) { + throw new IllegalArgumentException( + "File specified in `--identifiers-from-file` option does not exist."); + } + PrintWriter printWriter = commandSpec.commandLine().getOut(); + List tableIdentifiers; + if (identifiersFromFile != null) { + try { + printWriter.printf("Collecting identifiers from the file %s...%n", identifiersFromFile); + tableIdentifiers = + Files.readAllLines(Paths.get(identifiersFromFile)).stream() + .map(TableIdentifier::parse) + .collect(Collectors.toList()); + } catch (IOException e) { + throw new RuntimeException("Failed to read the file:", e); + } + } else if (!identifiers.isEmpty()) { + tableIdentifiers = + identifiers.stream().map(TableIdentifier::parse).collect(Collectors.toList()); + } else { + tableIdentifiers = Collections.emptyList(); + } + return tableIdentifiers; + } +} diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java new file mode 100644 index 0000000..fcebf33 --- /dev/null +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.cli; + +import picocli.CommandLine; + +@CommandLine.Command( + name = "migrate", + mixinStandardHelpOptions = true, + versionProvider = CLIVersionProvider.class, + // As both source and target catalog has similar configurations, + // documentation is easy to read if the target and source property is one after another instead + // of sorted order. + sortOptions = false, + description = + "Bulk migrate the iceberg tables from source catalog to target catalog without data copy." + + " Table entries from source catalog will be deleted after the successful migration to the target " + + "catalog.") +public class MigrateCommand extends BaseRegisterCommand { + + @Override + protected boolean isDeleteSourceCatalogTables() { + return true; + } +} diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java index 0ceccea..9d0e947 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java @@ -22,7 +22,7 @@ public final class PromptUtil { private PromptUtil() {} - public static final String WARNING_FOR_REGISTRATION = + private static final String WARNING_FOR_REGISTRATION = String.format( "%n[WARNING]%n" + "\ta) Executing catalog migration when the source catalog has some in-progress commits " @@ -34,10 +34,10 @@ private PromptUtil() {} + "catalog. " + "%n\tHaving the same metadata.json registered in more than one catalog can lead to missing updates, " + "loss of data, and table corruption. " - + "%n\tUse `--delete-source-tables` option to automatically delete the table from source catalog after " + + "%n\tUse `migrate` command to automatically delete the table from source catalog after " + "migration."); - public static final String WARNING_FOR_MIGRATION = + private static final String WARNING_FOR_MIGRATION = String.format( "%n[WARNING]%n" + "\ta) Executing catalog migration when the source catalog has some in-progress commits " @@ -48,11 +48,11 @@ private PromptUtil() {} + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " + "%n\tand can only be accessed from the target catalog."); - public static boolean proceedForRegistration(PrintWriter printWriter) { + static boolean proceedForRegistration(PrintWriter printWriter) { return proceed(WARNING_FOR_REGISTRATION, printWriter); } - public static boolean proceedForMigration(PrintWriter printWriter) { + static boolean proceedForMigration(PrintWriter printWriter) { return proceed(WARNING_FOR_MIGRATION, printWriter); } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java new file mode 100644 index 0000000..97d0abe --- /dev/null +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.cli; + +import picocli.CommandLine; + +@CommandLine.Command( + name = "register", + mixinStandardHelpOptions = true, + versionProvider = CLIVersionProvider.class, + // As both source and target catalog has similar configurations, + // documentation is easy to read if the target and source property is one after another instead + // of sorted order. + sortOptions = false, + description = + "Bulk register the iceberg tables from source catalog to target catalog without data copy.") +public class RegisterCommand extends BaseRegisterCommand { + + @Override + protected boolean isDeleteSourceCatalogTables() { + return false; + } +} diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java new file mode 100644 index 0000000..2f2dd11 --- /dev/null +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.cli; + +import java.util.HashMap; +import java.util.Map; +import org.apache.iceberg.catalog.Catalog; +import picocli.CommandLine; + +public class SourceCatalogOptions { + + @CommandLine.Option( + names = "--source-catalog-type", + required = true, + description = + "source catalog type. " + + "Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]") + private org.projectnessie.tools.catalog.migration.cli.CatalogUtil.CatalogType type; + + @CommandLine.Option( + names = "--source-catalog-properties", + required = true, + split = ",", + description = "source catalog properties (like uri, warehouse, etc)") + protected Map properties; + + @CommandLine.Option( + names = "--source-catalog-hadoop-conf", + split = ",", + description = + "optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when " + + "using an Iceberg FileIO.") + private Map hadoopConf = new HashMap<>(); + + @CommandLine.Option( + names = {"--source-custom-catalog-impl"}, + description = + "optional fully qualified class name of the custom catalog implementation of the source catalog. Required " + + "when the catalog type is CUSTOM.") + private String customCatalogImpl; + + Catalog build() { + return CatalogUtil.buildCatalog(properties, type, customCatalogImpl, hadoopConf); + } +} diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java new file mode 100644 index 0000000..5c5fdb9 --- /dev/null +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.cli; + +import java.util.HashMap; +import java.util.Map; +import org.apache.iceberg.catalog.Catalog; +import picocli.CommandLine; + +public class TargetCatalogOptions { + + @CommandLine.Option( + names = "--target-catalog-type", + required = true, + description = + "target catalog type. " + + "Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]") + private CatalogUtil.CatalogType type; + + @CommandLine.Option( + names = "--target-catalog-properties", + required = true, + split = ",", + description = "target catalog properties (like uri, warehouse, etc)") + private Map properties; + + @CommandLine.Option( + names = "--target-catalog-hadoop-conf", + split = ",", + description = + "optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when " + + "using an Iceberg FileIO.") + private Map hadoopConf = new HashMap<>(); + + @CommandLine.Option( + names = {"--target-custom-catalog-impl"}, + description = + "optional fully qualified class name of the custom catalog implementation of the target catalog. Required " + + "when the catalog type is CUSTOM.") + private String customCatalogImpl; + + protected Catalog build() { + return CatalogUtil.buildCatalog(properties, type, customCatalogImpl, hadoopConf); + } +} diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java index 13829d4..82d4901 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java @@ -42,7 +42,7 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; import org.projectnessie.tools.catalog.migration.api.test.AbstractTest; -import org.projectnessie.tools.catalog.migration.cli.CatalogMigrationCLI; +import org.projectnessie.tools.catalog.migration.cli.CatalogUtil; public abstract class AbstractCLIMigrationTest extends AbstractTest { @@ -82,7 +82,7 @@ protected void afterEach() throws IOException { @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegister(boolean deleteSourceTables) throws Exception { - RunCLI run = registerTablesCLI(deleteSourceTables, registerAllTablesArgs()); + RunCLI run = runCLI(deleteSourceTables, registerAllTablesArgs()); Assertions.assertThat(run.getExitCode()).isEqualTo(0); Assertions.assertThat(run.getOut()) @@ -119,7 +119,7 @@ public void testRegister(boolean deleteSourceTables) throws Exception { public void testRegisterSelectedTables(boolean deleteSourceTables) throws Exception { // using `--identifiers` option RunCLI run = - registerTablesCLI( + runCLI( deleteSourceTables, "--source-catalog-type", sourceCatalogType, @@ -132,7 +132,8 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except "--identifiers", "bar.tbl3", "--output-dir", - outputDir.toAbsolutePath().toString()); + outputDir.toAbsolutePath().toString(), + "--disable-prompts"); Assertions.assertThat(run.getOut()) .doesNotContain( @@ -165,7 +166,7 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except // using `--identifiers-from-file` option Files.write(identifierFile, Collections.singletonList("bar.tbl4")); run = - registerTablesCLI( + runCLI( deleteSourceTables, "--source-catalog-type", sourceCatalogType, @@ -178,7 +179,8 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except "--identifiers-from-file", identifierFile.toAbsolutePath().toString(), "--output-dir", - outputDir.toAbsolutePath().toString()); + outputDir.toAbsolutePath().toString(), + "--disable-prompts"); Files.delete(identifierFile); Assertions.assertThat(run.getExitCode()).isEqualTo(0); @@ -209,7 +211,7 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except // using --identifiers-regex option which matches all the tables starts with "foo." run = - registerTablesCLI( + runCLI( deleteSourceTables, "--source-catalog-type", sourceCatalogType, @@ -222,7 +224,8 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except "--identifiers-regex", "^foo\\..*", "--output-dir", - outputDir.toAbsolutePath().toString()); + outputDir.toAbsolutePath().toString(), + "--disable-prompts"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); Assertions.assertThat(run.getOut()) .contains( @@ -258,7 +261,7 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except public void testRegisterError(boolean deleteSourceTables) throws Exception { // use invalid namespace which leads to NoSuchTableException RunCLI run = - registerTablesCLI( + runCLI( deleteSourceTables, "--source-catalog-type", sourceCatalogType, @@ -271,7 +274,8 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { "--identifiers", "dummy.tbl3", "--output-dir", - outputDir.toAbsolutePath().toString()); + outputDir.toAbsolutePath().toString(), + "--disable-prompts"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); String operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) @@ -288,7 +292,7 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { String.format("Details: %n- Failed to %s these tables:%n[dummy.tbl3]", operation)); // try to register same table twice which leads to AlreadyExistsException - registerTablesCLI( + runCLI( deleteSourceTables, "--source-catalog-type", sourceCatalogType, @@ -301,9 +305,10 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { "--identifiers", "foo.tbl2", "--output-dir", - outputDir.toAbsolutePath().toString()); + outputDir.toAbsolutePath().toString(), + "--disable-prompts"); run = - registerTablesCLI( + runCLI( deleteSourceTables, "--source-catalog-type", sourceCatalogType, @@ -316,7 +321,8 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { "--identifiers", "foo.tbl2", "--output-dir", - outputDir.toAbsolutePath().toString()); + outputDir.toAbsolutePath().toString(), + "--disable-prompts"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) @@ -338,7 +344,7 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Exception { // register only foo.tbl2 RunCLI run = - registerTablesCLI( + runCLI( deleteSourceTables, "--source-catalog-type", sourceCatalogType, @@ -351,7 +357,8 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep "--identifiers", "foo.tbl2", "--output-dir", - outputDir.toAbsolutePath().toString()); + outputDir.toAbsolutePath().toString(), + "--disable-prompts"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); String operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) @@ -374,7 +381,7 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep // register all the tables from source catalog again run = - registerTablesCLI( + runCLI( deleteSourceTables, "--source-catalog-type", sourceCatalogType, @@ -385,7 +392,8 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep "--target-catalog-properties", targetCatalogProperties, "--output-dir", - outputDir.toAbsolutePath().toString()); + outputDir.toAbsolutePath().toString(), + "--disable-prompts"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) @@ -426,7 +434,7 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep // retry the failed tables using --identifiers-from-file run = - registerTablesCLI( + runCLI( deleteSourceTables, "--source-catalog-type", sourceCatalogType, @@ -439,7 +447,8 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep "--identifiers-from-file", failedIdentifiersFile.toAbsolutePath().toString(), "--output-dir", - outputDir.toAbsolutePath().toString()); + outputDir.toAbsolutePath().toString(), + "--disable-prompts"); Assertions.assertThat(run.getOut()) .contains( String.format( @@ -463,7 +472,7 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { // source catalog is catalog2 which has no tables. RunCLI run = - registerTablesCLI( + runCLI( deleteSourceTables, "--source-catalog-type", targetCatalogType, @@ -474,7 +483,8 @@ public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { "--target-catalog-properties", sourceCatalogProperties, "--output-dir", - outputDir.toAbsolutePath().toString()); + outputDir.toAbsolutePath().toString(), + "--disable-prompts"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); String operation = deleteSourceTables ? "migration" : "registration"; @@ -487,7 +497,7 @@ public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { @ValueSource(booleans = {true, false}) public void testDryRun(boolean deleteSourceTables) throws Exception { RunCLI run = - registerTablesCLI( + runCLI( deleteSourceTables, "--source-catalog-type", sourceCatalogType, @@ -499,7 +509,8 @@ public void testDryRun(boolean deleteSourceTables) throws Exception { targetCatalogProperties, "--dry-run", "--output-dir", - outputDir.toAbsolutePath().toString()); + outputDir.toAbsolutePath().toString(), + "--disable-prompts"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); // should not prompt for dry run @@ -536,7 +547,7 @@ public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws E catalog1.createTable( TableIdentifier.of(Namespace.of("foo"), "tblx" + val), schema)); - RunCLI run = registerTablesCLI(deleteSourceTables, registerAllTablesArgs()); + RunCLI run = runCLI(deleteSourceTables, registerAllTablesArgs()); Assertions.assertThat(run.getExitCode()).isEqualTo(0); String operation = deleteSourceTables ? "migration" : "registration"; @@ -576,39 +587,40 @@ private static String[] registerAllTablesArgs() { "--target-catalog-properties", targetCatalogProperties, "--output-dir", - outputDir.toAbsolutePath().toString()); + outputDir.toAbsolutePath().toString(), + "--disable-prompts"); return args.toArray(new String[0]); } - private static RunCLI registerTablesCLI(boolean deleteSourceTables, String... args) - throws Exception { + private static RunCLI runCLI(boolean deleteSourceTables, String... args) throws Exception { + List argsList = Lists.newArrayList(args); if (!deleteSourceTables) { - return RunCLI.run(args); + argsList.add(0, "register"); + } else { + argsList.add(0, "migrate"); } - List argsList = Lists.newArrayList(args); - argsList.add("--delete-source-tables"); return RunCLI.run(argsList.toArray(new String[0])); } protected static String catalogType(Catalog catalog) { if (catalog instanceof DynamoDbCatalog) { - return CatalogMigrationCLI.CatalogType.DYNAMODB.name(); + return CatalogUtil.CatalogType.DYNAMODB.name(); } else if (catalog instanceof EcsCatalog) { - return CatalogMigrationCLI.CatalogType.ECS.name(); + return CatalogUtil.CatalogType.ECS.name(); } else if (catalog instanceof GlueCatalog) { - return CatalogMigrationCLI.CatalogType.GLUE.name(); + return CatalogUtil.CatalogType.GLUE.name(); } else if (catalog instanceof HadoopCatalog) { - return CatalogMigrationCLI.CatalogType.HADOOP.name(); + return CatalogUtil.CatalogType.HADOOP.name(); } else if (catalog instanceof HiveCatalog) { - return CatalogMigrationCLI.CatalogType.HIVE.name(); + return CatalogUtil.CatalogType.HIVE.name(); } else if (catalog instanceof JdbcCatalog) { - return CatalogMigrationCLI.CatalogType.JDBC.name(); + return CatalogUtil.CatalogType.JDBC.name(); } else if (catalog instanceof NessieCatalog) { - return CatalogMigrationCLI.CatalogType.NESSIE.name(); + return CatalogUtil.CatalogType.NESSIE.name(); } else if (catalog instanceof RESTCatalog) { - return CatalogMigrationCLI.CatalogType.REST.name(); + return CatalogUtil.CatalogType.REST.name(); } else { - return CatalogMigrationCLI.CatalogType.CUSTOM.name(); + return CatalogUtil.CatalogType.CUSTOM.name(); } } } diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/CLITest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/CLITest.java index 91ff956..d8b91fb 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/CLITest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/CLITest.java @@ -15,12 +15,10 @@ */ package org.projectnessie.tools.catlog.migration.cli; -import static java.util.Collections.singletonList; import static org.junit.jupiter.params.provider.Arguments.arguments; +import com.google.common.collect.Lists; import java.nio.file.Path; -import java.util.Arrays; -import java.util.Collections; import java.util.List; import java.util.stream.Stream; import org.assertj.core.api.Assertions; @@ -44,58 +42,52 @@ protected static void initLogDir() { private static Stream optionErrors() { return Stream.of( // no arguments - arguments( - Collections.emptyList(), - "Missing required options: '--source-catalog-type=', " - + "'--source-catalog-properties=', '--target-catalog-type=', " - + "'--target-catalog-properties='"), + arguments(Lists.newArrayList(), "Missing required option: '--output-dir='"), // missing required arguments arguments( - singletonList(""), - "Missing required options: '--source-catalog-type=', " - + "'--source-catalog-properties=', '--target-catalog-type=', " - + "'--target-catalog-properties='"), + Lists.newArrayList(""), "Missing required option: '--output-dir='"), // missing required arguments arguments( - Arrays.asList("--source-catalog-type", "GLUE"), - "Missing required options: '--source-catalog-properties=', " - + "'--target-catalog-type=', '--target-catalog-properties='"), + Lists.newArrayList("--source-catalog-type", "GLUE"), + "Missing required option: '--output-dir='"), // missing required arguments arguments( - Arrays.asList( + Lists.newArrayList( "--source-catalog-type", "HIVE", "--source-catalog-properties", "properties1=ab", "--target-catalog-type", - "NESSIE"), - "Missing required option: '--target-catalog-properties='"), + "NESSIE", + "--output-dir", + "path"), + "Error: Missing required argument(s): --target-catalog-properties="), + // missing required arguments + arguments( + Lists.newArrayList( + "--source-catalog-type", + "HIVE", + "--source-catalog-properties", + "properties1=ab", + "--target-catalog-properties", + "properties2=cd", + "--output-dir", + "path"), + "Error: Missing required argument(s): --target-catalog-type="), // missing required arguments arguments( - Arrays.asList( + Lists.newArrayList( "--source-catalog-type", "HIVE", "--source-catalog-properties", "properties1=ab", + "--target-catalog-type", + "NESSIE", "--target-catalog-properties", "properties2=cd"), - "Missing required option: '--target-catalog-type='")); - } - - @ParameterizedTest - @MethodSource("optionErrors") - @Order(0) - public void testOptionErrors(List args, String expectedMessage) throws Exception { - RunCLI run = RunCLI.run(args); - - Assertions.assertThat(run.getExitCode()).isEqualTo(2); - Assertions.assertThat(run.getErr()).contains(expectedMessage); - } - - private static Stream invalidArgs() { - return Stream.of( + "Missing required option: '--output-dir='"), arguments( - Arrays.asList( + Lists.newArrayList( "--source-catalog-type", "HADOOP", "--source-catalog-properties", @@ -103,11 +95,18 @@ private static Stream invalidArgs() { "--target-catalog-type", "HIVE", "--target-catalog-properties", - "k3=v3, k4=v4"), - "java.lang.IllegalArgumentException: Cannot initialize HadoopCatalog " - + "because warehousePath must not be null or empty"), + "k3=v3, k4=v4", + "--identifiers", + "foo.tbl", + "--identifiers-from-file", + "file.txt", + "--identifiers-regex", + "^foo\\.", + "--output-dir", + "path"), + "Error: --identifiers=, --identifiers-from-file=, --identifiers-regex= are mutually exclusive (specify only one)"), arguments( - Arrays.asList( + Lists.newArrayList( "--source-catalog-type", "HADOOP", "--source-catalog-properties", @@ -120,12 +119,11 @@ private static Stream invalidArgs() { "foo.tbl", "--identifiers-from-file", "file.txt", - "--identifiers-regex", - "^foo\\."), - "java.lang.IllegalArgumentException: All the three identifier options (`--identifiers`, " - + "`--identifiers-from-file`, `--identifiers-regex`) are configured. Please use only one of them."), + "--output-dir", + "path"), + "Error: --identifiers=, --identifiers-from-file= are mutually exclusive (specify only one)"), arguments( - Arrays.asList( + Lists.newArrayList( "--source-catalog-type", "HADOOP", "--source-catalog-properties", @@ -134,12 +132,15 @@ private static Stream invalidArgs() { "HIVE", "--target-catalog-properties", "k3=v3, k4=v4", + "--identifiers-regex", + "^foo\\.", "--identifiers-from-file", - "file.txt"), - "java.lang.IllegalArgumentException: " - + "File specified in `--identifiers-from-file` option does not exist."), + "file.txt", + "--output-dir", + "path"), + "Error: --identifiers-from-file=, --identifiers-regex= are mutually exclusive (specify only one)"), arguments( - Arrays.asList( + Lists.newArrayList( "--source-catalog-type", "HADOOP", "--source-catalog-properties", @@ -150,12 +151,34 @@ private static Stream invalidArgs() { "k3=v3, k4=v4", "--identifiers", "foo.tbl", - "--identifiers-from-file", - "file.txt"), - "java.lang.IllegalArgumentException: Both `--identifiers` and `--identifiers-from-file` " - + "options are configured. Please use only one of them."), + "--identifiers-regex", + "^foo\\.", + "--output-dir", + "path"), + "Error: --identifiers=, --identifiers-regex= are mutually exclusive " + + "(specify only one)")); + } + + @ParameterizedTest + @MethodSource("optionErrors") + @Order(0) + public void testOptionErrorsForRegister(List args, String expectedMessage) + throws Exception { + executeAndValidateResults("register", args, expectedMessage, 2); + } + + @ParameterizedTest + @MethodSource("optionErrors") + @Order(1) + public void testOptionErrorsForMigrate(List args, String expectedMessage) + throws Exception { + executeAndValidateResults("migrate", args, expectedMessage, 2); + } + + private static Stream invalidArgs() { + return Stream.of( arguments( - Arrays.asList( + Lists.newArrayList( "--source-catalog-type", "HADOOP", "--source-catalog-properties", @@ -164,14 +187,12 @@ private static Stream invalidArgs() { "HIVE", "--target-catalog-properties", "k3=v3, k4=v4", - "--identifiers-regex", - "^foo\\.", - "--identifiers-from-file", - "file.txt"), - "java.lang.IllegalArgumentException: Both `--identifiers-regex` " - + "and `--identifiers-from-file` options are configured. Please use only one of them."), + "--output-dir", + "path"), + "java.lang.IllegalArgumentException: Cannot initialize HadoopCatalog " + + "because warehousePath must not be null or empty"), arguments( - Arrays.asList( + Lists.newArrayList( "--source-catalog-type", "HADOOP", "--source-catalog-properties", @@ -180,29 +201,45 @@ private static Stream invalidArgs() { "HIVE", "--target-catalog-properties", "k3=v3, k4=v4", - "--identifiers", - "foo.tbl", - "--identifiers-regex", - "^foo\\."), - "java.lang.IllegalArgumentException: Both `--identifiers-regex` and " - + "`--identifiers` options are configured. Please use only one of them.")); + "--identifiers-from-file", + "file.txt", + "--output-dir", + "path"), + "java.lang.IllegalArgumentException: " + + "File specified in `--identifiers-from-file` option does not exist.")); } @ParameterizedTest - @Order(1) + @Order(2) @MethodSource("invalidArgs") - public void testInvalidArgs(List args, String expectedMessage) throws Exception { - RunCLI run = RunCLI.run(args); + public void testInvalidArgsForRegister(List args, String expectedMessage) + throws Exception { + executeAndValidateResults("register", args, expectedMessage, 1); + } - Assertions.assertThat(run.getExitCode()).isEqualTo(1); - Assertions.assertThat(run.getErr()).contains(expectedMessage); + @ParameterizedTest + @Order(2) + @MethodSource("invalidArgs") + public void testInvalidArgsForMigrate(List args, String expectedMessage) + throws Exception { + executeAndValidateResults("migrate", args, expectedMessage, 1); } @Test - @Order(2) + @Order(4) public void version() throws Exception { RunCLI run = RunCLI.run("--version"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); Assertions.assertThat(run.getOut()).startsWith(System.getProperty("expectedCLIVersion")); } + + private static void executeAndValidateResults( + String command, List args, String expectedMessage, int expectedErrorCode) + throws Exception { + args.add(0, command); + RunCLI run = RunCLI.run(args); + + Assertions.assertThat(run.getExitCode()).isEqualTo(expectedErrorCode); + Assertions.assertThat(run.getErr()).contains(expectedMessage); + } } diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/HadoopCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/HadoopCLIMigrationTest.java index 366b399..58273ee 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/HadoopCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/HadoopCLIMigrationTest.java @@ -15,8 +15,8 @@ */ package org.projectnessie.tools.catlog.migration.cli; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHadoopToHiveCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHadoopToHiveCLIMigrationTest.java index fdffe5d..d983ed2 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHadoopToHiveCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHadoopToHiveCLIMigrationTest.java @@ -15,8 +15,8 @@ */ package org.projectnessie.tools.catlog.migration.cli; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToHadoopCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToHadoopCLIMigrationTest.java index 7b70f01..e546d80 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToHadoopCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToHadoopCLIMigrationTest.java @@ -15,8 +15,8 @@ */ package org.projectnessie.tools.catlog.migration.cli; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToNessieCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToNessieCLIMigrationTest.java index 44525b1..77fb899 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToNessieCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToNessieCLIMigrationTest.java @@ -15,8 +15,8 @@ */ package org.projectnessie.tools.catlog.migration.cli; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITNessieToHiveCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITNessieToHiveCLIMigrationTest.java index 4ff203f..69c2145 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITNessieToHiveCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITNessieToHiveCLIMigrationTest.java @@ -15,8 +15,8 @@ */ package org.projectnessie.tools.catlog.migration.cli; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.api.CatalogMigrator.FAILED_IDENTIFIERS_FILE; +import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java index b97d31a..2a4afcf 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java @@ -52,12 +52,8 @@ public static RunCLI run(String... args) throws Exception { } private static int runMain(PrintWriter out, PrintWriter err, String... arguments) { - CatalogMigrationCLI cli = new CatalogMigrationCLI(); - // disable prompts for tests - cli.disablePrompts(); - CommandLine commandLine = - new CommandLine(cli) + new CommandLine(new CatalogMigrationCLI()) .setExecutionExceptionHandler( (ex, cmd, parseResult) -> { cmd.getErr().println(cmd.getColorScheme().richStackTraceString(ex)); diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index 943f0cbfa754578e88a3dae77fce6e3dea56edbf..ccebba7710deaf9f98673a68957ea02138b60d0a 100644 GIT binary patch delta 5094 zcmZu#c|6qH|DG9RA4`noBZNWrC2N)tSqjO%%aX0^O4dPAB*iC6_9R<`apl^#h-_oY z)(k_0v8Fxp{fyi9-uwN%e)GpU&v~BrS>~KG^PF=MNmQjIDr&QHR7f-kM{%U_u*1=5 zGC}ae5(^Rrg9QY8$x^}oiJ0d2O9YW{J~$dD1ovlvh&0B4L)!4S=z;Hac>K{#9q9cKq;>>BtKo1!+gw`yqE zSK8x^jC|B!qmSW#uyb@T^CkB9qRd{N3V-rEi}AEgoU_J27lw_0X`}c0&m9JhxM;RK z54_gdZ(u?R5`B3}NeVal2NTHqlktM`2eTF28%6BZCWW$-shf0l-BOVSm)hU58MTPy zDcY-5777j;ccU!Yba8wH=X6OdPJ8O5Kp^3gUNo>!b=xb6T2F&LiC2eBJj8KuLPW!4 zw3V^NnAKZm^D?tmliCvzi>UtoDH%V#%SM0d*NS+m%4}qO<)M1E{OpQ(v&ZNc`vdi| zEGlVi$Dgxy1p6+k0qGLQt(JwxZxLCZ4>wJ=sb0v%Ki?*+!ic_2exumn{%Co|| z-axdK#RUC;P|vqbe?L`K!j;sUo=uuR_#ZkRvBf%Txo6{OL&I(?dz?47Z(DcX3KTw> zGY%A=kX;fBkq$F^sX|-)1Qkg##+n-Ci{qJVPj@P?l_1Y`nD^v>fZ3HMX%(4p-TlD(>yWwJij!6Jw}l7h>CIm@Ou5B@$Wy`Ky*814%Mdi1GfG1zDG9NogaoVHHr4gannv4?w6g&10!j=lKM zFW;@=Z0}vAPAxA=R4)|`J??*$|Fh`5=ks*V7TapX`+=4n*{aXxRhh-EGX_Xrzjb4r zn0vO7Cc~wtyeM_8{**~9y7>+}1JV8Buhg%*hy|PUc#!vw#W(HFTL|BpM)U0>JxG6S zLnqn1!0++RyyJ>5VU<4mDv8>Q#{EtgS3mj7Hx}Zkr0tz1}h8Kn6q`MiwC z{Y#;D!-ndlImST(C@(*i5f0U(jD29G7g#nkiPX zki6M$QYX_fNH=E4_eg9*FFZ3wF9YAKC}CP89Kl(GNS(Ag994)0$OL4-fj_1EdR}ARB#-vP_$bWF`Qk58+ z4Jq*-YkcmCuo9U%oxGeYe7Be=?n}pX+x>ob(8oPLDUPiIryT8v*N4@0{s_VYALi;lzj19ivLJKaXt7~UfU|mu9zjbhPnIhG2`uI34urWWA9IO{ z_1zJ)lwSs{qt3*UnD}3qB^kcRZ?``>IDn>qp8L96bRaZH)Zl`!neewt(wjSk1i#zf zb8_{x_{WRBm9+0CF4+nE)NRe6K8d|wOWN)&-3jCDiK5mj>77=s+TonlH5j`nb@rB5 z5NX?Z1dk`E#$BF{`(D>zISrMo4&}^wmUIyYL-$PWmEEfEn-U0tx_vy$H6|+ zi{ytv2@JXBsot|%I5s74>W1K{-cvj0BYdNiRJz*&jrV9>ZXYZhEMULcM=fCmxkN&l zEoi=)b)Vazc5TQC&Q$oEZETy@!`Gnj`qoXl7mcwdY@3a-!SpS2Mau|uK#++@>H8QC zr2ld8;<_8We%@E?S=E?=e9c$BL^9X?bj*4W;<+B&OOe+3{<`6~*fC(=`TO>o^A(Y! zA`Qc1ky?*6xjVfR?ugE~oY`Gtzhw^{Z@E6vZ`mMRAp>Odpa!m zzWmtjT|Lj^qiZMfj%%un-o$Eu>*v12qF{$kCKai^?DF=$^tfyV%m9;W@pm-BZn_6b z{jsXY3!U`%9hzk6n7YyHY%48NhjI6jjuUn?Xfxe0`ARD_Q+T_QBZ{ zUK@!63_Wr`%9q_rh`N4=J=m;v>T{Y=ZLKN^m?(KZQ2J%|3`hV0iogMHJ} zY6&-nXirq$Yhh*CHY&Qf*b@@>LPTMf z(cMorwW?M11RN{H#~ApKT)F!;R#fBHahZGhmy>Sox`rk>>q&Y)RG$-QwH$_TWk^hS zTq2TC+D-cB21|$g4D=@T`-ATtJ?C=aXS4Q}^`~XjiIRszCB^cvW0OHe5;e~9D%D10 zl4yP4O=s-~HbL7*4>#W52eiG7*^Hi)?@-#*7C^X5@kGwK+paI>_a2qxtW zU=xV7>QQROWQqVfPcJ$4GSx`Y23Z&qnS?N;%mjHL*EVg3pBT{V7bQUI60jtBTS?i~ zycZ4xqJ<*3FSC6_^*6f)N|sgB5Bep(^%)$=0cczl>j&n~KR!7WC|3;Zoh_^GuOzRP zo2Hxf50w9?_4Qe368fZ0=J|fR*jO_EwFB1I^g~i)roB|KWKf49-)!N%Ggb%w=kB8)(+_%kE~G!(73aF=yCmM3Cfb9lV$G!b zoDIxqY{dH>`SILGHEJwq%rwh46_i`wkZS-NY95qdNE)O*y^+k#JlTEij8NT(Y_J!W zFd+YFoZB|auOz~A@A{V*c)o7E(a=wHvb@8g5PnVJ&7D+Fp8ABV z5`&LD-<$jPy{-y*V^SqM)9!#_Pj2-x{m$z+9Z*o|JTBGgXYYVM;g|VbitDUfnVn$o zO)6?CZcDklDoODzj+ti@i#WcqPoZ!|IPB98LW!$-p+a4xBVM@%GEGZKmNjQMhh)zv z7D){Gpe-Dv=~>c9f|1vANF&boD=Nb1Dv>4~eD636Lldh?#zD5{6JlcR_b*C_Enw&~ z5l2(w(`{+01xb1FCRfD2ap$u(h1U1B6e&8tQrnC}Cy0GR=i^Uue26Rc6Dx}!4#K*0 zaxt`a+px7-Z!^(U1WN2#kdN#OeR|2z+C@b@w+L67VEi&ZpAdg+8`HJT=wIMJqibhT ztb3PFzsq&7jzQuod3xp7uL?h-7rYao&0MiT_Bux;U*N#ebGv92o(jM2?`1!N2W_M* zeo9$%hEtIy;=`8z1c|kL&ZPn0y`N)i$Y1R9>K!el{moiy)014448YC#9=K zwO3weN|8!`5bU_#f(+ZrVd*9`7Uw?!q?yo&7sk&DJ;#-^tcCtqt5*A(V;&LdHq7Hg zI6sC@!ly9p$^@v&XDsgIuv;9#w^!C1n5+10-tEw~ZdO1kqMDYyDl!5__o}f3hYe2M zCeO)~m&&=JZn%cVH3HzPlcE`9^@``2u+!Y}Remn)DLMHc-h5A9ATgs;7F7=u2=vBlDRbjeYvyNby=TvpI{5nb2@J_YTEEEj4q<@zaGSC_i&xxD!6)d zG{1??({Ma<=Wd4JL%bnEXoBOU_0bbNy3p%mFrMW>#c zzPEvryBevZVUvT^2P&Zobk#9j>vSIW_t?AHy>(^x-Bx~(mvNYb_%$ZFg(s5~oka+Kp(GU68I$h(Vq|fZ zC_u1FM|S)=ldt#5q>&p4r%%p)*7|Rf0}B#-FwHDTo*|P6HB_rz%R;{==hpl#xTt@VLdSrrf~g^ z`IA8ZV1b`UazYpnkn28h&U)$(gdZ*f{n`&kH%Oy54&Z;ebjlh4x?JmnjFAALu}EG} zfGmQ$5vEMJMH`a=+*src#dWK&N1^LFxK9Sa#q_rja$JWra09we<2oL9Q9Sx)?kZFW z$jhOFGE~VcihYlkaZv8?uA7v$*}?2h6i%Qmgc4n~3E(O_`YCRGy~}`NFaj@(?Wz;GS_?T+RqU{S)eD1j$1Gr;C^m z7zDK=xaJ^6``=#Y-2ssNfdRqh0ntJrutGV5Nv&WI%3k1wmD5n+0aRe{0k^!>LFReN zx1g*E>nbyx03KU~UT6->+rG%(owLF=beJxK&a0F;ie1GZ^eKg-VEZb&=s&ajKS#6w zjvC6J#?b|U_(%@uq$c#Q@V_me0S1%)pKz9--{EKwyM}_gOj*Og-NEWLDF_oFtPjG; zXCZ7%#=s}RKr&_5RFN@=H(015AGl4XRN9Bc51`;WWt%vzQvzexDI2BZ@xP~^2$I&7 zA(ndsgLsmA*su8p-~IS q+ZJUZM}`4#Zi@l2F-#HCw*??ha2ta#9s8?H3%YId(*zJG6aF78h1yF1 delta 5107 zcmY*d1zc0@|J{HQlai7V5+f#EN-H%&UP4MFm6QgFfuJK4DG4u#ARsbQL4i>MB1q|w zmWd#pqd~BR-yN@ieE-|$^W1aKIZtf&-p_fyw{(Uwc7_sWYDh^12cY!qXvcPQ!qF;q@b0nYU7 zP&ht}K7j%}P%%|ffm;4F0^i3P0R`a!2wm89L5P3Kfu;tTZJre<{N5}AzsH+E3DS`Q zJLIl`LRMf`JOTBLf(;IV(9(h{(}dXK!cPoSLm(o@fz8vRz}6fOw%3}3VYOsCczLF` za2RTsCWa2sS-uw(6|HLJg)Xf@S8#|+(Z5Y)ER+v+8;btfB3&9sWH6<=U}0)o-jIts zsi?Nko;No&JyZI%@1G&zsG5kKo^Zd7rk_9VIUao9;fC~nv(T0F&Af0&Rp`?x94EIS zUBPyBe5R5#okNiB1Xe--q4|hPyGzhJ?Lurt#Ci09BQ+}rlHpBhm;EmfLw{EbCz)sg zgseAE#f$met1jo;`Z6ihk?O1be3aa$IGV69{nzagziA!M*~E5lMc(Sp+NGm2IUjmn zql((DU9QP~Tn1pt6L`}|$Na-v(P+Zg&?6bAN@2u%KiB*Gmf}Z)R zMENRJgjKMqVbMpzPO{`!J~2Jyu7&xXnTDW?V?IJgy+-35q1)-J8T**?@_-2H`%X+6f5 zIRv`uLp&*?g7L~6+3O*saXT~gWsmhF*FNKw4X$29ePKi02G*)ysenhHv{u9-y?_do ztT(Cu04pk>51n}zu~=wgToY5Cx|MTlNw}GR>+`|6CAhQn=bh@S<7N)`w};;KTywDU z=QWO@RBj$WKOXSgCWg{BD`xl&DS!G}`Mm3$)=%3jzO_C+s+mfTFH5JL>}*(JKs@MqX|o2b#ZBX5P;p7;c)$F1y4HwvJ?KA938$rd)gn_U^CcUtmdaBW57 zlPph>Fz&L`cSScFjcj+7Jif3vxb20Ag~FPstm?9#OrD$e?Y~#1osDB0CFZ9Mu&%iE zSj~wZpFqu6!k%BT)}$F@Z%(d-Pqy07`N8ch2F7z^=S-!r-@j{#&{SM@a8O$P#SySx zZLD_z=I300OCA1YmKV0^lo@>^)THfZvW}s<$^w^#^Ce=kO5ymAnk>H7pK!+NJ-+F7 z1Bb6Y=r)0nZ+hRXUyD+BKAyecZxb+$JTHK5k(nWv*5%2a+u*GDt|rpReYQ}vft zXrIt#!kGO85o^~|9Oc-M5A!S@9Q)O$$&g8u>1=ew?T35h8B{-Z_S78oe=E(-YZhBPe@Y1sUt63A-Cdv>D1nIT~=Rub6$?8g>meFb7Ic@w^%@RN2z72oPZ#Ta%b(P1|&6I z61iO<8hT*)p19Bgd0JgXP{^c{P2~K@^DIXv=dF(u|DFfqD^dMIl8-x)xKIpJRZru@ zDxicyYJG}mh}=1Dfg%B$#H`CiAxPTj^;f4KRMZHUz-_x6)lEq!^mu%72*PI=t$6{Uql#dqm4 zClgaN63!&?v*enz4k1sbaM+yCqUf+i9rw$(YrY%ir1+%cWRB<;r}$8si!6QcNAk~J zk3?dejBaC`>=T<=y=>QVt*4kL>SwYwn$(4ES793qaH)>n(axyV3R5jdXDh#e-N0K- zuUgk|N^|3*D1!Wlz-!M*b}Zc5=;K6I+>1N$&Q%)&8LWUiTYi&aQIj(luA< zN5R<8Y8L#*i0xBio$jWcaiZ4S2w3#R@CGemesy~akKP)2GojQF6!$}!_RdUJPBevX zG#~uz%Yirb0@1wgQ;ayb=qD}6{=QXxjuZQ@@kxbN!QWhtEvuhS2yAZe8fZy6*4Inr zdSyR9Dec4HrE|I=z-U;IlH;_h#7e^Hq}gaJ<-z^}{*s!m^66wu2=(*EM0UaV*&u1q zJrq!K23TO8a(ecSQFdD$y+`xu)Xk36Z*;1i{hS=H2E<8<5yHuHG~22-S+Jq|3HMAw z%qBz3auT=M!=5F|Wqke|I^E8pmJ-}>_DwX5w%d3MSdC>xW%$ocm8w8HRdZ|^#cEt1 zM*I7S6sLQq;;Mecet(Q()+?s+&MeVLOvx}(MkvytkvLHl7h*N0AT1#AqC&(he(^%przH`KqA$z_dAvJJb409@F)fYwD$JW_{_Oie8!@VdJE zU>D$@B?LawAf5$;`AZ1E!krn=aAC%4+YQrzL!59yl1;|T2)u=RBYA8lk0Ek&gS!Rb zt0&hVuyhSa0}rpZGjTA>Gz}>Uv*4)F zf7S%D2nfA7x?gPEXZWk8DZimQs#xi0?So_k`2zb!UVQEAcbvjPLK9v>J~!awnxGpq zEh$EPOc4q&jywmglnC&D)1-P0DH!@)x;uJwMHdhPh>ZLWDw+p1pf52{X2dk{_|UOmakJa4MHu?CY`6Hhv!!d7=aNwiB5z zb*Wlq1zf^3iDlPf)b_SzI*{JCx2jN;*s~ra8NeB!PghqP!0po-ZL?0Jk;2~*~sCQ<%wU`mRImd)~!23RS?XJu|{u( ztFPy3*F=ZhJmBugTv48WX)4U*pNmm~4oD4}$*-92&<)n=R)5lT z-VpbEDk>(C1hoo#-H_u0`#%L6L$ zln(}h2*Cl(5(JtVM{YZ26@Fwmp;?Qt}9$_F%`?+-JHbC;bPZj8PLq9 zWo-KFw!i&r8WuA-!3F_m9!24Z(RhalAUR~_H#Ln=$%b5GY z)oB)zO%J5TY}&BXq^7#M>euVL%01Tzj4$6^ZOjT*7@zr~q@6GEjGi)nbwzSL`TiLN z{DVG~I$w@%^#tD{>1Ap@%=XogG_^Hvy_xiRn4yy?LKsC+ zU!S79X8orh&D%>1S`x2iyi&(iG&r#YT{}~iy(FIOo8?MZU#eo*c*(RjAGj@uDi zARJur)-*{n0PgW~&mFeg`MJ?(Kr;NUom)jh?ozZtyywN9bea6ikQlh}953Oul~N%4 z@Sx!@>?l1e7V*@HZMJx!gMo0TeXdU~#W6^n?YVQJ$)nuFRkvKbfwv_s*2g(!wPO|@ zvuXF=2MiPIX)A7x!|BthSa$GB%ECnuZe_Scx&AlnC z!~6C_SF24#@^VMIw)a-7{00}}Cr5NImPbW8OTIHoo6@NcxLVTna8<<;uy~YaaeMnd z;k_ynYc_8jQn9vW_W8QLkgaHtmwGC}wRcgZ^I^GPbz{lW)p#YYoinez1MjkY%6LBd z+Vr>j&^!?b-*Vk>8I!28o`r3w&^Lal8@=50zV4&9V9oXI{^r8;JmVeos&wf?O!;_o zk))^k*1fvYw9?WrS!sG2TcX`hH@Y3mF&@{i05;_AV{>Umi8{uZP_0W5_1V2yHU<)E z+qviK*7SJtnL;76{WK!?Pv$-!w$08<%8Qy|sB|P%GiV1<+dHw*sj!C~SjsB6+1L@so+Q~n# z+Uc5+Uz+mGmkR@>H7D*c?mm8WQz;3VOpktU_DeBi>3#@z zmLe;3gP<7KPy>~k47nEeT?G?7e2g6316Xdb_y+ja5C9Ayg6QTNr~&Kbs(1>7zp|f@le;9B z1e(+Ga%jPWR7oc}=XcB4$z?YD)l;%#U;}~gZzGViI=fwu9OAPCCK!0w>Ay^#$b49k zT&|M?JaIyRT<;@*t_jp1ifWPvL;{maf6o0T#X!#9YX;0Q;LTQ0}0tg^_Ru4pkSr4#P zmnW|D0`A#Ie6pEfBDv39=jN2;kiUoT6I&kChsbI!jMuY6zuZql5!&i%5!c zjsHlXtjT;NV?jAb`%vy)JOK_j1rponLqc>(2qgYlLPEs>|0QV<=Pw~C`fLFKJJitt zyC6003{rxCsmtGKjhB%W2W~*%vKH8l$pZoOFT*K@uL9%CD^3rh=ZtuTU1 zJpf4|%n^yjh#dKSSCJI8;YU*CD!8Wv20*e5`-fya^75@ADLU^RdHDg3Bk3k6)dGi7 z!!z;|O1h$8q!vO*w6 I6Xdi10eY*&F8}}l diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index f396aaa..19acfb4 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=1b6b558be93f29438d3df94b7dfee02e794b94d9aca4611a92cdb79b6b88e909 -distributionUrl=https\://services.gradle.org/distributions/gradle-8.0.1-bin.zip +distributionSha256Sum=ff7bf6a86f09b9b2c40bb8f48b25fc19cf2b2664fd1d220cd7ab833ec758d0d7 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.0.2-bin.zip networkTimeout=10000 zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew index 65dcd68..79a61d4 100755 --- a/gradlew +++ b/gradlew @@ -144,7 +144,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then case $MAX_FD in #( max*) # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC3045 + # shellcheck disable=SC3045 MAX_FD=$( ulimit -H -n ) || warn "Could not query maximum file descriptor limit" esac @@ -152,7 +152,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then '' | soft) :;; #( *) # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC3045 + # shellcheck disable=SC3045 ulimit -n "$MAX_FD" || warn "Could not set maximum file descriptor limit to $MAX_FD" esac From 6c507f65355efb253adcee2fcc2be5b1e546e8ef Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Tue, 7 Mar 2023 11:58:20 +0530 Subject: [PATCH 15/31] Part 2: Address review comments from 2nd March --- cli/build.gradle.kts | 10 +- .../migration/cli/BaseRegisterCommand.java | 131 +++++++++--------- .../migration/cli/IdentifierOptions.java | 10 +- .../catalog/migration/cli/PromptUtil.java | 29 ++-- cli/src/main/resources/logback.xml | 15 +- .../cli/AbstractCLIMigrationTest.java | 59 ++++---- .../tools/catlog/migration/cli/CLITest.java | 2 +- .../tools/catlog/migration/cli/RunCLI.java | 12 +- gradle/libs.versions.toml | 10 +- 9 files changed, 153 insertions(+), 125 deletions(-) diff --git a/cli/build.gradle.kts b/cli/build.gradle.kts index b8b7b28..13fad86 100644 --- a/cli/build.gradle.kts +++ b/cli/build.gradle.kts @@ -35,14 +35,20 @@ dependencies { implementation(libs.picocli) implementation(libs.iceberg.spark.runtime) implementation(libs.iceberg.dell) - implementation(libs.hadoop.aws) + implementation(libs.hadoop.aws) { exclude("com.amazonaws", "aws-java-sdk-bundle") } implementation(libs.hadoop.common) - implementation(libs.aws.sdk) + // AWS depdencies based on https://iceberg.apache.org/docs/latest/aws/#enabling-aws-integration + implementation(libs.aws.sdk.glue) + implementation(libs.aws.sdk.s3) + implementation(libs.aws.sdk.dynamo) + implementation(libs.aws.sdk.kms) + implementation(libs.aws.sdk.sts) testImplementation(libs.junit.jupiter.params) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.engine) testImplementation(libs.assertj) + testImplementation(libs.logcaptor) testImplementation(project(":iceberg-catalog-migrator-api-test")) diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java index 4035d69..7cea2c9 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java @@ -17,7 +17,6 @@ import com.google.common.collect.Lists; import java.io.IOException; -import java.io.PrintWriter; import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Path; @@ -33,10 +32,11 @@ import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; import org.projectnessie.tools.catalog.migration.api.CatalogMigratorParams; import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigratorParams; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import picocli.CommandLine; public abstract class BaseRegisterCommand implements Callable { - @CommandLine.Spec CommandLine.Model.CommandSpec commandSpec; @CommandLine.ArgGroup( exclusive = false, @@ -80,6 +80,8 @@ public abstract class BaseRegisterCommand implements Callable { public static final String FAILED_TO_DELETE_AT_SOURCE_FILE = "failed_to_delete_at_source.txt"; public static final String DRY_RUN_FILE = "dry_run_identifiers.txt"; + private final Logger consoleLog = LoggerFactory.getLogger("console-log"); + public BaseRegisterCommand() {} protected abstract boolean isDeleteSourceCatalogTables(); @@ -93,13 +95,11 @@ public Integer call() { identifiers = Collections.emptyList(); } - PrintWriter printWriter = commandSpec.commandLine().getOut(); - Catalog sourceCatalog = sourceCatalogOptions.build(); - printWriter.printf("%nConfigured source catalog: %s%n", sourceCatalog.name()); + consoleLog.info("Configured source catalog: {}", sourceCatalog.name()); Catalog targetCatalog = targetCatalogOptions.build(); - printWriter.printf("%nConfigured target catalog: %s%n", targetCatalog.name()); + consoleLog.info("Configured target catalog: {}", targetCatalog.name()); if (!canProceed(sourceCatalog)) { return 0; @@ -117,30 +117,30 @@ public Integer call() { String identifierRegEx = identifierOptions != null ? identifierOptions.identifiersRegEx : null; if (identifiers.isEmpty()) { if (identifierRegEx != null) { - printWriter.printf( - "%nUser has not specified the table identifiers." + consoleLog.info( + "User has not specified the table identifiers." + " Selecting all the tables from all the namespaces from the source catalog " - + "which matches the regex pattern:%s%n", + + "which matches the regex pattern:{}", identifierRegEx); } else { - printWriter.printf( - "%nUser has not specified the table identifiers." - + " Selecting all the tables from all the namespaces from the source catalog.%n"); + consoleLog.info( + "User has not specified the table identifiers." + + " Selecting all the tables from all the namespaces from the source catalog."); } identifiers = catalogMigrator.getMatchingTableIdentifiers(identifierRegEx); } String operation = deleteSourceCatalogTables ? "migration" : "registration"; - printWriter.printf("%nIdentified %d tables for %s.%n", identifiers.size(), operation); + consoleLog.info("Identified {} tables for {}.", identifiers.size(), operation); if (isDryRun) { writeToFile(outputDirPath.resolve(DRY_RUN_FILE), identifiers); - printWriter.println("Dry run is completed."); + consoleLog.info("Dry run is completed."); printDryRunResults(identifiers); return 0; } - printWriter.printf("%nStarted %s ...%n", operation); + consoleLog.info("Started {} ...", operation); List> identifierBatches = Lists.partition(identifiers, BATCH_SIZE); int totalIdentifiers = identifiers.size(); @@ -148,9 +148,11 @@ public Integer call() { identifierBatches.forEach( identifierBatch -> { catalogMigrator.registerTables(identifierBatch); - printWriter.printf( - "%nAttempted %s for %d tables out of %d tables.%n", - operation, counter.addAndGet(identifierBatch.size()), totalIdentifiers); + consoleLog.info( + "Attempted {} for {} tables out of {} tables.", + operation, + counter.addAndGet(identifierBatch.size()), + totalIdentifiers); }); CatalogMigrationResult result = catalogMigrator.result(); @@ -160,7 +162,7 @@ public Integer call() { outputDirPath.resolve(FAILED_TO_DELETE_AT_SOURCE_FILE), result.failedToDeleteTableIdentifiers()); - printWriter.printf("%nFinished %s ...%n", operation); + consoleLog.info("Finished {} ...", operation); printSummary(result, sourceCatalog.name(), targetCatalog.name()); printDetails(result); return 0; @@ -170,39 +172,38 @@ private boolean canProceed(Catalog sourceCatalog) { if (isDryRun || disablePrompts) { return true; } - PrintWriter printWriter = commandSpec.commandLine().getOut(); if (deleteSourceCatalogTables) { if (sourceCatalog instanceof HadoopCatalog) { - printWriter.printf( - "[WARNING]: Source catalog type is HADOOP and it doesn't support dropping tables just from " - + "catalog. %nAvoid operating the migrated tables from the source catalog after migration. " - + "Use the tables from target catalog.%n"); + consoleLog.warn( + String.format( + "Source catalog type is HADOOP and it doesn't support dropping tables just from " + + "catalog. %nAvoid operating the migrated tables from the source catalog after migration. " + + "Use the tables from target catalog.")); } - return PromptUtil.proceedForMigration(printWriter); + return PromptUtil.proceedForMigration(); } else { - return PromptUtil.proceedForRegistration(printWriter); + return PromptUtil.proceedForRegistration(); } } private void printSummary( CatalogMigrationResult result, String sourceCatalogName, String targetCatalogName) { - PrintWriter printWriter = commandSpec.commandLine().getOut(); - printWriter.printf("%nSummary: %n"); + consoleLog.info("Summary: "); if (!result.registeredTableIdentifiers().isEmpty()) { - printWriter.printf( - "- Successfully %s %d tables from %s catalog to %s catalog.%n", + consoleLog.info( + "Successfully {} {} tables from {} catalog to {} catalog.", deleteSourceCatalogTables ? "migrated" : "registered", result.registeredTableIdentifiers().size(), sourceCatalogName, targetCatalogName); } if (!result.failedToRegisterTableIdentifiers().isEmpty()) { - printWriter.printf( - "- Failed to %s %d tables from %s catalog to %s catalog. " + consoleLog.info( + "Failed to {} {} tables from {} catalog to {} catalog. " + "Please check the `catalog_migration.log` file for the failure reason. " - + "%nFailed identifiers are written into `%s`. " + + "Failed identifiers are written into `{}`. " + "Retry with that file using `--identifiers-from-file` option " - + "if the failure is because of network/connection timeouts.%n", + + "if the failure is because of network/connection timeouts.", deleteSourceCatalogTables ? "migrate" : "register", result.failedToRegisterTableIdentifiers().size(), sourceCatalogName, @@ -210,57 +211,57 @@ private void printSummary( FAILED_IDENTIFIERS_FILE); } if (!result.failedToDeleteTableIdentifiers().isEmpty()) { - printWriter.printf( - "- Failed to delete %d tables from %s catalog. " - + "Please check the `catalog_migration.log` file for the reason. " - + "%nFailed to delete identifiers are written into `%s`. %n", - result.failedToDeleteTableIdentifiers().size(), - sourceCatalogName, - FAILED_TO_DELETE_AT_SOURCE_FILE); + consoleLog.info( + String.format( + "Failed to delete %d tables from %s catalog. " + + "Please check the `catalog_migration.log` file for the reason. " + + "%nFailed to delete identifiers are written into `%s`.", + result.failedToDeleteTableIdentifiers().size(), + sourceCatalogName, + FAILED_TO_DELETE_AT_SOURCE_FILE)); } } private void printDetails(CatalogMigrationResult result) { - PrintWriter printWriter = commandSpec.commandLine().getOut(); - printWriter.printf("%nDetails: %n"); + consoleLog.info("Details: "); if (!result.registeredTableIdentifiers().isEmpty()) { - printWriter.printf( - "- Successfully %s these tables:%n", - deleteSourceCatalogTables ? "migrated" : "registered"); - printWriter.println(result.registeredTableIdentifiers()); + consoleLog.info( + "Successfully {} these tables:", deleteSourceCatalogTables ? "migrated" : "registered"); + consoleLog.info("{}", result.registeredTableIdentifiers()); } if (!result.failedToRegisterTableIdentifiers().isEmpty()) { - printWriter.printf( - "- Failed to %s these tables:%n", deleteSourceCatalogTables ? "migrate" : "register"); - printWriter.println(result.failedToRegisterTableIdentifiers()); + consoleLog.info( + "Failed to {} these tables:", deleteSourceCatalogTables ? "migrate" : "register"); + consoleLog.info("{}", result.failedToRegisterTableIdentifiers()); } if (!result.failedToDeleteTableIdentifiers().isEmpty()) { - printWriter.println("- [WARNING] Failed to delete these tables from source catalog:"); - printWriter.println(result.failedToDeleteTableIdentifiers()); + consoleLog.warn("Failed to delete these tables from source catalog:"); + consoleLog.info("{}", result.failedToDeleteTableIdentifiers()); } } private void printDryRunResults(List result) { - PrintWriter printWriter = commandSpec.commandLine().getOut(); - printWriter.printf("%nSummary: %n"); + consoleLog.info("Summary: "); if (result.isEmpty()) { - printWriter.printf( - "- No tables are identified for %s. Please check logs for more info.%n", + consoleLog.info( + "No tables are identified for {}. Please check logs for more info.", deleteSourceCatalogTables ? "migration" : "registration"); return; } - printWriter.printf( - "- Identified %d tables for %s by dry-run. These identifiers are also written into %s. " - + "You can use this file with `--identifiers-from-file` option.%n", - result.size(), deleteSourceCatalogTables ? "migration" : "registration", DRY_RUN_FILE); - - printWriter.printf("%nDetails: %n"); - printWriter.printf( - "- Identified these tables for %s by dry-run:%n", + consoleLog.info( + "Identified {} tables for {} by dry-run. These identifiers are also written into {}. " + + "You can use this file with `--identifiers-from-file` option.", + result.size(), + deleteSourceCatalogTables ? "migration" : "registration", + DRY_RUN_FILE); + + consoleLog.info("Details: "); + consoleLog.info( + "Identified these tables for {} by dry-run:", deleteSourceCatalogTables ? "migration" : "registration"); - printWriter.println(result); + consoleLog.info("{}", result); } private static void writeToFile(Path filePath, List identifiers) { diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java index 3813a68..51edfa1 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java @@ -16,7 +16,6 @@ package org.projectnessie.tools.catalog.migration.cli; import java.io.IOException; -import java.io.PrintWriter; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; @@ -24,12 +23,12 @@ import java.util.List; import java.util.stream.Collectors; import org.apache.iceberg.catalog.TableIdentifier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import picocli.CommandLine; public class IdentifierOptions { - @CommandLine.Spec CommandLine.Model.CommandSpec commandSpec; - @CommandLine.Option( names = {"--identifiers"}, split = ",", @@ -53,16 +52,17 @@ public class IdentifierOptions { + "Should not be used with `--identifiers` or '--identifiers-from-file' option.") protected String identifiersRegEx; + private final Logger consoleLog = LoggerFactory.getLogger("console-log"); + protected List processIdentifiersInput() { if (identifiersFromFile != null && !Files.exists(Paths.get(identifiersFromFile))) { throw new IllegalArgumentException( "File specified in `--identifiers-from-file` option does not exist."); } - PrintWriter printWriter = commandSpec.commandLine().getOut(); List tableIdentifiers; if (identifiersFromFile != null) { try { - printWriter.printf("Collecting identifiers from the file %s...%n", identifiersFromFile); + consoleLog.info("Collecting identifiers from the file {}...", identifiersFromFile); tableIdentifiers = Files.readAllLines(Paths.get(identifiersFromFile)).stream() .map(TableIdentifier::parse) diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java index 9d0e947..2e5c23b 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java @@ -16,15 +16,18 @@ package org.projectnessie.tools.catalog.migration.cli; import java.io.Console; -import java.io.PrintWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public final class PromptUtil { private PromptUtil() {} + private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); + private static final String WARNING_FOR_REGISTRATION = String.format( - "%n[WARNING]%n" + "%n" + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + "%n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + "%n\tSo, while using this tool please make sure there are no in-progress commits for the source " @@ -39,7 +42,7 @@ private PromptUtil() {} private static final String WARNING_FOR_MIGRATION = String.format( - "%n[WARNING]%n" + "%n" + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + "%n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + "%n\tSo, while using this tool please make sure there are no in-progress commits for the source " @@ -48,31 +51,31 @@ private PromptUtil() {} + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " + "%n\tand can only be accessed from the target catalog."); - static boolean proceedForRegistration(PrintWriter printWriter) { - return proceed(WARNING_FOR_REGISTRATION, printWriter); + static boolean proceedForRegistration() { + return proceed(WARNING_FOR_REGISTRATION); } - static boolean proceedForMigration(PrintWriter printWriter) { - return proceed(WARNING_FOR_MIGRATION, printWriter); + static boolean proceedForMigration() { + return proceed(WARNING_FOR_MIGRATION); } - private static boolean proceed(String warning, PrintWriter printWriter) { - printWriter.println(warning); + private static boolean proceed(String warning) { + consoleLog.warn(warning); Console console = System.console(); while (true) { - printWriter.println( + consoleLog.info( "Have you read the above warnings and are you sure you want to continue? (yes/no):"); String input = console.readLine(); if (input.equalsIgnoreCase("yes")) { - printWriter.println("Continuing..."); + consoleLog.info("Continuing..."); return true; } else if (input.equalsIgnoreCase("no")) { - printWriter.println("Aborting..."); + consoleLog.info("Aborting..."); return false; } else { - printWriter.println("Invalid input. Please enter 'yes' or 'no'."); + consoleLog.info("Invalid input. Please enter 'yes' or 'no'."); } } } diff --git a/cli/src/main/resources/logback.xml b/cli/src/main/resources/logback.xml index cb1446a..6acd040 100644 --- a/cli/src/main/resources/logback.xml +++ b/cli/src/main/resources/logback.xml @@ -18,7 +18,8 @@ --> - + + ${catalog.migration.log.dir}/catalog_migration.log true @@ -26,9 +27,17 @@ - + + + %highlight(%-5level) - %msg%n + + + + + + - + diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java index 82d4901..2ef0f2c 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java @@ -96,10 +96,10 @@ public void testRegister(boolean deleteSourceTables) throws Exception { Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: %n- Successfully %s 4 tables from %s catalog to %s catalog.", + "Summary: %nSuccessfully %s 4 tables from %s catalog to %s catalog.", operation, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operation)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 @@ -146,12 +146,10 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: %n- Successfully %s 1 tables from %s catalog to" + " %s catalog.", + "Summary: %nSuccessfully %s 1 tables from %s catalog to %s catalog.", operation, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) - .contains( - String.format( - "Details: %n- Successfully %s these tables:%n" + "[bar.tbl3]", operation)); + .contains(String.format("Details: %nSuccessfully %s these tables:%n[bar.tbl3]", operation)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 @@ -195,10 +193,10 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: %n- Successfully %s 1 tables from %s catalog to" + " %s catalog.", + "Summary: %nSuccessfully %s 1 tables from %s catalog to %s catalog.", operation, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operation)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 @@ -238,10 +236,10 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: %n- Successfully %s 2 tables from %s catalog to" + " %s catalog.", + "Summary: %nSuccessfully %s 2 tables from %s catalog to %s catalog.", operation, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operation)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 @@ -284,12 +282,11 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: %n- Failed to %s 1 tables from %s catalog to %s catalog." + "Summary: %nFailed to %s 1 tables from %s catalog to %s catalog." + " Please check the `catalog_migration.log`", operation, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) - .contains( - String.format("Details: %n- Failed to %s these tables:%n[dummy.tbl3]", operation)); + .contains(String.format("Details: %nFailed to %s these tables:%n[dummy.tbl3]", operation)); // try to register same table twice which leads to AlreadyExistsException runCLI( @@ -331,11 +328,11 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: %n- Failed to %s 1 tables from %s catalog to %s catalog." + "Summary: %nFailed to %s 1 tables from %s catalog to %s catalog." + " Please check the `catalog_migration.log`", operation, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %n- Failed to %s these tables:%n[foo.tbl2]", operation)); + .contains(String.format("Details: %nFailed to %s these tables:%n[foo.tbl2]", operation)); } @Order(3) @@ -367,12 +364,10 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: %n- Successfully %s 1 tables from %s catalog to %s catalog.", + "Summary: %nSuccessfully %s 1 tables from %s catalog to %s catalog.", operation, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) - .contains( - String.format( - "Details: %n" + "- Successfully %s these tables:%n" + "[foo.tbl2]", operation)); + .contains(String.format("Details: %nSuccessfully %s these tables:%n[foo.tbl2]", operation)); if (deleteSourceTables && !(catalog1 instanceof HadoopCatalog)) { // create a table with the same name in source catalog which got deleted. @@ -404,9 +399,9 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep .contains( String.format( "Summary: %n" - + "- Successfully %s 3 tables from %s catalog to %s catalog.%n" - + "- Failed to %s 1 tables from %s catalog to %s catalog. " - + "Please check the `catalog_migration.log` file for the failure reason. %n" + + "Successfully %s 3 tables from %s catalog to %s catalog.%n" + + "Failed to %s 1 tables from %s catalog to %s catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. " + "Failed identifiers are written into `failed_identifiers.txt`. " + "Retry with that file using `--identifiers-from-file` option " + "if the failure is because of network/connection timeouts.", @@ -417,9 +412,9 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operation)); Assertions.assertThat(run.getOut()) - .contains(String.format("- Failed to %s these tables:%n[foo.tbl2]", ops)); + .contains(String.format("Failed to %s these tables:%n[foo.tbl2]", ops)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 @@ -453,15 +448,14 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep .contains( String.format( "Summary: %n" - + "- Failed to %s 1 tables from %s catalog to %s catalog. " - + "Please check the `catalog_migration.log` file for the failure reason. %n" + + "Failed to %s 1 tables from %s catalog to %s catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. " + "Failed identifiers are written into `failed_identifiers.txt`. " + "Retry with that file using `--identifiers-from-file` option " + "if the failure is because of network/connection timeouts.", ops, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) - .contains( - String.format("Details: %n" + "- Failed to %s these tables:%n" + "[foo.tbl2]", ops)); + .contains(String.format("Details: %nFailed to %s these tables:%n[foo.tbl2]", ops)); Assertions.assertThat(Files.exists(failedIdentifiersFile)).isTrue(); Assertions.assertThat(Files.readAllLines(failedIdentifiersFile)).containsExactly("foo.tbl2"); } @@ -523,14 +517,13 @@ public void testDryRun(boolean deleteSourceTables) throws Exception { .contains( String.format( "Summary: %n" - + "- Identified 4 tables for %s by dry-run. " + + "Identified 4 tables for %s by dry-run. " + "These identifiers are also written into dry_run_identifiers.txt. " + "You can use this file with `--identifiers-from-file` option.", operation)); Assertions.assertThat(run.getOut()) .contains( - String.format( - "Details: %n" + "- Identified these tables for %s by dry-run:%n", operation)); + String.format("Details: %nIdentified these tables for %s by dry-run:%n", operation)); Assertions.assertThat(Files.exists(dryRunFile)).isTrue(); Assertions.assertThat(Files.readAllLines(dryRunFile)) .containsExactlyInAnyOrder("foo.tbl1", "foo.tbl2", "bar.tbl3", "bar.tbl4"); @@ -557,10 +550,10 @@ public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws E Assertions.assertThat(run.getOut()) .contains( String.format( - "Summary: %n- Successfully %s 244 tables from %s catalog to" + " %s catalog.", + "Summary: %nSuccessfully %s 244 tables from %s catalog to" + " %s catalog.", operation, sourceCatalogType, targetCatalogType)); Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %n" + "- Successfully %s these tables:%n", operation)); + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operation)); operation = deleteSourceTables ? "migration" : "registration"; // validate intermediate output diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/CLITest.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/CLITest.java index d8b91fb..1884080 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/CLITest.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/CLITest.java @@ -228,7 +228,7 @@ public void testInvalidArgsForMigrate(List args, String expectedMessage) @Test @Order(4) public void version() throws Exception { - RunCLI run = RunCLI.run("--version"); + RunCLI run = RunCLI.runWithPrintWriter("--version"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); Assertions.assertThat(run.getOut()).startsWith(System.getProperty("expectedCLIVersion")); } diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java index 2a4afcf..781b45c 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java +++ b/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java @@ -19,6 +19,7 @@ import java.io.StringWriter; import java.util.Arrays; import java.util.List; +import nl.altindag.log.LogCaptor; import org.projectnessie.tools.catalog.migration.cli.CatalogMigrationCLI; import picocli.CommandLine; @@ -42,6 +43,16 @@ public static RunCLI run(List args) throws Exception { } public static RunCLI run(String... args) throws Exception { + try (LogCaptor logCaptor = LogCaptor.forName("console-log"); + StringWriter err = new StringWriter(); + PrintWriter errWriter = new PrintWriter(err)) { + int exitCode = runMain(null, errWriter, args); + String out = String.join(System.lineSeparator(), logCaptor.getLogs()); + return new RunCLI(exitCode, out, err.toString(), args); + } + } + + public static RunCLI runWithPrintWriter(String... args) throws Exception { try (StringWriter out = new StringWriter(); PrintWriter outWriter = new PrintWriter(out); StringWriter err = new StringWriter(); @@ -70,7 +81,6 @@ private static int runMain(PrintWriter out, PrintWriter err, String... arguments try { return commandLine.execute(arguments); } finally { - commandLine.getOut().flush(); commandLine.getErr().flush(); } } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index ae7265c..bc349d0 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -8,7 +8,8 @@ hive = "2.3.8" # this is in mapping with iceberg repo. Later versions have junit iceberg = "1.1.0" immutables = "2.9.3" junit = "5.9.2" -logback = "1.2.11" +logback = "1.4.5" +logcaptor = "2.8.0" nessie = "0.50.0" nessieBuildPlugins = "0.2.19" nessieRunner = "0.29.0" @@ -19,7 +20,11 @@ spotlessPlugin = "6.16.0" [libraries] assertj = { module = "org.assertj:assertj-core", version.ref = "assertj" } -aws-sdk = { module = "com.amazonaws:aws-java-sdk", version.ref = "aws" } +aws-sdk-dynamo = { module = "com.amazonaws:aws-java-sdk-dynamodb", version.ref = "aws" } +aws-sdk-glue = { module = "com.amazonaws:aws-java-sdk-glue", version.ref = "aws" } +aws-sdk-kms = { module = "com.amazonaws:aws-java-sdk-kms", version.ref = "aws" } +aws-sdk-sts = { module = "com.amazonaws:aws-java-sdk-sts", version.ref = "aws" } +aws-sdk-s3 = { module = "com.amazonaws:aws-java-sdk-s3", version.ref = "aws" } google-java-format = { module = "com.google.googlejavaformat:google-java-format", version.ref = "googleJavaFormat" } guava = { module = "com.google.guava:guava", version.ref = "guava" } hadoop-aws = { module = "org.apache.hadoop:hadoop-aws", version.ref = "hadoop" } @@ -32,6 +37,7 @@ junit-jupiter-api = { module = "org.junit.jupiter:junit-jupiter-api", version.re junit-jupiter-engine = { module = "org.junit.jupiter:junit-jupiter-engine" } junit-jupiter-params = { module = "org.junit.jupiter:junit-jupiter-params" } logback-classic = { module = "ch.qos.logback:logback-classic", version.ref = "logback" } +logcaptor = { module = "io.github.hakky54:logcaptor", version.ref = "logcaptor" } picocli = { module = "info.picocli:picocli", version.ref = "picocli" } slf4j = { module = "org.slf4j:log4j-over-slf4j", version.ref = "slf4j" } From d783e86e4b23924f56e39e63efeee86a9189b61f Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Wed, 8 Mar 2023 10:24:17 +0530 Subject: [PATCH 16/31] Part3: Unify build stuffs --- README.md | 8 +- api-test/build.gradle.kts | 6 +- api/build.gradle.kts | 2 +- .../api/AbstractCatalogMigrator.java | 4 +- buildSrc/build.gradle.kts | 21 +- buildSrc/settings.gradle.kts | 2 +- .../src/main/kotlin/BuildSupport.gradle.kts | 107 -- buildSrc/src/main/kotlin/Checkstyle.kt | 42 + buildSrc/src/main/kotlin/Errorprone.kt | 80 + buildSrc/src/main/kotlin/Ide.kt | 92 + buildSrc/src/main/kotlin/Jandex.kt | 30 + buildSrc/src/main/kotlin/Java.kt | 72 + .../main/kotlin/PublishingHelperExtension.kt | 22 + .../src/main/kotlin/PublishingHelperPlugin.kt | 257 +++ .../src/main/kotlin/ReleaseSupportPlugin.kt | 110 ++ buildSrc/src/main/kotlin/Spotless.kt | 82 + buildSrc/src/main/kotlin/Testing.kt | 67 + buildSrc/src/main/kotlin/Utilities.kt | 75 + buildSrc/src/main/kotlin/VersionTuple.kt | 101 + .../main/kotlin/build-conventions.gradle.kts | 43 + cli/build.gradle.kts | 28 +- .../migration/cli/BaseRegisterCommand.java | 47 +- .../catalog/migration/cli/PromptUtil.java | 70 +- codestyle/errorprone-rules.properties | 1652 +++++++++++++++++ gradle.properties | 24 + gradle/baselibs.versions.toml | 26 + gradle/contributors.csv | 0 gradle/developers.csv | 1 + gradle/libs.versions.toml | 22 +- ide-name.txt | 1 + 30 files changed, 2896 insertions(+), 198 deletions(-) delete mode 100644 buildSrc/src/main/kotlin/BuildSupport.gradle.kts create mode 100644 buildSrc/src/main/kotlin/Checkstyle.kt create mode 100644 buildSrc/src/main/kotlin/Errorprone.kt create mode 100644 buildSrc/src/main/kotlin/Ide.kt create mode 100644 buildSrc/src/main/kotlin/Jandex.kt create mode 100644 buildSrc/src/main/kotlin/Java.kt create mode 100644 buildSrc/src/main/kotlin/PublishingHelperExtension.kt create mode 100644 buildSrc/src/main/kotlin/PublishingHelperPlugin.kt create mode 100644 buildSrc/src/main/kotlin/ReleaseSupportPlugin.kt create mode 100644 buildSrc/src/main/kotlin/Spotless.kt create mode 100644 buildSrc/src/main/kotlin/Testing.kt create mode 100644 buildSrc/src/main/kotlin/Utilities.kt create mode 100644 buildSrc/src/main/kotlin/VersionTuple.kt create mode 100644 buildSrc/src/main/kotlin/build-conventions.gradle.kts create mode 100644 codestyle/errorprone-rules.properties create mode 100644 gradle.properties create mode 100644 gradle/baselibs.versions.toml create mode 100644 gradle/contributors.csv create mode 100644 gradle/developers.csv create mode 100644 ide-name.txt diff --git a/README.md b/README.md index c64a4d3..90917bc 100644 --- a/README.md +++ b/README.md @@ -29,8 +29,12 @@ Below is the CLI syntax: ``` $ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar -h Usage: iceberg-catalog-migrator [-hV] [COMMAND] --h, --help Show this help message and exit. --V, --version Print version information and exit. + -h, --help Show this help message and exit. + -V, --version Print version information and exit. +Commands: + migrate Bulk migrate the iceberg tables from source catalog to target catalog without data copy. Table entries from source catalog will be + deleted after the successful migration to the target catalog. + register Bulk register the iceberg tables from source catalog to target catalog without data copy. ``` ``` diff --git a/api-test/build.gradle.kts b/api-test/build.gradle.kts index d86287f..296ffa7 100644 --- a/api-test/build.gradle.kts +++ b/api-test/build.gradle.kts @@ -18,7 +18,7 @@ plugins { `java-library` `maven-publish` alias(libs.plugins.nessie.run) - BuildSupport + `build-conventions` } dependencies { @@ -67,7 +67,9 @@ dependencies { } testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}") - nessieQuarkusServer("org.projectnessie:nessie-quarkus:${libs.versions.nessie.get()}:runner") + nessieQuarkusServer( + "org.projectnessie.nessie:nessie-quarkus:${libs.versions.nessie.get()}:runner" + ) } nessieQuarkusApp { includeTask(tasks.named("intTest")) } diff --git a/api/build.gradle.kts b/api/build.gradle.kts index 0b26d71..c529a2f 100644 --- a/api/build.gradle.kts +++ b/api/build.gradle.kts @@ -17,7 +17,7 @@ plugins { `java-library` `maven-publish` - BuildSupport + `build-conventions` } dependencies { diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/AbstractCatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/AbstractCatalogMigrator.java index 7934e20..db54d59 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/AbstractCatalogMigrator.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/AbstractCatalogMigrator.java @@ -63,8 +63,8 @@ public List getMatchingTableIdentifiers(String identifierRegex) } else { LOG.info( "Collecting all the tables from all the namespaces of source catalog" - + " which matches the regex pattern:" - + identifierRegex); + + " which matches the regex pattern:{}", + identifierRegex); Pattern pattern = Pattern.compile(identifierRegex); matchedIdentifiersPredicate = tableIdentifier -> pattern.matcher(tableIdentifier.toString()).matches(); diff --git a/buildSrc/build.gradle.kts b/buildSrc/build.gradle.kts index 4543001..500f2f0 100644 --- a/buildSrc/build.gradle.kts +++ b/buildSrc/build.gradle.kts @@ -26,11 +26,20 @@ repositories { dependencies { implementation(gradleKotlinDsl()) - val ver = libs.versions - implementation("com.diffplug.spotless:spotless-plugin-gradle:${ver.spotlessPlugin.get()}") - implementation("gradle.plugin.com.github.johnrengelman:shadow:${ver.shadowPlugin.get()}") - val nessieVer = ver.nessieBuildPlugins.get() - implementation("org.projectnessie.buildsupport:spotless:$nessieVer") + implementation(baselibs.spotless) + implementation(baselibs.jandex) + implementation(baselibs.idea.ext) + implementation(baselibs.shadow) + implementation(baselibs.protobuf) + implementation(baselibs.errorprone) + implementation(baselibs.nessie.buildsupport.jacoco) + implementation(baselibs.nessie.buildsupport.reflectionconfig) + + testImplementation(platform(baselibs.junit.bom)) + testImplementation(baselibs.assertj.core) + testImplementation(baselibs.junit.jupiter.api) + testImplementation(baselibs.junit.jupiter.params) + testRuntimeOnly(baselibs.junit.jupiter.engine) } java { @@ -39,3 +48,5 @@ java { } kotlinDslPluginOptions { jvmTarget.set(JavaVersion.VERSION_11.toString()) } + +tasks.withType().configureEach { useJUnitPlatform() } diff --git a/buildSrc/settings.gradle.kts b/buildSrc/settings.gradle.kts index 7bbbf0d..e3faf50 100644 --- a/buildSrc/settings.gradle.kts +++ b/buildSrc/settings.gradle.kts @@ -15,5 +15,5 @@ */ dependencyResolutionManagement { - versionCatalogs { create("libs") { from(files("../gradle/libs.versions.toml")) } } + versionCatalogs { create("baselibs") { from(files("../gradle/baselibs.versions.toml")) } } } diff --git a/buildSrc/src/main/kotlin/BuildSupport.gradle.kts b/buildSrc/src/main/kotlin/BuildSupport.gradle.kts deleted file mode 100644 index 10c9a90..0000000 --- a/buildSrc/src/main/kotlin/BuildSupport.gradle.kts +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (C) 2023 Dremio - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.gradle.api.JavaVersion -import org.gradle.api.plugins.JavaPlugin -import org.gradle.api.plugins.JavaPluginExtension -import org.gradle.api.tasks.bundling.Jar -import org.gradle.api.tasks.compile.JavaCompile -import org.gradle.api.tasks.javadoc.Javadoc -import org.gradle.external.javadoc.CoreJavadocOptions -import org.gradle.kotlin.dsl.configure -import org.gradle.kotlin.dsl.repositories -import org.gradle.kotlin.dsl.withType - -plugins { id("org.projectnessie.buildsupport.spotless") } - -repositories { - mavenCentral() - if (System.getProperty("withMavenLocal").toBoolean()) { - mavenLocal() - } -} - -testTasks() - -tasks.withType().configureEach { - manifest { - attributes["Implementation-Title"] = "iceberg-catalog-migrator" - attributes["Implementation-Version"] = project.version - } -} - -tasks.withType().configureEach { - options.encoding = "UTF-8" - options.release.set(8) -} - -tasks.withType().configureEach { - val opt = options as CoreJavadocOptions - // don't spam log w/ "warning: no @param/@return" - opt.addStringOption("Xdoclint:-reference", "-quiet") -} - -plugins.withType().configureEach { - configure { - withJavadocJar() - withSourcesJar() - sourceCompatibility = JavaVersion.VERSION_1_8 - targetCompatibility = JavaVersion.VERSION_1_8 - modularity.inferModulePath.set(true) - } -} - -fun Project.testTasks() { - if (projectDir.resolve("src/test").exists()) { - tasks.withType().configureEach { - useJUnitPlatform {} - val testJvmArgs: String? by project - if (testJvmArgs != null) { - jvmArgs((testJvmArgs as String).split(" ")) - } - - systemProperty("file.encoding", "UTF-8") - systemProperty("user.language", "en") - systemProperty("user.country", "US") - systemProperty("user.variant", "") - systemProperty("test.log.level", testLogLevel()) - environment("TESTCONTAINERS_REUSE_ENABLE", "true") - filter { - isFailOnNoMatchingTests = false - when (name) { - "test" -> { - includeTestsMatching("*Test") - includeTestsMatching("Test*") - excludeTestsMatching("Abstract*") - excludeTestsMatching("IT*") - } - "intTest" -> includeTestsMatching("IT*") - } - } - if (name != "test") { - mustRunAfter(tasks.named("test")) - } - } - val intTest = - tasks.register("intTest") { - group = "verification" - description = "Runs the integration tests." - } - tasks.named("check") { dependsOn(intTest) } - } -} - -fun testLogLevel() = System.getProperty("test.log.level", "WARN") diff --git a/buildSrc/src/main/kotlin/Checkstyle.kt b/buildSrc/src/main/kotlin/Checkstyle.kt new file mode 100644 index 0000000..8db5fae --- /dev/null +++ b/buildSrc/src/main/kotlin/Checkstyle.kt @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.gradle.api.Project +import org.gradle.api.plugins.quality.Checkstyle +import org.gradle.api.plugins.quality.CheckstyleExtension +import org.gradle.api.plugins.quality.CheckstylePlugin +import org.gradle.kotlin.dsl.apply +import org.gradle.kotlin.dsl.configure +import org.gradle.kotlin.dsl.withType + +fun Project.configureCheckstyle() { + apply() + configure { + toolVersion = libsRequiredVersion("checkstyle") + config = resources.text.fromFile(rootProject.file("codestyle/checkstyle-config.xml")) + isShowViolations = true + isIgnoreFailures = false + } + + tasks.withType().configureEach { + when (name) { + "checkstyleMain" -> dependsOn(tasks.named("processJandexIndex")) + "checkstyleTest" -> dependsOn(tasks.named("processTestJandexIndex")) + else -> {} + } + maxWarnings = 0 // treats warnings as errors + } +} diff --git a/buildSrc/src/main/kotlin/Errorprone.kt b/buildSrc/src/main/kotlin/Errorprone.kt new file mode 100644 index 0000000..13dd3a8 --- /dev/null +++ b/buildSrc/src/main/kotlin/Errorprone.kt @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Properties +import kotlin.collections.HashMap +import net.ltgt.gradle.errorprone.CheckSeverity +import net.ltgt.gradle.errorprone.ErrorPronePlugin +import net.ltgt.gradle.errorprone.errorprone +import org.gradle.api.Project +import org.gradle.api.plugins.JavaPlugin +import org.gradle.api.plugins.JavaPluginExtension +import org.gradle.api.tasks.PathSensitivity +import org.gradle.api.tasks.compile.JavaCompile +import org.gradle.kotlin.dsl.apply +import org.gradle.kotlin.dsl.configure +import org.gradle.kotlin.dsl.dependencies +import org.gradle.kotlin.dsl.withType + +fun Project.configureErrorprone() { + apply() + tasks.withType().configureEach { + options.errorprone.disableWarningsInGeneratedCode.set(true) + + val errorproneRules = rootProject.projectDir.resolve("codestyle/errorprone-rules.properties") + inputs.file(errorproneRules).withPathSensitivity(PathSensitivity.RELATIVE) + + val checksMapProperty = + objects + .mapProperty(String::class.java, CheckSeverity::class.java) + .convention( + provider { + val checksMap = HashMap() + errorproneRules.reader().use { + val rules = Properties() + rules.load(it) + rules.forEach { k, v -> + val key = k as String + val value = v as String + if (key.isNotEmpty() && value.isNotEmpty()) { + checksMap[key.trim()] = CheckSeverity.valueOf(value.trim()) + } + } + } + checksMap + } + ) + + options.errorprone.checks.putAll(checksMapProperty) + options.errorprone.excludedPaths.set(".*/build/generated.*") + } + plugins.withType().configureEach { + configure { + sourceSets.configureEach { + dependencies { + add( + "errorprone", + "com.google.errorprone:error_prone_core:${libsRequiredVersion("errorprone")}" + ) + add( + "errorprone", + "jp.skypencil.errorprone.slf4j:errorprone-slf4j:${libsRequiredVersion("errorproneSlf4j")}" + ) + } + } + } + } +} diff --git a/buildSrc/src/main/kotlin/Ide.kt b/buildSrc/src/main/kotlin/Ide.kt new file mode 100644 index 0000000..6e324ba --- /dev/null +++ b/buildSrc/src/main/kotlin/Ide.kt @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.gradle.api.Project +import org.gradle.kotlin.dsl.apply +import org.gradle.kotlin.dsl.configure +import org.gradle.plugins.ide.eclipse.EclipsePlugin +import org.gradle.plugins.ide.eclipse.model.EclipseModel +import org.gradle.plugins.ide.idea.model.IdeaModel +import org.jetbrains.gradle.ext.ActionDelegationConfig +import org.jetbrains.gradle.ext.IdeaExtPlugin +import org.jetbrains.gradle.ext.copyright +import org.jetbrains.gradle.ext.delegateActions +import org.jetbrains.gradle.ext.encodings +import org.jetbrains.gradle.ext.runConfigurations +import org.jetbrains.gradle.ext.settings + +fun Project.configureIde() { + apply() + + if (this == rootProject) { + + val projectName = rootProject.file("ide-name.txt").readText().trim() + val ideName = + "$projectName ${rootProject.version.toString().replace(Regex("^([0-9.]+).*"), "$1")}" + + apply() + configure { + module { + name = ideName + isDownloadSources = true // this is the default BTW + inheritOutputDirs = true + } + + project.settings { + copyright { + useDefault = "Nessie-ASF" + profiles.create("Nessie-ASF") { + // strip trailing LF + val copyrightText = + rootProject.file("codestyle/copyright-header.txt").readLines().joinToString("\n") + notice = copyrightText + } + } + + encodings.encoding = "UTF-8" + encodings.properties.encoding = "UTF-8" + + runConfigurations.register("Gradle", org.jetbrains.gradle.ext.Gradle::class.java) { + defaults = true + + jvmArgs = + rootProject.projectDir + .resolve("gradle.properties") + .reader() + .use { + val rules = java.util.Properties() + rules.load(it) + rules + } + .map { e -> "-D${e.key}=${e.value}" } + .joinToString(" ") + } + + delegateActions.testRunner = ActionDelegationConfig.TestRunner.CHOOSE_PER_TEST + } + } + + // There's no proper way to set the name of the IDEA project (when "just importing" or syncing + // the Gradle project) + val ideaDir = projectDir.resolve(".idea") + + if (ideaDir.isDirectory) { + ideaDir.resolve(".name").writeText(ideName) + } + + configure { project { name = ideName } } + } +} diff --git a/buildSrc/src/main/kotlin/Jandex.kt b/buildSrc/src/main/kotlin/Jandex.kt new file mode 100644 index 0000000..f840897 --- /dev/null +++ b/buildSrc/src/main/kotlin/Jandex.kt @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.github.vlsi.jandex.JandexExtension +import com.github.vlsi.jandex.JandexPlugin +import org.gradle.api.Project +import org.gradle.api.tasks.testing.Test +import org.gradle.kotlin.dsl.apply +import org.gradle.kotlin.dsl.configure +import org.gradle.kotlin.dsl.withType + +fun Project.configureJandex() { + apply() + configure { toolVersion.set(libsRequiredVersion("jandex")) } + + tasks.withType().configureEach { dependsOn(tasks.named("processTestJandexIndex")) } +} diff --git a/buildSrc/src/main/kotlin/Java.kt b/buildSrc/src/main/kotlin/Java.kt new file mode 100644 index 0000000..2a88991 --- /dev/null +++ b/buildSrc/src/main/kotlin/Java.kt @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.gradle.api.JavaVersion +import org.gradle.api.Project +import org.gradle.api.file.DuplicatesStrategy +import org.gradle.api.plugins.JavaPlugin +import org.gradle.api.plugins.JavaPluginExtension +import org.gradle.api.tasks.bundling.Jar +import org.gradle.api.tasks.compile.JavaCompile +import org.gradle.api.tasks.javadoc.Javadoc +import org.gradle.external.javadoc.CoreJavadocOptions +import org.gradle.kotlin.dsl.configure +import org.gradle.kotlin.dsl.maven +import org.gradle.kotlin.dsl.repositories +import org.gradle.kotlin.dsl.withType + +fun Project.configureJava() { + tasks.withType().configureEach { + manifest { + attributes["Implementation-Title"] = "iceberg-catalog-migrator" + attributes["Implementation-Version"] = project.version + attributes["Implementation-Vendor"] = "Dremio" + } + duplicatesStrategy = DuplicatesStrategy.WARN + } + + repositories { + mavenCentral() + if (System.getProperty("withMavenLocal").toBoolean()) { + mavenLocal() + } + } + + tasks.withType().configureEach { + options.encoding = "UTF-8" + options.compilerArgs.add("-parameters") + + // Required to enable incremental compilation w/ immutables, see + // https://github.com/immutables/immutables/pull/858 and + // https://github.com/immutables/immutables/issues/804#issuecomment-487366544 + options.compilerArgs.add("-Aimmutables.gradle.incremental") + } + + tasks.withType().configureEach { + val opt = options as CoreJavadocOptions + // don't spam log w/ "warning: no @param/@return" + opt.addStringOption("Xdoclint:-reference", "-quiet") + } + + plugins.withType().configureEach { + configure { + withJavadocJar() + withSourcesJar() + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 + } + } +} diff --git a/buildSrc/src/main/kotlin/PublishingHelperExtension.kt b/buildSrc/src/main/kotlin/PublishingHelperExtension.kt new file mode 100644 index 0000000..3f25185 --- /dev/null +++ b/buildSrc/src/main/kotlin/PublishingHelperExtension.kt @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.gradle.api.Project + +open class PublishingHelperExtension(project: Project) { + val nessieRepoName = project.objects.property(String::class.java) + val inceptionYear = project.objects.property(String::class.java) +} diff --git a/buildSrc/src/main/kotlin/PublishingHelperPlugin.kt b/buildSrc/src/main/kotlin/PublishingHelperPlugin.kt new file mode 100644 index 0000000..eaefddd --- /dev/null +++ b/buildSrc/src/main/kotlin/PublishingHelperPlugin.kt @@ -0,0 +1,257 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.github.jengelman.gradle.plugins.shadow.ShadowExtension +import groovy.util.Node +import groovy.util.NodeList +import org.gradle.api.GradleException +import org.gradle.api.Plugin +import org.gradle.api.Project +import org.gradle.api.artifacts.Configuration +import org.gradle.api.artifacts.component.ModuleComponentSelector +import org.gradle.api.artifacts.result.DependencyResult +import org.gradle.api.publish.PublishingExtension +import org.gradle.api.publish.maven.MavenPublication +import org.gradle.api.publish.maven.plugins.MavenPublishPlugin +import org.gradle.api.publish.tasks.GenerateModuleMetadata +import org.gradle.api.tasks.PathSensitivity +import org.gradle.kotlin.dsl.configure +import org.gradle.kotlin.dsl.extra +import org.gradle.kotlin.dsl.provideDelegate +import org.gradle.kotlin.dsl.register +import org.gradle.kotlin.dsl.withType +import org.gradle.plugins.signing.SigningExtension +import org.gradle.plugins.signing.SigningPlugin + +/** Applies common configurations to all Nessie projects. */ +@Suppress("unused") +class PublishingHelperPlugin : Plugin { + override fun apply(project: Project): Unit = + project.run { + extensions.create("publishingHelper", PublishingHelperExtension::class.java, this) + + plugins.withType().configureEach { + configure { + publications { + register("maven") { + val shadowExtension = project.extensions.findByType(ShadowExtension::class.java) + if (shadowExtension != null) { + shadowExtension.component(this) + project.afterEvaluate { + // Sonatype requires the javadoc and sources jar to be present, but the + // Shadow extension does not publish those. + artifact(tasks.named("javadocJar")) + artifact(tasks.named("sourcesJar")) + } + } else { + from(components.firstOrNull { c -> c.name == "javaPlatform" || c.name == "java" }) + } + suppressPomMetadataWarningsFor("testApiElements") + suppressPomMetadataWarningsFor("testJavadocElements") + suppressPomMetadataWarningsFor("testRuntimeElements") + suppressPomMetadataWarningsFor("testSourcesElements") + + groupId = "$group" + version = project.version.toString() + + tasks.named("generatePomFileForMavenPublication") { + val e = project.extensions.getByType(PublishingHelperExtension::class.java) + + pom { + name.set( + project.provider { + if (project.extra.has("maven.name")) { + project.extra["maven.name"].toString() + } else { + project.name + } + } + ) + description.set(project.description) + if (project != rootProject) { + withXml { + val projectNode = asNode() + + val parentNode = projectNode.appendNode("parent") + parentNode.appendNode("groupId", parent!!.group) + parentNode.appendNode("artifactId", parent!!.name) + parentNode.appendNode("version", parent!!.version) + + addMissingMandatoryDependencyVersions(projectNode) + } + } else { + val nessieRepoName = e.nessieRepoName.get() + + inputs + .file(rootProject.file("gradle/developers.csv")) + .withPathSensitivity(PathSensitivity.RELATIVE) + inputs + .file(rootProject.file("gradle/contributors.csv")) + .withPathSensitivity(PathSensitivity.RELATIVE) + doFirst { + inceptionYear.set(e.inceptionYear.get()) + url.set("https://github.com/projectnessie/$nessieRepoName") + organization { + name.set("Project Nessie") + url.set("https://projectnessie.org") + } + licenses { + license { + name.set("The Apache License, Version 2.0") + url.set("https://www.apache.org/licenses/LICENSE-2.0.txt") + } + } + mailingLists { + mailingList { + name.set("Project Nessie List") + subscribe.set("projectnessie-subscribe@googlegroups.com") + unsubscribe.set("projectnessie-unsubscribe@googlegroups.com") + post.set("projectnessie@googlegroups.com") + archive.set("https://groups.google.com/g/projectnessie") + } + } + scm { + connection.set("scm:git:https://github.com/projectnessie/$nessieRepoName") + developerConnection.set( + "scm:git:https://github.com/projectnessie/$nessieRepoName" + ) + url.set("https://github.com/projectnessie/$nessieRepoName/tree/main") + tag.set("main") + } + issueManagement { + system.set("Github") + url.set("https://github.com/projectnessie/$nessieRepoName/issues") + } + developers { + file(rootProject.file("gradle/developers.csv")) + .readLines() + .map { line -> line.trim() } + .filter { line -> line.isNotEmpty() && !line.startsWith("#") } + .forEach { line -> + val args = line.split(",") + if (args.size < 3) { + throw GradleException( + "gradle/developers.csv contains invalid line '${line}'" + ) + } + developer { + id.set(args[0]) + name.set(args[1]) + url.set(args[2]) + } + } + } + contributors { + file(rootProject.file("gradle/contributors.csv")) + .readLines() + .map { line -> line.trim() } + .filter { line -> line.isNotEmpty() && !line.startsWith("#") } + .forEach { line -> + val args = line.split(",") + if (args.size > 2) { + throw GradleException( + "gradle/contributors.csv contains invalid line '${line}'" + ) + } + contributor { + name.set(args[0]) + url.set(args[1]) + } + } + } + } + } + } + } + } + } + } + } + + // Gradle complains when a Gradle module metadata ("pom on steroids") is generated with an + // enforcedPlatform() dependency - but Quarkus requires enforcedPlatform(), so we have to + // allow it. + tasks.withType().configureEach { + suppressedValidationErrors.add("enforced-platform") + } + + if (project.hasProperty("release")) { + plugins.withType().configureEach { + configure { + val signingKey: String? by project + val signingPassword: String? by project + useInMemoryPgpKeys(signingKey, signingPassword) + val publishing = project.extensions.getByType(PublishingExtension::class.java) + afterEvaluate { sign(publishing.publications.getByName("maven")) } + } + } + } + } + + /** + * Scans the generated pom.xml for `` in `` that do not have a + * `` and adds one, if possible. Maven kinda requires `` tags there, even if the + * `` without a `` is a bom and that bom's version is available transitively. + */ + private fun Project.addMissingMandatoryDependencyVersions(projectNode: Node) { + xmlNode(xmlNode(projectNode, "dependencyManagement"), "dependencies")?.children()?.forEach { + val dependency = it as Node + if (xmlNode(dependency, "version") == null) { + val depGroup = xmlNode(dependency, "groupId")!!.text() + val depName = xmlNode(dependency, "artifactId")!!.text() + + var depResult = + findDependency(configurations.findByName("runtimeClasspath"), depGroup, depName) + if (depResult == null) { + depResult = + findDependency(configurations.findByName("testRuntimeClasspath"), depGroup, depName) + } + + if (depResult != null) { + val req = depResult.requested as ModuleComponentSelector + dependency.appendNode("version", req.version) + } + } + } + } + + private fun findDependency( + config: Configuration?, + depGroup: String, + depName: String + ): DependencyResult? { + if (config != null) { + val depResult = + config.incoming.resolutionResult.allDependencies.find { depResult -> + val req = depResult.requested + if (req is ModuleComponentSelector) req.group == depGroup && req.module == depName + else false + } + return depResult + } + return null + } + + private fun xmlNode(node: Node?, child: String): Node? { + val found = node?.get(child) + if (found is NodeList) { + if (found.isNotEmpty()) { + return found[0] as Node + } + } + return null + } +} diff --git a/buildSrc/src/main/kotlin/ReleaseSupportPlugin.kt b/buildSrc/src/main/kotlin/ReleaseSupportPlugin.kt new file mode 100644 index 0000000..a3b09a5 --- /dev/null +++ b/buildSrc/src/main/kotlin/ReleaseSupportPlugin.kt @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.nio.file.Path +import org.gradle.api.DefaultTask +import org.gradle.api.GradleException +import org.gradle.api.Plugin +import org.gradle.api.Project +import org.gradle.api.tasks.Internal +import org.gradle.api.tasks.TaskAction +import org.gradle.api.tasks.options.Option +import org.gradle.kotlin.dsl.register +import org.gradle.work.DisableCachingByDefault + +/** Registers some tasks to manage the `version.txt` file. */ +class ReleaseSupportPlugin : Plugin { + override fun apply(project: Project) { + + project.tasks.register("showVersion") { + group = "Release Support" + description = "Show current version" + + doFirst { + logger.lifecycle( + "Current version is ${VersionTuple.fromFile(versionTxtFile(this.project))}." + ) + } + } + + project.tasks.register("bumpVersion") { + group = "Release Support" + description = + "Bumps the version to the next patch/minor/major version as a snapshot, see ' ./gradlew help --task :bumpVersion '." + } + } + + companion object { + private fun versionTxtFile(project: Project): Path = + project.rootDir.toPath().resolve("./version.txt") + } + + @DisableCachingByDefault(because = "Version bumps cannot be cached") + open class BumpVersionTask : DefaultTask() { + @Option( + option = "bumpToRelease", + description = "Define whether to bump to a release version, defaults to snapshot release." + ) + @Internal + var bumpToRelease: Boolean = false + + @Option( + option = "bumpType", + description = "Defines which part of the version should be bumped, defaults to 'none'." + ) + @Internal + var bumpType: BumpType = BumpType.none + + @TaskAction + fun bumpVersion() { + val versionFile = versionTxtFile(project) + val currentVersion = VersionTuple.fromFile(versionFile) + + logger.lifecycle("Current version is $currentVersion.") + + val nextVersion = + when (bumpType) { + BumpType.none -> currentVersion + BumpType.patch -> currentVersion.bumpPatch() + BumpType.minor -> currentVersion.bumpMinor() + BumpType.major -> currentVersion.bumpMajor() + } + + val finalVersion = if (bumpToRelease) nextVersion.asRelease() else nextVersion.asSnapshot() + + if (finalVersion < currentVersion) { + throw GradleException( + "New version $finalVersion would be lower than current version $currentVersion" + ) + } + + if (finalVersion != currentVersion) { + finalVersion.writeToFile(versionFile) + logger.lifecycle("New version is $finalVersion.") + } else { + throw GradleException("Bump version tasks results in no change.") + } + } + } + + enum class BumpType { + // lower-case, used as command line option values + none, + patch, + minor, + major + } +} diff --git a/buildSrc/src/main/kotlin/Spotless.kt b/buildSrc/src/main/kotlin/Spotless.kt new file mode 100644 index 0000000..9d6cf7d --- /dev/null +++ b/buildSrc/src/main/kotlin/Spotless.kt @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.diffplug.gradle.spotless.SpotlessExtension +import com.diffplug.gradle.spotless.SpotlessPlugin +import org.gradle.api.Project +import org.gradle.kotlin.dsl.apply +import org.gradle.kotlin.dsl.configure +import org.gradle.kotlin.dsl.withType + +fun Project.configureSpotless() { + + apply() + if (!java.lang.Boolean.getBoolean("idea.sync.active")) { + plugins.withType().configureEach { + configure { + format("xml") { + target("src/**/*.xml", "src/**/*.xsd") + eclipseWtp(com.diffplug.spotless.extra.wtp.EclipseWtpFormatterStep.XML) + .configFile(rootProject.projectDir.resolve("codestyle/org.eclipse.wst.xml.core.prefs")) + } + kotlinGradle { + ktfmt().googleStyle() + licenseHeaderFile(rootProject.file("codestyle/copyright-header-java.txt"), "$") + if (project == rootProject) { + target("*.gradle.kts", "buildSrc/*.gradle.kts") + } + } + if (project == rootProject) { + kotlin { + ktfmt().googleStyle() + licenseHeaderFile(rootProject.file("codestyle/copyright-header-java.txt"), "$") + target("buildSrc/src/**/kotlin/**") + targetExclude("buildSrc/build/**") + } + } + + val dirsInSrc = projectDir.resolve("src").listFiles() + val sourceLangs = + if (dirsInSrc != null) + dirsInSrc + .filter { f -> f.isDirectory } + .map { f -> f.listFiles() } + .filterNotNull() + .flatMap { l -> l.filter { f -> f.isDirectory } } + .map { f -> f.name } + .distinct() + else listOf() + + if (sourceLangs.contains("java")) { + java { + googleJavaFormat(libsRequiredVersion("googleJavaFormat")) + licenseHeaderFile(rootProject.file("codestyle/copyright-header-java.txt")) + target("src/**/java/**") + targetExclude("build/**") + } + } + if (sourceLangs.contains("kotlin")) { + kotlin { + ktfmt().googleStyle() + licenseHeaderFile(rootProject.file("codestyle/copyright-header-java.txt"), "$") + target("src/**/kotlin/**") + targetExclude("build/**") + } + } + } + } + } +} diff --git a/buildSrc/src/main/kotlin/Testing.kt b/buildSrc/src/main/kotlin/Testing.kt new file mode 100644 index 0000000..379a1b7 --- /dev/null +++ b/buildSrc/src/main/kotlin/Testing.kt @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.gradle.api.Project +import org.gradle.api.services.BuildService +import org.gradle.api.services.BuildServiceParameters +import org.gradle.api.tasks.testing.Test +import org.gradle.kotlin.dsl.named +import org.gradle.kotlin.dsl.provideDelegate +import org.gradle.kotlin.dsl.register +import org.gradle.kotlin.dsl.withType + +fun Project.configureTestTasks() { + tasks.withType().configureEach { + useJUnitPlatform {} + val testJvmArgs: String? by project + val testHeapSize: String? by project + if (testJvmArgs != null) { + jvmArgs((testJvmArgs as String).split(" ")) + } + + systemProperty("file.encoding", "UTF-8") + systemProperty("user.language", "en") + systemProperty("user.country", "US") + systemProperty("user.variant", "") + systemProperty("test.log.level", testLogLevel()) + filter { + isFailOnNoMatchingTests = false + when (name) { + "test" -> { + includeTestsMatching("*Test") + includeTestsMatching("Test*") + excludeTestsMatching("Abstract*") + excludeTestsMatching("IT*") + } + "intTest" -> includeTestsMatching("IT*") + } + } + if (name != "test") { + mustRunAfter(tasks.named("test")) + } + + if (testHeapSize != null) { + setMinHeapSize(testHeapSize) + setMaxHeapSize(testHeapSize) + } + } + val intTest = + tasks.register("intTest") { + group = "verification" + description = "Runs the integration tests." + } + tasks.named("check") { dependsOn(intTest) } +} diff --git a/buildSrc/src/main/kotlin/Utilities.kt b/buildSrc/src/main/kotlin/Utilities.kt new file mode 100644 index 0000000..c1c35b4 --- /dev/null +++ b/buildSrc/src/main/kotlin/Utilities.kt @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.github.jengelman.gradle.plugins.shadow.ShadowPlugin +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar +import com.github.vlsi.jandex.JandexProcessResources +import java.io.File +import java.io.FileInputStream +import java.lang.IllegalStateException +import java.util.Properties +import org.gradle.api.JavaVersion +import org.gradle.api.Project +import org.gradle.api.artifacts.Dependency +import org.gradle.api.artifacts.ExternalModuleDependency +import org.gradle.api.artifacts.ModuleDependency +import org.gradle.api.artifacts.VersionCatalogsExtension +import org.gradle.api.plugins.JavaPluginExtension +import org.gradle.api.tasks.SourceSetContainer +import org.gradle.api.tasks.bundling.Jar +import org.gradle.api.tasks.testing.Test +import org.gradle.jvm.toolchain.JavaLanguageVersion +import org.gradle.jvm.toolchain.JavaToolchainService +import org.gradle.kotlin.dsl.DependencyHandlerScope +import org.gradle.kotlin.dsl.exclude +import org.gradle.kotlin.dsl.extra +import org.gradle.kotlin.dsl.findByType +import org.gradle.kotlin.dsl.getByType +import org.gradle.kotlin.dsl.module +import org.gradle.kotlin.dsl.named +import org.gradle.kotlin.dsl.project +import org.gradle.kotlin.dsl.provideDelegate +import org.gradle.kotlin.dsl.withType + +fun Project.libsRequiredVersion(name: String): String { + val libVer = + extensions.getByType().named("libs").findVersion(name).get() + val reqVer = libVer.requiredVersion + check(reqVer.isNotEmpty()) { + "libs-version for '$name' is empty, but must not be empty, version. strict: ${libVer.strictVersion}, required: ${libVer.requiredVersion}, preferred: ${libVer.preferredVersion}" + } + return reqVer +} + +fun Project.testLogLevel() = System.getProperty("test.log.level", "WARN") + +fun Project.applyShadowJar() { + plugins.apply(ShadowPlugin::class.java) + + plugins.withType().configureEach { + val shadowJar = + tasks.named("shadowJar") { + outputs.cacheIf { false } // do not cache uber/shaded jars + archiveClassifier.set("") + mergeServiceFiles() + } + + tasks.named("jar") { + dependsOn(shadowJar) + archiveClassifier.set("raw") + } + } +} diff --git a/buildSrc/src/main/kotlin/VersionTuple.kt b/buildSrc/src/main/kotlin/VersionTuple.kt new file mode 100644 index 0000000..a790247 --- /dev/null +++ b/buildSrc/src/main/kotlin/VersionTuple.kt @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.nio.file.Files +import java.nio.file.Path +import java.util.regex.Pattern + +/** Represents a version tuple with mandatory major, minor and patch numbers and snapshot-flag. */ +data class VersionTuple(val major: Int, val minor: Int, val patch: Int, val snapshot: Boolean) : + Comparable { + + companion object Factory { + val pattern = + Pattern.compile( + "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?\$" + ) + + fun fromFile(file: Path): VersionTuple = create(Files.readString(file).trim()) + + @JvmStatic + fun create(string: String): VersionTuple { + val matcher = pattern.matcher(string) + if (!matcher.matches()) { + throw IllegalArgumentException("'$string' is not a valid version string") + } + + val major = matcher.group(1) + val minor = matcher.group(2) + val patch = matcher.group(3) + val prerelease = matcher.group(4) + val buildmetadata = matcher.group(5) + + if (buildmetadata != null) { + throw IllegalArgumentException("Build metadata not supported") + } + + val snapshot = "SNAPSHOT" == prerelease + + if (prerelease != null && !snapshot) { + throw IllegalArgumentException( + "Only SNAPSHOT prerelease supported, but $prerelease != SNAPSHOT" + ) + } + + return VersionTuple(major.toInt(), minor.toInt(), patch.toInt(), snapshot) + } + } + + fun bumpMajor(): VersionTuple = VersionTuple(major + 1, 0, 0, false) + + fun bumpMinor(): VersionTuple = VersionTuple(major, minor + 1, 0, false) + + fun bumpPatch(): VersionTuple = VersionTuple(major, minor, patch + 1, false) + + fun asSnapshot(): VersionTuple = VersionTuple(major, minor, patch, true) + + fun asRelease(): VersionTuple = VersionTuple(major, minor, patch, false) + + fun writeToFile(file: Path) = Files.writeString(file, toString()) + + override fun compareTo(other: VersionTuple): Int { + var cmp: Int + + cmp = major.compareTo(other.major) + if (cmp != 0) { + return cmp + } + + cmp = minor.compareTo(other.minor) + if (cmp != 0) { + return cmp + } + + cmp = patch.compareTo(other.patch) + if (cmp != 0) { + return cmp + } + + if (snapshot == other.snapshot) { + return 0 + } + return if (snapshot) -1 else 1 + } + + override fun toString(): String { + return "$major.$minor.$patch${if (snapshot) "-SNAPSHOT" else ""}" + } +} diff --git a/buildSrc/src/main/kotlin/build-conventions.gradle.kts b/buildSrc/src/main/kotlin/build-conventions.gradle.kts new file mode 100644 index 0000000..32e0d22 --- /dev/null +++ b/buildSrc/src/main/kotlin/build-conventions.gradle.kts @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +plugins { + id("org.projectnessie.buildsupport.jacoco") + `eclipse` +} + +val hasSrcMain = projectDir.resolve("src/main").exists() +val hasSrcTest = projectDir.resolve("src/test").exists() + +apply() + +configureIde() + +configureSpotless() + +configureJandex() + +configureJava() + +if (hasSrcMain || hasSrcTest) { + configureCheckstyle() + + configureErrorprone() + + if (hasSrcTest) { + configureTestTasks() + } +} diff --git a/cli/build.gradle.kts b/cli/build.gradle.kts index 13fad86..60bee5b 100644 --- a/cli/build.gradle.kts +++ b/cli/build.gradle.kts @@ -14,14 +14,13 @@ * limitations under the License. */ -import com.github.jengelman.gradle.plugins.shadow.ShadowPlugin import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar plugins { `java-library` `maven-publish` alias(libs.plugins.nessie.run) - BuildSupport + `build-conventions` } java.sourceCompatibility = JavaVersion.VERSION_1_8 @@ -37,7 +36,7 @@ dependencies { implementation(libs.iceberg.dell) implementation(libs.hadoop.aws) { exclude("com.amazonaws", "aws-java-sdk-bundle") } implementation(libs.hadoop.common) - // AWS depdencies based on https://iceberg.apache.org/docs/latest/aws/#enabling-aws-integration + // AWS dependencies based on https://iceberg.apache.org/docs/latest/aws/#enabling-aws-integration implementation(libs.aws.sdk.glue) implementation(libs.aws.sdk.s3) implementation(libs.aws.sdk.dynamo) @@ -86,7 +85,9 @@ dependencies { } testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}") - nessieQuarkusServer("org.projectnessie:nessie-quarkus:${libs.versions.nessie.get()}:runner") + nessieQuarkusServer( + "org.projectnessie.nessie:nessie-quarkus:${libs.versions.nessie.get()}:runner" + ) } nessieQuarkusApp { includeTask(tasks.named("intTest")) } @@ -131,22 +132,3 @@ shadowJar { manifest { attributes["Main-Class"] = mainClassName } finalizedBy(unixExecutable) } - -fun Project.applyShadowJar() { - plugins.apply(ShadowPlugin::class.java) - - plugins.withType().configureEach { - val shadowJar = - tasks.named("shadowJar") { - isZip64 = true // as the package has more than 65535 files - outputs.cacheIf { false } // do not cache uber/shaded jars - archiveClassifier.set("") - mergeServiceFiles() - } - - tasks.named("jar") { - dependsOn(shadowJar) - archiveClassifier.set("raw") - } - } -} diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java index 7cea2c9..03f3a47 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java @@ -175,10 +175,10 @@ private boolean canProceed(Catalog sourceCatalog) { if (deleteSourceCatalogTables) { if (sourceCatalog instanceof HadoopCatalog) { consoleLog.warn( - String.format( - "Source catalog type is HADOOP and it doesn't support dropping tables just from " - + "catalog. %nAvoid operating the migrated tables from the source catalog after migration. " - + "Use the tables from target catalog.")); + "Source catalog type is HADOOP and it doesn't support dropping tables just from " + + "catalog. {}Avoid operating the migrated tables from the source catalog after migration. " + + "Use the tables from target catalog.", + System.lineSeparator()); } return PromptUtil.proceedForMigration(); } else { @@ -212,13 +212,13 @@ private void printSummary( } if (!result.failedToDeleteTableIdentifiers().isEmpty()) { consoleLog.info( - String.format( - "Failed to delete %d tables from %s catalog. " - + "Please check the `catalog_migration.log` file for the reason. " - + "%nFailed to delete identifiers are written into `%s`.", - result.failedToDeleteTableIdentifiers().size(), - sourceCatalogName, - FAILED_TO_DELETE_AT_SOURCE_FILE)); + "Failed to delete {} tables from {} catalog. " + + "Please check the `catalog_migration.log` file for the reason. " + + "{}Failed to delete identifiers are written into `{}`.", + result.failedToDeleteTableIdentifiers().size(), + sourceCatalogName, + System.lineSeparator(), + FAILED_TO_DELETE_AT_SOURCE_FILE); } } @@ -226,19 +226,25 @@ private void printDetails(CatalogMigrationResult result) { consoleLog.info("Details: "); if (!result.registeredTableIdentifiers().isEmpty()) { consoleLog.info( - "Successfully {} these tables:", deleteSourceCatalogTables ? "migrated" : "registered"); - consoleLog.info("{}", result.registeredTableIdentifiers()); + "Successfully {} these tables:{}{}", + deleteSourceCatalogTables ? "migrated" : "registered", + System.lineSeparator(), + result.registeredTableIdentifiers()); } if (!result.failedToRegisterTableIdentifiers().isEmpty()) { consoleLog.info( - "Failed to {} these tables:", deleteSourceCatalogTables ? "migrate" : "register"); - consoleLog.info("{}", result.failedToRegisterTableIdentifiers()); + "Failed to {} these tables:{}{}", + deleteSourceCatalogTables ? "migrate" : "register", + System.lineSeparator(), + result.failedToRegisterTableIdentifiers()); } if (!result.failedToDeleteTableIdentifiers().isEmpty()) { - consoleLog.warn("Failed to delete these tables from source catalog:"); - consoleLog.info("{}", result.failedToDeleteTableIdentifiers()); + consoleLog.warn( + "Failed to delete these tables from source catalog:{}{}", + System.lineSeparator(), + result.failedToDeleteTableIdentifiers()); } } @@ -259,9 +265,10 @@ private void printDryRunResults(List result) { consoleLog.info("Details: "); consoleLog.info( - "Identified these tables for {} by dry-run:", - deleteSourceCatalogTables ? "migration" : "registration"); - consoleLog.info("{}", result); + "Identified these tables for {} by dry-run:{}{}", + deleteSourceCatalogTables ? "migration" : "registration", + System.lineSeparator(), + result); } private static void writeToFile(Path filePath, List identifiers) { diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java index 2e5c23b..949cf68 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java @@ -24,44 +24,52 @@ public final class PromptUtil { private PromptUtil() {} private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); - - private static final String WARNING_FOR_REGISTRATION = - String.format( - "%n" - + "\ta) Executing catalog migration when the source catalog has some in-progress commits " - + "%n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " - + "%n\tSo, while using this tool please make sure there are no in-progress commits for the source " - + "catalog%n" - + "%n" - + "\tb) After the registration, successfully registered tables will be present in both source and target " - + "catalog. " - + "%n\tHaving the same metadata.json registered in more than one catalog can lead to missing updates, " - + "loss of data, and table corruption. " - + "%n\tUse `migrate` command to automatically delete the table from source catalog after " - + "migration."); - - private static final String WARNING_FOR_MIGRATION = - String.format( - "%n" - + "\ta) Executing catalog migration when the source catalog has some in-progress commits " - + "%n\tcan lead to a data loss as the in-progress commit will not be considered for migration. " - + "%n\tSo, while using this tool please make sure there are no in-progress commits for the source " - + "catalog%n" - + "%n" - + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " - + "%n\tand can only be accessed from the target catalog."); + private static final String newLine = System.lineSeparator(); static boolean proceedForRegistration() { - return proceed(WARNING_FOR_REGISTRATION); + consoleLog.warn( + "{}" + + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + + "{}\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + + "{}\tSo, while using this tool please make sure there are no in-progress commits for the source " + + "catalog{}" + + "{}" + + "\tb) After the registration, successfully registered tables will be present in both source and target " + + "catalog. " + + "{}\tHaving the same metadata.json registered in more than one catalog can lead to missing updates, " + + "loss of data, and table corruption. " + + "{}\tUse `migrate` command to automatically delete the table from source catalog after " + + "migration.", + newLine, + newLine, + newLine, + newLine, + newLine, + newLine, + newLine); + return proceed(); } static boolean proceedForMigration() { - return proceed(WARNING_FOR_MIGRATION); + consoleLog.warn( + "{}" + + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + + "{}\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + + "{}\tSo, while using this tool please make sure there are no in-progress commits for the source " + + "catalog{}" + + "{}" + + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " + + "{}\tand can only be accessed from the target catalog.", + newLine, + newLine, + newLine, + newLine, + newLine, + newLine); + return proceed(); } - private static boolean proceed(String warning) { - consoleLog.warn(warning); - + private static boolean proceed() { Console console = System.console(); while (true) { consoleLog.info( diff --git a/codestyle/errorprone-rules.properties b/codestyle/errorprone-rules.properties new file mode 100644 index 0000000..665676d --- /dev/null +++ b/codestyle/errorprone-rules.properties @@ -0,0 +1,1652 @@ +# +# Copyright (C) 2022 Dremio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Contains bug patterns up to Error Prone 2.15.0 + +#################################################################################################### +# On by default : ERROR +# See https://errorprone.info/bugpatterns +#################################################################################################### + +#AlwaysThrows=ERROR +# Detects calls that will fail at runtime + +#AndroidInjectionBeforeSuper=ERROR +# AndroidInjection.inject() should always be invoked before calling super.lifecycleMethod() + +#ArrayEquals=ERROR +# Reference equality used to compare arrays + +#ArrayFillIncompatibleType=ERROR +# Arrays.fill(Object[], Object) called with incompatible types. + +#ArrayHashCode=ERROR +# hashcode method on array does not hash array contents + +#ArrayToString=ERROR +# Calling toString on an array does not provide useful information + +#ArraysAsListPrimitiveArray=ERROR +# Arrays.asList does not autobox primitive arrays, as one might expect. + +#AsyncCallableReturnsNull=ERROR +# AsyncCallable should not return a null Future, only a Future whose result is null. + +#AsyncFunctionReturnsNull=ERROR +# AsyncFunction should not return a null Future, only a Future whose result is null. + +#AutoValueBuilderDefaultsInConstructor=ERROR +# Defaults for AutoValue Builders should be set in the factory method returning Builder instances, not the constructor + +#AutoValueConstructorOrderChecker=ERROR +# Arguments to AutoValue constructor are in the wrong order + +#BadAnnotationImplementation=ERROR +# Classes that implement Annotation must override equals and hashCode. Consider using AutoAnnotation instead of implementing Annotation by hand. + +#BadShiftAmount=ERROR +# Shift by an amount that is out of range + +#BanJNDI=ERROR +# Using JNDI may deserialize user input via the `Serializable` API which is extremely dangerous + +#BoxedPrimitiveEquality=ERROR +# Comparison using reference equality instead of value equality. Reference equality of boxed primitive types is usually not useful, as they are value objects, and it is bug-prone, as instances are cached for some values but not others. + +#BundleDeserializationCast=ERROR +# Object serialized in Bundle may have been flattened to base type. + +#ChainingConstructorIgnoresParameter=ERROR +# The called constructor accepts a parameter with the same name and type as one of its caller's parameters, but its caller doesn't pass that parameter to it. It's likely that it was intended to. + +#CheckNotNullMultipleTimes=ERROR +# A variable was checkNotNulled multiple times. Did you mean to check something else? + +#CheckReturnValue=ERROR +# Ignored return value of method that is annotated with @CheckReturnValue + +#CollectionIncompatibleType=ERROR +# Incompatible type as argument to Object-accepting Java collections method + +#CollectionToArraySafeParameter=ERROR +# The type of the array parameter of Collection.toArray needs to be compatible with the array type + +#ComparableType=ERROR +# Implementing 'Comparable' where T is not the same as the implementing class is incorrect, since it violates the symmetry contract of compareTo. + +#ComparingThisWithNull=ERROR +# this == null is always false, this != null is always true + +#ComparisonOutOfRange=ERROR +# Comparison to value that is out of range for the compared type + +#CompatibleWithAnnotationMisuse=ERROR +# @CompatibleWith's value is not a type argument. + +#CompileTimeConstant=ERROR +# Non-compile-time constant expression passed to parameter with @CompileTimeConstant type annotation. + +#ComputeIfAbsentAmbiguousReference=ERROR +# computeIfAbsent passes the map key to the provided class's constructor + +#ConditionalExpressionNumericPromotion=ERROR +# A conditional expression with numeric operands of differing types will perform binary numeric promotion of the operands; when these operands are of reference types, the expression's result may not be of the expected type. + +#ConstantOverflow=ERROR +# Compile-time constant expression overflows + +#DaggerProvidesNull=ERROR +# Dagger @Provides methods may not return null unless annotated with @Nullable + +#DangerousLiteralNull=ERROR +# This method is null-hostile: passing a null literal to it is always wrong + +#DeadException=ERROR +# Exception created but not thrown + +#DeadThread=ERROR +# Thread created but not started + +#DiscardedPostfixExpression=ERROR +# The result of this unary operation on a lambda parameter is discarded + +#DoNotCall=ERROR +# This method should not be called. + +#DoNotMock=ERROR +# Identifies undesirable mocks. + +#DoubleBraceInitialization=ERROR +# Prefer collection factory methods or builders to the double-brace initialization pattern. + +#DuplicateMapKeys=ERROR +# Map#ofEntries will throw an IllegalArgumentException if there are any duplicate keys + +#DurationFrom=ERROR +# Duration.from(Duration) returns itself; from(Period) throws a runtime exception. + +#DurationGetTemporalUnit=ERROR +# Duration.get() only works with SECONDS or NANOS. + +#DurationTemporalUnit=ERROR +# Duration APIs only work for DAYS or exact durations. + +#DurationToLongTimeUnit=ERROR +# Unit mismatch when decomposing a Duration or Instant to call a API + +#EmptyTopLevelDeclaration=ERROR +# Empty top-level type declarations should be omitted + +#EqualsHashCode=ERROR +# Classes that override equals should also override hashCode. + +#EqualsNaN=ERROR +# == NaN always returns false; use the isNaN methods instead + +#EqualsNull=ERROR +# The contract of Object.equals() states that for any non-null reference value x, x.equals(null) should return false. If x is null, a NullPointerException is thrown. Consider replacing equals() with the == operator. + +#EqualsReference=ERROR +# == must be used in equals method to check equality to itself or an infinite loop will occur. + +#EqualsWrongThing=ERROR +# Comparing different pairs of fields/getters in an equals implementation is probably a mistake. + +#FloggerFormatString=ERROR +# Invalid printf-style format string + +#FloggerLogString=ERROR +# Arguments to log(String) must be compile-time constants or parameters annotated with @CompileTimeConstant. If possible, use Flogger's formatting log methods instead. + +#FloggerLogVarargs=ERROR +# logVarargs should be used to pass through format strings and arguments. + +#FloggerSplitLogStatement=ERROR +# Splitting log statements and using Api instances directly breaks logging. + +#ForOverride=ERROR +# Method annotated @ForOverride must be protected or package-private and only invoked from declaring class, or from an override of the method + +#FormatString=ERROR +# Invalid printf-style format string + +#FormatStringAnnotation=ERROR +# Invalid format string passed to formatting method. + +#FromTemporalAccessor=ERROR +# Certain combinations of javaTimeType.from(TemporalAccessor) will always throw a DateTimeException or return the parameter directly. + +#FunctionalInterfaceMethodChanged=ERROR +# Casting a lambda to this @FunctionalInterface can cause a behavior change from casting to a functional superinterface, which is surprising to users. Prefer decorator methods to this surprising behavior. + +#FuturesGetCheckedIllegalExceptionType=ERROR +# Futures.getChecked requires a checked exception type with a standard constructor. + +#FuzzyEqualsShouldNotBeUsedInEqualsMethod=ERROR +# DoubleMath.fuzzyEquals should never be used in an Object.equals() method + +#GetClassOnAnnotation=ERROR +# Calling getClass() on an annotation may return a proxy class + +#GetClassOnClass=ERROR +# Calling getClass() on an object of type Class returns the Class object for java.lang.Class; you probably meant to operate on the object directly + +#GuardedBy=ERROR +# Checks for unguarded accesses to fields and methods with @GuardedBy annotations + +#GuiceAssistedInjectScoping=ERROR +# Scope annotation on implementation class of AssistedInject factory is not allowed + +#GuiceAssistedParameters=ERROR +# A constructor cannot have two @Assisted parameters of the same type unless they are disambiguated with named @Assisted annotations. + +#GuiceInjectOnFinalField=ERROR +# Although Guice allows injecting final fields, doing so is disallowed because the injected value may not be visible to other threads. + +#HashtableContains=ERROR +# contains() is a legacy method that is equivalent to containsValue() + +#IdentityBinaryExpression=ERROR +# A binary expression where both operands are the same is usually incorrect. + +#IdentityHashMapBoxing=ERROR +# Using IdentityHashMap with a boxed type as the key is risky since boxing may produce distinct instances + +#IgnoredPureGetter=ERROR +# Getters on AutoValues, AutoBuilders, and Protobuf Messages are side-effect free, so there is no point in calling them if the return value is ignored. While there are no side effects from the getter, the receiver may have side effects. + +#Immutable=ERROR +# Type declaration annotated with @Immutable is not immutable + +#Incomparable=ERROR +# Types contained in sorted collections must implement Comparable. + +#IncompatibleArgumentType=ERROR +# Passing argument to a generic method with an incompatible type. + +#IncompatibleModifiers=ERROR +# This annotation has incompatible modifiers as specified by its @IncompatibleModifiers annotation + +#IndexOfChar=ERROR +# The first argument to indexOf is a Unicode code point, and the second is the index to start the search from + +#InexactVarargsConditional=ERROR +# Conditional expression in varargs call contains array and non-array arguments + +#InfiniteRecursion=ERROR +# This method always recurses, and will cause a StackOverflowError + +#InjectMoreThanOneScopeAnnotationOnClass=ERROR +# A class can be annotated with at most one scope annotation. + +#InjectOnMemberAndConstructor=ERROR +# Members shouldn't be annotated with @Inject if constructor is already annotated @Inject + +#InlineMeValidator=ERROR +# Ensures that the @InlineMe annotation is used correctly. + +#InstantTemporalUnit=ERROR +# Instant APIs only work for NANOS, MICROS, MILLIS, SECONDS, MINUTES, HOURS, HALF_DAYS and DAYS. + +#InvalidJavaTimeConstant=ERROR +# This checker errors on calls to java.time methods using values that are guaranteed to throw a DateTimeException. + +#InvalidPatternSyntax=ERROR +# Invalid syntax used for a regular expression + +#InvalidTimeZoneID=ERROR +# Invalid time zone identifier. TimeZone.getTimeZone(String) will silently return GMT instead of the time zone you intended. + +#InvalidZoneId=ERROR +# Invalid zone identifier. ZoneId.of(String) will throw exception at runtime. + +#IsInstanceIncompatibleType=ERROR +# This use of isInstance will always evaluate to false. + +#IsInstanceOfClass=ERROR +# The argument to Class#isInstance(Object) should not be a Class + +#IsLoggableTagLength=ERROR +# Log tag too long, cannot exceed 23 characters. + +#JUnit3TestNotRun=ERROR +# Test method will not be run; please correct method signature (Should be public, non-static, and method name should begin with "test"). + +#JUnit4ClassAnnotationNonStatic=ERROR +# This method should be static + +#JUnit4SetUpNotRun=ERROR +# setUp() method will not be run; please add JUnit's @Before annotation + +#JUnit4TearDownNotRun=ERROR +# tearDown() method will not be run; please add JUnit's @After annotation + +#JUnit4TestNotRun=ERROR +# This looks like a test method but is not run; please add @Test and @Ignore, or, if this is a helper method, reduce its visibility. + +#JUnit4TestsNotRunWithinEnclosed=ERROR +# This test is annotated @Test, but given it's within a class using the Enclosed runner, will not run. + +#JUnitAssertSameCheck=ERROR +# An object is tested for reference equality to itself using JUnit library. + +#JUnitParameterMethodNotFound=ERROR +# The method for providing parameters was not found. + +#JavaxInjectOnAbstractMethod=ERROR +# Abstract and default methods are not injectable with javax.inject.Inject + +#JodaToSelf=ERROR +# Use of Joda-Time's DateTime.toDateTime(), Duration.toDuration(), Instant.toInstant(), Interval.toInterval(), and Period.toPeriod() are not allowed. + +#LiteByteStringUtf8=ERROR +# This pattern will silently corrupt certain byte sequences from the serialized protocol message. Use ByteString or byte[] directly + +#LocalDateTemporalAmount=ERROR +# LocalDate.plus() and minus() does not work with Durations. LocalDate represents civil time (years/months/days), so java.time.Period is the appropriate thing to add or subtract instead. + +#LockOnBoxedPrimitive=ERROR +# It is dangerous to use a boxed primitive as a lock as it can unintentionally lead to sharing a lock with another piece of code. + +#LoopConditionChecker=ERROR +# Loop condition is never modified in loop body. + +#LossyPrimitiveCompare=ERROR +# Using an unnecessarily-wide comparison method can lead to lossy comparison + +#MathRoundIntLong=ERROR +# Math.round(Integer) results in truncation + +#MislabeledAndroidString=ERROR +# Certain resources in `android.R.string` have names that do not match their content + +#MisplacedScopeAnnotations=ERROR +# Scope annotations used as qualifier annotations don't have any effect. Move the scope annotation to the binding location or delete it. + +#MissingSuperCall=ERROR +# Overriding method is missing a call to overridden super method + +#MissingTestCall=ERROR +# A terminating method call is required for a test helper to have any effect. + +#MisusedDayOfYear=ERROR +# Use of 'DD' (day of year) in a date pattern with 'MM' (month of year) is not likely to be intentional, as it would lead to dates like 'March 73rd'. + +#MisusedWeekYear=ERROR +# Use of "YYYY" (week year) in a date pattern without "ww" (week in year). You probably meant to use "yyyy" (year) instead. + +#MixedDescriptors=ERROR +# The field number passed into #getFieldByNumber belongs to a different proto to the Descriptor. + +#MockitoUsage=ERROR +# Missing method call for verify(mock) here + +#ModifyingCollectionWithItself=ERROR +# Using a collection function with itself as the argument. + +#MoreThanOneInjectableConstructor=ERROR +# This class has more than one @Inject-annotated constructor. Please remove the @Inject annotation from all but one of them. + +MustBeClosedChecker=ERROR +# This method returns a resource which must be managed carefully, not just left for garbage collection. If it is a constant that will persist for the lifetime of your program, move it to a private static final field. Otherwise, you should use it in a try-with-resources. + +#NCopiesOfChar=ERROR +# The first argument to nCopies is the number of copies, and the second is the item to copy + +#NoCanIgnoreReturnValueOnClasses=ERROR +# @CanIgnoreReturnValue should not be applied to classes as it almost always overmatches (as it +# applies to constructors and all methods), and the CIRVness isn't conferred to its subclasses. + +#NonCanonicalStaticImport=ERROR +# Static import of type uses non-canonical name + +#NonFinalCompileTimeConstant=ERROR +# @CompileTimeConstant parameters should be final or effectively final + +#NonRuntimeAnnotation=ERROR +# Calling getAnnotation on an annotation that is not retained at runtime. + +#NullArgumentForNonNullParameter=ERROR +# Null is not permitted for this parameter. + +#NullTernary=ERROR +# This conditional expression may evaluate to null, which will result in an NPE when the result is unboxed. + +#OptionalEquality=ERROR +# Comparison using reference equality instead of value equality + +#OptionalMapUnusedValue=ERROR +# Optional.ifPresent is preferred over Optional.map when the return value is unused + +#OptionalOfRedundantMethod=ERROR +# Optional.of() always returns a non-empty optional. Using ifPresent/isPresent/orElse/orElseGet/orElseThrow/isPresent/or/orNull method on it is unnecessary and most probably a bug. + +#OverlappingQualifierAndScopeAnnotation=ERROR +# Annotations cannot be both Scope annotations and Qualifier annotations: this causes confusion when trying to use them. + +#OverridesJavaxInjectableMethod=ERROR +# This method is not annotated with @Inject, but it overrides a method that is annotated with @javax.inject.Inject. The method will not be Injected. + +#PackageInfo=ERROR +# Declaring types inside package-info.java files is very bad form + +#ParametersButNotParameterized=ERROR +# This test has @Parameters but is using the default JUnit4 runner. The parameters will have no effect. + +#ParcelableCreator=ERROR +# Detects classes which implement Parcelable but don't have CREATOR + +#PeriodFrom=ERROR +# Period.from(Period) returns itself; from(Duration) throws a runtime exception. + +#PeriodGetTemporalUnit=ERROR +# Period.get() only works with YEARS, MONTHS, or DAYS. + +#PeriodTimeMath=ERROR +# When adding or subtracting from a Period, Duration is incompatible. + +#PreconditionsInvalidPlaceholder=ERROR +# Preconditions only accepts the %s placeholder in error message strings + +#PrivateSecurityContractProtoAccess=ERROR +# Access to a private protocol buffer field is forbidden. This protocol buffer carries a security contract, and can only be created using an approved library. Direct access to the fields is forbidden. + +#ProtoBuilderReturnValueIgnored=ERROR +# Unnecessary call to proto's #build() method. If you don't consume the return value of #build(), the result is discarded and the only effect is to verify that all required fields are set, which can be expressed more directly with #isInitialized(). + +#ProtoFieldNullComparison=ERROR +# Protobuf fields cannot be null. + +#ProtoStringFieldReferenceEquality=ERROR +# Comparing protobuf fields of type String using reference equality + +#ProtoTruthMixedDescriptors=ERROR +# The arguments passed to `ignoringFields` are inconsistent with the proto which is the subject of the assertion. + +#ProtocolBufferOrdinal=ERROR +# To get the tag number of a protocol buffer enum, use getNumber() instead. + +#ProvidesMethodOutsideOfModule=ERROR +# @Provides methods need to be declared in a Module to have any effect. + +#RandomCast=ERROR +# Casting a random number in the range [0.0, 1.0) to an integer or long always results in 0. + +#RandomModInteger=ERROR +# Use Random.nextInt(int). Random.nextInt() % n can have negative results + +#RectIntersectReturnValueIgnored=ERROR +# Return value of android.graphics.Rect.intersect() must be checked + +#RequiredModifiers=ERROR +# This annotation is missing required modifiers as specified by its @RequiredModifiers annotation + +#RestrictedApiChecker=ERROR +# Check for non-allowlisted callers to RestrictedApiChecker. + +#ReturnValueIgnored=ERROR +# Return value of this method must be used + +#SelfAssignment=ERROR +# Variable assigned to itself + +#SelfComparison=ERROR +# An object is compared to itself + +#SelfEquals=ERROR +# Testing an object for equality with itself will always be true. + +#ShouldHaveEvenArgs=ERROR +# This method must be called with an even number of arguments. + +#SizeGreaterThanOrEqualsZero=ERROR +# Comparison of a size >= 0 is always true, did you intend to check for non-emptiness? + +#StreamToString=ERROR +# Calling toString on a Stream does not provide useful information + +#StringBuilderInitWithChar=ERROR +# StringBuilder does not have a char constructor; this invokes the int constructor. + +#SubstringOfZero=ERROR +# String.substring(0) returns the original String + +#SuppressWarningsDeprecated=ERROR +# Suppressing "deprecated" is probably a typo for "deprecation" + +#TemporalAccessorGetChronoField=ERROR +# TemporalAccessor.get() only works for certain values of ChronoField. + +#TestParametersNotInitialized=ERROR +# This test has @TestParameter fields but is using the default JUnit4 runner. The parameters will not be initialised beyond their default value. + +#TheoryButNoTheories=ERROR +# This test has members annotated with @Theory, @DataPoint, or @DataPoints but is using the default JUnit4 runner. + +#ThrowIfUncheckedKnownChecked=ERROR +# throwIfUnchecked(knownCheckedException) is a no-op. + +#ThrowNull=ERROR +# Throwing 'null' always results in a NullPointerException being thrown. + +#TreeToString=ERROR +# Tree#toString shouldn't be used for Trees deriving from the code being compiled, as it discards whitespace and comments. + +#TruthSelfEquals=ERROR +# isEqualTo should not be used to test an object for equality with itself; the assertion will never fail. + +#TryFailThrowable=ERROR +# Catching Throwable/Error masks failures from fail() or assert*() in the try block + +#TypeParameterQualifier=ERROR +# Type parameter used as type qualifier + +#UnicodeDirectionalityCharacters=ERROR +# Unicode directionality modifiers can be used to conceal code in many editors. + +UnicodeInCode=OFF +# Avoid using non-ASCII Unicode characters outside of comments and literals, as they can be confusing. + +#UnnecessaryCheckNotNull=ERROR +# This null check is unnecessary; the expression can never be null + +#UnnecessaryTypeArgument=ERROR +# Non-generic methods should not be invoked with type arguments + +#UnsafeWildcard=ERROR +# Certain wildcard types can confuse the compiler. + +#UnusedAnonymousClass=ERROR +# Instance created but never used + +#UnusedCollectionModifiedInPlace=ERROR +# Collection is modified in place, but the result is not used + +#VarTypeName=ERROR +# `var` should not be used as a type name. + +#WrongOneof=ERROR +# This field is guaranteed not to be set given it's within a switch over a one_of. + +#XorPower=ERROR +# The `^` operator is binary XOR, not a power operator. + +#ZoneIdOfZ=ERROR +# Use ZoneOffset.UTC instead of ZoneId.of("Z"). + +#################################################################################################### +# On by default : WARNING +# See https://errorprone.info/bugpatterns +#################################################################################################### + +#AlmostJavadoc=WARN +# This comment contains Javadoc or HTML tags, but isn't started with a double asterisk (/**); is it meant to be Javadoc? + +#AlreadyChecked=WARN +# This condition has already been checked. + +#AmbiguousMethodReference=WARN +# Method reference is ambiguous + +AnnotateFormatMethod=ERROR +# This method passes a pair of parameters through to String.format, but the enclosing method wasn't annotated @FormatMethod. Doing so gives compile-time rather than run-time protection against malformed format strings. + +#ArgumentSelectionDefectChecker=WARN +# Arguments are in the wrong order or could be commented for clarity. + +ArrayAsKeyOfSetOrMap=ERROR +# Arrays do not override equals() or hashCode, so comparisons will be done on reference equality only. If neither deduplication nor lookup are needed, consider using a List instead. Otherwise, use IdentityHashMap/Set, a Map from a library that handles object arrays, or an Iterable/List of pairs. + +AssertEqualsArgumentOrderChecker=ERROR +# Arguments are swapped in assertEquals-like call + +AssertThrowsMultipleStatements=ERROR +# The lambda passed to assertThrows should contain exactly one statement + +AssertionFailureIgnored=ERROR +# This assertion throws an AssertionError if it fails, which will be caught by an enclosing try block. + +#AssistedInjectAndInjectOnSameConstructor=WARN +# @AssistedInject and @Inject cannot be used on the same constructor. + +#AutoValueFinalMethods=WARN +# Make toString(), hashCode() and equals() final in AutoValue classes, so it is clear to readers that AutoValue is not overriding them + +#AutoValueImmutableFields=WARN +# AutoValue recommends using immutable collections + +#AutoValueSubclassLeaked=WARN +# Do not refer to the autogenerated AutoValue_ class outside the file containing the corresponding @AutoValue base class. + +#BadComparable=WARN +# Possible sign flip from narrowing conversion + +BadImport=ERROR +# Importing nested classes/static methods/static fields with commonly-used names can make code harder to read, because it may not be clear from the context exactly which type is being referred to. Qualifying the name with that of the containing class can make the code clearer. + +BadInstanceof=ERROR +# instanceof used in a way that is equivalent to a null check. + +BareDotMetacharacter=ERROR +# "." is rarely useful as a regex, as it matches any character. To match a literal '.' character, instead write "\.". + +BigDecimalEquals=ERROR +# BigDecimal#equals has surprising behavior: it also compares scale. + +BigDecimalLiteralDouble=ERROR +# new BigDecimal(double) loses precision in this case. + +BoxedPrimitiveConstructor=ERROR +# valueOf or autoboxing provides better time and space performance + +#BugPatternNaming=WARN +# Giving BugPatterns a name different to the enclosing class can be confusing + +#BuilderReturnThis=WARN +# Builder instance method does not return 'this' + +ByteBufferBackingArray=ERROR +# ByteBuffer.array() shouldn't be called unless ByteBuffer.arrayOffset() is used or if the ByteBuffer was initialized using ByteBuffer.wrap() or ByteBuffer.allocate(). + +#CacheLoaderNull=WARN +# The result of CacheLoader#load must be non-null. + +CanIgnoreReturnValueSuggester=OFF +# Methods that always 'return this' should be annotated with @CanIgnoreReturnValue + +#CannotMockFinalClass=WARN +# Mockito cannot mock final classes + +#CanonicalDuration=WARN +# Duration can be expressed more clearly with different units + +CatchAndPrintStackTrace=ERROR +# Logging or rethrowing exceptions should usually be preferred to catching and calling printStackTrace + +#CatchFail=WARN +# Ignoring exceptions and calling fail() is unnecessary, and makes test output less useful + +#ChainedAssertionLosesContext=WARN +# Inside a Subject, use check(…) instead of assert*() to preserve user-supplied messages and other settings. + +#CharacterGetNumericValue=WARN +# getNumericValue has unexpected behaviour: it interprets A-Z as base-36 digits with values 10-35, but also supports non-arabic numerals and miscellaneous numeric unicode characters like ㊷; consider using Character.digit or UCharacter.getUnicodeNumericValue instead + +#ClassCanBeStatic=WARN +# Inner class is non-static but does not reference enclosing class + +ClassNewInstance=ERROR +# Class.newInstance() bypasses exception checking; prefer getDeclaredConstructor().newInstance() + +#CloseableProvides=WARN +# Providing Closeable resources makes their lifecycle unclear + +#CollectionUndefinedEquality=WARN +# This type does not have well-defined equals behavior. + +#CollectorShouldNotUseState=WARN +# Collector.of() should not use state + +#ComparableAndComparator=WARN +# Class should not implement both `Comparable` and `Comparator` + +#CompareToZero=WARN +# The result of #compareTo or #compare should only be compared to 0. It is an implementation detail whether a given type returns strictly the values {-1, 0, +1} or others. + +#ComplexBooleanConstant=WARN +# Non-trivial compile time constant boolean expressions shouldn't be used. + +#DateChecker=WARN +# Warns against suspect looking calls to java.util.Date APIs + +DateFormatConstant=ERROR +# DateFormat is not thread-safe, and should not be used as a constant field. + +DefaultCharset=ERROR +# Implicit use of the platform default charset, which can result in differing behaviour between JVM executions or incorrect behavior if the encoding of the data source doesn't match expectations. + +#DefaultPackage=WARN +# Java classes shouldn't use default package + +#DeprecatedVariable=WARN +# Applying the @Deprecated annotation to local variables or parameters has no effect + +#DirectInvocationOnMock=WARN +# Methods should not be directly invoked on mocks. Should this be part of a verify(..) call? + +DistinctVarargsChecker=ERROR +# Method expects distinct arguments at some/all positions + +#DoNotCallSuggester=WARN +# Consider annotating methods that always throw with @DoNotCall. Read more at https://errorprone.info/bugpattern/DoNotCall + +#DoNotClaimAnnotations=WARN +# Don't 'claim' annotations in annotation processors; Processor#process should unconditionally return `false` + +#DoNotMockAutoValue=WARN +# AutoValue classes represent pure data classes, so mocking them should not be necessary. Construct a real instance of the class instead. + +DoubleCheckedLocking=ERROR +# Double-checked locking on non-volatile fields is unsafe + +#EmptyBlockTag=WARN +# A block tag (@param, @return, @throws, @deprecated) has an empty description. Block tags without descriptions don't add much value for future readers of the code; consider removing the tag entirely or adding a description. + +#EmptyCatch=WARN +# Caught exceptions should not be ignored + +#EmptySetMultibindingContributions=WARN +# @Multibinds is a more efficient and declarative mechanism for ensuring that a set multibinding is present in the graph. + +EqualsGetClass=ERROR +# Prefer instanceof to getClass when implementing Object#equals. + +EqualsIncompatibleType=ERROR +# An equality test between objects with incompatible types always returns false + +EqualsUnsafeCast=ERROR +# The contract of #equals states that it should return false for incompatible types, while this implementation may throw ClassCastException. + +EqualsUsingHashCode=ERROR +# Implementing #equals by just comparing hashCodes is fragile. Hashes collide frequently, and this will lead to false positives in #equals. + +ErroneousBitwiseExpression=ERROR +# This expression evaluates to 0. If this isn't an error, consider expressing it as a literal 0. + +ErroneousThreadPoolConstructorChecker=ERROR +# Thread pool size will never go beyond corePoolSize if an unbounded queue is used + +EscapedEntity=ERROR +# HTML entities in @code/@literal tags will appear literally in the rendered javadoc. + +#ExtendingJUnitAssert=WARN +# When only using JUnit Assert's static methods, you should import statically instead of extending. + +#ExtendsObject=WARN +# `T extends Object` is redundant (unless you are using the Checker Framework). + +#FallThrough=WARN +# Switch case may fall through + +#Finally=WARN +# If you return or throw from a finally, then values returned or thrown from the try-catch block will be ignored. Consider using try-with-resources instead. + +FloatCast=ERROR +# Use parentheses to make the precedence explicit + +FloatingPointAssertionWithinEpsilon=ERROR +# This fuzzy equality check is using a tolerance less than the gap to the next number. You may want a less restrictive tolerance, or to assert equality. + +FloatingPointLiteralPrecision=ERROR +# Floating point literal loses precision + +#FloggerArgumentToString=WARN +# Use Flogger's printf-style formatting instead of explicitly converting arguments to strings + +#FloggerStringConcatenation=WARN +# Prefer string formatting using printf placeholders (e.g. %s) instead of string concatenation + +#FragmentInjection=WARN +# Classes extending PreferenceActivity must implement isValidFragment such that it does not unconditionally return true to prevent vulnerability to fragment injection attacks. + +#FragmentNotInstantiable=WARN +# Subclasses of Fragment must be instantiable via Class#newInstance(): the class must be public, static and have a public nullary constructor + +FutureReturnValueIgnored=ERROR +# Return value of methods returning Future must be checked. Ignoring returned Futures suppresses exceptions thrown from the code that completes the Future. + +GetClassOnEnum=ERROR +# Calling getClass() on an enum may return a subclass of the enum type + +#HidingField=WARN +# Hiding fields of superclasses may cause confusion and errors + +#IdentityHashMapUsage=WARN +# IdentityHashMap usage shouldn't be intermingled with Map + +#ImmutableAnnotationChecker=WARN +# Annotations should always be immutable + +#ImmutableEnumChecker=WARN +# Enums should always be immutable + +#InconsistentCapitalization=WARN +# It is confusing to have a field and a parameter under the same scope that differ only in capitalization. + +InconsistentHashCode=ERROR +# Including fields in hashCode which are not compared in equals violates the contract of hashCode. + +#IncorrectMainMethod=WARN +# 'main' methods must be public, static, and void + +#IncrementInForLoopAndHeader=WARN +# This for loop increments the same variable in the header and in the body + +#InheritDoc=WARN +# Invalid use of @inheritDoc. + +#InjectInvalidTargetingOnScopingAnnotation=WARN +# A scoping annotation's Target should include TYPE and METHOD. + +#InjectOnConstructorOfAbstractClass=WARN +# Constructors on abstract classes are never directly @Inject'ed, only the constructors of their subclasses can be @Inject'ed. + +#InjectScopeAnnotationOnInterfaceOrAbstractClass=WARN +# Scope annotation on an interface or abstract class is not allowed + +#InjectedConstructorAnnotations=WARN +# Injected constructors cannot be optional nor have binding annotations + +#InlineFormatString=WARN +# Prefer to create format strings inline, instead of extracting them to a single-use constant + +#InlineMeInliner=WARN +# Callers of this API should be inlined. + +#InlineMeSuggester=WARN +# This deprecated API looks inlineable. If you'd like the body of the API to be inlined to its callers, please annotate it with @InlineMe. + +#InputStreamSlowMultibyteRead=WARN +# Please also override int read(byte[], int, int), otherwise multi-byte reads from this input stream are likely to be slow. + +#InstanceOfAndCastMatchWrongType=WARN +# Casting inside an if block should be plausibly consistent with the instanceof type + +IntLongMath=ERROR +# Expression of type int may overflow before being assigned to a long + +#InvalidBlockTag=WARN +# This tag is invalid. + +#InvalidInlineTag=WARN +# This tag is invalid. + +#InvalidLink=WARN +# This @link tag looks wrong. + +#InvalidParam=WARN +# This @param tag doesn't refer to a parameter of the method. + +#InvalidThrows=WARN +# The documented method doesn't actually throw this checked exception. + +#InvalidThrowsLink=WARN +# Javadoc links to exceptions in @throws without a @link tag (@throws Exception, not @throws {@link Exception}). + +#IterableAndIterator=WARN +# Class should not implement both `Iterable` and `Iterator` + +#JUnit3FloatingPointComparisonWithoutDelta=WARN +# Floating-point comparison without error tolerance + +#JUnit4ClassUsedInJUnit3=WARN +# Some JUnit4 construct cannot be used in a JUnit3 context. Convert your class to JUnit4 style to use them. + +#JUnitAmbiguousTestClass=WARN +# Test class inherits from JUnit 3's TestCase but has JUnit 4 @Test or @RunWith annotations. + +#JavaDurationGetSecondsGetNano=WARN +# duration.getNano() only accesses the underlying nanosecond adjustment from the whole second. + +#JavaDurationWithNanos=WARN +# Use of java.time.Duration.withNanos(int) is not allowed. + +#JavaDurationWithSeconds=WARN +# Use of java.time.Duration.withSeconds(long) is not allowed. + +#JavaInstantGetSecondsGetNano=WARN +# instant.getNano() only accesses the underlying nanosecond adjustment from the whole second. + +JavaLangClash=ERROR +# Never reuse class names from java.lang + +#JavaLocalDateTimeGetNano=WARN +# localDateTime.getNano() only accesss the nanos-of-second field. It's rare to only use getNano() without a nearby getSecond() call. + +#JavaLocalTimeGetNano=WARN +# localTime.getNano() only accesses the nanos-of-second field. It's rare to only use getNano() without a nearby getSecond() call. + +#JavaPeriodGetDays=WARN +# period.getDays() only accesses the "days" portion of the Period, and doesn't represent the total span of time of the period. Consider using org.threeten.extra.Days to extract the difference between two civil dates if you want the whole time. + +#JavaTimeDefaultTimeZone=WARN +# java.time APIs that silently use the default system time-zone are not allowed. + +#JavaUtilDate=WARN +# Date has a bad API that leads to bugs; prefer java.time.Instant or LocalDate. + +#JavaxInjectOnFinalField=WARN +# @javax.inject.Inject cannot be put on a final field. + +JdkObsolete=ERROR +# Suggests alternatives to obsolete JDK classes. + +#JodaConstructors=WARN +# Use of certain JodaTime constructors are not allowed. + +#JodaDateTimeConstants=WARN +# Using the `PER` constants in `DateTimeConstants` is problematic because they encourage manual date/time math. + +#JodaDurationWithMillis=WARN +# Use of duration.withMillis(long) is not allowed. Please use Duration.millis(long) instead. + +#JodaInstantWithMillis=WARN +# Use of instant.withMillis(long) is not allowed. Please use new Instant(long) instead. + +#JodaNewPeriod=WARN +# This may have surprising semantics, e.g. new Period(LocalDate.parse("1970-01-01"), LocalDate.parse("1970-02-02")).getDays() == 1, not 32. + +#JodaPlusMinusLong=WARN +# Use of JodaTime's type.plus(long) or type.minus(long) is not allowed (where = {Duration,Instant,DateTime,DateMidnight}). Please use type.plus(Duration.millis(long)) or type.minus(Duration.millis(long)) instead. + +#JodaTimeConverterManager=WARN +# Joda-Time's ConverterManager makes the semantics of DateTime/Instant/etc construction subject to global static state. If you need to define your own converters, use a helper. + +#JodaWithDurationAddedLong=WARN +# Use of JodaTime's type.withDurationAdded(long, int) (where = {Duration,Instant,DateTime}). Please use type.withDurationAdded(Duration.millis(long), int) instead. + +#LiteEnumValueOf=WARN +# Instead of converting enums to string and back, its numeric value should be used instead as it is the stable part of the protocol defined by the enum. + +#LiteProtoToString=WARN +# toString() on lite protos will not generate a useful representation of the proto from optimized builds. Consider whether using some subset of fields instead would provide useful information. + +LockNotBeforeTry=ERROR +# Calls to Lock#lock should be immediately followed by a try block which releases the lock. + +#LogicalAssignment=WARN +# Assignment where a boolean expression was expected; use == if this assignment wasn't expected or add parentheses for clarity. + +LongDoubleConversion=ERROR +# Conversion from long to double may lose precision; use an explicit cast to double if this was intentional + +LongFloatConversion=ERROR +# Conversion from long to float may lose precision; use an explicit cast to float if this was intentional + +#LoopOverCharArray=WARN +# toCharArray allocates a new array, using charAt is more efficient + +#MalformedInlineTag=WARN +# This Javadoc tag is malformed. The correct syntax is {@tag and not @{tag. + +#MathAbsoluteRandom=WARN +# Math.abs does not always give a positive result. Please consider other methods for positive random numbers. + +#MemoizeConstantVisitorStateLookups=WARN +# Anytime you need to look up a constant value from VisitorState, improve performance by creating a cache for it with VisitorState.memoize + +#MissingCasesInEnumSwitch=WARN +# Switches on enum types should either handle all values, or have a default case. + +#MissingFail=WARN +# Not calling fail() when expecting an exception masks bugs + +#MissingImplementsComparable=WARN +# Classes implementing valid compareTo function should implement Comparable interface + +MissingOverride=ERROR +# method overrides method in supertype; expected @Override + +#MissingSummary=WARN +# A summary line is required on public/protected Javadocs. + +#MixedMutabilityReturnType=WARN +# This method returns both mutable and immutable collections or maps from different paths. This may be confusing for users of the method. + +#ModifiedButNotUsed=WARN +# A collection or proto builder was created, but its values were never accessed. + +#MockNotUsedInProduction=WARN +# This mock is instantiated and configured, but is never passed to production code. It should be +# either removed or used. + +#ModifyCollectionInEnhancedForLoop=WARN +# Modifying a collection while iterating over it in a loop may cause a ConcurrentModificationException to be thrown or lead to undefined behavior. + +#ModifySourceCollectionInStream=WARN +# Modifying the backing source during stream operations may cause unintended results. + +#MultipleParallelOrSequentialCalls=WARN +# Multiple calls to either parallel or sequential are unnecessary and cause confusion. + +#MultipleUnaryOperatorsInMethodCall=WARN +# Avoid having multiple unary operators acting on the same variable in a method call + +#MutablePublicArray=WARN +# Non-empty arrays are mutable, so this `public static final` array is not a constant and can be modified by clients of this class. Prefer an ImmutableList, or provide an accessor method that returns a defensive copy. + +#NarrowCalculation=WARN +# This calculation may lose precision compared to its target type. + +#NarrowingCompoundAssignment=WARN +# Compound assignments may hide dangerous casts + +#NegativeCharLiteral=WARN +# Casting a negative signed literal to an (unsigned) char might be misleading. + +#NestedInstanceOfConditions=WARN +# Nested instanceOf conditions of disjoint types create blocks of code that never execute + +#NonAtomicVolatileUpdate=WARN +# This update of a volatile variable is non-atomic + +#NonCanonicalType=WARN +# This type is referred to by a non-canonical name, which may be misleading. + +#NonOverridingEquals=WARN +# equals method doesn't override Object.equals + +#NullOptional=WARN +# Passing a literal null to an Optional parameter is almost certainly a mistake. Did you mean to provide an empty Optional? + +#NullableConstructor=WARN +# Constructors should not be annotated with @Nullable since they cannot return null + +#NullablePrimitive=WARN +# @Nullable should not be used for primitive types since they cannot be null + +#NullablePrimitiveArray=WARN +# @Nullable type annotations should not be used for primitive types since they cannot be null + +#NullableVoid=WARN +# void-returning methods should not be annotated with @Nullable, since they cannot return null + +ObjectEqualsForPrimitives=ERROR +# Avoid unnecessary boxing by using plain == for primitive types. + +#ObjectToString=WARN +# Calling toString on Objects that don't override toString() doesn't provide useful information + +#ObjectsHashCodePrimitive=WARN +# Objects.hashCode(Object o) should not be passed a primitive value + +OperatorPrecedence=ERROR +# Use grouping parenthesis to make the operator precedence explicit + +#OptionalMapToOptional=WARN +# Mapping to another Optional will yield a nested Optional. Did you mean flatMap? + +#OptionalNotPresent=WARN +# This Optional has been confirmed to be empty at this point, so the call to `get` will throw. + +OrphanedFormatString=ERROR +# String literal contains format specifiers, but is not passed to a format method + +#OutlineNone=WARN +# Setting CSS outline style to none or 0 (while not otherwise providing visual focus indicators) is inaccessible for users navigating a web page without a mouse. + +#OverrideThrowableToString=WARN +# To return a custom message with a Throwable class, one should override getMessage() instead of toString(). + +Overrides=ERROR +# Varargs doesn't agree for overridden method + +#OverridesGuiceInjectableMethod=WARN +# This method is not annotated with @Inject, but it overrides a method that is annotated with @com.google.inject.Inject. Guice will inject this method, and it is recommended to annotate it explicitly. + +#ParameterName=WARN +# Detects `/* name= */`-style comments on actual parameters where the name doesn't match the formal parameter + +#PreconditionsCheckNotNullRepeated=WARN +# Including the first argument of checkNotNull in the failure message is not useful, as it will always be `null`. + +#PrimitiveAtomicReference=WARN +# Using compareAndSet with boxed primitives is dangerous, as reference rather than value equality is used. Consider using AtomicInteger, AtomicLong, AtomicBoolean from JDK or AtomicDouble from Guava instead. + +#ProtectedMembersInFinalClass=WARN +# Protected members in final classes can be package-private + +#ProtoDurationGetSecondsGetNano=WARN +# getNanos() only accesses the underlying nanosecond-adjustment of the duration. + +#ProtoRedundantSet=WARN +# A field on a protocol buffer was set twice in the same chained expression. + +#ProtoTimestampGetSecondsGetNano=WARN +# getNanos() only accesses the underlying nanosecond-adjustment of the instant. + +#QualifierOrScopeOnInjectMethod=WARN +# Qualifiers/Scope annotations on @Inject methods don't have any effect. Move the qualifier annotation to the binding location. + +#ReachabilityFenceUsage=WARN +# reachabilityFence should always be called inside a finally block + +#ReferenceEquality=WARN +# Comparison using reference equality instead of value equality + +#RethrowReflectiveOperationExceptionAsLinkageError=WARN +# Prefer LinkageError for rethrowing ReflectiveOperationException as unchecked + +#ReturnFromVoid=WARN +# Void methods should not have a @return tag. + +#RobolectricShadowDirectlyOn=WARN +# Migrate off a deprecated overload of org.robolectric.shadow.api.Shadow#directlyOn + +#RxReturnValueIgnored=WARN +# Returned Rx objects must be checked. Ignoring a returned Rx value means it is never scheduled for execution + +#SameNameButDifferent=WARN +# This type name shadows another in a way that may be confusing. + +#SelfAlwaysReturnsThis=WARN +# Non-abstract instance methods named 'self()' that return the enclosing class must always 'return this'. + +#ShortCircuitBoolean=WARN +# Prefer the short-circuiting boolean operators && and || to & and |. + +StaticAssignmentInConstructor=ERROR +# This assignment is to a static field. Mutating static state from a constructor is highly error-prone. + +#StaticAssignmentOfThrowable=WARN +# Saving instances of Throwable in static fields is discouraged, prefer to create them on-demand when an exception is thrown + +#StaticGuardedByInstance=WARN +# Writes to static fields should not be guarded by instance locks + +#StaticMockMember=WARN +# @Mock members of test classes shouldn't share state between tests and preferably be non-static + +#StreamResourceLeak=WARN +# Streams that encapsulate a closeable resource should be closed using try-with-resources + +StreamToIterable=ERROR +# Using stream::iterator creates a one-shot Iterable, which may cause surprising failures. + +#StringSplitter=WARN +# String.split(String) has surprising behavior + +#SwigMemoryLeak=WARN +# SWIG generated code that can't call a C++ destructor will leak memory + +SynchronizeOnNonFinalField=ERROR +# Synchronizing on non-final fields is not safe: if the field is ever updated, different threads may end up locking on different objects. + +#ThreadJoinLoop=WARN +# Thread.join needs to be immediately surrounded by a loop until it succeeds. Consider using Uninterruptibles.joinUninterruptibly. + +ThreadLocalUsage=ERROR +# ThreadLocals should be stored in static fields + +#ThreadPriorityCheck=WARN +# Relying on the thread scheduler is discouraged. + +#ThreeLetterTimeZoneID=WARN +# Three-letter time zone identifiers are deprecated, may be ambiguous, and might not do what you intend; the full IANA time zone ID should be used instead. + +#TimeUnitConversionChecker=WARN +# This TimeUnit conversion looks buggy: converting from a smaller unit to a larger unit (and passing a constant), converting to/from the same TimeUnit, or converting TimeUnits where the result is statically known to be 0 or 1 are all buggy patterns. + +#ToStringReturnsNull=WARN +# An implementation of Object.toString() should never return null. + +#TruthAssertExpected=WARN +# The actual and expected values appear to be swapped, which results in poor assertion failure messages. The actual value should come first. + +#TruthConstantAsserts=WARN +# Truth Library assert is called on a constant. + +#TruthGetOrDefault=WARN +# Asserting on getOrDefault is unclear; prefer containsEntry or doesNotContainKey + +#TruthIncompatibleType=WARN +# Argument is not compatible with the subject's type. + +#TypeEquals=WARN +# com.sun.tools.javac.code.Type doesn't override Object.equals and instances are not interned by javac, so testing types for equality should be done with Types#isSameType instead + +#TypeNameShadowing=WARN +# Type parameter declaration shadows another named type + +#TypeParameterShadowing=WARN +# Type parameter declaration overrides another type parameter already declared + +#TypeParameterUnusedInFormals=WARN +# Declaring a type parameter that is only used in the return type is a misuse of generics: operations on the type parameter are unchecked, it hides unsafe casts at invocations of the method, and it interacts badly with method overload resolution. + +URLEqualsHashCode=ERROR +# Avoid hash-based containers of java.net.URL–the containers rely on equals() and hashCode(), which cause java.net.URL to make blocking internet connections. + +#UndefinedEquals=WARN +# This type is not guaranteed to implement a useful #equals method. + +#UnescapedEntity=WARN +# Javadoc is interpreted as HTML, so HTML entities such as &, <, > must be escaped. If this finding seems wrong (e.g. is within a @code or @literal tag), check whether the tag could be malformed and not recognised by the compiler. + +#UnicodeEscape=WARN +# Using unicode escape sequences for printable ASCII characters is obfuscated, and potentially dangerous. + +#UnnecessaryAssignment=WARN +# Fields annotated with @Inject/@Mock should not be manually assigned to, as they should be initialized by a framework. Remove the assignment if a framework is being used, or the annotation if one isn't. + +UnnecessaryLambda=ERROR +# Returning a lambda from a helper method or saving it in a constant is unnecessary; prefer to implement the functional interface method directly and use a method reference instead. + +#UnnecessaryLongToIntConversion=WARN +# Converting a long or Long to an int to pass as a long parameter is usually not necessary. If this conversion is intentional, consider `Longs.constrainToRange()` instead. + +#UnnecessaryMethodInvocationMatcher=WARN +# It is not necessary to wrap a MethodMatcher with methodInvocation(). + +#UnnecessaryMethodReference=WARN +# This method reference is unnecessary, and can be replaced with the variable itself. + +#UnnecessaryParentheses=WARN +# These grouping parentheses are unnecessary; it is unlikely the code will be misinterpreted without them + +#UnrecognisedJavadocTag=WARN +# This Javadoc tag wasn't recognised by the parser. Is it malformed somehow, perhaps with mismatched braces? + +#UnsafeFinalization=WARN +# Finalizer may run before native code finishes execution + +#UnsafeReflectiveConstructionCast=WARN +# Prefer `asSubclass` instead of casting the result of `newInstance`, to detect classes of incorrect type before invoking their constructors.This way, if the class is of the incorrect type,it will throw an exception before invoking its constructor. + +#UnsynchronizedOverridesSynchronized=WARN +# Unsynchronized method overrides a synchronized method. + +UnusedMethod=ERROR +# Unused. + +#UnusedNestedClass=WARN +# This nested class is unused, and can be removed. + +#UnusedTypeParameter=WARN +# This type parameter is unused and can be removed. + +#UnusedVariable=WARN +# Unused. + +#UseBinds=WARN +# @Binds is a more efficient and declarative mechanism for delegating a binding. + +UseCorrectAssertInTests=ERROR +# Java assert is used in test. For testing purposes Assert.* matchers should be used. + +#VariableNameSameAsType=WARN +# variableName and type with the same name would refer to the static field instead of the class + +#WaitNotInLoop=WARN +# Because of spurious wakeups, Object.wait() and Condition.await() must always be called in a loop + +#WakelockReleasedDangerously=WARN +# A wakelock acquired with a timeout may be released by the system before calling `release`, even after checking `isHeld()`. If so, it will throw a RuntimeException. Please wrap in a try/catch block. + +#WithSignatureDiscouraged=WARN +# withSignature is discouraged. Prefer .named and/or .withParameters where possible. + +#################################################################################################### +# Experimental : ERROR +# See https://errorprone.info/bugpatterns +#################################################################################################### + +#AndroidJdkLibsChecker=ERROR +# Use of class, field, or method that is not compatible with legacy Android devices + +#AutoFactoryAtInject=ERROR +# @AutoFactory and @Inject should not be used in the same type. + +#BanSerializableRead=ERROR +# Deserializing user input via the `Serializable` API is extremely dangerous + +#ClassName=ERROR +# The source file name should match the name of the top-level class it contains + +#ComparisonContractViolated=ERROR +# This comparison method violates the contract + +#DeduplicateConstants=ERROR +# This expression was previously declared as a constant; consider replacing this occurrence. + +#DepAnn=ERROR +# Item documented with a @deprecated javadoc note is not annotated with @Deprecated + +#EmptyIf=ERROR +# Empty statement after if + +#ExtendsAutoValue=ERROR +# Do not extend an @AutoValue/@AutoOneOf class in non-generated code. + +#InjectMoreThanOneQualifier=ERROR +# Using more than one qualifier annotation on the same element is not allowed. + +#InjectScopeOrQualifierAnnotationRetention=ERROR +# Scoping and qualifier annotations must have runtime retention. + +#InsecureCryptoUsage=ERROR +# A standard cryptographic operation is used in a mode that is prone to vulnerabilities + +#IterablePathParameter=ERROR +# Path implements Iterable; prefer Collection for clarity + +#Java7ApiChecker=ERROR +# Use of class, field, or method that is not compatible with JDK 7 + +#Java8ApiChecker=ERROR +# Use of class, field, or method that is not compatible with JDK 8 + +#LockMethodChecker=ERROR +# This method does not acquire the locks specified by its @LockMethod annotation + +#LongLiteralLowerCaseSuffix=ERROR +# Prefer 'L' to 'l' for the suffix to long literals + +#NoAllocation=ERROR +# @NoAllocation was specified on this method, but something was found that would trigger an allocation + +#RefersToDaggerCodegen=ERROR +# Don't refer to Dagger's internal or generated code + +#StaticOrDefaultInterfaceMethod=ERROR +# Static and default interface methods are not natively supported on older Android devices. + +#StaticQualifiedUsingExpression=ERROR +# A static variable or method should be qualified with a class name, not expression + +#UnlockMethod=ERROR +# This method does not acquire the locks specified by its @UnlockMethod annotation + +#################################################################################################### +# Experimental : WARNING +# See https://errorprone.info/bugpatterns +#################################################################################################### + +#AnnotationPosition=WARN +# Annotations should be positioned after Javadocs, but before modifiers. + +#AssertFalse=WARN +# Assertions may be disabled at runtime and do not guarantee that execution will halt here; consider throwing an exception instead + +#AssistedInjectAndInjectOnConstructors=WARN +# @AssistedInject and @Inject should not be used on different constructors in the same class. + +#BinderIdentityRestoredDangerously=WARN +# A call to Binder.clearCallingIdentity() should be followed by Binder.restoreCallingIdentity() in a finally block. Otherwise the wrong Binder identity may be used by subsequent code. + +#BindingToUnqualifiedCommonType=WARN +# This code declares a binding for a common value type without a Qualifier annotation. + +#CannotMockFinalClass=WARN +# Mockito cannot mock final classes + +#CatchingUnchecked=WARN +# This catch block catches `Exception`, but can only catch unchecked exceptions. Consider catching RuntimeException (or something more specific) instead so it is more apparent that no checked exceptions are being handled. + +#CheckedExceptionNotThrown=WARN +# This method cannot throw a checked exception that it claims to. This may cause consumers of the API to incorrectly attempt to handle, or propagate, this exception. + +ConstantPatternCompile=WARN +# Variables initialized with Pattern#compile calls on constants can be constants + +#DifferentNameButSame=WARN +# This type is referred to in different ways within this file, which may be confusing. + +#EqualsBrokenForNull=WARN +# equals() implementation may throw NullPointerException when given null + +#ExpectedExceptionChecker=WARN +# Prefer assertThrows to ExpectedException + +#FloggerLogWithCause=WARN +# Setting the caught exception as the cause of the log message may provide more context for anyone debugging errors. + +#FloggerMessageFormat=WARN +# Invalid message format-style format specifier ({0}), expected printf-style (%s) + +#FloggerPassedAround=WARN +# There is no advantage to passing around a logger rather than declaring one in the class that needs it. + +#FloggerRedundantIsEnabled=WARN +# Logger level check is already implied in the log() call. An explicit atLevel.isEnabled() check is redundant. + +#FloggerRequiredModifiers=WARN +# FluentLogger.forEnclosingClass should always be saved to a private static final field. + +#FloggerWithCause=WARN +# Calling withCause(Throwable) with an inline allocated Throwable is discouraged. Consider using withStackTrace(StackSize) instead, and specifying a reduced stack size (e.g. SMALL, MEDIUM or LARGE) instead of FULL, to improve performance. + +#FloggerWithoutCause=WARN +# Use withCause to associate Exceptions with log statements + +#FunctionalInterfaceClash=WARN +# Overloads will be ambiguous when passing lambda arguments. + +#HardCodedSdCardPath=WARN +# Hardcoded reference to /sdcard + +#InconsistentOverloads=WARN +# The ordering of parameters in overloaded methods should be as consistent as possible (when viewed from left to right) + +#InitializeInline=WARN +# Initializing variables in their declaring statement is clearer, where possible. + +#InterfaceWithOnlyStatics=WARN +# This interface only contains static fields and methods; consider making it a final class instead to prevent subclassing. + +#InterruptedExceptionSwallowed=WARN +# This catch block appears to be catching an explicitly declared InterruptedException as an Exception/Throwable and not handling the interruption separately. + +#MemberName=WARN +# Methods and non-static variables should be named in lowerCamelCase. + +#MissingDefault=WARN +# The Google Java Style Guide requires that each switch statement includes a default statement group, even if it contains no code. (This requirement is lifted for any switch statement that covers all values of an enum.) + +#NonCanonicalStaticMemberImport=WARN +# Static import of member uses non-canonical name + +#PreferJavaTimeOverload=WARN +# Prefer using java.time-based APIs when available. Note that this checker does not and cannot guarantee that the overloads have equivalent semantics, but that is generally the case with overloaded methods. + +#PreferredInterfaceType=WARN +# This type can be more specific. + +PrimitiveArrayPassedToVarargsMethod=WARN +# Passing a primitive array to a varargs method is usually wrong + +#QualifierWithTypeUse=WARN +# Injection frameworks currently don't understand Qualifiers in TYPE_PARAMETER or TYPE_USE contexts. + +RedundantOverride=ERROR +# This overriding method is redundant, and can be removed. + +RedundantThrows=ERROR +# Thrown exception is a subtype of another + +StronglyTypeByteString=WARN +# This primitive byte array is only used to construct ByteStrings. It would be clearer to strongly type the field instead. + +StronglyTypeTime=WARN +# This primitive integral type is only used to construct time types. It would be clearer to strongly type the field instead. + +#SuppressWarningsWithoutExplanation=WARN +# Use of @SuppressWarnings should be accompanied by a comment describing why the warning is safe to ignore. + +#SystemExitOutsideMain=WARN +# Code that contains System.exit() is untestable. + +#SystemOut=WARN +# Printing to standard output should only be used for debugging, not in production code + +#TestExceptionChecker=WARN +# Using @Test(expected=…) is discouraged, since the test will pass if any statement in the test method throws the expected exception + +#ThrowSpecificExceptions=WARN +# Base exception classes should be treated as abstract. If the exception is intended to be caught, throw a domain-specific exception. Otherwise, prefer a more specific exception for clarity. Common alternatives include: AssertionError, IllegalArgumentException, IllegalStateException, and (Guava's) VerifyException. + +#TimeUnitMismatch=WARN +# An value that appears to be represented in one unit is used where another appears to be required (e.g., seconds where nanos are needed) + +#TooManyParameters=WARN +# A large number of parameters on public APIs should be avoided. + +#TransientMisuse=WARN +# Static fields are implicitly transient, so the explicit modifier is unnecessary + +#TryWithResourcesVariable=WARN +# This variable is unnecessary, the try-with-resources resource can be a reference to a final or effectively final variable + +#UnnecessarilyFullyQualified=WARN +# This fully qualified name is unambiguous to the compiler if imported. + +#UnnecessarilyVisible=WARN +# Some methods (such as those annotated with @Inject or @Provides) are only intended to be called by a framework, and so should have default visibility. + +#UnnecessaryAnonymousClass=WARN +# Implementing a functional interface is unnecessary; prefer to implement the functional interface method directly and use a method reference instead. + +#UnnecessaryDefaultInEnumSwitch=WARN +# Switch handles all enum values: an explicit default case is unnecessary and defeats error checking for non-exhaustive switches. + +#UnnecessaryFinal=WARN +# Since Java 8, it's been unnecessary to make local variables and parameters `final` for use in lambdas or anonymous classes. Marking them as `final` is weakly discouraged, as it adds a fair amount of noise for minimal benefit. + +#UnnecessaryOptionalGet=WARN +# This code can be simplified by directly using the lambda parameters instead of calling get..() on optional. + +#UnsafeLocaleUsage=WARN +# Possible unsafe operation related to the java.util.Locale library. + +#UnusedException=WARN +# This catch block catches an exception and re-throws another, but swallows the caught exception rather than setting it as a cause. This can make debugging harder. + +#UrlInSee=WARN +# URLs should not be used in @see tags; they are designed for Java elements which could be used with @link. + +#UsingJsr305CheckReturnValue=WARN +# Prefer ErrorProne's @CheckReturnValue over JSR305's version. + +#Var=WARN +# Non-constant variable missing @Var annotation + +#Varifier=WARN +# Consider using `var` here to avoid boilerplate. + +#################################################################################################### +# Experimental : SUGGESTION +# See https://errorprone.info/bugpatterns +#################################################################################################### + +#AnnotationMirrorToString=WARN +# AnnotationMirror#toString doesn't use fully qualified type names, prefer auto-common's AnnotationMirrors#toString + +#AnnotationValueToString=WARN +# AnnotationValue#toString doesn't use fully qualified type names, prefer auto-common's AnnotationValues#toString + +#BooleanParameter=WARN +# Use parameter comments to document ambiguous literals + +#ClassNamedLikeTypeParameter=WARN +# This class's name looks like a Type Parameter. + +#ConstantField=WARN +# Fields with CONSTANT_CASE names should be both static and final + +#EqualsMissingNullable=WARN +# Method overrides Object.equals but does not have @Nullable on its parameter + +#FieldCanBeFinal=WARN +# This field is only assigned during initialization; consider making it final + +#FieldCanBeLocal=WARN +# This field can be replaced with a local variable in the methods that use it. + +#FieldCanBeStatic=WARN +# A final field initialized at compile-time with an instance of an immutable type can be static. + +#FieldMissingNullable=WARN +# Field is assigned (or compared against) a definitely null value but is not annotated @Nullable + +#ForEachIterable=WARN +# This loop can be replaced with an enhanced for loop. + +#ImmutableMemberCollection=WARN +# If you don't intend to mutate a member collection prefer using Immutable types. + +#ImmutableRefactoring=WARN +# Refactors uses of the JSR 305 @Immutable to Error Prone's annotation + +#ImmutableSetForContains=WARN +# This private static ImmutableList is only used for contains, containsAll or isEmpty checks; prefer ImmutableSet. + +#ImplementAssertionWithChaining=WARN +# Prefer check(…), which usually generates more readable failure messages. + +#LambdaFunctionalInterface=WARN +# Use Java's utility functional interfaces instead of Function for primitive types. + +#MethodCanBeStatic=WARN +# A private method that does not reference the enclosing instance can be static + +#MissingBraces=WARN +# The Google Java Style Guide requires braces to be used with if, else, for, do and while statements, even when the body is empty or contains only a single statement. + +#MixedArrayDimensions=WARN +# C-style array declarations should not be used + +#MultiVariableDeclaration=WARN +# Variable declarations should declare only one variable + +#MultipleTopLevelClasses=WARN +# Source files should not contain multiple top-level class declarations + +#PackageLocation=WARN +# Package names should match the directory they are declared in + +#ParameterComment=WARN +# Non-standard parameter comment; prefer `/* paramName= */ arg` + +#ParameterMissingNullable=WARN +# Parameter has handling for null but is not annotated @Nullable + +#PrivateConstructorForNoninstantiableModule=WARN +# Add a private constructor to modules that will not be instantiated by Dagger. + +#PrivateConstructorForUtilityClass=WARN +# Classes which are not intended to be instantiated should be made non-instantiable with a private constructor. This includes utility classes (classes with only static members), and the main class. + +#PublicApiNamedStreamShouldReturnStream=WARN +# Public methods named stream() are generally expected to return a type whose name ends with Stream. Consider choosing a different method name instead. + +#RemoveUnusedImports=WARN +# Unused imports + +#ReturnMissingNullable=WARN +# Method returns a definitely null value but is not annotated @Nullable + +#ReturnsNullCollection=WARN +# Method has a collection return type and returns {@code null} in some cases but does not annotate the method as @Nullable. See Effective Java 3rd Edition Item 54. + +#ScopeOnModule=WARN +# Scopes on modules have no function and will soon be an error. + +#SwitchDefault=WARN +# The default case of a switch should appear at the end of the last statement group + +#SymbolToString=WARN +# Symbol#toString shouldn't be used for comparison as it is expensive and fragile. + +#ThrowsUncheckedException=WARN +# Unchecked exceptions do not need to be declared in the method signature. + +TryFailRefactoring=WARN +# Prefer assertThrows to try/fail + +#TypeParameterNaming=WARN +# Type parameters must be a single letter with an optional numeric suffix, or an UpperCamelCase name followed by the letter 'T'. + +#TypeToString=WARN +# Type#toString shouldn't be used for comparison as it is expensive and fragile. + +#UngroupedOverloads=WARN +# Constructors and methods with the same name should appear sequentially with no other code in between, even when modifiers such as static or private differ between the methods. Please re-order or re-name methods. + +UnnecessaryBoxedAssignment=WARN +# This expression can be implicitly boxed. + +UnnecessaryBoxedVariable=ERROR +# It is unnecessary for this variable to be boxed. Use the primitive instead. + +#UnnecessarySetDefault=WARN +# Unnecessary call to NullPointerTester#setDefault + +#UnnecessaryStaticImport=WARN +# Using static imports for types is unnecessary + +UseEnumSwitch=WARN +# Prefer using a switch instead of a chained if-else for enums + +#VoidMissingNullable=WARN +# The type Void is not annotated @Nullable + +#WildcardImport=WARN +# Wildcard imports, static or otherwise, should not be used + +#################################################################################################### +# SLF4j +# See https://github.com/KengoTODA/errorprone-slf4j +#################################################################################################### + +Slf4jPlaceholderMismatch=ERROR +Slf4jFormatShouldBeConst=ERROR +Slf4jLoggerShouldBePrivate=ERROR +Slf4jLoggerShouldBeFinal=ERROR +Slf4jLoggerShouldBeNonStatic=OFF +Slf4jIllegalPassedClass=ERROR +#Slf4jSignOnlyFormat=OFF +#Slf4jDoNotLogMessageOfExceptionExplicitly=ERROR + diff --git a/gradle.properties b/gradle.properties new file mode 100644 index 0000000..1e8305a --- /dev/null +++ b/gradle.properties @@ -0,0 +1,24 @@ +# enable the Gradle build cache +org.gradle.caching=true +# enable Gradle parallel builds +org.gradle.parallel=true +# configure only necessary Gradle tasks +org.gradle.configureondemand=true +# also enable the configuration cache +#org.gradle.unsafe.configuration-cache=true +#org.gradle.unsafe.configuration-cache-problems=warn +# bump the Gradle daemon heap size (you can set bigger heap sizes as well) +org.gradle.jvmargs=\ + -Xms2g -Xmx2g -XX:MaxMetaspaceSize=768m \ + -Dfile.encoding=UTF-8 \ + -Duser.language=en -Duser.country=US -Duser.variant= \ + --add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \ + --add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \ + --add-exports=jdk.compiler/com.sun.tools.javac.main=ALL-UNNAMED \ + --add-exports=jdk.compiler/com.sun.tools.javac.model=ALL-UNNAMED \ + --add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \ + --add-exports=jdk.compiler/com.sun.tools.javac.processing=ALL-UNNAMED \ + --add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \ + --add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED \ + --add-opens=jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED \ + --add-opens=jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED diff --git a/gradle/baselibs.versions.toml b/gradle/baselibs.versions.toml new file mode 100644 index 0000000..3147706 --- /dev/null +++ b/gradle/baselibs.versions.toml @@ -0,0 +1,26 @@ +# Dependencies needed by buildSrc/ + +[versions] +errorpronePlugin = "3.0.1" +jandexPlugin = "1.86" +junit = "5.9.2" +nessieBuildPlugins = "0.2.20" +protobufPlugin = "0.9.2" +shadowPlugin = "8.1.0" +spotlessPlugin = "6.16.0" + +[libraries] +assertj-core = { module = "org.assertj:assertj-core", version = "3.24.2" } +errorprone = { module = "net.ltgt.gradle:gradle-errorprone-plugin", version.ref = "errorpronePlugin" } +idea-ext = { module = "gradle.plugin.org.jetbrains.gradle.plugin.idea-ext:gradle-idea-ext", version = "1.1.7" } +jandex = { module = "com.github.vlsi.gradle:jandex-plugin", version.ref = "jandexPlugin" } +junit-bom = { module = "org.junit:junit-bom", version.ref = "junit" } +junit-jupiter-api = { module = "org.junit.jupiter:junit-jupiter-api" } +junit-jupiter-engine = { module = "org.junit.jupiter:junit-jupiter-engine" } +junit-jupiter-params = { module = "org.junit.jupiter:junit-jupiter-params" } +nessie-buildsupport-jacoco = { module = "org.projectnessie.buildsupport:jacoco", version.ref = "nessieBuildPlugins" } +nessie-buildsupport-jacoco-aggregator = { module = "org.projectnessie.buildsupport:jacoco-aggregator", version.ref = "nessieBuildPlugins" } +nessie-buildsupport-reflectionconfig = { module = "org.projectnessie.buildsupport:reflection-config", version.ref = "nessieBuildPlugins" } +shadow = { module = "com.github.johnrengelman:shadow", version.ref = "shadowPlugin" } +spotless = { module = "com.diffplug.spotless:spotless-plugin-gradle", version.ref = "spotlessPlugin" } +protobuf = { module = "com.google.protobuf:protobuf-gradle-plugin", version.ref = "protobufPlugin" } diff --git a/gradle/contributors.csv b/gradle/contributors.csv new file mode 100644 index 0000000..e69de29 diff --git a/gradle/developers.csv b/gradle/developers.csv new file mode 100644 index 0000000..58012fc --- /dev/null +++ b/gradle/developers.csv @@ -0,0 +1 @@ +ajantha-bhat,Ajantha Bhat,https://github.com/ajantha-bhat \ No newline at end of file diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index bc349d0..0c442ec 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,22 +1,28 @@ [versions] assertj = "3.24.2" aws = "1.12.415" +checkstyle = "10.8.0" +errorprone = "2.18.0" +errorproneSlf4j = "0.1.18" googleJavaFormat = "1.16.0" guava = "31.1-jre" hadoop = "3.2.4" hive = "2.3.8" # this is in mapping with iceberg repo. Later versions have junit depedency problem iceberg = "1.1.0" immutables = "2.9.3" +jacoco = "0.8.8" +jandex = "3.0.5" +jmh = "1.36" junit = "5.9.2" logback = "1.4.5" logcaptor = "2.8.0" -nessie = "0.50.0" +nessie = "0.51.1" nessieBuildPlugins = "0.2.19" nessieRunner = "0.29.0" picocli = "4.7.1" +protobuf = "3.21.12" shadowPlugin = "7.1.2" slf4j = "1.7.36" -spotlessPlugin = "6.16.0" [libraries] assertj = { module = "org.assertj:assertj-core", version.ref = "assertj" } @@ -25,6 +31,11 @@ aws-sdk-glue = { module = "com.amazonaws:aws-java-sdk-glue", version.ref = "aws" aws-sdk-kms = { module = "com.amazonaws:aws-java-sdk-kms", version.ref = "aws" } aws-sdk-sts = { module = "com.amazonaws:aws-java-sdk-sts", version.ref = "aws" } aws-sdk-s3 = { module = "com.amazonaws:aws-java-sdk-s3", version.ref = "aws" } +checkstyle = { module = "com.puppycrawl.tools:checkstyle", version.ref = "checkstyle" } +errorprone-annotations = { module = "com.google.errorprone:error_prone_annotations", version.ref = "errorprone" } +errorprone-core = { module = "com.google.errorprone:error_prone_core", version.ref = "errorprone" } +errorprone-slf4j = { module = "jp.skypencil.errorprone.slf4j:errorprone-slf4j", version.ref = "errorproneSlf4j" } +findbugs-jsr305 = { module = "com.google.code.findbugs:jsr305", version = "3.0.2" } google-java-format = { module = "com.google.googlejavaformat:google-java-format", version.ref = "googleJavaFormat" } guava = { module = "com.google.guava:guava", version.ref = "guava" } hadoop-aws = { module = "org.apache.hadoop:hadoop-aws", version.ref = "hadoop" } @@ -32,6 +43,10 @@ hadoop-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "had iceberg-dell = { module = "org.apache.iceberg:iceberg-dell", version.ref = "iceberg" } iceberg-spark-runtime = { module = "org.apache.iceberg:iceberg-spark-runtime-3.3_2.12", version.ref = "iceberg" } immutables = { module = "org.immutables:value", version.ref = "immutables" } +jacoco-ant = { module = "org.jacoco:org.jacoco.ant", version.ref = "jacoco" } +jacoco-report = { module = "org.jacoco:org.jacoco.report", version.ref = "jacoco" } +jacoco-maven-plugin = { module = "org.jacoco:jacoco-maven-plugin", version.ref = "jacoco" } +jandex = { module = "org.jboss:jandex", version.ref = "jandex" } junit-bom = { module = "org.junit:junit-bom", version.ref = "junit" } junit-jupiter-api = { module = "org.junit.jupiter:junit-jupiter-api", version.ref = "junit" } junit-jupiter-engine = { module = "org.junit.jupiter:junit-jupiter-engine" } @@ -42,7 +57,6 @@ picocli = { module = "info.picocli:picocli", version.ref = "picocli" } slf4j = { module = "org.slf4j:log4j-over-slf4j", version.ref = "slf4j" } [plugins] -nessie-build-spotless = { id = "org.projectnessie.buildsupport.spotless", version.ref = "nessieBuildPlugins" } +errorprone = { id = "net.ltgt.errorprone", version = "3.0.1" } nessie-run = { id = "org.projectnessie", version.ref = "nessieRunner" } shadow = { id = "com.github.johnrengelman.shadow", version.ref = "shadowPlugin" } -spotless = { id = "com.diffplug.spotless", version.ref = "spotlessPlugin" } diff --git a/ide-name.txt b/ide-name.txt new file mode 100644 index 0000000..486369b --- /dev/null +++ b/ide-name.txt @@ -0,0 +1 @@ +Iceberg-Catalog-Migrator From f7f45517015dc1a02988327ec01e9ac9716c487f Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Wed, 8 Mar 2023 14:54:26 +0530 Subject: [PATCH 17/31] Self review --- buildSrc/build.gradle.kts | 1 - gradle/baselibs.versions.toml | 2 -- gradle/libs.versions.toml | 2 -- 3 files changed, 5 deletions(-) diff --git a/buildSrc/build.gradle.kts b/buildSrc/build.gradle.kts index 500f2f0..aedfd90 100644 --- a/buildSrc/build.gradle.kts +++ b/buildSrc/build.gradle.kts @@ -30,7 +30,6 @@ dependencies { implementation(baselibs.jandex) implementation(baselibs.idea.ext) implementation(baselibs.shadow) - implementation(baselibs.protobuf) implementation(baselibs.errorprone) implementation(baselibs.nessie.buildsupport.jacoco) implementation(baselibs.nessie.buildsupport.reflectionconfig) diff --git a/gradle/baselibs.versions.toml b/gradle/baselibs.versions.toml index 3147706..028df8b 100644 --- a/gradle/baselibs.versions.toml +++ b/gradle/baselibs.versions.toml @@ -5,7 +5,6 @@ errorpronePlugin = "3.0.1" jandexPlugin = "1.86" junit = "5.9.2" nessieBuildPlugins = "0.2.20" -protobufPlugin = "0.9.2" shadowPlugin = "8.1.0" spotlessPlugin = "6.16.0" @@ -23,4 +22,3 @@ nessie-buildsupport-jacoco-aggregator = { module = "org.projectnessie.buildsuppo nessie-buildsupport-reflectionconfig = { module = "org.projectnessie.buildsupport:reflection-config", version.ref = "nessieBuildPlugins" } shadow = { module = "com.github.johnrengelman:shadow", version.ref = "shadowPlugin" } spotless = { module = "com.diffplug.spotless:spotless-plugin-gradle", version.ref = "spotlessPlugin" } -protobuf = { module = "com.google.protobuf:protobuf-gradle-plugin", version.ref = "protobufPlugin" } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 0c442ec..a1954f0 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -12,7 +12,6 @@ iceberg = "1.1.0" immutables = "2.9.3" jacoco = "0.8.8" jandex = "3.0.5" -jmh = "1.36" junit = "5.9.2" logback = "1.4.5" logcaptor = "2.8.0" @@ -20,7 +19,6 @@ nessie = "0.51.1" nessieBuildPlugins = "0.2.19" nessieRunner = "0.29.0" picocli = "4.7.1" -protobuf = "3.21.12" shadowPlugin = "7.1.2" slf4j = "1.7.36" From 3db80bbf331c093c959282a9490e44852652ecde Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Thu, 9 Mar 2023 00:10:19 +0530 Subject: [PATCH 18/31] Address review comments from March 9 --- .../migration/api/test/AbstractTest.java | 55 ++++--- .../api/test/AbstractTestCatalogMigrator.java | 63 +++++--- .../api/test/CustomCatalogMigratorTest.java | 9 +- .../api/AbstractCatalogMigrator.java | 144 ------------------ .../migration/api/CatalogMigrator.java | 140 ++++++++++++++++- .../migration/api/CatalogMigratorParams.java | 39 ----- .../api/CatalogMigratorParamsTest.java | 11 +- .../api/UnsupportedNamespaceTest.java | 6 +- .../migration/cli/BaseRegisterCommand.java | 106 ++++++------- .../catalog/migration/cli/MigrateCommand.java | 43 +++++- .../migration/cli/RegisterCommand.java | 31 +++- .../migration/cli/SourceCatalogOptions.java | 2 +- .../migration/cli/TargetCatalogOptions.java | 2 +- 13 files changed, 329 insertions(+), 322 deletions(-) delete mode 100644 api/src/main/java/org/projectnessie/tools/catalog/migration/api/AbstractCatalogMigrator.java delete mode 100644 api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParams.java diff --git a/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java index d12d20c..3c2d4f1 100644 --- a/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java +++ b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java @@ -17,9 +17,10 @@ import java.nio.file.Path; import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.stream.Stream; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.Schema; @@ -37,6 +38,9 @@ public abstract class AbstractTest { protected static @TempDir Path logDir; + private static final List namespaceList = + Arrays.asList(Namespace.of("foo"), Namespace.of("bar"), Namespace.of("db1")); + @BeforeAll protected static void initLogDir() { System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); @@ -49,6 +53,24 @@ protected static void initLogDir() { new Schema( Types.StructType.of(Types.NestedField.required(1, "id", Types.LongType.get())).fields()); + protected static void createNamespaces() { + namespaceList.forEach(namespace -> ((SupportsNamespaces) catalog1).createNamespace(namespace)); + // don't create "db1" namespace in catalog2 + namespaceList + .subList(0, 2) + .forEach(namespace -> ((SupportsNamespaces) catalog2).createNamespace(namespace)); + } + + protected static void dropNamespaces() { + Stream.of(catalog1, catalog2) + .map(catalog -> (SupportsNamespaces) catalog) + .forEach( + catalog -> + namespaceList.stream() + .filter(catalog::namespaceExists) + .forEach(catalog::dropNamespace)); + } + protected static void createTables() { // two tables in 'foo' namespace catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl1"), schema); @@ -58,20 +80,14 @@ protected static void createTables() { catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl4"), schema); } - protected static void createNamespaces() { - ((SupportsNamespaces) catalog1).createNamespace(Namespace.of("foo"), Collections.emptyMap()); - ((SupportsNamespaces) catalog1).createNamespace(Namespace.of("bar"), Collections.emptyMap()); - - ((SupportsNamespaces) catalog2).createNamespace(Namespace.of("foo"), Collections.emptyMap()); - ((SupportsNamespaces) catalog2).createNamespace(Namespace.of("bar"), Collections.emptyMap()); - } - - protected static void dropNamespaces() { - ((SupportsNamespaces) catalog1).dropNamespace(Namespace.of("foo")); - ((SupportsNamespaces) catalog1).dropNamespace(Namespace.of("bar")); - - ((SupportsNamespaces) catalog2).dropNamespace(Namespace.of("foo")); - ((SupportsNamespaces) catalog2).dropNamespace(Namespace.of("bar")); + protected static void dropTables() { + Stream.of(catalog1, catalog2) + .forEach( + catalog -> + namespaceList.stream() + .filter(namespace -> ((SupportsNamespaces) catalog).namespaceExists(namespace)) + .forEach( + namespace -> catalog.listTables(namespace).forEach(catalog::dropTable))); } protected static Catalog createHadoopCatalog(String warehousePath, String name) { @@ -90,13 +106,4 @@ protected static Catalog createNessieCatalog(String warehousePath, String uri) { return CatalogUtil.loadCatalog( NessieCatalog.class.getName(), "nessie", properties, new Configuration()); } - - protected static void dropTables() { - Arrays.asList(Namespace.of("foo"), Namespace.of("bar")) - .forEach( - namespace -> { - catalog1.listTables(namespace).forEach(catalog1::dropTable); - catalog2.listTables(namespace).forEach(catalog2::dropTable); - }); - } } diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java index f15d84c..8efc0f8 100644 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java @@ -31,7 +31,7 @@ import org.junit.jupiter.params.provider.ValueSource; import org.projectnessie.tools.catalog.migration.api.CatalogMigrationResult; import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; -import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigratorParams; +import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigrator; public abstract class AbstractTestCatalogMigrator extends AbstractTest { @@ -91,9 +91,8 @@ public void testRegister(boolean deleteSourceTables) { @ValueSource(booleans = {true, false}) public void testRegisterSelectedTables(boolean deleteSourceTables) { // using `--identifiers` option - ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); CatalogMigrationResult result = - new CatalogMigrator(builder.build()) + catalogMigratorWithDefaultArgs(deleteSourceTables) .registerTables(Collections.singletonList(TableIdentifier.parse("bar.tbl3"))) .result(); Assertions.assertThat(result.registeredTableIdentifiers()) @@ -106,8 +105,7 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) { .containsExactly(TableIdentifier.parse("bar.tbl3")); // using --identifiers-regex option which matches all the tables starts with "foo." - builder = builderWithDefaultArgs(deleteSourceTables); - CatalogMigrator catalogMigrator = new CatalogMigrator(builder.build()); + CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(deleteSourceTables); result = catalogMigrator .registerTables(catalogMigrator.getMatchingTableIdentifiers("^foo\\..*")) @@ -130,9 +128,8 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) { @ValueSource(booleans = {true, false}) public void testRegisterError(boolean deleteSourceTables) { // use invalid namespace which leads to NoSuchTableException - ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); CatalogMigrationResult result = - new CatalogMigrator(builder.build()) + catalogMigratorWithDefaultArgs(deleteSourceTables) .registerTables(Collections.singletonList(TableIdentifier.parse("dummy.tbl3"))) .result(); Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); @@ -141,9 +138,8 @@ public void testRegisterError(boolean deleteSourceTables) { Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); // try to register same table twice which leads to AlreadyExistsException - builder = builderWithDefaultArgs(deleteSourceTables); result = - new CatalogMigrator(builder.build()) + catalogMigratorWithDefaultArgs(deleteSourceTables) .registerTables(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))) .result(); Assertions.assertThat(result.registeredTableIdentifiers()) @@ -151,9 +147,8 @@ public void testRegisterError(boolean deleteSourceTables) { Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - builder = builderWithDefaultArgs(deleteSourceTables); result = - new CatalogMigrator(builder.build()) + catalogMigratorWithDefaultArgs(deleteSourceTables) .registerTables(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))) .result(); Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); @@ -167,9 +162,8 @@ public void testRegisterError(boolean deleteSourceTables) { @ValueSource(booleans = {true, false}) public void testRegisterWithFewFailures(boolean deleteSourceTables) { // register only foo.tbl2 - ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); CatalogMigrationResult result = - new CatalogMigrator(builder.build()) + catalogMigratorWithDefaultArgs(deleteSourceTables) .registerTables(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))) .result(); Assertions.assertThat(result.registeredTableIdentifiers()) @@ -206,12 +200,12 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) { @ValueSource(booleans = {true, false}) public void testRegisterNoTables(boolean deleteSourceTables) { // source catalog is catalog2 which has no tables. - ImmutableCatalogMigratorParams.Builder builder = - ImmutableCatalogMigratorParams.builder() + CatalogMigrator catalogMigrator = + ImmutableCatalogMigrator.builder() .sourceCatalog(catalog2) .targetCatalog(catalog1) - .deleteEntriesFromSourceCatalog(deleteSourceTables); - CatalogMigrator catalogMigrator = new CatalogMigrator(builder.build()); + .deleteEntriesFromSourceCatalog(deleteSourceTables) + .build(); List matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers(null); Assertions.assertThat(matchingTableIdentifiers).isEmpty(); @@ -250,8 +244,7 @@ public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws E @ParameterizedTest @ValueSource(booleans = {true, false}) public void testListingTableIdentifiers(boolean deleteSourceTables) { - ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); - CatalogMigrator catalogMigrator = new CatalogMigrator(builder.build()); + CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(deleteSourceTables); List matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers(null); @@ -268,19 +261,39 @@ public void testListingTableIdentifiers(boolean deleteSourceTables) { TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); } + @Order(7) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterWithNewNamespace(boolean deleteSourceTables) { + // catalog2 doesn't have a namespace "db1" + catalog1.createTable(TableIdentifier.of(Namespace.of("db1"), "tbl5"), schema); + + CatalogMigrationResult result = + catalogMigratorWithDefaultArgs(deleteSourceTables) + .registerTables(Collections.singletonList(TableIdentifier.parse("db1.tbl5"))) + .result(); + + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactly(TableIdentifier.parse("db1.tbl5")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + Assertions.assertThat(catalog2.listTables(Namespace.of("db1"))) + .containsExactly(TableIdentifier.parse("db1.tbl5")); + } + private CatalogMigrationResult registerAllTables(boolean deleteSourceTables) { - ImmutableCatalogMigratorParams.Builder builder = builderWithDefaultArgs(deleteSourceTables); - CatalogMigrator catalogMigrator = new CatalogMigrator(builder.build()); + CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(deleteSourceTables); return catalogMigrator .registerTables(catalogMigrator.getMatchingTableIdentifiers(null)) .result(); } - private ImmutableCatalogMigratorParams.Builder builderWithDefaultArgs( - boolean deleteSourceTables) { - return ImmutableCatalogMigratorParams.builder() + private CatalogMigrator catalogMigratorWithDefaultArgs(boolean deleteSourceTables) { + return ImmutableCatalogMigrator.builder() .sourceCatalog(catalog1) .targetCatalog(catalog2) - .deleteEntriesFromSourceCatalog(deleteSourceTables); + .deleteEntriesFromSourceCatalog(deleteSourceTables) + .build(); } } diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/CustomCatalogMigratorTest.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/CustomCatalogMigratorTest.java index a407518..6aaf5ca 100644 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/CustomCatalogMigratorTest.java +++ b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/CustomCatalogMigratorTest.java @@ -32,8 +32,7 @@ import org.junit.jupiter.api.io.TempDir; import org.projectnessie.tools.catalog.migration.api.CatalogMigrationResult; import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; -import org.projectnessie.tools.catalog.migration.api.CatalogMigratorParams; -import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigratorParams; +import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigrator; public class CustomCatalogMigratorTest extends AbstractTest { @@ -65,14 +64,12 @@ protected static void tearDown() { @Test public void testRegister() { - CatalogMigratorParams params = - ImmutableCatalogMigratorParams.builder() + CatalogMigrator catalogMigrator = + ImmutableCatalogMigrator.builder() .sourceCatalog(catalog1) .targetCatalog(catalog2) .deleteEntriesFromSourceCatalog(true) .build(); - - CatalogMigrator catalogMigrator = new CatalogMigrator(params); // should fail to register as catalog doesn't support register table operations. CatalogMigrationResult result = catalogMigrator.registerTables(catalogMigrator.getMatchingTableIdentifiers(null)).result(); diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/AbstractCatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/AbstractCatalogMigrator.java deleted file mode 100644 index db54d59..0000000 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/AbstractCatalogMigrator.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (C) 2023 Dremio - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.projectnessie.tools.catalog.migration.api; - -import com.google.common.base.Preconditions; -import java.util.List; -import java.util.Objects; -import java.util.function.Predicate; -import java.util.regex.Pattern; -import java.util.stream.Collectors; -import org.apache.iceberg.BaseTable; -import org.apache.iceberg.TableOperations; -import org.apache.iceberg.catalog.Catalog; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.SupportsNamespaces; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.hadoop.HadoopCatalog; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public abstract class AbstractCatalogMigrator { - private static final Logger LOG = LoggerFactory.getLogger(AbstractCatalogMigrator.class); - private final ImmutableCatalogMigrationResult.Builder resultBuilder = - ImmutableCatalogMigrationResult.builder(); - - public abstract CatalogMigratorParams getParams(); - - /** - * Get the table identifiers which matches the regular expression pattern input from all the - * namespaces. - * - * @param identifierRegex regular expression pattern. If null, fetches all the table identifiers - * from all the namespaces. - * @return List of table identifiers. - */ - public List getMatchingTableIdentifiers(String identifierRegex) { - Catalog sourceCatalog = getParams().sourceCatalog(); - if (!(sourceCatalog instanceof SupportsNamespaces)) { - throw new UnsupportedOperationException( - String.format( - "source catalog %s doesn't implement SupportsNamespaces to list all namespaces.", - sourceCatalog.name())); - } - LOG.info("Collecting all the namespaces from source catalog..."); - List namespaces = ((SupportsNamespaces) sourceCatalog).listNamespaces(); - Predicate matchedIdentifiersPredicate; - if (identifierRegex == null) { - LOG.info("Collecting all the tables from all the namespaces of source catalog..."); - matchedIdentifiersPredicate = tableIdentifier -> true; - } else { - LOG.info( - "Collecting all the tables from all the namespaces of source catalog" - + " which matches the regex pattern:{}", - identifierRegex); - Pattern pattern = Pattern.compile(identifierRegex); - matchedIdentifiersPredicate = - tableIdentifier -> pattern.matcher(tableIdentifier.toString()).matches(); - } - return namespaces.stream() - .filter(Objects::nonNull) - .flatMap( - namespace -> - sourceCatalog.listTables(namespace).stream().filter(matchedIdentifiersPredicate)) - .collect(Collectors.toList()); - } - - /** - * Register or Migrate tables from one catalog(source catalog) to another catalog(target catalog). - * - *

Users must make sure that no in-progress commits on the tables of source catalog during - * registration. - * - * @param identifiers List of table identifiers to register or migrate - * @return {@code this} for use in a chained invocation - */ - public AbstractCatalogMigrator registerTables(List identifiers) { - Preconditions.checkArgument(identifiers != null, "Identifiers list is null"); - - if (identifiers.isEmpty()) { - LOG.info("Identifiers list is empty"); - return this; - } - - identifiers.forEach( - tableIdentifier -> { - boolean isRegistered = registerTable(tableIdentifier); - if (isRegistered) { - resultBuilder.addRegisteredTableIdentifiers(tableIdentifier); - } else { - resultBuilder.addFailedToRegisterTableIdentifiers(tableIdentifier); - } - - // HadoopCatalog dropTable will delete the table files completely even when purge is - // false. So, skip dropTable for HadoopCatalog. - boolean deleteTableFromSourceCatalog = - !(getParams().sourceCatalog() instanceof HadoopCatalog) - && isRegistered - && getParams().deleteEntriesFromSourceCatalog(); - try { - if (deleteTableFromSourceCatalog - && !getParams().sourceCatalog().dropTable(tableIdentifier, false)) { - resultBuilder.addFailedToDeleteTableIdentifiers(tableIdentifier); - } - } catch (Exception exception) { - resultBuilder.addFailedToDeleteTableIdentifiers(tableIdentifier); - LOG.warn("Failed to delete the table after migration {}", tableIdentifier, exception); - } - }); - return this; - } - - public CatalogMigrationResult result() { - return resultBuilder.build(); - } - - private boolean registerTable(TableIdentifier tableIdentifier) { - try { - // register the table to the target catalog - TableOperations ops = - ((BaseTable) getParams().sourceCatalog().loadTable(tableIdentifier)).operations(); - getParams() - .targetCatalog() - .registerTable(tableIdentifier, ops.current().metadataFileLocation()); - LOG.info("Successfully migrated the table {}", tableIdentifier); - return true; - } catch (Exception ex) { - LOG.warn("Unable to register the table {}", tableIdentifier, ex); - return false; - } - } -} diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java index aa1df19..d5704ef 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java @@ -15,16 +15,142 @@ */ package org.projectnessie.tools.catalog.migration.api; -public class CatalogMigrator extends AbstractCatalogMigrator { +import com.google.common.base.Preconditions; +import java.util.List; +import java.util.Objects; +import java.util.function.Predicate; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import org.apache.iceberg.BaseTable; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.immutables.value.Value; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; - private final CatalogMigratorParams params; +@Value.Immutable +public abstract class CatalogMigrator { - public CatalogMigrator(CatalogMigratorParams params) { - this.params = params; + /** Source {@link Catalog} from which the tables are chosen. */ + public abstract Catalog sourceCatalog(); + + /** Target {@link Catalog} to which the tables need to be migrated. */ + public abstract Catalog targetCatalog(); + + /** Delete the table entries from source catalog after successful migration. */ + public abstract boolean deleteEntriesFromSourceCatalog(); + + private static final Logger LOG = LoggerFactory.getLogger(CatalogMigrator.class); + private final ImmutableCatalogMigrationResult.Builder resultBuilder = + ImmutableCatalogMigrationResult.builder(); + + /** + * Get the table identifiers which matches the regular expression pattern input from all the + * namespaces. + * + * @param identifierRegex regular expression pattern. If null, fetches all the table identifiers + * from all the namespaces. + * @return List of table identifiers. + */ + public List getMatchingTableIdentifiers(String identifierRegex) { + Catalog sourceCatalog = sourceCatalog(); + if (!(sourceCatalog instanceof SupportsNamespaces)) { + throw new UnsupportedOperationException( + String.format( + "source catalog %s doesn't implement SupportsNamespaces to list all namespaces.", + sourceCatalog.name())); + } + LOG.info("Collecting all the namespaces from source catalog..."); + List namespaces = ((SupportsNamespaces) sourceCatalog).listNamespaces(); + Predicate matchedIdentifiersPredicate; + if (identifierRegex == null) { + LOG.info("Collecting all the tables from all the namespaces of source catalog..."); + matchedIdentifiersPredicate = tableIdentifier -> true; + } else { + LOG.info( + "Collecting all the tables from all the namespaces of source catalog" + + " which matches the regex pattern:{}", + identifierRegex); + Pattern pattern = Pattern.compile(identifierRegex); + matchedIdentifiersPredicate = + tableIdentifier -> pattern.matcher(tableIdentifier.toString()).matches(); + } + return namespaces.stream() + .filter(Objects::nonNull) + .flatMap( + namespace -> + sourceCatalog.listTables(namespace).stream().filter(matchedIdentifiersPredicate)) + .collect(Collectors.toList()); + } + + /** + * Register or Migrate tables from one catalog(source catalog) to another catalog(target catalog). + * + *

Users must make sure that no in-progress commits on the tables of source catalog during + * registration. + * + * @param identifiers List of table identifiers to register or migrate + * @return {@code this} for use in a chained invocation + */ + public CatalogMigrator registerTables(List identifiers) { + Preconditions.checkArgument(identifiers != null, "Identifiers list is null"); + Preconditions.checkArgument( + !targetCatalog().equals(sourceCatalog()), "target catalog is same as source catalog"); + + if (identifiers.isEmpty()) { + LOG.info("Identifiers list is empty"); + return this; + } + + identifiers.forEach( + tableIdentifier -> { + boolean isRegistered = registerTable(tableIdentifier); + if (isRegistered) { + resultBuilder.addRegisteredTableIdentifiers(tableIdentifier); + } else { + resultBuilder.addFailedToRegisterTableIdentifiers(tableIdentifier); + } + + // HadoopCatalog dropTable will delete the table files completely even when purge is + // false. So, skip dropTable for HadoopCatalog. + boolean deleteTableFromSourceCatalog = + !(sourceCatalog() instanceof HadoopCatalog) + && isRegistered + && deleteEntriesFromSourceCatalog(); + try { + if (deleteTableFromSourceCatalog + && !sourceCatalog().dropTable(tableIdentifier, false)) { + resultBuilder.addFailedToDeleteTableIdentifiers(tableIdentifier); + } + } catch (Exception exception) { + resultBuilder.addFailedToDeleteTableIdentifiers(tableIdentifier); + LOG.warn("Failed to delete the table after migration {}", tableIdentifier, exception); + } + }); + return this; + } + + public CatalogMigrationResult result() { + return resultBuilder.build(); } - @Override - public CatalogMigratorParams getParams() { - return params; + private boolean registerTable(TableIdentifier tableIdentifier) { + try { + if (!((SupportsNamespaces) targetCatalog()).namespaceExists(tableIdentifier.namespace())) { + ((SupportsNamespaces) targetCatalog()).createNamespace(tableIdentifier.namespace()); + } + // register the table to the target catalog + TableOperations ops = ((BaseTable) sourceCatalog().loadTable(tableIdentifier)).operations(); + targetCatalog().registerTable(tableIdentifier, ops.current().metadataFileLocation()); + LOG.info("Successfully migrated the table {}", tableIdentifier); + return true; + } catch (Exception ex) { + LOG.warn("Unable to register the table {}", tableIdentifier, ex); + return false; + } } } diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParams.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParams.java deleted file mode 100644 index 2dfdc33..0000000 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParams.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2023 Dremio - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.projectnessie.tools.catalog.migration.api; - -import com.google.common.base.Preconditions; -import org.apache.iceberg.catalog.Catalog; -import org.immutables.value.Value; - -@Value.Immutable -public interface CatalogMigratorParams { - - /** Source {@link Catalog} from which the tables are chosen. */ - Catalog sourceCatalog(); - - /** Target {@link Catalog} to which the tables need to be migrated. */ - Catalog targetCatalog(); - - /** Delete the table entries from source catalog after successful migration. */ - boolean deleteEntriesFromSourceCatalog(); - - @Value.Check - default void validate() { - Preconditions.checkArgument( - !targetCatalog().equals(sourceCatalog()), "target catalog is same as source catalog"); - } -} diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java index 3d4c995..4d7b62f 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java @@ -16,7 +16,9 @@ package org.projectnessie.tools.catalog.migration.api; import java.nio.file.Path; +import java.util.Collections; import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.hadoop.HadoopCatalog; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.BeforeAll; @@ -39,17 +41,18 @@ public void testInvalidArgs() { Assertions.assertThatThrownBy( () -> - ImmutableCatalogMigratorParams.builder() + ImmutableCatalogMigrator.builder() .sourceCatalog(catalog2) // source-catalog is same as target catalog .targetCatalog(catalog2) .deleteEntriesFromSourceCatalog(true) - .build()) + .build() + .registerTables(Collections.singletonList(TableIdentifier.parse("foo.abc")))) .isInstanceOf(IllegalArgumentException.class) .hasMessageContaining("target catalog is same as source catalog"); Assertions.assertThatThrownBy( () -> - ImmutableCatalogMigratorParams.builder() + ImmutableCatalogMigrator.builder() .sourceCatalog(catalog1) .targetCatalog(null) // target-catalog is null .deleteEntriesFromSourceCatalog(true) @@ -59,7 +62,7 @@ public void testInvalidArgs() { Assertions.assertThatThrownBy( () -> - ImmutableCatalogMigratorParams.builder() + ImmutableCatalogMigrator.builder() .sourceCatalog(null) // source-catalog is null .targetCatalog(catalog2) .deleteEntriesFromSourceCatalog(true) diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java index 3af857d..722d7e9 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java @@ -68,15 +68,13 @@ public void renameTable(TableIdentifier from, TableIdentifier to) {} Catalog catalog1 = new TestCatalog(); Catalog catalog2 = new TestCatalog(); - CatalogMigratorParams params = - ImmutableCatalogMigratorParams.builder() + CatalogMigrator catalogMigrator = + ImmutableCatalogMigrator.builder() .sourceCatalog(catalog1) .targetCatalog(catalog2) .deleteEntriesFromSourceCatalog(true) .build(); - CatalogMigrator catalogMigrator = new CatalogMigrator(params); - Assertions.assertThatThrownBy(() -> catalogMigrator.getMatchingTableIdentifiers(null)) .isInstanceOf(UnsupportedOperationException.class) .hasMessageContaining( diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java index 03f3a47..9445d44 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java @@ -27,11 +27,8 @@ import java.util.stream.Collectors; import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.hadoop.HadoopCatalog; import org.projectnessie.tools.catalog.migration.api.CatalogMigrationResult; import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; -import org.projectnessie.tools.catalog.migration.api.CatalogMigratorParams; -import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigratorParams; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import picocli.CommandLine; @@ -73,8 +70,6 @@ public abstract class BaseRegisterCommand implements Callable { description = "optional configuration to disable warning prompts which needs console input.") private boolean disablePrompts; - private boolean deleteSourceCatalogTables; - private static final int BATCH_SIZE = 100; public static final String FAILED_IDENTIFIERS_FILE = "failed_identifiers.txt"; public static final String FAILED_TO_DELETE_AT_SOURCE_FILE = "failed_to_delete_at_source.txt"; @@ -84,15 +79,21 @@ public abstract class BaseRegisterCommand implements Callable { public BaseRegisterCommand() {} - protected abstract boolean isDeleteSourceCatalogTables(); + protected abstract CatalogMigrator catalogMigrator(Catalog sourceCatalog, Catalog targetCatalog); + + protected abstract boolean canProceed(Catalog sourceCatalog); + + protected abstract String operation(); + + protected abstract String operated(); + + protected abstract String operate(); @Override public Integer call() { - List identifiers; + List identifiers = Collections.emptyList(); if (identifierOptions != null) { identifiers = identifierOptions.processIdentifiersInput(); - } else { - identifiers = Collections.emptyList(); } Catalog sourceCatalog = sourceCatalogOptions.build(); @@ -101,18 +102,11 @@ public Integer call() { Catalog targetCatalog = targetCatalogOptions.build(); consoleLog.info("Configured target catalog: {}", targetCatalog.name()); - if (!canProceed(sourceCatalog)) { + if (!isDryRun && !disablePrompts && !canProceed(sourceCatalog)) { return 0; } - deleteSourceCatalogTables = isDeleteSourceCatalogTables(); - CatalogMigratorParams params = - ImmutableCatalogMigratorParams.builder() - .sourceCatalog(sourceCatalog) - .targetCatalog(targetCatalog) - .deleteEntriesFromSourceCatalog(deleteSourceCatalogTables) - .build(); - CatalogMigrator catalogMigrator = new CatalogMigrator(params); + CatalogMigrator catalogMigrator = catalogMigrator(sourceCatalog, targetCatalog); String identifierRegEx = identifierOptions != null ? identifierOptions.identifiersRegEx : null; if (identifiers.isEmpty()) { @@ -130,17 +124,14 @@ public Integer call() { identifiers = catalogMigrator.getMatchingTableIdentifiers(identifierRegEx); } - String operation = deleteSourceCatalogTables ? "migration" : "registration"; - consoleLog.info("Identified {} tables for {}.", identifiers.size(), operation); + consoleLog.info("Identified {} tables for {}.", identifiers.size(), operation()); if (isDryRun) { - writeToFile(outputDirPath.resolve(DRY_RUN_FILE), identifiers); - consoleLog.info("Dry run is completed."); - printDryRunResults(identifiers); + handleDryRunResult(identifiers); return 0; } - consoleLog.info("Started {} ...", operation); + consoleLog.info("Started {} ...", operation()); List> identifierBatches = Lists.partition(identifiers, BATCH_SIZE); int totalIdentifiers = identifiers.size(); @@ -150,52 +141,42 @@ public Integer call() { catalogMigrator.registerTables(identifierBatch); consoleLog.info( "Attempted {} for {} tables out of {} tables.", - operation, + operation(), counter.addAndGet(identifierBatch.size()), totalIdentifiers); }); - CatalogMigrationResult result = catalogMigrator.result(); + handleResults(catalogMigrator.result()); + return 0; + } + + private void handleResults(CatalogMigrationResult result) { writeToFile( outputDirPath.resolve(FAILED_IDENTIFIERS_FILE), result.failedToRegisterTableIdentifiers()); writeToFile( outputDirPath.resolve(FAILED_TO_DELETE_AT_SOURCE_FILE), result.failedToDeleteTableIdentifiers()); - consoleLog.info("Finished {} ...", operation); - printSummary(result, sourceCatalog.name(), targetCatalog.name()); + consoleLog.info("Finished {} ...", operation()); + printSummary(result); printDetails(result); - return 0; } - private boolean canProceed(Catalog sourceCatalog) { - if (isDryRun || disablePrompts) { - return true; - } - if (deleteSourceCatalogTables) { - if (sourceCatalog instanceof HadoopCatalog) { - consoleLog.warn( - "Source catalog type is HADOOP and it doesn't support dropping tables just from " - + "catalog. {}Avoid operating the migrated tables from the source catalog after migration. " - + "Use the tables from target catalog.", - System.lineSeparator()); - } - return PromptUtil.proceedForMigration(); - } else { - return PromptUtil.proceedForRegistration(); - } + private void handleDryRunResult(List identifiers) { + writeToFile(outputDirPath.resolve(DRY_RUN_FILE), identifiers); + consoleLog.info("Dry run is completed."); + printDryRunResult(identifiers); } - private void printSummary( - CatalogMigrationResult result, String sourceCatalogName, String targetCatalogName) { + private void printSummary(CatalogMigrationResult result) { consoleLog.info("Summary: "); if (!result.registeredTableIdentifiers().isEmpty()) { consoleLog.info( "Successfully {} {} tables from {} catalog to {} catalog.", - deleteSourceCatalogTables ? "migrated" : "registered", + operated(), result.registeredTableIdentifiers().size(), - sourceCatalogName, - targetCatalogName); + sourceCatalogOptions.type.name(), + targetCatalogOptions.type.name()); } if (!result.failedToRegisterTableIdentifiers().isEmpty()) { consoleLog.info( @@ -204,10 +185,10 @@ private void printSummary( + "Failed identifiers are written into `{}`. " + "Retry with that file using `--identifiers-from-file` option " + "if the failure is because of network/connection timeouts.", - deleteSourceCatalogTables ? "migrate" : "register", + operate(), result.failedToRegisterTableIdentifiers().size(), - sourceCatalogName, - targetCatalogName, + sourceCatalogOptions.type.name(), + targetCatalogOptions.type.name(), FAILED_IDENTIFIERS_FILE); } if (!result.failedToDeleteTableIdentifiers().isEmpty()) { @@ -216,7 +197,7 @@ private void printSummary( + "Please check the `catalog_migration.log` file for the reason. " + "{}Failed to delete identifiers are written into `{}`.", result.failedToDeleteTableIdentifiers().size(), - sourceCatalogName, + sourceCatalogOptions.type.name(), System.lineSeparator(), FAILED_TO_DELETE_AT_SOURCE_FILE); } @@ -227,7 +208,7 @@ private void printDetails(CatalogMigrationResult result) { if (!result.registeredTableIdentifiers().isEmpty()) { consoleLog.info( "Successfully {} these tables:{}{}", - deleteSourceCatalogTables ? "migrated" : "registered", + operated(), System.lineSeparator(), result.registeredTableIdentifiers()); } @@ -235,7 +216,7 @@ private void printDetails(CatalogMigrationResult result) { if (!result.failedToRegisterTableIdentifiers().isEmpty()) { consoleLog.info( "Failed to {} these tables:{}{}", - deleteSourceCatalogTables ? "migrate" : "register", + operate(), System.lineSeparator(), result.failedToRegisterTableIdentifiers()); } @@ -248,25 +229,24 @@ private void printDetails(CatalogMigrationResult result) { } } - private void printDryRunResults(List result) { + private void printDryRunResult(List result) { consoleLog.info("Summary: "); if (result.isEmpty()) { consoleLog.info( - "No tables are identified for {}. Please check logs for more info.", - deleteSourceCatalogTables ? "migration" : "registration"); + "No tables are identified for {}. Please check logs for more info.", operation()); return; } consoleLog.info( "Identified {} tables for {} by dry-run. These identifiers are also written into {}. " + "You can use this file with `--identifiers-from-file` option.", result.size(), - deleteSourceCatalogTables ? "migration" : "registration", + operation(), DRY_RUN_FILE); - consoleLog.info("Details: "); consoleLog.info( - "Identified these tables for {} by dry-run:{}{}", - deleteSourceCatalogTables ? "migration" : "registration", + "Details: {}Identified these tables for {} by dry-run:{}{}", + System.lineSeparator(), + operation(), System.lineSeparator(), result); } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java index fcebf33..29e4634 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java @@ -15,6 +15,12 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; +import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigrator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import picocli.CommandLine; @CommandLine.Command( @@ -31,8 +37,41 @@ + "catalog.") public class MigrateCommand extends BaseRegisterCommand { + private final Logger consoleLog = LoggerFactory.getLogger("console-log"); + + @Override + protected CatalogMigrator catalogMigrator(Catalog sourceCatalog, Catalog targetCatalog) { + return ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(true) + .build(); + } + + @Override + protected boolean canProceed(Catalog sourceCatalog) { + if (sourceCatalog instanceof HadoopCatalog) { + consoleLog.warn( + "Source catalog type is HADOOP and it doesn't support dropping tables just from " + + "catalog. {}Avoid operating the migrated tables from the source catalog after migration. " + + "Use the tables from target catalog.", + System.lineSeparator()); + } + return PromptUtil.proceedForMigration(); + } + + @Override + protected String operation() { + return "migration"; + } + + @Override + protected String operated() { + return "migrated"; + } + @Override - protected boolean isDeleteSourceCatalogTables() { - return true; + protected String operate() { + return "migrate"; } } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java index 97d0abe..f180bae 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java @@ -15,6 +15,9 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import org.apache.iceberg.catalog.Catalog; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; +import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigrator; import picocli.CommandLine; @CommandLine.Command( @@ -30,7 +33,31 @@ public class RegisterCommand extends BaseRegisterCommand { @Override - protected boolean isDeleteSourceCatalogTables() { - return false; + protected CatalogMigrator catalogMigrator(Catalog sourceCatalog, Catalog targetCatalog) { + return ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(false) + .build(); + } + + @Override + protected boolean canProceed(Catalog sourceCatalog) { + return PromptUtil.proceedForRegistration(); + } + + @Override + protected String operation() { + return "registration"; + } + + @Override + protected String operated() { + return "registered"; + } + + @Override + protected String operate() { + return "register"; } } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java index 2f2dd11..191f23a 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java @@ -28,7 +28,7 @@ public class SourceCatalogOptions { description = "source catalog type. " + "Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]") - private org.projectnessie.tools.catalog.migration.cli.CatalogUtil.CatalogType type; + protected org.projectnessie.tools.catalog.migration.cli.CatalogUtil.CatalogType type; @CommandLine.Option( names = "--source-catalog-properties", diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java index 5c5fdb9..748df7f 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java @@ -28,7 +28,7 @@ public class TargetCatalogOptions { description = "target catalog type. " + "Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]") - private CatalogUtil.CatalogType type; + protected CatalogUtil.CatalogType type; @CommandLine.Option( names = "--target-catalog-properties", From 8e4c879e4347076041a1a0f6de6a5531e6ea43cd Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Wed, 15 Mar 2023 20:43:03 +0530 Subject: [PATCH 19/31] Address review comments from March 15 --- api-test/build.gradle.kts | 48 +--------- .../api/test/HadoopCatalogMigratorTest.java | 35 -------- .../test/ITHadoopToHiveCatalogMigrator.java | 46 ---------- api/build.gradle.kts | 43 +++++++++ .../migration/api/CatalogMigrator.java | 9 +- .../api}/AbstractTestCatalogMigrator.java | 22 +++-- .../api}/CustomCatalogMigratorTest.java | 6 +- .../api/HadoopCatalogMigratorTest.java | 70 +++++++++++++++ .../api/ITHadoopToHiveCatalogMigrator.java | 81 +++++++++++++++++ .../api/ITHadoopToNessieCatalogMigrator.java | 89 +++++++++++++++++++ .../api}/ITHiveToHadoopCatalogMigrator.java | 3 +- .../api}/ITHiveToNessieCatalogMigrator.java | 3 +- .../api}/ITNessieToHiveCatalogMigrator.java | 3 +- .../src/test/resources/logback.xml | 3 +- .../migration/cli/BaseRegisterCommand.java | 5 +- .../catalog/migration/cli/MigrateCommand.java | 5 +- .../catalog/migration/cli/PromptUtil.java | 7 +- cli/src/main/resources/logback.xml | 4 +- 18 files changed, 328 insertions(+), 154 deletions(-) delete mode 100644 api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/HadoopCatalogMigratorTest.java delete mode 100644 api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHadoopToHiveCatalogMigrator.java rename {api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test => api/src/test/java/org/projectnessie/tools/catalog/migration/api}/AbstractTestCatalogMigrator.java (97%) rename {api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test => api/src/test/java/org/projectnessie/tools/catalog/migration/api}/CustomCatalogMigratorTest.java (92%) create mode 100644 api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java create mode 100644 api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java create mode 100644 api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java rename {api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test => api/src/test/java/org/projectnessie/tools/catalog/migration/api}/ITHiveToHadoopCatalogMigrator.java (91%) rename {api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test => api/src/test/java/org/projectnessie/tools/catalog/migration/api}/ITHiveToNessieCatalogMigrator.java (92%) rename {api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test => api/src/test/java/org/projectnessie/tools/catalog/migration/api}/ITNessieToHiveCatalogMigrator.java (92%) rename {api-test => api}/src/test/resources/logback.xml (92%) diff --git a/api-test/build.gradle.kts b/api-test/build.gradle.kts index 296ffa7..6223883 100644 --- a/api-test/build.gradle.kts +++ b/api-test/build.gradle.kts @@ -17,59 +17,13 @@ plugins { `java-library` `maven-publish` - alias(libs.plugins.nessie.run) `build-conventions` } dependencies { - implementation(libs.slf4j) - implementation(libs.picocli) + implementation(libs.guava) implementation(libs.hadoop.common) implementation(libs.iceberg.spark.runtime) implementation(libs.junit.jupiter.api) implementation("org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests") - - testImplementation(project(":iceberg-catalog-migrator-api")) - - testRuntimeOnly(libs.logback.classic) - testImplementation(libs.assertj) - testImplementation(libs.junit.jupiter.params) - implementation(libs.junit.jupiter.engine) - - // for integration tests - testImplementation("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") { - // these are taken from iceberg repo configurations - exclude("org.apache.avro", "avro") - exclude("org.slf4j", "slf4j-log4j12") - exclude("org.pentaho") // missing dependency - exclude("org.apache.hbase") - exclude("org.apache.logging.log4j") - exclude("co.cask.tephra") - exclude("com.google.code.findbugs", "jsr305") - exclude("org.eclipse.jetty.aggregate", "jetty-all") - exclude("org.eclipse.jetty.orbit", "javax.servlet") - exclude("org.apache.parquet", "parquet-hadoop-bundle") - exclude("com.tdunning", "json") - exclude("javax.transaction", "transaction-api") - exclude("com.zaxxer", "HikariCP") - } - testImplementation("org.apache.hive:hive-exec:${libs.versions.hive.get()}:core") { - // these are taken from iceberg repo configurations - exclude("org.apache.avro", "avro") - exclude("org.slf4j", "slf4j-log4j12") - exclude("org.pentaho") // missing dependency - exclude("org.apache.hive", "hive-llap-tez") - exclude("org.apache.logging.log4j") - exclude("com.google.protobuf", "protobuf-java") - exclude("org.apache.calcite") - exclude("org.apache.calcite.avatica") - exclude("com.google.code.findbugs", "jsr305") - } - testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}") - - nessieQuarkusServer( - "org.projectnessie.nessie:nessie-quarkus:${libs.versions.nessie.get()}:runner" - ) } - -nessieQuarkusApp { includeTask(tasks.named("intTest")) } diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/HadoopCatalogMigratorTest.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/HadoopCatalogMigratorTest.java deleted file mode 100644 index c27fb92..0000000 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/HadoopCatalogMigratorTest.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (C) 2023 Dremio - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.projectnessie.tools.catalog.migration.api.test; - -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; - -public class HadoopCatalogMigratorTest extends AbstractTestCatalogMigrator { - - @BeforeAll - protected static void setup() { - catalog1 = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "catalog1"); - catalog2 = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "catalog2"); - - createNamespaces(); - } - - @AfterAll - protected static void tearDown() { - dropNamespaces(); - } -} diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHadoopToHiveCatalogMigrator.java b/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHadoopToHiveCatalogMigrator.java deleted file mode 100644 index 7f441ef..0000000 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHadoopToHiveCatalogMigrator.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2023 Dremio - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.projectnessie.tools.catalog.migration.api.test; - -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; - -public class ITHadoopToHiveCatalogMigrator extends AbstractTestCatalogMigrator { - - @BeforeAll - protected static void setup() throws Exception { - HiveMetaStoreRunner.startMetastore(); - - catalog1 = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "hadoop"); - catalog2 = HiveMetaStoreRunner.hiveCatalog(); - - createNamespaces(); - } - - @AfterAll - protected static void tearDown() throws Exception { - dropNamespaces(); - HiveMetaStoreRunner.stopMetastore(); - } - - // disable large table test for IT to save CI time. It will be executed only for UT. - @Override - @Disabled - public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { - super.testRegisterLargeNumberOfTables(deleteSourceTables); - } -} diff --git a/api/build.gradle.kts b/api/build.gradle.kts index c529a2f..27d7f23 100644 --- a/api/build.gradle.kts +++ b/api/build.gradle.kts @@ -17,6 +17,7 @@ plugins { `java-library` `maven-publish` + alias(libs.plugins.nessie.run) `build-conventions` } @@ -34,4 +35,46 @@ dependencies { testImplementation(libs.junit.jupiter.engine) testImplementation(libs.assertj) testImplementation(libs.hadoop.common) + + testImplementation(project(":iceberg-catalog-migrator-api-test")) + + // for integration tests + testImplementation( + "org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests" + ) + testImplementation("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") { + // these are taken from iceberg repo configurations + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hbase") + exclude("org.apache.logging.log4j") + exclude("co.cask.tephra") + exclude("com.google.code.findbugs", "jsr305") + exclude("org.eclipse.jetty.aggregate", "jetty-all") + exclude("org.eclipse.jetty.orbit", "javax.servlet") + exclude("org.apache.parquet", "parquet-hadoop-bundle") + exclude("com.tdunning", "json") + exclude("javax.transaction", "transaction-api") + exclude("com.zaxxer", "HikariCP") + } + testImplementation("org.apache.hive:hive-exec:${libs.versions.hive.get()}:core") { + // these are taken from iceberg repo configurations + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hive", "hive-llap-tez") + exclude("org.apache.logging.log4j") + exclude("com.google.protobuf", "protobuf-java") + exclude("org.apache.calcite") + exclude("org.apache.calcite.avatica") + exclude("com.google.code.findbugs", "jsr305") + } + testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}") + + nessieQuarkusServer( + "org.projectnessie.nessie:nessie-quarkus:${libs.versions.nessie.get()}:runner" + ) } + +nessieQuarkusApp { includeTask(tasks.named("intTest")) } diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java index d5704ef..24f0e39 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java @@ -16,6 +16,7 @@ package org.projectnessie.tools.catalog.migration.api; import com.google.common.base.Preconditions; +import java.util.Arrays; import java.util.List; import java.util.Objects; import java.util.function.Predicate; @@ -141,7 +142,13 @@ public CatalogMigrationResult result() { private boolean registerTable(TableIdentifier tableIdentifier) { try { if (!((SupportsNamespaces) targetCatalog()).namespaceExists(tableIdentifier.namespace())) { - ((SupportsNamespaces) targetCatalog()).createNamespace(tableIdentifier.namespace()); + String[] levels = tableIdentifier.namespace().levels(); + for (int index = 0; index < levels.length; index++) { + Namespace namespace = Namespace.of(Arrays.copyOfRange(levels, 0, index + 1)); + if (!((SupportsNamespaces) targetCatalog()).namespaceExists(namespace)) { + ((SupportsNamespaces) targetCatalog()).createNamespace(namespace); + } + } } // register the table to the target catalog TableOperations ops = ((BaseTable) sourceCatalog().loadTable(tableIdentifier)).operations(); diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java similarity index 97% rename from api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java rename to api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java index 8efc0f8..90e81cf 100644 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTestCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration.api.test; +package org.projectnessie.tools.catalog.migration.api; import java.nio.file.Path; import java.util.Collections; @@ -29,9 +29,7 @@ import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; -import org.projectnessie.tools.catalog.migration.api.CatalogMigrationResult; -import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; -import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigrator; +import org.projectnessie.tools.catalog.migration.api.test.AbstractTest; public abstract class AbstractTestCatalogMigrator extends AbstractTest { @@ -282,18 +280,18 @@ public void testRegisterWithNewNamespace(boolean deleteSourceTables) { .containsExactly(TableIdentifier.parse("db1.tbl5")); } - private CatalogMigrationResult registerAllTables(boolean deleteSourceTables) { - CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(deleteSourceTables); - return catalogMigrator - .registerTables(catalogMigrator.getMatchingTableIdentifiers(null)) - .result(); - } - - private CatalogMigrator catalogMigratorWithDefaultArgs(boolean deleteSourceTables) { + protected CatalogMigrator catalogMigratorWithDefaultArgs(boolean deleteSourceTables) { return ImmutableCatalogMigrator.builder() .sourceCatalog(catalog1) .targetCatalog(catalog2) .deleteEntriesFromSourceCatalog(deleteSourceTables) .build(); } + + private CatalogMigrationResult registerAllTables(boolean deleteSourceTables) { + CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(deleteSourceTables); + return catalogMigrator + .registerTables(catalogMigrator.getMatchingTableIdentifiers(null)) + .result(); + } } diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/CustomCatalogMigratorTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java similarity index 92% rename from api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/CustomCatalogMigratorTest.java rename to api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java index 6aaf5ca..6488872 100644 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/CustomCatalogMigratorTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration.api.test; +package org.projectnessie.tools.catalog.migration.api; import java.nio.file.Path; import java.util.HashMap; @@ -30,9 +30,7 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import org.projectnessie.tools.catalog.migration.api.CatalogMigrationResult; -import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; -import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigrator; +import org.projectnessie.tools.catalog.migration.api.test.AbstractTest; public class CustomCatalogMigratorTest extends AbstractTest { diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java new file mode 100644 index 0000000..6c658b6 --- /dev/null +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.api; + +import java.util.Collections; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class HadoopCatalogMigratorTest extends AbstractTestCatalogMigrator { + + @BeforeAll + protected static void setup() { + catalog1 = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "catalog1"); + catalog2 = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "catalog2"); + + createNamespaces(); + } + + @AfterAll + protected static void tearDown() { + dropNamespaces(); + } + + @Order(8) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterWithNewNestedNamespace(boolean deleteSourceTables) { + // catalog2 doesn't have a namespace "a.b.c" + Namespace namespace = Namespace.of("a.b.c"); + String tableName = "tbl5_" + deleteSourceTables; + TableIdentifier tableIdentifier = TableIdentifier.parse("a.b.c." + tableName); + ((SupportsNamespaces) catalog1).createNamespace(namespace); + catalog1.createTable(tableIdentifier, schema); + + CatalogMigrationResult result = + catalogMigratorWithDefaultArgs(deleteSourceTables) + .registerTables(Collections.singletonList(tableIdentifier)) + .result(); + + Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(tableIdentifier); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + Assertions.assertThat(catalog2.loadTable(tableIdentifier)).isNotNull(); + catalog2.dropTable(tableIdentifier); + ((SupportsNamespaces) catalog2).dropNamespace(namespace); + catalog1.dropTable(tableIdentifier); + ((SupportsNamespaces) catalog1).dropNamespace(namespace); + } +} diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java new file mode 100644 index 0000000..72e3354 --- /dev/null +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.api; + +import java.util.Collections; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; + +public class ITHadoopToHiveCatalogMigrator extends AbstractTestCatalogMigrator { + + @BeforeAll + protected static void setup() throws Exception { + HiveMetaStoreRunner.startMetastore(); + + catalog1 = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "hadoop"); + catalog2 = HiveMetaStoreRunner.hiveCatalog(); + + createNamespaces(); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + HiveMetaStoreRunner.stopMetastore(); + } + + // disable large table test for IT to save CI time. It will be executed only for UT. + @Override + @Disabled + public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { + super.testRegisterLargeNumberOfTables(deleteSourceTables); + } + + @Order(8) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterWithNewNestedNamespace(boolean deleteSourceTables) { + // catalog2 doesn't have a namespace "a.b.c" + Namespace namespace = Namespace.of("a.b.c"); + String tableName = "tbl5_" + deleteSourceTables; + TableIdentifier tableIdentifier = TableIdentifier.parse("a.b.c." + tableName); + ((SupportsNamespaces) catalog1).createNamespace(namespace); + catalog1.createTable(tableIdentifier, schema); + + CatalogMigrationResult result = + catalogMigratorWithDefaultArgs(deleteSourceTables) + .registerTables(Collections.singletonList(tableIdentifier)) + .result(); + + // hive catalog doesn't support multipart namespace. Hence, table should fail to register. + Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()) + .containsExactly(tableIdentifier); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + catalog1.dropTable(tableIdentifier); + ((SupportsNamespaces) catalog1).dropNamespace(namespace); + } +} diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java new file mode 100644 index 0000000..8a00484 --- /dev/null +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.api; + +import java.util.Collections; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; + +public class ITHadoopToNessieCatalogMigrator extends AbstractTestCatalogMigrator { + + protected static final int NESSIE_PORT = Integer.getInteger("quarkus.http.test-port", 19121); + + protected static String nessieUri = String.format("http://localhost:%d/api/v1", NESSIE_PORT); + + @BeforeAll + protected static void setup() throws Exception { + HiveMetaStoreRunner.startMetastore(); + + catalog1 = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "hadoop"); + catalog2 = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); + + createNamespaces(); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + HiveMetaStoreRunner.stopMetastore(); + } + + // disable large table test for IT to save CI time. It will be executed only for UT. + @Override + @Disabled + public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { + super.testRegisterLargeNumberOfTables(deleteSourceTables); + } + + @Order(8) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterWithNewNestedNamespace(boolean deleteSourceTables) { + // catalog2 doesn't have a namespace "a.b.c" + Namespace namespace = Namespace.of("a.b.c"); + String tableName = "tbl5_" + deleteSourceTables; + TableIdentifier tableIdentifier = TableIdentifier.parse("a.b.c." + tableName); + ((SupportsNamespaces) catalog1).createNamespace(namespace); + catalog1.createTable(tableIdentifier, schema); + + CatalogMigrationResult result = + catalogMigratorWithDefaultArgs(deleteSourceTables) + .registerTables(Collections.singletonList(tableIdentifier)) + .result(); + + Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(tableIdentifier); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + Assertions.assertThat(catalog2.loadTable(tableIdentifier)).isNotNull(); + Assertions.assertThat(((SupportsNamespaces) catalog2).listNamespaces()) + .contains(Namespace.of("a"), Namespace.of("a", "b"), Namespace.of("a", "b", "c")); + + catalog2.dropTable(tableIdentifier); + ((SupportsNamespaces) catalog2).dropNamespace(namespace); + catalog1.dropTable(tableIdentifier); + ((SupportsNamespaces) catalog1).dropNamespace(namespace); + } +} diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToHadoopCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToHadoopCatalogMigrator.java similarity index 91% rename from api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToHadoopCatalogMigrator.java rename to api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToHadoopCatalogMigrator.java index 39d8d9f..236bce0 100644 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToHadoopCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToHadoopCatalogMigrator.java @@ -13,11 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration.api.test; +package org.projectnessie.tools.catalog.migration.api; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; +import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHiveToHadoopCatalogMigrator extends AbstractTestCatalogMigrator { diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToNessieCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToNessieCatalogMigrator.java similarity index 92% rename from api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToNessieCatalogMigrator.java rename to api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToNessieCatalogMigrator.java index 56a68f9..1e31214 100644 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITHiveToNessieCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToNessieCatalogMigrator.java @@ -13,11 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration.api.test; +package org.projectnessie.tools.catalog.migration.api; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; +import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHiveToNessieCatalogMigrator extends AbstractTestCatalogMigrator { diff --git a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITNessieToHiveCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java similarity index 92% rename from api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITNessieToHiveCatalogMigrator.java rename to api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java index da249af..3e86556 100644 --- a/api-test/src/test/java/org/projectnessie/tools/catalog/migration/api/test/ITNessieToHiveCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java @@ -13,11 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration.api.test; +package org.projectnessie.tools.catalog.migration.api; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; +import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITNessieToHiveCatalogMigrator extends AbstractTestCatalogMigrator { diff --git a/api-test/src/test/resources/logback.xml b/api/src/test/resources/logback.xml similarity index 92% rename from api-test/src/test/resources/logback.xml rename to api/src/test/resources/logback.xml index cb1446a..247cf81 100644 --- a/api-test/src/test/resources/logback.xml +++ b/api/src/test/resources/logback.xml @@ -16,7 +16,8 @@ limitations under the License. --> - + + ${catalog.migration.log.dir}/catalog_migration.log diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java index 9445d44..7cb49f4 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java @@ -15,6 +15,8 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import static org.projectnessie.tools.catalog.migration.cli.PromptUtil.ANSI_YELLOW; + import com.google.common.collect.Lists; import java.io.IOException; import java.io.UncheckedIOException; @@ -223,7 +225,8 @@ private void printDetails(CatalogMigrationResult result) { if (!result.failedToDeleteTableIdentifiers().isEmpty()) { consoleLog.warn( - "Failed to delete these tables from source catalog:{}{}", + "{}Failed to delete these tables from source catalog:{}{}", + ANSI_YELLOW, System.lineSeparator(), result.failedToDeleteTableIdentifiers()); } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java index 29e4634..8599dfa 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java @@ -15,6 +15,8 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import static org.projectnessie.tools.catalog.migration.cli.PromptUtil.ANSI_YELLOW; + import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.hadoop.HadoopCatalog; import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; @@ -52,9 +54,10 @@ protected CatalogMigrator catalogMigrator(Catalog sourceCatalog, Catalog targetC protected boolean canProceed(Catalog sourceCatalog) { if (sourceCatalog instanceof HadoopCatalog) { consoleLog.warn( - "Source catalog type is HADOOP and it doesn't support dropping tables just from " + "{}Source catalog type is HADOOP and it doesn't support dropping tables just from " + "catalog. {}Avoid operating the migrated tables from the source catalog after migration. " + "Use the tables from target catalog.", + ANSI_YELLOW, System.lineSeparator()); } return PromptUtil.proceedForMigration(); diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java index 949cf68..ae06d6d 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java @@ -25,10 +25,11 @@ private PromptUtil() {} private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); private static final String newLine = System.lineSeparator(); + static final String ANSI_YELLOW = "\u001B[33m"; static boolean proceedForRegistration() { consoleLog.warn( - "{}" + "{}{}" + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + "{}\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + "{}\tSo, while using this tool please make sure there are no in-progress commits for the source " @@ -40,6 +41,7 @@ static boolean proceedForRegistration() { + "loss of data, and table corruption. " + "{}\tUse `migrate` command to automatically delete the table from source catalog after " + "migration.", + ANSI_YELLOW, newLine, newLine, newLine, @@ -52,7 +54,7 @@ static boolean proceedForRegistration() { static boolean proceedForMigration() { consoleLog.warn( - "{}" + "{}{}" + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + "{}\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + "{}\tSo, while using this tool please make sure there are no in-progress commits for the source " @@ -60,6 +62,7 @@ static boolean proceedForMigration() { + "{}" + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " + "{}\tand can only be accessed from the target catalog.", + ANSI_YELLOW, newLine, newLine, newLine, diff --git a/cli/src/main/resources/logback.xml b/cli/src/main/resources/logback.xml index 6acd040..5f2a53a 100644 --- a/cli/src/main/resources/logback.xml +++ b/cli/src/main/resources/logback.xml @@ -16,7 +16,8 @@ limitations under the License. --> - + + @@ -28,6 +29,7 @@ + true %highlight(%-5level) - %msg%n From aecd282bd2efc6909121465fe5aab00504c845ff Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Thu, 16 Mar 2023 22:42:22 +0530 Subject: [PATCH 20/31] Address review comments from March 16 --- .../migration/api/CatalogMigrator.java | 40 +++-- .../api/CatalogMigratorParamsTest.java | 11 ++ .../api/UnsupportedNamespaceTest.java | 6 + .../catalog/migration/cli/CatalogUtil.java | 21 +-- .../cli/AbstractCLIMigrationTest.java | 3 +- .../migration/cli/CLIOptionsTest.java} | 4 +- .../migration/cli/CatalogUtilTest.java | 143 ++++++++++++++++++ .../migration/cli/HadoopCLIMigrationTest.java | 2 +- .../cli/ITHadoopToHiveCLIMigrationTest.java | 2 +- .../cli/ITHiveToHadoopCLIMigrationTest.java | 2 +- .../cli/ITHiveToNessieCLIMigrationTest.java | 2 +- .../cli/ITNessieToHiveCLIMigrationTest.java | 2 +- .../migration/cli/RunCLI.java | 3 +- 13 files changed, 212 insertions(+), 29 deletions(-) rename cli/src/test/java/org/projectnessie/tools/{catlog => catalog}/migration/cli/AbstractCLIMigrationTest.java (99%) rename cli/src/test/java/org/projectnessie/tools/{catlog/migration/cli/CLITest.java => catalog/migration/cli/CLIOptionsTest.java} (99%) create mode 100644 cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtilTest.java rename cli/src/test/java/org/projectnessie/tools/{catlog => catalog}/migration/cli/HadoopCLIMigrationTest.java (96%) rename cli/src/test/java/org/projectnessie/tools/{catlog => catalog}/migration/cli/ITHadoopToHiveCLIMigrationTest.java (97%) rename cli/src/test/java/org/projectnessie/tools/{catlog => catalog}/migration/cli/ITHiveToHadoopCLIMigrationTest.java (97%) rename cli/src/test/java/org/projectnessie/tools/{catlog => catalog}/migration/cli/ITHiveToNessieCLIMigrationTest.java (97%) rename cli/src/test/java/org/projectnessie/tools/{catlog => catalog}/migration/cli/ITNessieToHiveCLIMigrationTest.java (97%) rename cli/src/test/java/org/projectnessie/tools/{catlog => catalog}/migration/cli/RunCLI.java (96%) diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java index 24f0e39..83bca7f 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java @@ -17,8 +17,10 @@ import com.google.common.base.Preconditions; import java.util.Arrays; +import java.util.HashSet; import java.util.List; import java.util.Objects; +import java.util.Set; import java.util.function.Predicate; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -28,6 +30,7 @@ import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.hadoop.HadoopCatalog; import org.immutables.value.Value; import org.slf4j.Logger; @@ -49,6 +52,8 @@ public abstract class CatalogMigrator { private final ImmutableCatalogMigrationResult.Builder resultBuilder = ImmutableCatalogMigrationResult.builder(); + private static final Set processedNamespaces = new HashSet<>(); + /** * Get the table identifiers which matches the regular expression pattern input from all the * namespaces. @@ -101,6 +106,12 @@ public CatalogMigrator registerTables(List identifiers) { Preconditions.checkArgument(identifiers != null, "Identifiers list is null"); Preconditions.checkArgument( !targetCatalog().equals(sourceCatalog()), "target catalog is same as source catalog"); + if (!(targetCatalog() instanceof SupportsNamespaces)) { + throw new UnsupportedOperationException( + String.format( + "target catalog %s doesn't implement SupportsNamespaces to create missing namespaces.", + targetCatalog().name())); + } if (identifiers.isEmpty()) { LOG.info("Identifiers list is empty"); @@ -136,20 +147,13 @@ public CatalogMigrator registerTables(List identifiers) { } public CatalogMigrationResult result() { + processedNamespaces.clear(); return resultBuilder.build(); } private boolean registerTable(TableIdentifier tableIdentifier) { try { - if (!((SupportsNamespaces) targetCatalog()).namespaceExists(tableIdentifier.namespace())) { - String[] levels = tableIdentifier.namespace().levels(); - for (int index = 0; index < levels.length; index++) { - Namespace namespace = Namespace.of(Arrays.copyOfRange(levels, 0, index + 1)); - if (!((SupportsNamespaces) targetCatalog()).namespaceExists(namespace)) { - ((SupportsNamespaces) targetCatalog()).createNamespace(namespace); - } - } - } + createNamespacesIfNotExist(tableIdentifier.namespace()); // register the table to the target catalog TableOperations ops = ((BaseTable) sourceCatalog().loadTable(tableIdentifier)).operations(); targetCatalog().registerTable(tableIdentifier, ops.current().metadataFileLocation()); @@ -160,4 +164,22 @@ private boolean registerTable(TableIdentifier tableIdentifier) { return false; } } + + private void createNamespacesIfNotExist(Namespace identifierNamespace) { + if (!processedNamespaces.contains(identifierNamespace.toString())) { + String[] levels = identifierNamespace.levels(); + for (int index = 0; index < levels.length; index++) { + Namespace namespace = Namespace.of(Arrays.copyOfRange(levels, 0, index + 1)); + if (!processedNamespaces.contains(namespace.toString())) { + try { + ((SupportsNamespaces) targetCatalog()).createNamespace(namespace); + } catch (AlreadyExistsException ex) { + // ignore the error as forcefully creating the namespace even if it exists to avoid + // namespaceExists() check. + } + processedNamespaces.add(namespace.toString()); + } + } + } + } } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java index 4d7b62f..69d8e16 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java @@ -50,6 +50,17 @@ public void testInvalidArgs() { .isInstanceOf(IllegalArgumentException.class) .hasMessageContaining("target catalog is same as source catalog"); + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigrator.builder() + .sourceCatalog(catalog1) + .targetCatalog(catalog2) + .deleteEntriesFromSourceCatalog(true) + .build() + .registerTables(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Identifiers list is null"); + Assertions.assertThatThrownBy( () -> ImmutableCatalogMigrator.builder() diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java index 722d7e9..ee487e1 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java @@ -16,6 +16,7 @@ package org.projectnessie.tools.catalog.migration.api; import java.nio.file.Path; +import java.util.Collections; import java.util.List; import org.apache.iceberg.BaseMetastoreCatalog; import org.apache.iceberg.TableOperations; @@ -79,5 +80,10 @@ public void renameTable(TableIdentifier from, TableIdentifier to) {} .isInstanceOf(UnsupportedOperationException.class) .hasMessageContaining( "source catalog TestCatalog{} doesn't implement SupportsNamespaces to list all namespaces."); + + Assertions.assertThatThrownBy(() -> catalogMigrator.registerTables(Collections.emptyList())) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining( + "target catalog TestCatalog{} doesn't implement SupportsNamespaces to create missing namespaces."); } } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtil.java index f50ca5a..4e976ed 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtil.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtil.java @@ -15,8 +15,8 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import com.google.common.base.Preconditions; import java.util.Map; -import java.util.Objects; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.aws.dynamodb.DynamoDbCatalog; import org.apache.iceberg.aws.glue.GlueCatalog; @@ -49,22 +49,25 @@ static Catalog buildCatalog( CatalogType catalogType, String customCatalogImpl, Map hadoopConf) { - Configuration sourceCatalogConf = new Configuration(); - hadoopConf.forEach(sourceCatalogConf::set); + Preconditions.checkArgument(catalogProperties != null, "catalog properties is null"); + Preconditions.checkArgument(catalogType != null, "catalog type is null"); + Configuration catalogConf = new Configuration(); + if (hadoopConf != null) { + hadoopConf.forEach(catalogConf::set); + } return org.apache.iceberg.CatalogUtil.loadCatalog( - Objects.requireNonNull(catalogImpl(catalogType, customCatalogImpl)), + catalogImpl(catalogType, customCatalogImpl), catalogType.name(), catalogProperties, - sourceCatalogConf); + catalogConf); } private static String catalogImpl(CatalogType type, String customCatalogImpl) { switch (type) { case CUSTOM: - if (customCatalogImpl == null || customCatalogImpl.isEmpty()) { - throw new IllegalArgumentException( - "Need to specify the fully qualified class name of the custom catalog " + "impl"); - } + Preconditions.checkArgument( + customCatalogImpl != null && !customCatalogImpl.trim().isEmpty(), + "Need to specify the fully qualified class name of the custom catalog impl"); return customCatalogImpl; case DYNAMODB: return DynamoDbCatalog.class.getName(); diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java similarity index 99% rename from cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java rename to cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java index 2ef0f2c..b0bf8ec 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/AbstractCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catlog.migration.cli; +package org.projectnessie.tools.catalog.migration.cli; import com.google.common.collect.Lists; import java.io.IOException; @@ -42,7 +42,6 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; import org.projectnessie.tools.catalog.migration.api.test.AbstractTest; -import org.projectnessie.tools.catalog.migration.cli.CatalogUtil; public abstract class AbstractCLIMigrationTest extends AbstractTest { diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/CLITest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java similarity index 99% rename from cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/CLITest.java rename to cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java index 1884080..8f54188 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/CLITest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catlog.migration.cli; +package org.projectnessie.tools.catalog.migration.cli; import static org.junit.jupiter.params.provider.Arguments.arguments; @@ -30,7 +30,7 @@ import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; -public class CLITest { +public class CLIOptionsTest { protected static @TempDir Path logDir; diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtilTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtilTest.java new file mode 100644 index 0000000..956255c --- /dev/null +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtilTest.java @@ -0,0 +1,143 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.cli; + +import java.nio.file.Path; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Stream; +import org.apache.iceberg.Schema; +import org.apache.iceberg.Table; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.iceberg.hive.HiveCatalog; +import org.apache.iceberg.nessie.NessieCatalog; +import org.apache.iceberg.types.Types; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +public class CatalogUtilTest { + + protected static @TempDir Path logDir; + + @BeforeAll + protected static void initLogDir() { + System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); + } + + static Stream blankOrNullStrings() { + return Stream.of("", " ", null); + } + + @Order(0) + @ParameterizedTest() + @MethodSource("blankOrNullStrings") + public void testCustomCatalogWithoutImpl(String impl) { + Assertions.assertThatThrownBy( + () -> + CatalogUtil.buildCatalog( + Collections.emptyMap(), + CatalogUtil.CatalogType.CUSTOM, + impl, + Collections.emptyMap())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining( + "Need to specify the fully qualified class name of the custom catalog impl"); + } + + @Order(1) + @Test + public void testInvalidArgs() { + Assertions.assertThatThrownBy(() -> CatalogUtil.buildCatalog(null, null, null, null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("catalog properties is null"); + + Assertions.assertThatThrownBy( + () -> CatalogUtil.buildCatalog(Collections.emptyMap(), null, null, null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("catalog type is null"); + + Assertions.assertThatThrownBy( + () -> + CatalogUtil.buildCatalog( + Collections.emptyMap(), CatalogUtil.CatalogType.CUSTOM, "abc", null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining( + "Cannot initialize Catalog implementation abc: Cannot find constructor for interface"); + } + + @Test + @Order(2) + public void testBuildHadoopCatalog() { + Map properties = new HashMap<>(); + properties.put("warehouse", logDir.toAbsolutePath().toString()); + properties.put("type", "hadoop"); + + Map conf = new HashMap<>(); + conf.put("k1", "v1"); + + Catalog catalog = + CatalogUtil.buildCatalog(properties, CatalogUtil.CatalogType.HADOOP, null, conf); + + Assertions.assertThat(catalog).isInstanceOf(HadoopCatalog.class); + Assertions.assertThat(catalog.name()).isEqualTo("HADOOP"); + Assertions.assertThat(((HadoopCatalog) catalog).getConf().get("k1")).isEqualTo("v1"); + Schema schema = + new Schema( + Types.StructType.of(Types.NestedField.required(1, "id", Types.LongType.get())) + .fields()); + Table table = catalog.createTable(TableIdentifier.parse("foo.tbl1"), schema); + Assertions.assertThat(table.location()).contains(logDir.toAbsolutePath().toString()); + catalog.dropTable(TableIdentifier.parse("foo.tbl1")); + } + + @Test + @Order(3) + public void testBuildNessieCatalog() { + Map properties = new HashMap<>(); + properties.put("warehouse", logDir.toAbsolutePath().toString()); + properties.put("ref", "main"); + properties.put("uri", "http://localhost:19120/api/v1"); + + Catalog catalog = + CatalogUtil.buildCatalog(properties, CatalogUtil.CatalogType.NESSIE, null, null); + + Assertions.assertThat(catalog).isInstanceOf(NessieCatalog.class); + Assertions.assertThat(catalog.name()).isEqualTo("NESSIE"); + } + + @Test + @Order(4) + public void testBuildHiveCatalog() { + Map properties = new HashMap<>(); + properties.put("warehouse", logDir.toAbsolutePath().toString()); + properties.put("type", "hive"); + properties.put("uri", "thrift://localhost:9083"); + + Catalog catalog = + CatalogUtil.buildCatalog(properties, CatalogUtil.CatalogType.HIVE, null, null); + + Assertions.assertThat(catalog).isInstanceOf(HiveCatalog.class); + Assertions.assertThat(catalog.name()).isEqualTo("HIVE"); + } +} diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/HadoopCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/HadoopCLIMigrationTest.java similarity index 96% rename from cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/HadoopCLIMigrationTest.java rename to cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/HadoopCLIMigrationTest.java index 58273ee..b15101c 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/HadoopCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/HadoopCLIMigrationTest.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catlog.migration.cli; +package org.projectnessie.tools.catalog.migration.cli; import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHadoopToHiveCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToHiveCLIMigrationTest.java similarity index 97% rename from cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHadoopToHiveCLIMigrationTest.java rename to cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToHiveCLIMigrationTest.java index d983ed2..4a9ec34 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHadoopToHiveCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToHiveCLIMigrationTest.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catlog.migration.cli; +package org.projectnessie.tools.catalog.migration.cli; import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToHadoopCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToHadoopCLIMigrationTest.java similarity index 97% rename from cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToHadoopCLIMigrationTest.java rename to cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToHadoopCLIMigrationTest.java index e546d80..223ad9c 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToHadoopCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToHadoopCLIMigrationTest.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catlog.migration.cli; +package org.projectnessie.tools.catalog.migration.cli; import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToNessieCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToNessieCLIMigrationTest.java similarity index 97% rename from cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToNessieCLIMigrationTest.java rename to cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToNessieCLIMigrationTest.java index 77fb899..ae38082 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITHiveToNessieCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToNessieCLIMigrationTest.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catlog.migration.cli; +package org.projectnessie.tools.catalog.migration.cli; import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITNessieToHiveCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITNessieToHiveCLIMigrationTest.java similarity index 97% rename from cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITNessieToHiveCLIMigrationTest.java rename to cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITNessieToHiveCLIMigrationTest.java index 69c2145..dbcb4cd 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/ITNessieToHiveCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITNessieToHiveCLIMigrationTest.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catlog.migration.cli; +package org.projectnessie.tools.catalog.migration.cli; import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; diff --git a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/RunCLI.java similarity index 96% rename from cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java rename to cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/RunCLI.java index 781b45c..562cbfc 100644 --- a/cli/src/test/java/org/projectnessie/tools/catlog/migration/cli/RunCLI.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/RunCLI.java @@ -13,14 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catlog.migration.cli; +package org.projectnessie.tools.catalog.migration.cli; import java.io.PrintWriter; import java.io.StringWriter; import java.util.Arrays; import java.util.List; import nl.altindag.log.LogCaptor; -import org.projectnessie.tools.catalog.migration.cli.CatalogMigrationCLI; import picocli.CommandLine; /** Helper class for tests. */ From d2cf26274609f3cfdccf3ecd2005f8c197ed6da7 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Mon, 20 Mar 2023 17:58:52 +0530 Subject: [PATCH 21/31] Update colorscheme --- .../migration/cli/BaseRegisterCommand.java | 5 +---- .../migration/cli/CatalogMigrationCLI.java | 16 +++++++++++++++- .../catalog/migration/cli/MigrateCommand.java | 5 +---- .../tools/catalog/migration/cli/PromptUtil.java | 7 ++----- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java index 7cb49f4..9445d44 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java @@ -15,8 +15,6 @@ */ package org.projectnessie.tools.catalog.migration.cli; -import static org.projectnessie.tools.catalog.migration.cli.PromptUtil.ANSI_YELLOW; - import com.google.common.collect.Lists; import java.io.IOException; import java.io.UncheckedIOException; @@ -225,8 +223,7 @@ private void printDetails(CatalogMigrationResult result) { if (!result.failedToDeleteTableIdentifiers().isEmpty()) { consoleLog.warn( - "{}Failed to delete these tables from source catalog:{}{}", - ANSI_YELLOW, + "Failed to delete these tables from source catalog:{}{}", System.lineSeparator(), result.failedToDeleteTableIdentifiers()); } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java index 9ea8add..76b94af 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java @@ -27,9 +27,23 @@ public class CatalogMigrationCLI { public CatalogMigrationCLI() {} public static void main(String... args) { - CommandLine commandLine = new CommandLine(new CatalogMigrationCLI()); + CommandLine commandLine = + new CommandLine(new CatalogMigrationCLI()).setColorScheme(createColorScheme()); commandLine.setUsageHelpWidth(150); int exitCode = commandLine.execute(args); System.exit(exitCode); } + + private static CommandLine.Help.ColorScheme createColorScheme() { + return new CommandLine.Help.ColorScheme.Builder() + .commands( + CommandLine.Help.Ansi.Style.bold, + CommandLine.Help.Ansi.Style.underline) // combine multiple styles + .options(CommandLine.Help.Ansi.Style.fg_yellow) // yellow foreground color + .parameters(CommandLine.Help.Ansi.Style.fg_yellow) + .optionParams(CommandLine.Help.Ansi.Style.italic) + .errors(CommandLine.Help.Ansi.Style.fg_red, CommandLine.Help.Ansi.Style.bold) + .stackTraces(CommandLine.Help.Ansi.Style.italic) + .build(); + } } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java index 8599dfa..29e4634 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java @@ -15,8 +15,6 @@ */ package org.projectnessie.tools.catalog.migration.cli; -import static org.projectnessie.tools.catalog.migration.cli.PromptUtil.ANSI_YELLOW; - import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.hadoop.HadoopCatalog; import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; @@ -54,10 +52,9 @@ protected CatalogMigrator catalogMigrator(Catalog sourceCatalog, Catalog targetC protected boolean canProceed(Catalog sourceCatalog) { if (sourceCatalog instanceof HadoopCatalog) { consoleLog.warn( - "{}Source catalog type is HADOOP and it doesn't support dropping tables just from " + "Source catalog type is HADOOP and it doesn't support dropping tables just from " + "catalog. {}Avoid operating the migrated tables from the source catalog after migration. " + "Use the tables from target catalog.", - ANSI_YELLOW, System.lineSeparator()); } return PromptUtil.proceedForMigration(); diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java index ae06d6d..949cf68 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java @@ -25,11 +25,10 @@ private PromptUtil() {} private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); private static final String newLine = System.lineSeparator(); - static final String ANSI_YELLOW = "\u001B[33m"; static boolean proceedForRegistration() { consoleLog.warn( - "{}{}" + "{}" + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + "{}\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + "{}\tSo, while using this tool please make sure there are no in-progress commits for the source " @@ -41,7 +40,6 @@ static boolean proceedForRegistration() { + "loss of data, and table corruption. " + "{}\tUse `migrate` command to automatically delete the table from source catalog after " + "migration.", - ANSI_YELLOW, newLine, newLine, newLine, @@ -54,7 +52,7 @@ static boolean proceedForRegistration() { static boolean proceedForMigration() { consoleLog.warn( - "{}{}" + "{}" + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + "{}\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + "{}\tSo, while using this tool please make sure there are no in-progress commits for the source " @@ -62,7 +60,6 @@ static boolean proceedForMigration() { + "{}" + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " + "{}\tand can only be accessed from the target catalog.", - ANSI_YELLOW, newLine, newLine, newLine, From d61a0e112d2acf04d349da80470b4a8c455cb2a4 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Tue, 21 Mar 2023 11:53:03 +0530 Subject: [PATCH 22/31] Addressing comments from March 20 --- README.md | 144 +++++++++--------- .../migration/api/CatalogMigrator.java | 28 +++- .../migration/cli/BaseRegisterCommand.java | 38 +++-- .../migration/cli/CatalogMigrationCLI.java | 16 +- ...logUtil.java => CatalogMigrationUtil.java} | 12 +- .../migration/cli/IdentifierOptions.java | 26 ++-- .../catalog/migration/cli/MigrateCommand.java | 1 + .../migration/cli/RegisterCommand.java | 1 + .../migration/cli/SourceCatalogOptions.java | 36 +++-- .../migration/cli/TargetCatalogOptions.java | 40 +++-- .../cli/AbstractCLIMigrationTest.java | 42 ++--- .../catalog/migration/cli/CLIOptionsTest.java | 99 +++++++----- ...est.java => CatalogMigrationUtilTest.java} | 53 +++++-- codestyle/errorprone-rules.properties | 2 +- gradle/libs.versions.toml | 1 + 15 files changed, 323 insertions(+), 216 deletions(-) rename cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/{CatalogUtil.java => CatalogMigrationUtil.java} (88%) rename cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/{CatalogUtilTest.java => CatalogMigrationUtilTest.java} (68%) diff --git a/README.md b/README.md index 90917bc..d1fe3bf 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,5 @@ # Objective -Iceberg supports managing the iceberg tables using the following Iceberg Catalogs: -* CUSTOM (By plugging in the jar and providing implementation class name) -* DYNAMODB -* ECS -* GLUE -* HADOOP -* HIVE -* JDBC -* NESSIE (Arctic) -* REST +Introduce a CLI tool to bulk migrate Iceberg tables from one catalog to another without a data copy. Users may want to move away from one catalog and use the other catalog with their existing Iceberg tables for the following reasons: * They were using hadoop catalog and later realized that it is not production recommended. So, they want to move tables to other production ready catalogs. @@ -18,11 +9,9 @@ Users may want to move away from one catalog and use the other catalog with thei Before the `1.1.0` Iceberg release, the only way to achieve this was **by copying the data** using `insert into catalog1.db.tableName as select * from catalog2.db.tableName`. After the iceberg `1.1.0` release, all Iceberg Catalogs supports register table with the `catalog#registerTable()` API. However, custom code is needed to migrate all the tables in bulk. -**Here we introduce a CLI tool to migrate Iceberg tables in bulk from one Iceberg Catalog to another without a data copy.** +**Hence, we introduce a CLI tool to migrate Iceberg tables in bulk from one Iceberg Catalog to another without a data copy.** # Iceberg-catalog-migrator -A CLI tool to bulk migrate Iceberg tables from one catalog to another without a data copy. - Need to have java installed in your machine(JDK11 or later version) to use this CLI tool. Below is the CLI syntax: @@ -39,9 +28,10 @@ Commands: ``` $ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar register -h -Usage: iceberg-catalog-migrator register [-hV] [--disable-prompts] [--dry-run] --output-dir= (--source-catalog-type= - --source-catalog-properties=[,...] [--source-catalog-properties= - [,...]]... [--source-catalog-hadoop-conf=[,...]]... +Usage: iceberg-catalog-migrator register [-hV] [--disable-safety-prompts] [--dry-run] [--stacktrace] [--output-dir=] + (--source-catalog-type= --source-catalog-properties=[,...] + [--source-catalog-properties=[,...]]... + [--source-catalog-hadoop-conf=[,...]]... [--source-custom-catalog-impl=]) (--target-catalog-type= --target-catalog-properties=[,...] [--target-catalog-properties= [,...]]... [--target-catalog-hadoop-conf=[,...]]... @@ -50,49 +40,66 @@ Usage: iceberg-catalog-migrator register [-hV] [--disable-prompts] [--dry-run] - --identifiers-regex=] Bulk register the iceberg tables from source catalog to target catalog without data copy. --output-dir= - local output directory path to write CLI output files like `failed_identifiers.txt`, `failed_to_delete_at_source.txt`, - `dry_run_identifiers.txt`. - --dry-run optional configuration to simulate the registration without actually registering. Can learn about a list of the tables that - will be registered by running this. - --disable-prompts optional configuration to disable warning prompts which needs console input. - -h, --help Show this help message and exit. - -V, --version Print version information and exit. -source catalog options: + Optional local output directory path to write CLI output files like `failed_identifiers.txt`, `failed_to_delete_at_source.txt`, + `dry_run_identifiers.txt`. If not specified, uses present working directory. + Example: --output-dir /tmp/output/ + --output-dir $PWD/output_folder + --dry-run Optional configuration to simulate the registration without actually registering. Can learn about a list of tables that will be + registered by running this. + --disable-safety-prompts + Optional configuration to disable safety prompts which needs console input. + --stacktrace Optional configuration to enable capturing stacktrace in logs in case of failures. + -h, --help Show this help message and exit. + -V, --version Print version information and exit. +Source catalog options: --source-catalog-type= - source catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] + Source catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]. + Example: --source-catalog-type GLUE + --source-catalog-type NESSIE --source-catalog-properties=[,...] - source catalog properties (like uri, warehouse, etc) + Catalog properties for source catalog (like uri, warehouse, etc). + Example: --source-catalog-properties warehouse=/tmp/warehouseHadoop,type=hadoop + --source-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie --source-catalog-hadoop-conf=[,...] - optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg - FileIO. + Optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg FileIO. + Example: --source-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY --source-custom-catalog-impl= - optional fully qualified class name of the custom catalog implementation of the source catalog. Required when the catalog - type is CUSTOM. -target catalog options: + Optional fully qualified class name of the custom catalog implementation of the source catalog. Required when the catalog type + is CUSTOM. + Example: --source-custom-catalog-impl org.apache.iceberg.AwesomeCatalog +Target catalog options: --target-catalog-type= - target catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST] + Target catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]. + Example: --target-catalog-type GLUE + --target-catalog-type NESSIE --target-catalog-properties=[,...] - target catalog properties (like uri, warehouse, etc) + Catalog properties for target catalog (like uri, warehouse, etc). + Example: --target-catalog-properties warehouse=/tmp/warehouseHadoop,type=hadoop + --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie --target-catalog-hadoop-conf=[,...] - optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg - FileIO. + Optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg FileIO. + Example: --target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY --target-custom-catalog-impl= - optional fully qualified class name of the custom catalog implementation of the target catalog. Required when the catalog - type is CUSTOM. -identifier options: + Optional fully qualified class name of the custom catalog implementation of the target catalog. Required when the catalog type + is CUSTOM. + Example: --target-custom-catalog-impl org.apache.iceberg.AwesomeCatalog +Identifier options: --identifiers=[,...] - optional selective list of identifiers to register. If not specified, all the tables will be registered. Use this when - there are few identifiers that need to be registered. For a large number of identifiers, use the - `--identifiers-from-file` or `--identifiers-regex` option. + Optional selective list of identifiers to register. If not specified, all the tables will be registered. Use this when there are + few identifiers that need to be registered. For a large number of identifiers, use the `--identifiers-from-file` or + `--identifiers-regex` option. + Example: --identifiers foo.t1,bar.t2 --identifiers-from-file= - optional text file path that contains a list of table identifiers (one per line) to register. Should not be used with - `--identifiers` or `--identifiers-regex` option. + Optional text file path that contains a list of table identifiers (one per line) to register. Should not be used with + `--identifiers` or `--identifiers-regex` option. + Example: --identifiers-from-file /tmp/files/ids.txt --identifiers-regex= - optional regular expression pattern used to register only the tables whose identifiers match this pattern. Should not be - used with `--identifiers` or '--identifiers-from-file' option. + Optional regular expression pattern used to register only the tables whose identifiers match this pattern. Should not be used + with `--identifiers` or '--identifiers-from-file' option. + Example: --identifiers-regex ^foo\..* ``` -Note: options for migrate command is exactly same as register command. +Note: Options for migrate command is exactly same as register command. > :warning: **It is recommended to use this CLI tool when there is no in-progress commits for the tables in the source catalog.** In-progress commits may not make it into the target catalog if used. @@ -110,8 +117,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ ---target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---output-dir $PWD/output +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse ``` ## Register all the tables from Hadoop catalog to Arctic catalog (main branch) @@ -128,8 +134,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar register \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ --target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/8158e68a-5046-42c6-a7e4-c920d9ae2475,ref=main,warehouse=/tmp/warehouse,authentication.type=BEARER,authentication.token=$PAT \ ---target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY \ ---output-dir $PWD/output +--target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY ``` ## Migrate selected tables (t1,t2 in namespace foo) from Arctic catalog (main branch) to Hadoop catalog. @@ -146,8 +151,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/8158e68a-5046-42c6-a7e4-c920d9ae2475,ref=main,warehouse=/tmp/warehouse,authentication.type=BEARER,authentication.token=$PAT \ --target-catalog-type HADOOP \ --target-catalog-properties warehouse=/tmp/warehouse,type=hadoop --source-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY \ ---identifiers foo.t1,foo.t2 \ ---output-dir $PWD/output +--identifiers foo.t1,foo.t2 ``` # Scenarios @@ -166,8 +170,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar register \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---dry-run \ ---output-dir $PWD/output +--dry-run ``` All the inputs will be validated and a list of identified table identifiers for migration will be printed on the console @@ -182,8 +185,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ ---target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---output-dir $PWD/output +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse ``` Once the input validations are done, users will be prompted with this message. @@ -245,8 +247,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ ---target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---output-dir $PWD/output +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse ``` Console output will be same as B.2) till summary because even in case of failure, @@ -280,8 +281,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ ---target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---output-dir $PWD/output +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse ``` Console output will be same as B.2) till summary because even in case of failure, @@ -314,8 +314,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ ---target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---output-dir $PWD/output +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse ``` Console output will be same as B.2) till summary because even in case of failure, @@ -357,8 +356,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---identifiers-regex ^foo\..* \ ---output-dir $PWD/output +--identifiers-regex ^foo\..* ``` @@ -369,8 +367,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---identifiers-from-file ids.txt \ ---output-dir $PWD/output +--identifiers-from-file ids.txt ``` Sample input: (migrate only two tables foo.tbl1, foo.tbl2) @@ -380,9 +377,20 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---identifiers foo.tbl1,foo.tbl2 \ ---output-dir $PWD/output +--identifiers foo.tbl1,foo.tbl2 ``` Console will clearly print that only these identifiers are used for table migration. Rest of the behavior will be the same as mentioned in the previous sections. + +# Appendix A: Iceberg catalogs +Iceberg supports managing the iceberg tables using the following Iceberg Catalogs: +* CUSTOM (By plugging in the jar and providing implementation class name) +* DYNAMODB +* ECS +* GLUE +* HADOOP +* HIVE +* JDBC +* NESSIE (Arctic) +* REST \ No newline at end of file diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java index 83bca7f..aba6c01 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java @@ -48,11 +48,17 @@ public abstract class CatalogMigrator { /** Delete the table entries from source catalog after successful migration. */ public abstract boolean deleteEntriesFromSourceCatalog(); + /** Enable the stacktrace in logs in case of failures. */ + @Value.Default + public boolean enableStacktrace() { + return false; + } + private static final Logger LOG = LoggerFactory.getLogger(CatalogMigrator.class); private final ImmutableCatalogMigrationResult.Builder resultBuilder = ImmutableCatalogMigrationResult.builder(); - private static final Set processedNamespaces = new HashSet<>(); + private final Set processedNamespaces = new HashSet<>(); /** * Get the table identifiers which matches the regular expression pattern input from all the @@ -140,7 +146,14 @@ public CatalogMigrator registerTables(List identifiers) { } } catch (Exception exception) { resultBuilder.addFailedToDeleteTableIdentifiers(tableIdentifier); - LOG.warn("Failed to delete the table after migration {}", tableIdentifier, exception); + if (enableStacktrace()) { + LOG.warn("Failed to delete the table after migration {}", tableIdentifier, exception); + } else { + LOG.warn( + "Failed to delete the table after migration {} : {}", + tableIdentifier, + exception.getMessage()); + } } }); return this; @@ -160,24 +173,27 @@ private boolean registerTable(TableIdentifier tableIdentifier) { LOG.info("Successfully migrated the table {}", tableIdentifier); return true; } catch (Exception ex) { - LOG.warn("Unable to register the table {}", tableIdentifier, ex); + if (enableStacktrace()) { + LOG.warn("Unable to register the table {}", tableIdentifier, ex); + } else { + LOG.warn("Unable to register the table {} : {}", tableIdentifier, ex.getMessage()); + } return false; } } private void createNamespacesIfNotExist(Namespace identifierNamespace) { - if (!processedNamespaces.contains(identifierNamespace.toString())) { + if (!processedNamespaces.contains(identifierNamespace)) { String[] levels = identifierNamespace.levels(); for (int index = 0; index < levels.length; index++) { Namespace namespace = Namespace.of(Arrays.copyOfRange(levels, 0, index + 1)); - if (!processedNamespaces.contains(namespace.toString())) { + if (processedNamespaces.add(namespace)) { try { ((SupportsNamespaces) targetCatalog()).createNamespace(namespace); } catch (AlreadyExistsException ex) { // ignore the error as forcefully creating the namespace even if it exists to avoid // namespaceExists() check. } - processedNamespaces.add(namespace.toString()); } } } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java index 9445d44..0974e0f 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java @@ -15,6 +15,7 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import java.io.IOException; import java.io.UncheckedIOException; @@ -38,38 +39,48 @@ public abstract class BaseRegisterCommand implements Callable { @CommandLine.ArgGroup( exclusive = false, multiplicity = "1", - heading = "source catalog options: %n") + heading = "Source catalog options: %n") private SourceCatalogOptions sourceCatalogOptions; @CommandLine.ArgGroup( exclusive = false, multiplicity = "1", - heading = "target catalog options: %n") + heading = "Target catalog options: %n") private TargetCatalogOptions targetCatalogOptions; - @CommandLine.ArgGroup(heading = "identifier options: %n") + @CommandLine.ArgGroup(heading = "Identifier options: %n") private IdentifierOptions identifierOptions; @CommandLine.Option( names = {"--output-dir"}, - required = true, - description = - "local output directory path to write CLI output files like `failed_identifiers.txt`, " - + "`failed_to_delete_at_source.txt`, `dry_run_identifiers.txt`. ") + defaultValue = "", + description = { + "Optional local output directory path to write CLI output files like `failed_identifiers.txt`, " + + "`failed_to_delete_at_source.txt`, `dry_run_identifiers.txt`. If not specified, uses present working " + + "directory.", + "Example: --output-dir /tmp/output/", + " --output-dir $PWD/output_folder" + }) private Path outputDirPath; @CommandLine.Option( names = {"--dry-run"}, description = - "optional configuration to simulate the registration without actually registering. Can learn about a list " - + "of the tables that will be registered by running this.") + "Optional configuration to simulate the registration without actually registering. Can learn about a list " + + "of tables that will be registered by running this.") private boolean isDryRun; @CommandLine.Option( - names = {"--disable-prompts"}, - description = "optional configuration to disable warning prompts which needs console input.") + names = {"--disable-safety-prompts"}, + description = "Optional configuration to disable safety prompts which needs console input.") private boolean disablePrompts; + @CommandLine.Option( + names = {"--stacktrace"}, + description = + "Optional configuration to enable capturing stacktrace in logs in case of failures.") + boolean enableStackTrace; + private static final int BATCH_SIZE = 100; public static final String FAILED_IDENTIFIERS_FILE = "failed_identifiers.txt"; public static final String FAILED_TO_DELETE_AT_SOURCE_FILE = "failed_to_delete_at_source.txt"; @@ -96,6 +107,11 @@ public Integer call() { identifiers = identifierOptions.processIdentifiersInput(); } + Preconditions.checkArgument( + Files.exists(outputDirPath), "path specified in `--output-dir` does not exist"); + Preconditions.checkArgument( + Files.isWritable(outputDirPath), "path specified in `--output-dir` is not writable"); + Catalog sourceCatalog = sourceCatalogOptions.build(); consoleLog.info("Configured source catalog: {}", sourceCatalog.name()); diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java index 76b94af..9ea8add 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java @@ -27,23 +27,9 @@ public class CatalogMigrationCLI { public CatalogMigrationCLI() {} public static void main(String... args) { - CommandLine commandLine = - new CommandLine(new CatalogMigrationCLI()).setColorScheme(createColorScheme()); + CommandLine commandLine = new CommandLine(new CatalogMigrationCLI()); commandLine.setUsageHelpWidth(150); int exitCode = commandLine.execute(args); System.exit(exitCode); } - - private static CommandLine.Help.ColorScheme createColorScheme() { - return new CommandLine.Help.ColorScheme.Builder() - .commands( - CommandLine.Help.Ansi.Style.bold, - CommandLine.Help.Ansi.Style.underline) // combine multiple styles - .options(CommandLine.Help.Ansi.Style.fg_yellow) // yellow foreground color - .parameters(CommandLine.Help.Ansi.Style.fg_yellow) - .optionParams(CommandLine.Help.Ansi.Style.italic) - .errors(CommandLine.Help.Ansi.Style.fg_red, CommandLine.Help.Ansi.Style.bold) - .stackTraces(CommandLine.Help.Ansi.Style.italic) - .build(); - } } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java similarity index 88% rename from cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtil.java rename to cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java index 4e976ed..54f6380 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtil.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java @@ -28,9 +28,9 @@ import org.apache.iceberg.nessie.NessieCatalog; import org.apache.iceberg.rest.RESTCatalog; -public final class CatalogUtil { +public final class CatalogMigrationUtil { - private CatalogUtil() {} + private CatalogMigrationUtil() {} public enum CatalogType { CUSTOM, @@ -47,19 +47,19 @@ public enum CatalogType { static Catalog buildCatalog( Map catalogProperties, CatalogType catalogType, + String catalogName, String customCatalogImpl, Map hadoopConf) { Preconditions.checkArgument(catalogProperties != null, "catalog properties is null"); Preconditions.checkArgument(catalogType != null, "catalog type is null"); + Preconditions.checkArgument(catalogName != null, "catalog name is null"); + Preconditions.checkArgument(!catalogName.trim().isEmpty(), "catalog name is empty"); Configuration catalogConf = new Configuration(); if (hadoopConf != null) { hadoopConf.forEach(catalogConf::set); } return org.apache.iceberg.CatalogUtil.loadCatalog( - catalogImpl(catalogType, customCatalogImpl), - catalogType.name(), - catalogProperties, - catalogConf); + catalogImpl(catalogType, customCatalogImpl), catalogName, catalogProperties, catalogConf); } private static String catalogImpl(CatalogType type, String customCatalogImpl) { diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java index 51edfa1..b39f1ea 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java @@ -32,24 +32,30 @@ public class IdentifierOptions { @CommandLine.Option( names = {"--identifiers"}, split = ",", - description = - "optional selective list of identifiers to register. If not specified, all the tables will be registered. " - + "Use this when there are few identifiers that need to be registered. For a large number of identifiers, " - + "use the `--identifiers-from-file` or `--identifiers-regex` option.") + description = { + "Optional selective list of identifiers to register. If not specified, all the tables will be registered. " + + "Use this when there are few identifiers that need to be registered. For a large number of identifiers, " + + "use the `--identifiers-from-file` or `--identifiers-regex` option.", + "Example: --identifiers foo.t1,bar.t2" + }) private List identifiers = new ArrayList<>(); @CommandLine.Option( names = {"--identifiers-from-file"}, - description = - "optional text file path that contains a list of table identifiers (one per line) to register. Should not be " - + "used with `--identifiers` or `--identifiers-regex` option.") + description = { + "Optional text file path that contains a list of table identifiers (one per line) to register. Should not be " + + "used with `--identifiers` or `--identifiers-regex` option.", + "Example: --identifiers-from-file /tmp/files/ids.txt" + }) private String identifiersFromFile; @CommandLine.Option( names = {"--identifiers-regex"}, - description = - "optional regular expression pattern used to register only the tables whose identifiers match this pattern. " - + "Should not be used with `--identifiers` or '--identifiers-from-file' option.") + description = { + "Optional regular expression pattern used to register only the tables whose identifiers match this pattern. " + + "Should not be used with `--identifiers` or '--identifiers-from-file' option.", + "Example: --identifiers-regex ^foo\\..*" + }) protected String identifiersRegEx; private final Logger consoleLog = LoggerFactory.getLogger("console-log"); diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java index 29e4634..af38dbf 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java @@ -45,6 +45,7 @@ protected CatalogMigrator catalogMigrator(Catalog sourceCatalog, Catalog targetC .sourceCatalog(sourceCatalog) .targetCatalog(targetCatalog) .deleteEntriesFromSourceCatalog(true) + .enableStacktrace(enableStackTrace) .build(); } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java index f180bae..db9fac0 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java @@ -38,6 +38,7 @@ protected CatalogMigrator catalogMigrator(Catalog sourceCatalog, Catalog targetC .sourceCatalog(sourceCatalog) .targetCatalog(targetCatalog) .deleteEntriesFromSourceCatalog(false) + .enableStacktrace(enableStackTrace) .build(); } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java index 191f23a..3b93f10 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java @@ -25,34 +25,46 @@ public class SourceCatalogOptions { @CommandLine.Option( names = "--source-catalog-type", required = true, - description = - "source catalog type. " - + "Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]") - protected org.projectnessie.tools.catalog.migration.cli.CatalogUtil.CatalogType type; + description = { + "Source catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, " + + "NESSIE, REST].", + "Example: --source-catalog-type GLUE", + " --source-catalog-type NESSIE" + }) + protected CatalogMigrationUtil.CatalogType type; @CommandLine.Option( names = "--source-catalog-properties", required = true, split = ",", - description = "source catalog properties (like uri, warehouse, etc)") + description = { + "Catalog properties for source catalog (like uri, warehouse, etc).", + "Example: --source-catalog-properties warehouse=/tmp/warehouseHadoop,type=hadoop", + " --source-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie" + }) protected Map properties; @CommandLine.Option( names = "--source-catalog-hadoop-conf", split = ",", - description = - "optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when " - + "using an Iceberg FileIO.") + description = { + "Optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when " + + "using an Iceberg FileIO.", + "Example: --source-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY" + }) private Map hadoopConf = new HashMap<>(); @CommandLine.Option( names = {"--source-custom-catalog-impl"}, - description = - "optional fully qualified class name of the custom catalog implementation of the source catalog. Required " - + "when the catalog type is CUSTOM.") + description = { + "Optional fully qualified class name of the custom catalog implementation of the source catalog. Required " + + "when the catalog type is CUSTOM.", + "Example: --source-custom-catalog-impl org.apache.iceberg.AwesomeCatalog" + }) private String customCatalogImpl; Catalog build() { - return CatalogUtil.buildCatalog(properties, type, customCatalogImpl, hadoopConf); + return CatalogMigrationUtil.buildCatalog( + properties, type, "SOURCE_CATALOG_" + type.name(), customCatalogImpl, hadoopConf); } } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java index 748df7f..02e66ff 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java @@ -25,34 +25,46 @@ public class TargetCatalogOptions { @CommandLine.Option( names = "--target-catalog-type", required = true, - description = - "target catalog type. " - + "Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]") - protected CatalogUtil.CatalogType type; + description = { + "Target catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, " + + "NESSIE, REST].", + "Example: --target-catalog-type GLUE", + " --target-catalog-type NESSIE" + }) + protected CatalogMigrationUtil.CatalogType type; @CommandLine.Option( names = "--target-catalog-properties", required = true, split = ",", - description = "target catalog properties (like uri, warehouse, etc)") - private Map properties; + description = { + "Catalog properties for target catalog (like uri, warehouse, etc).", + "Example: --target-catalog-properties warehouse=/tmp/warehouseHadoop,type=hadoop", + " --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie" + }) + protected Map properties; @CommandLine.Option( names = "--target-catalog-hadoop-conf", split = ",", - description = - "optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when " - + "using an Iceberg FileIO.") + description = { + "Optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when " + + "using an Iceberg FileIO.", + "Example: --target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY" + }) private Map hadoopConf = new HashMap<>(); @CommandLine.Option( names = {"--target-custom-catalog-impl"}, - description = - "optional fully qualified class name of the custom catalog implementation of the target catalog. Required " - + "when the catalog type is CUSTOM.") + description = { + "Optional fully qualified class name of the custom catalog implementation of the target catalog. Required " + + "when the catalog type is CUSTOM.", + "Example: --target-custom-catalog-impl org.apache.iceberg.AwesomeCatalog" + }) private String customCatalogImpl; - protected Catalog build() { - return CatalogUtil.buildCatalog(properties, type, customCatalogImpl, hadoopConf); + Catalog build() { + return CatalogMigrationUtil.buildCatalog( + properties, type, "TARGET_CATALOG_" + type.name(), customCatalogImpl, hadoopConf); } } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java index b0bf8ec..670d061 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java @@ -132,7 +132,7 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except "bar.tbl3", "--output-dir", outputDir.toAbsolutePath().toString(), - "--disable-prompts"); + "--disable-safety-prompts"); Assertions.assertThat(run.getOut()) .doesNotContain( @@ -177,7 +177,7 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except identifierFile.toAbsolutePath().toString(), "--output-dir", outputDir.toAbsolutePath().toString(), - "--disable-prompts"); + "--disable-safety-prompts"); Files.delete(identifierFile); Assertions.assertThat(run.getExitCode()).isEqualTo(0); @@ -222,7 +222,7 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except "^foo\\..*", "--output-dir", outputDir.toAbsolutePath().toString(), - "--disable-prompts"); + "--disable-safety-prompts"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); Assertions.assertThat(run.getOut()) .contains( @@ -272,7 +272,7 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { "dummy.tbl3", "--output-dir", outputDir.toAbsolutePath().toString(), - "--disable-prompts"); + "--disable-safety-prompts"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); String operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) @@ -302,7 +302,7 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { "foo.tbl2", "--output-dir", outputDir.toAbsolutePath().toString(), - "--disable-prompts"); + "--disable-safety-prompts"); run = runCLI( deleteSourceTables, @@ -318,7 +318,7 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { "foo.tbl2", "--output-dir", outputDir.toAbsolutePath().toString(), - "--disable-prompts"); + "--disable-safety-prompts"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) @@ -354,7 +354,7 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep "foo.tbl2", "--output-dir", outputDir.toAbsolutePath().toString(), - "--disable-prompts"); + "--disable-safety-prompts"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); String operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) @@ -387,7 +387,7 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep targetCatalogProperties, "--output-dir", outputDir.toAbsolutePath().toString(), - "--disable-prompts"); + "--disable-safety-prompts"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) @@ -442,7 +442,7 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep failedIdentifiersFile.toAbsolutePath().toString(), "--output-dir", outputDir.toAbsolutePath().toString(), - "--disable-prompts"); + "--disable-safety-prompts"); Assertions.assertThat(run.getOut()) .contains( String.format( @@ -477,7 +477,7 @@ public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { sourceCatalogProperties, "--output-dir", outputDir.toAbsolutePath().toString(), - "--disable-prompts"); + "--disable-safety-prompts"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); String operation = deleteSourceTables ? "migration" : "registration"; @@ -503,7 +503,7 @@ public void testDryRun(boolean deleteSourceTables) throws Exception { "--dry-run", "--output-dir", outputDir.toAbsolutePath().toString(), - "--disable-prompts"); + "--disable-safety-prompts"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); // should not prompt for dry run @@ -580,7 +580,7 @@ private static String[] registerAllTablesArgs() { targetCatalogProperties, "--output-dir", outputDir.toAbsolutePath().toString(), - "--disable-prompts"); + "--disable-safety-prompts"); return args.toArray(new String[0]); } @@ -596,23 +596,23 @@ private static RunCLI runCLI(boolean deleteSourceTables, String... args) throws protected static String catalogType(Catalog catalog) { if (catalog instanceof DynamoDbCatalog) { - return CatalogUtil.CatalogType.DYNAMODB.name(); + return CatalogMigrationUtil.CatalogType.DYNAMODB.name(); } else if (catalog instanceof EcsCatalog) { - return CatalogUtil.CatalogType.ECS.name(); + return CatalogMigrationUtil.CatalogType.ECS.name(); } else if (catalog instanceof GlueCatalog) { - return CatalogUtil.CatalogType.GLUE.name(); + return CatalogMigrationUtil.CatalogType.GLUE.name(); } else if (catalog instanceof HadoopCatalog) { - return CatalogUtil.CatalogType.HADOOP.name(); + return CatalogMigrationUtil.CatalogType.HADOOP.name(); } else if (catalog instanceof HiveCatalog) { - return CatalogUtil.CatalogType.HIVE.name(); + return CatalogMigrationUtil.CatalogType.HIVE.name(); } else if (catalog instanceof JdbcCatalog) { - return CatalogUtil.CatalogType.JDBC.name(); + return CatalogMigrationUtil.CatalogType.JDBC.name(); } else if (catalog instanceof NessieCatalog) { - return CatalogUtil.CatalogType.NESSIE.name(); + return CatalogMigrationUtil.CatalogType.NESSIE.name(); } else if (catalog instanceof RESTCatalog) { - return CatalogUtil.CatalogType.REST.name(); + return CatalogMigrationUtil.CatalogType.REST.name(); } else { - return CatalogUtil.CatalogType.CUSTOM.name(); + return CatalogMigrationUtil.CatalogType.CUSTOM.name(); } } } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java index 8f54188..05637e5 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java @@ -18,8 +18,11 @@ import static org.junit.jupiter.params.provider.Arguments.arguments; import com.google.common.collect.Lists; +import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; import java.util.List; +import java.util.UUID; import java.util.stream.Stream; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.BeforeAll; @@ -42,14 +45,20 @@ protected static void initLogDir() { private static Stream optionErrors() { return Stream.of( // no arguments - arguments(Lists.newArrayList(), "Missing required option: '--output-dir='"), + arguments( + Lists.newArrayList(), + "Error: Missing required argument(s): (--target-catalog-type= --target-catalog-properties=[,...] [--target-catalog-properties=[,...]]... [--target-catalog-hadoop-conf=[,...]]... [--target-custom-catalog-impl=])"), + // missing required arguments + arguments(Lists.newArrayList(""), "Unmatched argument at index 1: ''"), // missing required arguments arguments( - Lists.newArrayList(""), "Missing required option: '--output-dir='"), + Lists.newArrayList( + "--source-catalog-properties", "properties1=ab", "--target-catalog-type", "NESSIE"), + "Error: Missing required argument(s): --source-catalog-type="), // missing required arguments arguments( Lists.newArrayList("--source-catalog-type", "GLUE"), - "Missing required option: '--output-dir='"), + "Error: Missing required argument(s): --source-catalog-properties="), // missing required arguments arguments( Lists.newArrayList( @@ -58,9 +67,7 @@ private static Stream optionErrors() { "--source-catalog-properties", "properties1=ab", "--target-catalog-type", - "NESSIE", - "--output-dir", - "path"), + "NESSIE"), "Error: Missing required argument(s): --target-catalog-properties="), // missing required arguments arguments( @@ -70,22 +77,8 @@ private static Stream optionErrors() { "--source-catalog-properties", "properties1=ab", "--target-catalog-properties", - "properties2=cd", - "--output-dir", - "path"), - "Error: Missing required argument(s): --target-catalog-type="), - // missing required arguments - arguments( - Lists.newArrayList( - "--source-catalog-type", - "HIVE", - "--source-catalog-properties", - "properties1=ab", - "--target-catalog-type", - "NESSIE", - "--target-catalog-properties", "properties2=cd"), - "Missing required option: '--output-dir='"), + "Error: Missing required argument(s): --target-catalog-type="), arguments( Lists.newArrayList( "--source-catalog-type", @@ -101,9 +94,7 @@ private static Stream optionErrors() { "--identifiers-from-file", "file.txt", "--identifiers-regex", - "^foo\\.", - "--output-dir", - "path"), + "^foo\\."), "Error: --identifiers=, --identifiers-from-file=, --identifiers-regex= are mutually exclusive (specify only one)"), arguments( Lists.newArrayList( @@ -118,9 +109,7 @@ private static Stream optionErrors() { "--identifiers", "foo.tbl", "--identifiers-from-file", - "file.txt", - "--output-dir", - "path"), + "file.txt"), "Error: --identifiers=, --identifiers-from-file= are mutually exclusive (specify only one)"), arguments( Lists.newArrayList( @@ -135,9 +124,7 @@ private static Stream optionErrors() { "--identifiers-regex", "^foo\\.", "--identifiers-from-file", - "file.txt", - "--output-dir", - "path"), + "file.txt"), "Error: --identifiers-from-file=, --identifiers-regex= are mutually exclusive (specify only one)"), arguments( Lists.newArrayList( @@ -152,9 +139,7 @@ private static Stream optionErrors() { "--identifiers", "foo.tbl", "--identifiers-regex", - "^foo\\.", - "--output-dir", - "path"), + "^foo\\."), "Error: --identifiers=, --identifiers-regex= are mutually exclusive " + "(specify only one)")); } @@ -186,9 +171,7 @@ private static Stream invalidArgs() { "--target-catalog-type", "HIVE", "--target-catalog-properties", - "k3=v3, k4=v4", - "--output-dir", - "path"), + "k3=v3, k4=v4"), "java.lang.IllegalArgumentException: Cannot initialize HadoopCatalog " + "because warehousePath must not be null or empty"), arguments( @@ -202,11 +185,37 @@ private static Stream invalidArgs() { "--target-catalog-properties", "k3=v3, k4=v4", "--identifiers-from-file", - "file.txt", + "file.txt"), + "java.lang.IllegalArgumentException: " + + "File specified in `--identifiers-from-file` option does not exist."), + arguments( + Lists.newArrayList( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--output-dir", + "/path/to/file"), + "java.lang.IllegalArgumentException: " + + "path specified in `--output-dir` does not exist"), + arguments( + Lists.newArrayList( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4", "--output-dir", - "path"), + readOnlyDirLocation()), "java.lang.IllegalArgumentException: " - + "File specified in `--identifiers-from-file` option does not exist.")); + + "path specified in `--output-dir` is not writable")); } @ParameterizedTest @@ -242,4 +251,16 @@ private static void executeAndValidateResults( Assertions.assertThat(run.getExitCode()).isEqualTo(expectedErrorCode); Assertions.assertThat(run.getErr()).contains(expectedMessage); } + + private static String readOnlyDirLocation() { + Path readOnly = logDir.resolve(UUID.randomUUID().toString()); + try { + Files.createDirectory(readOnly); + } catch (IOException e) { + throw new RuntimeException(e); + } + readOnly.toFile().setWritable(false); + + return readOnly.toAbsolutePath().toString(); + } } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtilTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtilTest.java similarity index 68% rename from cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtilTest.java rename to cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtilTest.java index 956255c..1bd3039 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogUtilTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtilTest.java @@ -36,7 +36,7 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; -public class CatalogUtilTest { +public class CatalogMigrationUtilTest { protected static @TempDir Path logDir; @@ -55,9 +55,10 @@ static Stream blankOrNullStrings() { public void testCustomCatalogWithoutImpl(String impl) { Assertions.assertThatThrownBy( () -> - CatalogUtil.buildCatalog( + CatalogMigrationUtil.buildCatalog( Collections.emptyMap(), - CatalogUtil.CatalogType.CUSTOM, + CatalogMigrationUtil.CatalogType.CUSTOM, + "catalogName", impl, Collections.emptyMap())) .isInstanceOf(IllegalArgumentException.class) @@ -68,19 +69,42 @@ public void testCustomCatalogWithoutImpl(String impl) { @Order(1) @Test public void testInvalidArgs() { - Assertions.assertThatThrownBy(() -> CatalogUtil.buildCatalog(null, null, null, null)) + Assertions.assertThatThrownBy( + () -> CatalogMigrationUtil.buildCatalog(null, null, null, null, null)) .isInstanceOf(IllegalArgumentException.class) .hasMessageContaining("catalog properties is null"); Assertions.assertThatThrownBy( - () -> CatalogUtil.buildCatalog(Collections.emptyMap(), null, null, null)) + () -> CatalogMigrationUtil.buildCatalog(Collections.emptyMap(), null, null, null, null)) .isInstanceOf(IllegalArgumentException.class) .hasMessageContaining("catalog type is null"); Assertions.assertThatThrownBy( () -> - CatalogUtil.buildCatalog( - Collections.emptyMap(), CatalogUtil.CatalogType.CUSTOM, "abc", null)) + CatalogMigrationUtil.buildCatalog( + Collections.emptyMap(), + CatalogMigrationUtil.CatalogType.HIVE, + null, + null, + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("catalog name is null"); + + Assertions.assertThatThrownBy( + () -> + CatalogMigrationUtil.buildCatalog( + Collections.emptyMap(), CatalogMigrationUtil.CatalogType.HIVE, " ", null, null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("catalog name is empty"); + + Assertions.assertThatThrownBy( + () -> + CatalogMigrationUtil.buildCatalog( + Collections.emptyMap(), + CatalogMigrationUtil.CatalogType.CUSTOM, + "catalogName", + "abc", + null)) .isInstanceOf(IllegalArgumentException.class) .hasMessageContaining( "Cannot initialize Catalog implementation abc: Cannot find constructor for interface"); @@ -97,10 +121,11 @@ public void testBuildHadoopCatalog() { conf.put("k1", "v1"); Catalog catalog = - CatalogUtil.buildCatalog(properties, CatalogUtil.CatalogType.HADOOP, null, conf); + CatalogMigrationUtil.buildCatalog( + properties, CatalogMigrationUtil.CatalogType.HADOOP, "catalogName", null, conf); Assertions.assertThat(catalog).isInstanceOf(HadoopCatalog.class); - Assertions.assertThat(catalog.name()).isEqualTo("HADOOP"); + Assertions.assertThat(catalog.name()).isEqualTo("catalogName"); Assertions.assertThat(((HadoopCatalog) catalog).getConf().get("k1")).isEqualTo("v1"); Schema schema = new Schema( @@ -120,10 +145,11 @@ public void testBuildNessieCatalog() { properties.put("uri", "http://localhost:19120/api/v1"); Catalog catalog = - CatalogUtil.buildCatalog(properties, CatalogUtil.CatalogType.NESSIE, null, null); + CatalogMigrationUtil.buildCatalog( + properties, CatalogMigrationUtil.CatalogType.NESSIE, "catalogName", null, null); Assertions.assertThat(catalog).isInstanceOf(NessieCatalog.class); - Assertions.assertThat(catalog.name()).isEqualTo("NESSIE"); + Assertions.assertThat(catalog.name()).isEqualTo("catalogName"); } @Test @@ -135,9 +161,10 @@ public void testBuildHiveCatalog() { properties.put("uri", "thrift://localhost:9083"); Catalog catalog = - CatalogUtil.buildCatalog(properties, CatalogUtil.CatalogType.HIVE, null, null); + CatalogMigrationUtil.buildCatalog( + properties, CatalogMigrationUtil.CatalogType.HIVE, "catalogName", null, null); Assertions.assertThat(catalog).isInstanceOf(HiveCatalog.class); - Assertions.assertThat(catalog.name()).isEqualTo("HIVE"); + Assertions.assertThat(catalog.name()).isEqualTo("catalogName"); } } diff --git a/codestyle/errorprone-rules.properties b/codestyle/errorprone-rules.properties index 665676d..afd59e7 100644 --- a/codestyle/errorprone-rules.properties +++ b/codestyle/errorprone-rules.properties @@ -1648,5 +1648,5 @@ Slf4jLoggerShouldBeFinal=ERROR Slf4jLoggerShouldBeNonStatic=OFF Slf4jIllegalPassedClass=ERROR #Slf4jSignOnlyFormat=OFF -#Slf4jDoNotLogMessageOfExceptionExplicitly=ERROR +Slf4jDoNotLogMessageOfExceptionExplicitly=OFF diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index a1954f0..3634574 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -33,6 +33,7 @@ checkstyle = { module = "com.puppycrawl.tools:checkstyle", version.ref = "checks errorprone-annotations = { module = "com.google.errorprone:error_prone_annotations", version.ref = "errorprone" } errorprone-core = { module = "com.google.errorprone:error_prone_core", version.ref = "errorprone" } errorprone-slf4j = { module = "jp.skypencil.errorprone.slf4j:errorprone-slf4j", version.ref = "errorproneSlf4j" } +findbugs-annotations = { module = "com.google.code.findbugs:annotations", version = "3.0.1" } findbugs-jsr305 = { module = "com.google.code.findbugs:jsr305", version = "3.0.2" } google-java-format = { module = "com.google.googlejavaformat:google-java-format", version.ref = "googleJavaFormat" } guava = { module = "com.google.guava:guava", version.ref = "guava" } From 6b0aa1cadf44e9eb1f531ecaa1d17c3794456c3d Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Tue, 21 Mar 2023 23:23:18 +0530 Subject: [PATCH 23/31] self review --- README.md | 66 +++++++--------- api/build.gradle.kts | 2 + .../migration/api/CatalogMigrator.java | 9 +-- .../api/AbstractTestCatalogMigrator.java | 51 +++++++++++- cli/build.gradle.kts | 1 + .../migration/cli/BaseRegisterCommand.java | 31 ++++---- .../migration/cli/IdentifierOptions.java | 8 +- .../catalog/migration/cli/PromptUtil.java | 10 +-- .../migration/cli/SourceCatalogOptions.java | 2 +- .../migration/cli/TargetCatalogOptions.java | 2 +- .../cli/AbstractCLIMigrationTest.java | 79 +++++++++++++++++-- .../migration/cli/ProcessIdentifiersTest.java | 69 ++++++++++++++++ gradle/libs.versions.toml | 5 +- 13 files changed, 261 insertions(+), 74 deletions(-) create mode 100644 cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ProcessIdentifiersTest.java diff --git a/README.md b/README.md index d1fe3bf..70d1987 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # Objective -Introduce a CLI tool to bulk migrate Iceberg tables from one catalog to another without a data copy. +Introduce a command-line tool that enables bulk migration of Iceberg tables from one catalog to another without the need to copy the data. -Users may want to move away from one catalog and use the other catalog with their existing Iceberg tables for the following reasons: +There are various reasons why users may want to move their Iceberg tables to a different catalog. For instance, * They were using hadoop catalog and later realized that it is not production recommended. So, they want to move tables to other production ready catalogs. * They just heard about the awesome Arctic catalog (or Nessie) and want to move their existing iceberg tables to Dremio Arctic. * They had an on-premise Hive catalog, but want to move tables to a cloud-based catalog as part of their cloud migration strategy. -Before the `1.1.0` Iceberg release, the only way to achieve this was **by copying the data** using `insert into catalog1.db.tableName as select * from catalog2.db.tableName`. +Previously, before the Iceberg `1.1.0` release, the only way to migrate tables was by copying the data using the command `insert into catalog1.db.tableName as select * from catalog2.db.tableName`. After the iceberg `1.1.0` release, all Iceberg Catalogs supports register table with the `catalog#registerTable()` API. However, custom code is needed to migrate all the tables in bulk. **Hence, we introduce a CLI tool to migrate Iceberg tables in bulk from one Iceberg Catalog to another without a data copy.** @@ -156,8 +156,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ # Scenarios ## A. User need to try out new catalog -They can use a new catalog with a fresh table to explore the capabilities of the new catalog. -No need for a catalog migration tool. +Users can use a new catalog by creating a fresh table to test the new catalog's capabilities, without requiring a tool to migrate the catalog. ## B. Users need to move away from one catalog (example: Hadoop) to another (example: Nessie) with all the tables. @@ -173,9 +172,9 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar register \ --dry-run ``` -All the inputs will be validated and a list of identified table identifiers for migration will be printed on the console -along with the total count. Output will be written to _dry_run.txt_ file. -which can be used for actual migration using the `--identifiers-from-file` option without listing tables again from the catalog. +After validating all inputs, the console will display a list of table identifiers that have been identified for migration along with the total count. +This information will also be written to a file called `dry_run.txt`, +which can be used for actual migration using the `--identifiers-from-file` option, thus eliminating the need to list tables from the catalog again. ### B.2) executes the migration of all 1000 tables and all the tables are successfully migrated. @@ -188,8 +187,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse ``` -Once the input validations are done, users will be prompted with this message. -They have an ability to abort or continue the operation. +After input validation, users will receive a prompt message with the option to either abort or continue the operation. ``` Configured source catalog: HADOOP @@ -198,15 +196,15 @@ Configured target catalog: NESSIE [WARNING] a) Executing catalog migration when the source catalog has some in-progress commits -can lead to a data loss as the in-progress commit will not be considered for migration. -So, while using this tool please make sure there are no in-progress commits for the source catalog +can lead to a data loss as the in-progress commits will not be considered for migration. +So, while using this tool please make sure there are no in-progress commits for the source catalog. b) After the migration, successfully migrated tables will be deleted from the source catalog and can only be accessed from the target catalog. -Have you read the above warnings and are you sure you want to continue? (yes/no): +Are you certain that you wish to proceed, after reading the above warnings? (yes/no): ``` -Once the user wants to continue, other information will be printed on the console. +If the user chooses to continue, additional information will be displayed on the console. ``` Continuing... @@ -236,8 +234,8 @@ Details: [foo.tbl-1, foo.tbl-2, bar.tbl-4, bar.tbl-3, …, …,bar.tbl-1000] ``` -Note: a log file will also be generated which prints “successfully migrated table X” on every table migration. It also captures table level failures if there are any. - +Please note that a log file will be created, which will print "successfully migrated table X" for every table migration, +and also log any table level failures, if present. ### B.3) executes the migration and out of 1000 tables 10 tables have failed to migrate because the target catalog had the same table and namespace (maybe different schema).Remaining 990 tables were successfully migrated. @@ -247,7 +245,8 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ ---target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--stacktrace ``` Console output will be same as B.2) till summary because even in case of failure, @@ -266,11 +265,9 @@ Details: [bar.tbl-201, foo.tbl-202, …, …,bar.tbl-210] ``` -Note: -A log file will also be generated which prints “successfully migrated table X” on every table migration. It also captures table level failures if there are any. -So from the details or from _failed_identifiers.txt_ file, users can get the failed table names and search in the log. -It will have a 10 stacktrace with `TableAlreadyExists` exception for 10 tables. Which gives an idea for the user about why it failed. -Users can rename the tables in the source catalog and migrate only these 10 tables by using any one of the identifier options in the argument. +Please note that a log file will be generated, which will print "successfully migrated table X" for every table migration and log any table-level failures in the `failed_identifiers.txt` file. +Users can use this file to identify failed tables and search for them in the log, which will contain a stacktrace with the `TableAlreadyExists` exception for up to 10 tables. +This can help users understand why the migration failed. In such cases, users can rename the tables in the source catalog and migrate only those 10 tables using any of the identifier options available in the argument. ### B.4) executes the migration and out of 1000 tables 900 tables have failed to migrate because the target/source catalog connection went off. Only 100 tables were successfully migrated. @@ -281,7 +278,8 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ ---target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--stacktrace ``` Console output will be same as B.2) till summary because even in case of failure, @@ -300,11 +298,9 @@ Details: [bar.tbl-201, foo.tbl-202, …, …,bar.tbl-1000] ``` -Note: -A log file will also be generated which prints “successfully migrated table X” on every table migration. It also captures table level failures if there are any. -So from the details or from _failed_identifiers.txt_ file, users can get the failed table names and search in the log. -It will have a 900 stack trace with `ConnectionTimeOut` exception for 900 tables. Which gives an idea for the user about why it failed. -As these were timeout exceptions, users can retry migration of only these 900 tables using the `--identifiers-from-file` option with _failed_identifiers.txt_. +Please note that a log file will be generated, which will print "successfully migrated table X" for every table migration and log any table-level failures in the `failed_identifiers.txt` file. +Users can use this file to identify failed tables and search for them in the log, which may contain a stacktrace with the `ConnectionTimeOut` exception for up to 900 tables. +This can help users understand why the migration failed. Since these are timeout exceptions, users can retry migrating only those 900 tables using the `--identifiers-from-file` option with the `failed_identifiers.txt` file. ### B.5) executes the migration and out of 1000 tables. Where all the 1000 tables were migrated successfully but deletion of 200 tables from the source catalog has failed due to network issues. @@ -317,9 +313,9 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse ``` -Console output will be same as B.2) till summary because even in case of failure, -all the identified tables will be attempted for migration. -These failed to delete tables are stored in _failed_to_delete.txt_ and the user has to delete them manually or stop using them from the source catalog. (console will print this warning) +The console output will be the same as in B.2) until the summary because, even in case of failure, all the identified tables will be attempted for migration. +However, any tables that fail to delete will be stored in the `failed_to_delete.txt` file, and the user will have to delete them manually or stop using them from the source catalog. +The console will print this warning. ``` Summary: @@ -338,11 +334,9 @@ Users should manually drop the table entry from the source catalog in this case ### B.6) executes the migration and out of 1000 tables. But manually aborts the migration by killing the process. -User has to go through the log to figure out how many tables have migrated so far. -Users can also do `listTables()` at the target catalog to see how many tables migrated. -There can be a chance that tables that are migrated to the target catalog may not be cleaned in the source catalog due to abort. -Users should not operate them from source catalog and can manually drop them from source catalog. -Users can also try bulk migration again, which will attempt to migrate all the tables in the source catalog. +To determine the number of migrated tables, the user can either review the log or use the listTables() function in the target catalog. +In the event of an abort, migrated tables may not be deleted from the source catalog, and users should avoid manipulating them from there. +If necessary, users can manually remove these tables from the source catalog or attempt a bulk migration to transfer all tables from the source catalog. ### B.7) Users need to move away from one catalog to another with selective tables (maybe want to move only the production tables, test tables, etc) diff --git a/api/build.gradle.kts b/api/build.gradle.kts index 27d7f23..73da6e6 100644 --- a/api/build.gradle.kts +++ b/api/build.gradle.kts @@ -33,8 +33,10 @@ dependencies { testImplementation(libs.junit.jupiter.params) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.engine) + testImplementation(libs.junit.vintage.engine) testImplementation(libs.assertj) testImplementation(libs.hadoop.common) + testImplementation(libs.logcaptor) testImplementation(project(":iceberg-catalog-migrator-api-test")) diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java index aba6c01..8b4e830 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java @@ -42,10 +42,10 @@ public abstract class CatalogMigrator { /** Source {@link Catalog} from which the tables are chosen. */ public abstract Catalog sourceCatalog(); - /** Target {@link Catalog} to which the tables need to be migrated. */ + /** Target {@link Catalog} to which the tables need to be registered or migrated. */ public abstract Catalog targetCatalog(); - /** Delete the table entries from source catalog after successful migration. */ + /** Delete the table entries from the source catalog after successful registration. */ public abstract boolean deleteEntriesFromSourceCatalog(); /** Enable the stacktrace in logs in case of failures. */ @@ -57,7 +57,6 @@ public boolean enableStacktrace() { private static final Logger LOG = LoggerFactory.getLogger(CatalogMigrator.class); private final ImmutableCatalogMigrationResult.Builder resultBuilder = ImmutableCatalogMigrationResult.builder(); - private final Set processedNamespaces = new HashSet<>(); /** @@ -120,7 +119,7 @@ public CatalogMigrator registerTables(List identifiers) { } if (identifiers.isEmpty()) { - LOG.info("Identifiers list is empty"); + LOG.warn("Identifiers list is empty"); return this; } @@ -170,7 +169,7 @@ private boolean registerTable(TableIdentifier tableIdentifier) { // register the table to the target catalog TableOperations ops = ((BaseTable) sourceCatalog().loadTable(tableIdentifier)).operations(); targetCatalog().registerTable(tableIdentifier, ops.current().metadataFileLocation()); - LOG.info("Successfully migrated the table {}", tableIdentifier); + LOG.info("Successfully registered the table {}", tableIdentifier); return true; } catch (Exception ex) { if (enableStacktrace()) { diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java index 90e81cf..d80b5a2 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java @@ -19,8 +19,11 @@ import java.util.Collections; import java.util.List; import java.util.stream.IntStream; +import nl.altindag.log.LogCaptor; +import nl.altindag.log.model.LogEvent; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.hadoop.HadoopCatalog; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterEach; @@ -28,6 +31,7 @@ import org.junit.jupiter.api.Order; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.ValueSource; import org.projectnessie.tools.catalog.migration.api.test.AbstractTest; @@ -174,7 +178,7 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) { catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); } - // register all the tables from source catalog again + // register all the tables from source catalog again. So that `foo.tbl2` will fail to register. result = registerAllTables(deleteSourceTables); Assertions.assertThat(result.registeredTableIdentifiers()) .containsExactlyInAnyOrder( @@ -244,6 +248,7 @@ public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws E public void testListingTableIdentifiers(boolean deleteSourceTables) { CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(deleteSourceTables); + // should list all the tables from all the namespace when regex is null. List matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers(null); Assertions.assertThat(matchingTableIdentifiers) @@ -253,10 +258,15 @@ public void testListingTableIdentifiers(boolean deleteSourceTables) { TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + // list the tables whose identifier starts with "foo." matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers("^foo\\..*"); Assertions.assertThat(matchingTableIdentifiers) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + + // test filter that doesn't match any table. + matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers("^dev\\..*"); + Assertions.assertThat(matchingTableIdentifiers).isEmpty(); } @Order(7) @@ -280,6 +290,45 @@ public void testRegisterWithNewNamespace(boolean deleteSourceTables) { .containsExactly(TableIdentifier.parse("db1.tbl5")); } + @Order(7) + @ParameterizedTest + @CsvSource(value = {"false,false", "false,true", "true,false", "true,true"}) + public void testStacktrace(boolean deleteSourceTables, boolean enableStacktrace) { + ImmutableCatalogMigrator migrator = + ImmutableCatalogMigrator.builder() + .sourceCatalog(catalog1) + .targetCatalog(catalog2) + .deleteEntriesFromSourceCatalog(deleteSourceTables) + .enableStacktrace(enableStacktrace) + .build(); + try (LogCaptor logCaptor = LogCaptor.forClass(CatalogMigrator.class)) { + CatalogMigrationResult result = + migrator + .registerTables(Collections.singletonList(TableIdentifier.parse("db.dummy_table"))) + .result(); + Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()) + .containsExactly(TableIdentifier.parse("db.dummy_table")); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + Assertions.assertThat(logCaptor.getLogEvents()).hasSize(1); + LogEvent logEvent = logCaptor.getLogEvents().get(0); + if (enableStacktrace) { + Assertions.assertThat(logEvent.getFormattedMessage()) + .isEqualTo("Unable to register the table db.dummy_table"); + Assertions.assertThat(logEvent.getThrowable()) + .isPresent() + .get() + .isInstanceOf(NoSuchTableException.class); + } else { + Assertions.assertThat(logEvent.getFormattedMessage()) + .isEqualTo( + "Unable to register the table db.dummy_table : Table does not exist: db.dummy_table"); + Assertions.assertThat(logEvent.getThrowable()).isEmpty(); + } + } + } + protected CatalogMigrator catalogMigratorWithDefaultArgs(boolean deleteSourceTables) { return ImmutableCatalogMigrator.builder() .sourceCatalog(catalog1) diff --git a/cli/build.gradle.kts b/cli/build.gradle.kts index 60bee5b..75b789d 100644 --- a/cli/build.gradle.kts +++ b/cli/build.gradle.kts @@ -46,6 +46,7 @@ dependencies { testImplementation(libs.junit.jupiter.params) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.engine) + testImplementation(libs.junit.vintage.engine) testImplementation(libs.assertj) testImplementation(libs.logcaptor) diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java index 0974e0f..e7563b1 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java @@ -56,8 +56,8 @@ public abstract class BaseRegisterCommand implements Callable { defaultValue = "", description = { "Optional local output directory path to write CLI output files like `failed_identifiers.txt`, " - + "`failed_to_delete_at_source.txt`, `dry_run_identifiers.txt`. If not specified, uses present working " - + "directory.", + + "`failed_to_delete_at_source.txt`, `dry_run_identifiers.txt`. " + + "If not specified, uses the present working directory.", "Example: --output-dir /tmp/output/", " --output-dir $PWD/output_folder" }) @@ -79,7 +79,7 @@ public abstract class BaseRegisterCommand implements Callable { names = {"--stacktrace"}, description = "Optional configuration to enable capturing stacktrace in logs in case of failures.") - boolean enableStackTrace; + protected boolean enableStackTrace; private static final int BATCH_SIZE = 100; public static final String FAILED_IDENTIFIERS_FILE = "failed_identifiers.txt"; @@ -137,16 +137,23 @@ public Integer call() { "User has not specified the table identifiers." + " Selecting all the tables from all the namespaces from the source catalog."); } + identifiers = catalogMigrator.getMatchingTableIdentifiers(identifierRegEx); + if (identifiers.isEmpty()) { + consoleLog.info( + "No tables are identified for {}. Please check `catalog_migration.log` file for more info.", + operation()); + return 0; + } } - consoleLog.info("Identified {} tables for {}.", identifiers.size(), operation()); - if (isDryRun) { handleDryRunResult(identifiers); return 0; } + consoleLog.info("Identified {} tables for {}.", identifiers.size(), operation()); + consoleLog.info("Started {} ...", operation()); List> identifierBatches = Lists.partition(identifiers, BATCH_SIZE); @@ -172,7 +179,6 @@ private void handleResults(CatalogMigrationResult result) { writeToFile( outputDirPath.resolve(FAILED_TO_DELETE_AT_SOURCE_FILE), result.failedToDeleteTableIdentifiers()); - consoleLog.info("Finished {} ...", operation()); printSummary(result); printDetails(result); @@ -210,7 +216,7 @@ private void printSummary(CatalogMigrationResult result) { if (!result.failedToDeleteTableIdentifiers().isEmpty()) { consoleLog.info( "Failed to delete {} tables from {} catalog. " - + "Please check the `catalog_migration.log` file for the reason. " + + "Please check the `catalog_migration.log` file for the failure reason. " + "{}Failed to delete identifiers are written into `{}`.", result.failedToDeleteTableIdentifiers().size(), sourceCatalogOptions.type.name(), @@ -247,18 +253,12 @@ private void printDetails(CatalogMigrationResult result) { private void printDryRunResult(List result) { consoleLog.info("Summary: "); - if (result.isEmpty()) { - consoleLog.info( - "No tables are identified for {}. Please check logs for more info.", operation()); - return; - } consoleLog.info( "Identified {} tables for {} by dry-run. These identifiers are also written into {}. " - + "You can use this file with `--identifiers-from-file` option.", + + "This file can be used with `--identifiers-from-file` option for an actual run.", result.size(), operation(), DRY_RUN_FILE); - consoleLog.info( "Details: {}Identified these tables for {} by dry-run:{}{}", System.lineSeparator(), @@ -268,6 +268,9 @@ private void printDryRunResult(List result) { } private static void writeToFile(Path filePath, List identifiers) { + if (identifiers.isEmpty()) { + return; + } List identifiersString = identifiers.stream().map(TableIdentifier::toString).collect(Collectors.toList()); try { diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java index b39f1ea..ce2fb62 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java @@ -16,6 +16,7 @@ package org.projectnessie.tools.catalog.migration.cli; import java.io.IOException; +import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; @@ -38,7 +39,7 @@ public class IdentifierOptions { + "use the `--identifiers-from-file` or `--identifiers-regex` option.", "Example: --identifiers foo.t1,bar.t2" }) - private List identifiers = new ArrayList<>(); + protected List identifiers = new ArrayList<>(); @CommandLine.Option( names = {"--identifiers-from-file"}, @@ -47,7 +48,7 @@ public class IdentifierOptions { + "used with `--identifiers` or `--identifiers-regex` option.", "Example: --identifiers-from-file /tmp/files/ids.txt" }) - private String identifiersFromFile; + protected String identifiersFromFile; @CommandLine.Option( names = {"--identifiers-regex"}, @@ -74,7 +75,8 @@ protected List processIdentifiersInput() { .map(TableIdentifier::parse) .collect(Collectors.toList()); } catch (IOException e) { - throw new RuntimeException("Failed to read the file:", e); + throw new UncheckedIOException( + String.format("Failed to read the file: %s", identifiersFromFile), e); } } else if (!identifiers.isEmpty()) { tableIdentifiers = diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java index 949cf68..42f3b73 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java @@ -30,9 +30,9 @@ static boolean proceedForRegistration() { consoleLog.warn( "{}" + "\ta) Executing catalog migration when the source catalog has some in-progress commits " - + "{}\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + + "{}\tcan lead to a data loss as the in-progress commits will not be considered for migration. " + "{}\tSo, while using this tool please make sure there are no in-progress commits for the source " - + "catalog{}" + + "catalog.{}" + "{}" + "\tb) After the registration, successfully registered tables will be present in both source and target " + "catalog. " @@ -54,9 +54,9 @@ static boolean proceedForMigration() { consoleLog.warn( "{}" + "\ta) Executing catalog migration when the source catalog has some in-progress commits " - + "{}\tcan lead to a data loss as the in-progress commit will not be considered for migration. " + + "{}\tcan lead to a data loss as the in-progress commits will not be considered for migration. " + "{}\tSo, while using this tool please make sure there are no in-progress commits for the source " - + "catalog{}" + + "catalog.{}" + "{}" + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " + "{}\tand can only be accessed from the target catalog.", @@ -73,7 +73,7 @@ private static boolean proceed() { Console console = System.console(); while (true) { consoleLog.info( - "Have you read the above warnings and are you sure you want to continue? (yes/no):"); + "Are you certain that you wish to proceed, after reading the above warnings? (yes/no):"); String input = console.readLine(); if (input.equalsIgnoreCase("yes")) { diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java index 3b93f10..89e5966 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java @@ -42,7 +42,7 @@ public class SourceCatalogOptions { "Example: --source-catalog-properties warehouse=/tmp/warehouseHadoop,type=hadoop", " --source-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie" }) - protected Map properties; + private Map properties; @CommandLine.Option( names = "--source-catalog-hadoop-conf", diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java index 02e66ff..6791134 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java @@ -42,7 +42,7 @@ public class TargetCatalogOptions { "Example: --target-catalog-properties warehouse=/tmp/warehouseHadoop,type=hadoop", " --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie" }) - protected Map properties; + private Map properties; @CommandLine.Option( names = "--target-catalog-hadoop-conf", diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java index 670d061..705044e 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java @@ -23,12 +23,15 @@ import java.util.Collections; import java.util.List; import java.util.stream.IntStream; +import nl.altindag.log.LogCaptor; +import nl.altindag.log.model.LogEvent; import org.apache.iceberg.aws.dynamodb.DynamoDbCatalog; import org.apache.iceberg.aws.glue.GlueCatalog; import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.dell.ecs.EcsCatalog; +import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.hadoop.HadoopCatalog; import org.apache.iceberg.hive.HiveCatalog; import org.apache.iceberg.jdbc.JdbcCatalog; @@ -40,7 +43,9 @@ import org.junit.jupiter.api.Order; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.ValueSource; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; import org.projectnessie.tools.catalog.migration.api.test.AbstractTest; public abstract class AbstractCLIMigrationTest extends AbstractTest { @@ -110,6 +115,24 @@ public void testRegister(boolean deleteSourceTables) throws Exception { Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) .containsExactlyInAnyOrder( TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + catalog1.tableExists(TableIdentifier.parse("foo.tbl1")); + + if (deleteSourceTables && !(catalog1 instanceof HadoopCatalog)) { + // table should be deleted after migration from source catalog + Assertions.assertThat(catalog1.listTables(Namespace.of("foo"))).isEmpty(); + Assertions.assertThat(catalog1.listTables(Namespace.of("bar"))).isEmpty(); + return; + } + // tables should be present in source catalog. + Assertions.assertThat(catalog1.listTables(Namespace.of("foo"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(catalog1.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); } @Order(1) @@ -206,7 +229,7 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except .containsExactlyInAnyOrder( TableIdentifier.parse("bar.tbl4"), TableIdentifier.parse("bar.tbl3")); - // using --identifiers-regex option which matches all the tables starts with "foo." + // using `--identifiers-regex` option which matches all the tables starts with "foo." run = runCLI( deleteSourceTables, @@ -373,7 +396,7 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); } - // register all the tables from source catalog again + // register all the tables from source catalog again. So that registering `foo.tbl2` will fail. run = runCLI( deleteSourceTables, @@ -426,7 +449,7 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep .containsExactlyInAnyOrder( TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); - // retry the failed tables using --identifiers-from-file + // retry the failed tables using `--identifiers-from-file` run = runCLI( deleteSourceTables, @@ -482,7 +505,10 @@ public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { Assertions.assertThat(run.getExitCode()).isEqualTo(0); String operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) - .contains(String.format("Identified 0 tables for %s.", operation)); + .contains( + String.format( + "No tables are identified for %s. Please check `catalog_migration.log` file for more info.", + operation)); } @Order(5) @@ -509,7 +535,7 @@ public void testDryRun(boolean deleteSourceTables) throws Exception { // should not prompt for dry run Assertions.assertThat(run.getOut()) .doesNotContain( - "Have you read the above warnings and are you sure you want to continue? (yes/no):"); + "Are you certain that you wish to proceed, after reading the above warnings? (yes/no):"); Assertions.assertThat(run.getOut()).contains("Dry run is completed."); String operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) @@ -518,7 +544,7 @@ public void testDryRun(boolean deleteSourceTables) throws Exception { "Summary: %n" + "Identified 4 tables for %s by dry-run. " + "These identifiers are also written into dry_run_identifiers.txt. " - + "You can use this file with `--identifiers-from-file` option.", + + "This file can be used with `--identifiers-from-file` option for an actual run.", operation)); Assertions.assertThat(run.getOut()) .contains( @@ -567,6 +593,47 @@ public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws E TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); } + @Order(7) + @ParameterizedTest + @CsvSource(value = {"false,false", "false,true", "true,false", "true,true"}) + public void testStacktrace(boolean deleteSourceTables, boolean enableStacktrace) + throws Exception { + try (LogCaptor logCaptor = LogCaptor.forClass(CatalogMigrator.class)) { + runCLI( + deleteSourceTables, + "--source-catalog-type", + sourceCatalogType, + "--source-catalog-properties", + sourceCatalogProperties, + "--target-catalog-type", + targetCatalogType, + "--target-catalog-properties", + targetCatalogProperties, + "--identifiers", + "db.dummy_table", + "--output-dir", + outputDir.toAbsolutePath().toString(), + "--disable-safety-prompts", + "--stacktrace=" + enableStacktrace); + + Assertions.assertThat(logCaptor.getLogEvents()).hasSize(1); + LogEvent logEvent = logCaptor.getLogEvents().get(0); + if (enableStacktrace) { + Assertions.assertThat(logEvent.getFormattedMessage()) + .isEqualTo("Unable to register the table db.dummy_table"); + Assertions.assertThat(logEvent.getThrowable()) + .isPresent() + .get() + .isInstanceOf(NoSuchTableException.class); + } else { + Assertions.assertThat(logEvent.getFormattedMessage()) + .isEqualTo( + "Unable to register the table db.dummy_table : Table does not exist: db.dummy_table"); + Assertions.assertThat(logEvent.getThrowable()).isEmpty(); + } + } + } + private static String[] registerAllTablesArgs() { ArrayList args = Lists.newArrayList( diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ProcessIdentifiersTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ProcessIdentifiersTest.java new file mode 100644 index 0000000..fc8fa67 --- /dev/null +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ProcessIdentifiersTest.java @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.cli; + +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import org.apache.iceberg.catalog.TableIdentifier; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class ProcessIdentifiersTest { + + protected static @TempDir Path logDir; + + @BeforeAll + protected static void initLogDir() { + System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); + } + + @Test + public void testOptions() throws Exception { + Assertions.assertThat(new IdentifierOptions().processIdentifiersInput()).isEmpty(); + + IdentifierOptions identifierOptions = new IdentifierOptions(); + identifierOptions.identifiers = Arrays.asList("foo.abc", "bar.def"); + Assertions.assertThat(identifierOptions.processIdentifiersInput()) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.abc"), TableIdentifier.parse("bar.def")); + + Path identifierFile = logDir.resolve("file_with_ids.txt"); + Files.write(identifierFile, Arrays.asList("db1.t1", "db2.t2", "db123.t5")); + IdentifierOptions newOptions = new IdentifierOptions(); + newOptions.identifiersFromFile = identifierFile.toAbsolutePath().toString(); + Assertions.assertThat(newOptions.processIdentifiersInput()) + .containsExactlyInAnyOrder( + TableIdentifier.parse("db1.t1"), + TableIdentifier.parse("db2.t2"), + TableIdentifier.parse("db123.t5")); + + identifierFile.toFile().setReadable(false); + Assertions.assertThatThrownBy(newOptions::processIdentifiersInput) + .isInstanceOf(UncheckedIOException.class) + .hasMessageContaining("Failed to read the file: " + identifierFile); + identifierFile.toFile().setReadable(true); + + IdentifierOptions options = new IdentifierOptions(); + options.identifiersFromFile = "path/to/file"; + Assertions.assertThatThrownBy(options::processIdentifiersInput) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("File specified in `--identifiers-from-file` option does not exist"); + } +} diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 3634574..a365fa0 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -8,14 +8,14 @@ googleJavaFormat = "1.16.0" guava = "31.1-jre" hadoop = "3.2.4" hive = "2.3.8" # this is in mapping with iceberg repo. Later versions have junit depedency problem -iceberg = "1.1.0" +iceberg = "1.2.0" immutables = "2.9.3" jacoco = "0.8.8" jandex = "3.0.5" junit = "5.9.2" logback = "1.4.5" logcaptor = "2.8.0" -nessie = "0.51.1" +nessie = "0.52.3" nessieBuildPlugins = "0.2.19" nessieRunner = "0.29.0" picocli = "4.7.1" @@ -50,6 +50,7 @@ junit-bom = { module = "org.junit:junit-bom", version.ref = "junit" } junit-jupiter-api = { module = "org.junit.jupiter:junit-jupiter-api", version.ref = "junit" } junit-jupiter-engine = { module = "org.junit.jupiter:junit-jupiter-engine" } junit-jupiter-params = { module = "org.junit.jupiter:junit-jupiter-params" } +junit-vintage-engine = { module = "org.junit.vintage:junit-vintage-engine" } logback-classic = { module = "ch.qos.logback:logback-classic", version.ref = "logback" } logcaptor = { module = "io.github.hakky54:logcaptor", version.ref = "logcaptor" } picocli = { module = "info.picocli:picocli", version.ref = "picocli" } From a8fd104e7de15d2565b84d80ac4454708753f61f Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Thu, 23 Mar 2023 11:59:20 +0530 Subject: [PATCH 24/31] Addressing the comments from March 22 --- README.md | 41 ++-- .../migration/api/test/AbstractTest.java | 32 ++-- api/build.gradle.kts | 8 +- .../migration/api/CatalogMigrator.java | 133 ++++++++----- .../api/AbstractTestCatalogMigrator.java | 94 +++++----- .../api/CatalogMigratorParamsTest.java | 32 +++- .../api/CustomCatalogMigratorTest.java | 11 +- .../api/HadoopCatalogMigratorTest.java | 131 ++++++++++--- .../api/ITHadoopToHiveCatalogMigrator.java | 37 ++-- .../api/ITHadoopToNessieCatalogMigrator.java | 160 ++++++++++++---- .../api/ITHiveToHadoopCatalogMigrator.java | 12 +- .../api/ITHiveToNessieCatalogMigrator.java | 12 +- .../api/ITNessieToHiveCatalogMigrator.java | 33 +++- .../api/UnsupportedNamespaceTest.java | 28 +-- buildSrc/build.gradle.kts | 8 - buildSrc/src/main/kotlin/CodeCoverage.kt | 42 +++++ buildSrc/src/main/kotlin/Testing.kt | 1 - buildSrc/src/main/kotlin/Utilities.kt | 2 - .../main/kotlin/build-conventions.gradle.kts | 7 +- cli/build.gradle.kts | 4 +- .../migration/cli/BaseRegisterCommand.java | 175 ++++++++++++------ .../migration/cli/CatalogMigrationCLI.java | 25 ++- .../migration/cli/CatalogMigrationUtil.java | 2 - .../migration/cli/IdentifierOptions.java | 26 +-- .../catalog/migration/cli/MigrateCommand.java | 43 +++-- .../catalog/migration/cli/PromptUtil.java | 90 --------- .../migration/cli/RegisterCommand.java | 30 ++- .../migration/cli/SourceCatalogOptions.java | 5 +- .../migration/cli/TargetCatalogOptions.java | 5 +- .../cli/AbstractCLIMigrationTest.java | 139 ++++++-------- .../catalog/migration/cli/CLIOptionsTest.java | 50 ++--- .../cli/CatalogMigrationUtilTest.java | 24 --- .../migration/cli/HadoopCLIMigrationTest.java | 13 +- .../cli/ITHadoopToHiveCLIMigrationTest.java | 21 +-- .../cli/ITHiveToHadoopCLIMigrationTest.java | 21 +-- .../cli/ITHiveToNessieCLIMigrationTest.java | 65 +++++-- .../cli/ITNessieToHiveCLIMigrationTest.java | 21 +-- .../migration/cli/ProcessIdentifiersTest.java | 45 ++++- .../tools/catalog/migration/cli/RunCLI.java | 15 +- gradle/baselibs.versions.toml | 13 +- gradle/libs.versions.toml | 6 +- 41 files changed, 976 insertions(+), 686 deletions(-) create mode 100644 buildSrc/src/main/kotlin/CodeCoverage.kt delete mode 100644 cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java diff --git a/README.md b/README.md index 70d1987..f83af8c 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ There are various reasons why users may want to move their Iceberg tables to a d * They just heard about the awesome Arctic catalog (or Nessie) and want to move their existing iceberg tables to Dremio Arctic. * They had an on-premise Hive catalog, but want to move tables to a cloud-based catalog as part of their cloud migration strategy. -Previously, before the Iceberg `1.1.0` release, the only way to migrate tables was by copying the data using the command `insert into catalog1.db.tableName as select * from catalog2.db.tableName`. +Previously, before the Iceberg `1.1.0` release, the only way to migrate tables was by copying the data using the command `insert into targetCatalog.db.tableName as select * from sourceCatalog.db.tableName`. After the iceberg `1.1.0` release, all Iceberg Catalogs supports register table with the `catalog#registerTable()` API. However, custom code is needed to migrate all the tables in bulk. **Hence, we introduce a CLI tool to migrate Iceberg tables in bulk from one Iceberg Catalog to another without a data copy.** @@ -28,20 +28,21 @@ Commands: ``` $ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar register -h -Usage: iceberg-catalog-migrator register [-hV] [--disable-safety-prompts] [--dry-run] [--stacktrace] [--output-dir=] - (--source-catalog-type= --source-catalog-properties=[,...] - [--source-catalog-properties=[,...]]... - [--source-catalog-hadoop-conf=[,...]]... - [--source-custom-catalog-impl=]) (--target-catalog-type= - --target-catalog-properties=[,...] [--target-catalog-properties= - [,...]]... [--target-catalog-hadoop-conf=[,...]]... - [--target-custom-catalog-impl=]) [--identifiers=[,...] - [--identifiers=[,...]]... | --identifiers-from-file= | - --identifiers-regex=] -Bulk register the iceberg tables from source catalog to target catalog without data copy. +Usage: iceberg-catalog-migrator migrate [-hV] [--disable-safety-prompts] [--dry-run] [--stacktrace] [--output-dir=] + (--source-catalog-type= --source-catalog-properties=[,...] + [--source-catalog-properties=[,...]]... + [--source-catalog-hadoop-conf=[,...]]... + [--source-custom-catalog-impl=]) (--target-catalog-type= + --target-catalog-properties=[,...] [--target-catalog-properties= + [,...]]... [--target-catalog-hadoop-conf=[,...]]... + [--target-custom-catalog-impl=]) [--identifiers=[,...] + [--identifiers=[,...]]... | --identifiers-from-file= | + --identifiers-regex=] +Bulk migrate the iceberg tables from source catalog to target catalog without data copy. Table entries from source catalog will be deleted after the +successful migration to the target catalog. --output-dir= Optional local output directory path to write CLI output files like `failed_identifiers.txt`, `failed_to_delete_at_source.txt`, - `dry_run_identifiers.txt`. If not specified, uses present working directory. + `dry_run_identifiers.txt`. If not specified, uses the present working directory. Example: --output-dir /tmp/output/ --output-dir $PWD/output_folder --dry-run Optional configuration to simulate the registration without actually registering. Can learn about a list of tables that will be @@ -57,9 +58,8 @@ Source catalog options: Example: --source-catalog-type GLUE --source-catalog-type NESSIE --source-catalog-properties=[,...] - Catalog properties for source catalog (like uri, warehouse, etc). - Example: --source-catalog-properties warehouse=/tmp/warehouseHadoop,type=hadoop - --source-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie + Iceberg catalog properties for source catalog (like uri, warehouse, etc). + Example: --source-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie --source-catalog-hadoop-conf=[,...] Optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg FileIO. Example: --source-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY @@ -73,9 +73,8 @@ Target catalog options: Example: --target-catalog-type GLUE --target-catalog-type NESSIE --target-catalog-properties=[,...] - Catalog properties for target catalog (like uri, warehouse, etc). - Example: --target-catalog-properties warehouse=/tmp/warehouseHadoop,type=hadoop - --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie + Iceberg catalog properties for target catalog (like uri, warehouse, etc). + Example: --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie --target-catalog-hadoop-conf=[,...] Optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg FileIO. Example: --target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY @@ -85,12 +84,12 @@ Target catalog options: Example: --target-custom-catalog-impl org.apache.iceberg.AwesomeCatalog Identifier options: --identifiers=[,...] - Optional selective list of identifiers to register. If not specified, all the tables will be registered. Use this when there are + Optional selective set of identifiers to register. If not specified, all the tables will be registered. Use this when there are few identifiers that need to be registered. For a large number of identifiers, use the `--identifiers-from-file` or `--identifiers-regex` option. Example: --identifiers foo.t1,bar.t2 --identifiers-from-file= - Optional text file path that contains a list of table identifiers (one per line) to register. Should not be used with + Optional text file path that contains a set of table identifiers (one per line) to register. Should not be used with `--identifiers` or `--identifiers-regex` option. Example: --identifiers-from-file /tmp/files/ids.txt --identifiers-regex= diff --git a/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java index 3c2d4f1..5c4d805 100644 --- a/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java +++ b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java @@ -31,6 +31,7 @@ import org.apache.iceberg.hadoop.HadoopCatalog; import org.apache.iceberg.nessie.NessieCatalog; import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.io.TempDir; @@ -46,23 +47,31 @@ protected static void initLogDir() { System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); } - protected static Catalog catalog1; + protected static Catalog sourceCatalog; + + protected static Catalog targetCatalog; - protected static Catalog catalog2; protected static final Schema schema = new Schema( Types.StructType.of(Types.NestedField.required(1, "id", Types.LongType.get())).fields()); + protected void validateAssumptionForHadoopCatalogAsSource(boolean deleteSourceTables) { + Assumptions.assumeFalse( + deleteSourceTables && sourceCatalog instanceof HadoopCatalog, + "deleting source tables is unsupported for HadoopCatalog"); + } + protected static void createNamespaces() { - namespaceList.forEach(namespace -> ((SupportsNamespaces) catalog1).createNamespace(namespace)); - // don't create "db1" namespace in catalog2 + namespaceList.forEach( + namespace -> ((SupportsNamespaces) sourceCatalog).createNamespace(namespace)); + // don't create "db1" namespace in targetCatalog namespaceList .subList(0, 2) - .forEach(namespace -> ((SupportsNamespaces) catalog2).createNamespace(namespace)); + .forEach(namespace -> ((SupportsNamespaces) targetCatalog).createNamespace(namespace)); } protected static void dropNamespaces() { - Stream.of(catalog1, catalog2) + Stream.of(sourceCatalog, targetCatalog) .map(catalog -> (SupportsNamespaces) catalog) .forEach( catalog -> @@ -73,15 +82,15 @@ protected static void dropNamespaces() { protected static void createTables() { // two tables in 'foo' namespace - catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl1"), schema); - catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); + sourceCatalog.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl1"), schema); + sourceCatalog.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); // two tables in 'bar' namespace - catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl3"), schema); - catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl4"), schema); + sourceCatalog.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl3"), schema); + sourceCatalog.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl4"), schema); } protected static void dropTables() { - Stream.of(catalog1, catalog2) + Stream.of(sourceCatalog, targetCatalog) .forEach( catalog -> namespaceList.stream() @@ -93,7 +102,6 @@ protected static void dropTables() { protected static Catalog createHadoopCatalog(String warehousePath, String name) { Map properties = new HashMap<>(); properties.put("warehouse", warehousePath); - properties.put("type", "hadoop"); return CatalogUtil.loadCatalog( HadoopCatalog.class.getName(), name, properties, new Configuration()); } diff --git a/api/build.gradle.kts b/api/build.gradle.kts index 73da6e6..29809b1 100644 --- a/api/build.gradle.kts +++ b/api/build.gradle.kts @@ -26,14 +26,13 @@ dependencies { implementation(libs.slf4j) implementation(libs.iceberg.spark.runtime) - annotationProcessor(libs.immutables) - compileOnly(libs.immutables) + compileOnly(libs.immutables.value.annotations) + annotationProcessor(libs.immutables.value.processor) testRuntimeOnly(libs.logback.classic) testImplementation(libs.junit.jupiter.params) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.engine) - testImplementation(libs.junit.vintage.engine) testImplementation(libs.assertj) testImplementation(libs.hadoop.common) testImplementation(libs.logcaptor) @@ -44,6 +43,9 @@ dependencies { testImplementation( "org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests" ) + // this junit4 dependency is needed for above Iceberg's TestHiveMetastore + testRuntimeOnly("junit:junit:4.12") + testImplementation("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") { // these are taken from iceberg repo configurations exclude("org.apache.avro", "avro") diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java index 8b4e830..4fbad4a 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java @@ -17,6 +17,7 @@ import com.google.common.base.Preconditions; import java.util.Arrays; +import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Objects; @@ -54,6 +55,24 @@ public boolean enableStacktrace() { return false; } + @Value.Check + void check() { + Preconditions.checkArgument( + !targetCatalog().equals(sourceCatalog()), "target catalog is same as source catalog"); + + if (!(targetCatalog() instanceof SupportsNamespaces)) { + throw new UnsupportedOperationException( + String.format( + "target catalog %s doesn't implement SupportsNamespaces to create missing namespaces.", + targetCatalog().name())); + } + + if (deleteEntriesFromSourceCatalog() && sourceCatalog() instanceof HadoopCatalog) { + throw new UnsupportedOperationException( + "Source catalog is a Hadoop catalog and it doesn't support deleting the table entries just from the catalog. Please configure `deleteEntriesFromSourceCatalog` as `false`"); + } + } + private static final Logger LOG = LoggerFactory.getLogger(CatalogMigrator.class); private final ImmutableCatalogMigrationResult.Builder resultBuilder = ImmutableCatalogMigrationResult.builder(); @@ -65,9 +84,9 @@ public boolean enableStacktrace() { * * @param identifierRegex regular expression pattern. If null, fetches all the table identifiers * from all the namespaces. - * @return List of table identifiers. + * @return Set of table identifiers. */ - public List getMatchingTableIdentifiers(String identifierRegex) { + public Set getMatchingTableIdentifiers(String identifierRegex) { Catalog sourceCatalog = sourceCatalog(); if (!(sourceCatalog instanceof SupportsNamespaces)) { throw new UnsupportedOperationException( @@ -76,7 +95,9 @@ public List getMatchingTableIdentifiers(String identifierRegex) sourceCatalog.name())); } LOG.info("Collecting all the namespaces from source catalog..."); - List namespaces = ((SupportsNamespaces) sourceCatalog).listNamespaces(); + Set namespaces = new HashSet<>(); + getAllNamespacesFromSourceCatalog(Namespace.empty(), namespaces); + Predicate matchedIdentifiersPredicate; if (identifierRegex == null) { LOG.info("Collecting all the tables from all the namespaces of source catalog..."); @@ -90,12 +111,28 @@ public List getMatchingTableIdentifiers(String identifierRegex) matchedIdentifiersPredicate = tableIdentifier -> pattern.matcher(tableIdentifier.toString()).matches(); } - return namespaces.stream() - .filter(Objects::nonNull) - .flatMap( - namespace -> - sourceCatalog.listTables(namespace).stream().filter(matchedIdentifiersPredicate)) - .collect(Collectors.toList()); + Set identifiers = + namespaces.stream() + .filter(Objects::nonNull) + .flatMap( + namespace -> + sourceCatalog.listTables(namespace).stream() + .filter(matchedIdentifiersPredicate)) + .collect(Collectors.toSet()); + + // add the tables from default namespace + try { + List fromDefaultNamespace = + sourceCatalog.listTables(Namespace.empty()).stream() + .filter(matchedIdentifiersPredicate) + .collect(Collectors.toList()); + identifiers.addAll(fromDefaultNamespace); + } catch (Exception exception) { + // some catalogs don't support default namespace. Hence, just log the warning and ignore the + // exception. + LOG.warn("Failed to identify tables from default namespace: {}", exception.getMessage()); + } + return identifiers; } /** @@ -104,19 +141,11 @@ public List getMatchingTableIdentifiers(String identifierRegex) *

Users must make sure that no in-progress commits on the tables of source catalog during * registration. * - * @param identifiers List of table identifiers to register or migrate + * @param identifiers collection of table identifiers to register or migrate * @return {@code this} for use in a chained invocation */ - public CatalogMigrator registerTables(List identifiers) { + public CatalogMigrator registerTables(Collection identifiers) { Preconditions.checkArgument(identifiers != null, "Identifiers list is null"); - Preconditions.checkArgument( - !targetCatalog().equals(sourceCatalog()), "target catalog is same as source catalog"); - if (!(targetCatalog() instanceof SupportsNamespaces)) { - throw new UnsupportedOperationException( - String.format( - "target catalog %s doesn't implement SupportsNamespaces to create missing namespaces.", - targetCatalog().name())); - } if (identifiers.isEmpty()) { LOG.warn("Identifiers list is empty"); @@ -132,23 +161,19 @@ public CatalogMigrator registerTables(List identifiers) { resultBuilder.addFailedToRegisterTableIdentifiers(tableIdentifier); } - // HadoopCatalog dropTable will delete the table files completely even when purge is - // false. So, skip dropTable for HadoopCatalog. - boolean deleteTableFromSourceCatalog = - !(sourceCatalog() instanceof HadoopCatalog) - && isRegistered - && deleteEntriesFromSourceCatalog(); try { - if (deleteTableFromSourceCatalog + if (isRegistered + && deleteEntriesFromSourceCatalog() && !sourceCatalog().dropTable(tableIdentifier, false)) { resultBuilder.addFailedToDeleteTableIdentifiers(tableIdentifier); } } catch (Exception exception) { resultBuilder.addFailedToDeleteTableIdentifiers(tableIdentifier); if (enableStacktrace()) { - LOG.warn("Failed to delete the table after migration {}", tableIdentifier, exception); + LOG.error( + "Failed to delete the table after migration {}", tableIdentifier, exception); } else { - LOG.warn( + LOG.error( "Failed to delete the table after migration {} : {}", tableIdentifier, exception.getMessage()); @@ -163,9 +188,38 @@ public CatalogMigrationResult result() { return resultBuilder.build(); } + protected void createNamespacesIfNotExistOnTargetCatalog(Namespace identifierNamespace) { + if (!processedNamespaces.contains(identifierNamespace)) { + String[] levels = identifierNamespace.levels(); + for (int index = 0; index < levels.length; index++) { + Namespace namespace = Namespace.of(Arrays.copyOfRange(levels, 0, index + 1)); + if (processedNamespaces.add(namespace)) { + try { + ((SupportsNamespaces) targetCatalog()).createNamespace(namespace); + } catch (AlreadyExistsException ex) { + LOG.debug( + "{}.Ignoring the error as forcefully creating the namespace even if it exists to avoid " + + "namespaceExists() check.", + ex.getMessage()); + } + } + } + } + } + + protected void getAllNamespacesFromSourceCatalog(Namespace namespace, Set visited) { + if (!namespace.isEmpty() && !visited.add(namespace)) { + return; + } + List children = ((SupportsNamespaces) sourceCatalog()).listNamespaces(namespace); + for (Namespace child : children) { + getAllNamespacesFromSourceCatalog(child, visited); + } + } + private boolean registerTable(TableIdentifier tableIdentifier) { try { - createNamespacesIfNotExist(tableIdentifier.namespace()); + createNamespacesIfNotExistOnTargetCatalog(tableIdentifier.namespace()); // register the table to the target catalog TableOperations ops = ((BaseTable) sourceCatalog().loadTable(tableIdentifier)).operations(); targetCatalog().registerTable(tableIdentifier, ops.current().metadataFileLocation()); @@ -173,28 +227,11 @@ private boolean registerTable(TableIdentifier tableIdentifier) { return true; } catch (Exception ex) { if (enableStacktrace()) { - LOG.warn("Unable to register the table {}", tableIdentifier, ex); + LOG.error("Unable to register the table {}", tableIdentifier, ex); } else { - LOG.warn("Unable to register the table {} : {}", tableIdentifier, ex.getMessage()); + LOG.error("Unable to register the table {} : {}", tableIdentifier, ex.getMessage()); } return false; } } - - private void createNamespacesIfNotExist(Namespace identifierNamespace) { - if (!processedNamespaces.contains(identifierNamespace)) { - String[] levels = identifierNamespace.levels(); - for (int index = 0; index < levels.length; index++) { - Namespace namespace = Namespace.of(Arrays.copyOfRange(levels, 0, index + 1)); - if (processedNamespaces.add(namespace)) { - try { - ((SupportsNamespaces) targetCatalog()).createNamespace(namespace); - } catch (AlreadyExistsException ex) { - // ignore the error as forcefully creating the namespace even if it exists to avoid - // namespaceExists() check. - } - } - } - } - } } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java index d80b5a2..e0d5c5f 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java @@ -17,7 +17,7 @@ import java.nio.file.Path; import java.util.Collections; -import java.util.List; +import java.util.Set; import java.util.stream.IntStream; import nl.altindag.log.LogCaptor; import nl.altindag.log.model.LogEvent; @@ -27,8 +27,8 @@ import org.apache.iceberg.hadoop.HadoopCatalog; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Order; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; @@ -51,10 +51,11 @@ protected void afterEach() { dropTables(); } - @Order(0) @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegister(boolean deleteSourceTables) { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + CatalogMigrationResult result = registerAllTables(deleteSourceTables); Assertions.assertThat(result.registeredTableIdentifiers()) @@ -66,32 +67,33 @@ public void testRegister(boolean deleteSourceTables) { Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) .containsExactlyInAnyOrder( TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); - if (deleteSourceTables && !(catalog1 instanceof HadoopCatalog)) { + if (deleteSourceTables && !(sourceCatalog instanceof HadoopCatalog)) { // table should be deleted after migration from source catalog - Assertions.assertThat(catalog1.listTables(Namespace.of("foo"))).isEmpty(); - Assertions.assertThat(catalog1.listTables(Namespace.of("bar"))).isEmpty(); + Assertions.assertThat(sourceCatalog.listTables(Namespace.of("foo"))).isEmpty(); + Assertions.assertThat(sourceCatalog.listTables(Namespace.of("bar"))).isEmpty(); return; } // tables should be present in source catalog. - Assertions.assertThat(catalog1.listTables(Namespace.of("foo"))) + Assertions.assertThat(sourceCatalog.listTables(Namespace.of("foo"))) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(catalog1.listTables(Namespace.of("bar"))) + Assertions.assertThat(sourceCatalog.listTables(Namespace.of("bar"))) .containsExactlyInAnyOrder( TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); } - @Order(1) @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterSelectedTables(boolean deleteSourceTables) { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + // using `--identifiers` option CatalogMigrationResult result = catalogMigratorWithDefaultArgs(deleteSourceTables) @@ -102,8 +104,8 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) { Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))).isEmpty(); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))).isEmpty(); + Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) .containsExactly(TableIdentifier.parse("bar.tbl3")); // using --identifiers-regex option which matches all the tables starts with "foo." @@ -118,17 +120,18 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) { Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) .containsExactly(TableIdentifier.parse("bar.tbl3")); } - @Order(2) @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterError(boolean deleteSourceTables) { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + // use invalid namespace which leads to NoSuchTableException CatalogMigrationResult result = catalogMigratorWithDefaultArgs(deleteSourceTables) @@ -159,10 +162,11 @@ public void testRegisterError(boolean deleteSourceTables) { Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); } - @Order(3) @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterWithFewFailures(boolean deleteSourceTables) { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + // register only foo.tbl2 CatalogMigrationResult result = catalogMigratorWithDefaultArgs(deleteSourceTables) @@ -173,9 +177,9 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) { Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - if (deleteSourceTables && !(catalog1 instanceof HadoopCatalog)) { + if (deleteSourceTables && !(sourceCatalog instanceof HadoopCatalog)) { // create a table with the same name in source catalog which got deleted. - catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); + sourceCatalog.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); } // register all the tables from source catalog again. So that `foo.tbl2` will fail to register. @@ -189,26 +193,28 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) { .contains(TableIdentifier.parse("foo.tbl2")); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) .containsExactlyInAnyOrder( TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); } - @Order(4) @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterNoTables(boolean deleteSourceTables) { - // source catalog is catalog2 which has no tables. + // use source catalog as targetCatalog which has no tables. + Assumptions.assumeFalse( + deleteSourceTables && targetCatalog instanceof HadoopCatalog, + "deleting source tables is unsupported for HadoopCatalog"); CatalogMigrator catalogMigrator = ImmutableCatalogMigrator.builder() - .sourceCatalog(catalog2) - .targetCatalog(catalog1) + .sourceCatalog(targetCatalog) + .targetCatalog(sourceCatalog) .deleteEntriesFromSourceCatalog(deleteSourceTables) .build(); - List matchingTableIdentifiers = + Set matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers(null); Assertions.assertThat(matchingTableIdentifiers).isEmpty(); CatalogMigrationResult result = @@ -218,15 +224,16 @@ public void testRegisterNoTables(boolean deleteSourceTables) { Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); } - @Order(5) @ParameterizedTest @ValueSource(booleans = {true, false}) - public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { + public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + // additionally create 240 tables along with 4 tables created in beforeEach() IntStream.range(0, 240) .forEach( val -> - catalog1.createTable( + sourceCatalog.createTable( TableIdentifier.of(Namespace.of("foo"), "tblx" + val), schema)); CatalogMigrationResult result; @@ -236,20 +243,21 @@ public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws E Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))).hasSize(242); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))).hasSize(242); + Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) .containsExactlyInAnyOrder( TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); } - @Order(6) @ParameterizedTest @ValueSource(booleans = {true, false}) public void testListingTableIdentifiers(boolean deleteSourceTables) { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(deleteSourceTables); // should list all the tables from all the namespace when regex is null. - List matchingTableIdentifiers = + Set matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers(null); Assertions.assertThat(matchingTableIdentifiers) .containsExactlyInAnyOrder( @@ -269,12 +277,13 @@ public void testListingTableIdentifiers(boolean deleteSourceTables) { Assertions.assertThat(matchingTableIdentifiers).isEmpty(); } - @Order(7) @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterWithNewNamespace(boolean deleteSourceTables) { - // catalog2 doesn't have a namespace "db1" - catalog1.createTable(TableIdentifier.of(Namespace.of("db1"), "tbl5"), schema); + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + // create namespace "db1" only in source catalog + sourceCatalog.createTable(TableIdentifier.of(Namespace.of("db1"), "tbl5"), schema); CatalogMigrationResult result = catalogMigratorWithDefaultArgs(deleteSourceTables) @@ -286,18 +295,19 @@ public void testRegisterWithNewNamespace(boolean deleteSourceTables) { Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(catalog2.listTables(Namespace.of("db1"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("db1"))) .containsExactly(TableIdentifier.parse("db1.tbl5")); } - @Order(7) @ParameterizedTest @CsvSource(value = {"false,false", "false,true", "true,false", "true,true"}) public void testStacktrace(boolean deleteSourceTables, boolean enableStacktrace) { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + ImmutableCatalogMigrator migrator = ImmutableCatalogMigrator.builder() - .sourceCatalog(catalog1) - .targetCatalog(catalog2) + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) .deleteEntriesFromSourceCatalog(deleteSourceTables) .enableStacktrace(enableStacktrace) .build(); @@ -331,8 +341,8 @@ public void testStacktrace(boolean deleteSourceTables, boolean enableStacktrace) protected CatalogMigrator catalogMigratorWithDefaultArgs(boolean deleteSourceTables) { return ImmutableCatalogMigrator.builder() - .sourceCatalog(catalog1) - .targetCatalog(catalog2) + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) .deleteEntriesFromSourceCatalog(deleteSourceTables) .build(); } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java index 69d8e16..24c9039 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java @@ -36,14 +36,14 @@ protected static void initLogDir() { @Test public void testInvalidArgs() { - Catalog catalog1 = new HadoopCatalog(); - Catalog catalog2 = new HadoopCatalog(); + Catalog sourceCatalog = new HadoopCatalog(); + Catalog targetCatalog = new HadoopCatalog(); Assertions.assertThatThrownBy( () -> ImmutableCatalogMigrator.builder() - .sourceCatalog(catalog2) // source-catalog is same as target catalog - .targetCatalog(catalog2) + .sourceCatalog(targetCatalog) // source-catalog is same as target catalog + .targetCatalog(targetCatalog) .deleteEntriesFromSourceCatalog(true) .build() .registerTables(Collections.singletonList(TableIdentifier.parse("foo.abc")))) @@ -53,9 +53,9 @@ public void testInvalidArgs() { Assertions.assertThatThrownBy( () -> ImmutableCatalogMigrator.builder() - .sourceCatalog(catalog1) - .targetCatalog(catalog2) - .deleteEntriesFromSourceCatalog(true) + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(false) .build() .registerTables(null)) .isInstanceOf(IllegalArgumentException.class) @@ -64,7 +64,7 @@ public void testInvalidArgs() { Assertions.assertThatThrownBy( () -> ImmutableCatalogMigrator.builder() - .sourceCatalog(catalog1) + .sourceCatalog(sourceCatalog) .targetCatalog(null) // target-catalog is null .deleteEntriesFromSourceCatalog(true) .build()) @@ -75,10 +75,24 @@ public void testInvalidArgs() { () -> ImmutableCatalogMigrator.builder() .sourceCatalog(null) // source-catalog is null - .targetCatalog(catalog2) + .targetCatalog(targetCatalog) .deleteEntriesFromSourceCatalog(true) .build()) .isInstanceOf(NullPointerException.class) .hasMessageContaining("sourceCatalog"); + + // test source catalog as hadoop with `deleteEntriesFromSourceCatalog` as true. + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(true) + .build() + .registerTables(Collections.emptyList())) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining( + "Source catalog is a Hadoop catalog and it doesn't support deleting the table entries just from the catalog. " + + "Please configure `deleteEntriesFromSourceCatalog` as `false`"); } } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java index 6488872..63e71ca 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java @@ -39,8 +39,8 @@ public class CustomCatalogMigratorTest extends AbstractTest { @BeforeAll protected static void setup() { - catalog1 = createCustomCatalog(warehouse1.toAbsolutePath().toString(), "catalog1"); - catalog2 = createCustomCatalog(warehouse2.toAbsolutePath().toString(), "catalog2"); + sourceCatalog = createCustomCatalog(warehouse1.toAbsolutePath().toString(), "sourceCatalog"); + targetCatalog = createCustomCatalog(warehouse2.toAbsolutePath().toString(), "targetCatalog"); createNamespaces(); } @@ -64,9 +64,9 @@ protected static void tearDown() { public void testRegister() { CatalogMigrator catalogMigrator = ImmutableCatalogMigrator.builder() - .sourceCatalog(catalog1) - .targetCatalog(catalog2) - .deleteEntriesFromSourceCatalog(true) + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(false) .build(); // should fail to register as catalog doesn't support register table operations. CatalogMigrationResult result = @@ -91,7 +91,6 @@ public Table registerTable(TableIdentifier identifier, String metadataFileLocati Map properties = new HashMap<>(); properties.put("warehouse", warehousePath); - properties.put("type", "hadoop"); TestCatalog testCatalog = new TestCatalog(); testCatalog.setConf(new Configuration()); testCatalog.initialize(name, properties); diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java index 6c658b6..fe95316 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java @@ -15,23 +15,25 @@ */ package org.projectnessie.tools.catalog.migration.api; +import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Order; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; +import org.junit.jupiter.api.Test; public class HadoopCatalogMigratorTest extends AbstractTestCatalogMigrator { @BeforeAll protected static void setup() { - catalog1 = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "catalog1"); - catalog2 = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "catalog2"); + sourceCatalog = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "sourceCatalog"); + targetCatalog = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "targetCatalog"); createNamespaces(); } @@ -41,30 +43,111 @@ protected static void tearDown() { dropNamespaces(); } - @Order(8) - @ParameterizedTest - @ValueSource(booleans = {true, false}) - public void testRegisterWithNewNestedNamespace(boolean deleteSourceTables) { - // catalog2 doesn't have a namespace "a.b.c" - Namespace namespace = Namespace.of("a.b.c"); - String tableName = "tbl5_" + deleteSourceTables; - TableIdentifier tableIdentifier = TableIdentifier.parse("a.b.c." + tableName); - ((SupportsNamespaces) catalog1).createNamespace(namespace); - catalog1.createTable(tableIdentifier, schema); + @Test + public void testRegisterWithNewNestedNamespaces() { + List namespaceList = + Arrays.asList( + Namespace.of("ns1"), + Namespace.of("ns2"), + Namespace.of("ns3"), + Namespace.of("ns1", "ns2"), + Namespace.of("ns1", "ns3"), + Namespace.of("ns1", "ns2", "ns3")); + List identifiers = + Arrays.asList( + TableIdentifier.parse("tblz"), + TableIdentifier.parse("ns1.tblz"), + TableIdentifier.parse("ns2.tblz"), + TableIdentifier.parse("ns3.tblz"), + TableIdentifier.of(Namespace.of("ns1", "ns2"), "tblz"), + TableIdentifier.of(Namespace.of("ns1", "ns3"), "tblz"), + TableIdentifier.of(Namespace.of("ns1", "ns2", "ns3"), "tblz")); + namespaceList.forEach(((SupportsNamespaces) sourceCatalog)::createNamespace); + identifiers.forEach(identifier -> sourceCatalog.createTable(identifier, schema)); + + CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(false); + Set matchingTableIdentifiers = + catalogMigrator.getMatchingTableIdentifiers(null); + // HadoopCatalog supports implicit namespaces. + // Hence, No concept of default namespace too. So, cannot list the tables from default + // namespaces. + // Can only load tables in default namespace using identifiers. + Assertions.assertThat(matchingTableIdentifiers) + .containsAll(identifiers.subList(1, 7)); // without "tblz" + Assertions.assertThat(matchingTableIdentifiers).doesNotContain(identifiers.get(0)); CatalogMigrationResult result = - catalogMigratorWithDefaultArgs(deleteSourceTables) - .registerTables(Collections.singletonList(tableIdentifier)) - .result(); + catalogMigrator.registerTables(matchingTableIdentifiers).result(); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsAll(identifiers.subList(1, 7)); // without "tblz" + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(tableIdentifier); + // manually register the table from default namespace + catalogMigrator = catalogMigratorWithDefaultArgs(false); + result = + catalogMigrator + .registerTables(Collections.singletonList(TableIdentifier.of("tblz"))) + .result(); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactly(TableIdentifier.of("tblz")); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(catalog2.loadTable(tableIdentifier)).isNotNull(); - catalog2.dropTable(tableIdentifier); - ((SupportsNamespaces) catalog2).dropNamespace(namespace); - catalog1.dropTable(tableIdentifier); - ((SupportsNamespaces) catalog1).dropNamespace(namespace); + Collections.reverse(namespaceList); + identifiers.forEach(sourceCatalog::dropTable); + namespaceList.forEach(((SupportsNamespaces) sourceCatalog)::dropNamespace); + identifiers.forEach(targetCatalog::dropTable); + namespaceList.forEach(((SupportsNamespaces) targetCatalog)::dropNamespace); + } + + @Test + public void testCreateAndListNamespaces() { + ImmutableCatalogMigrator catalogMigrator = + ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(false) + .build(); + + List namespaceList = + Arrays.asList( + Namespace.of("a"), + Namespace.of("a", "b"), + Namespace.of("a", "b", "c"), + Namespace.of("a", "b", "c", "d"), + Namespace.of("a", "b", "c", "d", "e"), + Namespace.of("a", "c")); + catalogMigrator.createNamespacesIfNotExistOnTargetCatalog( + namespaceList.get(4)); // try creating "a.b.c.d.e" + catalogMigrator.createNamespacesIfNotExistOnTargetCatalog( + namespaceList.get(5)); // try creating "a.c" + // should create all the levels of missing namespaces on target catalog + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces()) + .contains(namespaceList.get(0)) + .doesNotContainAnyElementsOf(namespaceList.subList(1, 6)); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(namespaceList.get(0))) + .containsExactlyInAnyOrder(namespaceList.get(1), namespaceList.get(5)); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(namespaceList.get(1))) + .containsExactly(namespaceList.get(2)); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(namespaceList.get(2))) + .containsExactly(namespaceList.get(3)); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(namespaceList.get(3))) + .containsExactly(namespaceList.get(4)); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(namespaceList.get(4))) + .isEmpty(); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(namespaceList.get(5))) + .isEmpty(); + + namespaceList.forEach( + namespace -> ((SupportsNamespaces) sourceCatalog).createNamespace(namespace)); + Set listedNamespaces = new HashSet<>(); + // collect all the namespaces from all levels + catalogMigrator.getAllNamespacesFromSourceCatalog(Namespace.empty(), listedNamespaces); + Assertions.assertThat(listedNamespaces).containsAll(namespaceList); + + Collections.reverse(namespaceList); + namespaceList.forEach(((SupportsNamespaces) sourceCatalog)::dropNamespace); + namespaceList.forEach(((SupportsNamespaces) targetCatalog)::dropNamespace); } } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java index 72e3354..b8a640f 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java @@ -22,10 +22,7 @@ import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Order; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; +import org.junit.jupiter.api.Test; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHadoopToHiveCatalogMigrator extends AbstractTestCatalogMigrator { @@ -34,8 +31,8 @@ public class ITHadoopToHiveCatalogMigrator extends AbstractTestCatalogMigrator { protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - catalog1 = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "hadoop"); - catalog2 = HiveMetaStoreRunner.hiveCatalog(); + sourceCatalog = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "hadoop"); + targetCatalog = HiveMetaStoreRunner.hiveCatalog(); createNamespaces(); } @@ -46,26 +43,16 @@ protected static void tearDown() throws Exception { HiveMetaStoreRunner.stopMetastore(); } - // disable large table test for IT to save CI time. It will be executed only for UT. - @Override - @Disabled - public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { - super.testRegisterLargeNumberOfTables(deleteSourceTables); - } - - @Order(8) - @ParameterizedTest - @ValueSource(booleans = {true, false}) - public void testRegisterWithNewNestedNamespace(boolean deleteSourceTables) { - // catalog2 doesn't have a namespace "a.b.c" + @Test + public void testRegisterWithNewNestedNamespace() { Namespace namespace = Namespace.of("a.b.c"); - String tableName = "tbl5_" + deleteSourceTables; - TableIdentifier tableIdentifier = TableIdentifier.parse("a.b.c." + tableName); - ((SupportsNamespaces) catalog1).createNamespace(namespace); - catalog1.createTable(tableIdentifier, schema); + TableIdentifier tableIdentifier = TableIdentifier.parse("a.b.c.tbl5"); + // create namespace "a.b.c" only in source catalog + ((SupportsNamespaces) sourceCatalog).createNamespace(namespace); + sourceCatalog.createTable(tableIdentifier, schema); CatalogMigrationResult result = - catalogMigratorWithDefaultArgs(deleteSourceTables) + catalogMigratorWithDefaultArgs(false) .registerTables(Collections.singletonList(tableIdentifier)) .result(); @@ -75,7 +62,7 @@ public void testRegisterWithNewNestedNamespace(boolean deleteSourceTables) { .containsExactly(tableIdentifier); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - catalog1.dropTable(tableIdentifier); - ((SupportsNamespaces) catalog1).dropNamespace(namespace); + sourceCatalog.dropTable(tableIdentifier); + ((SupportsNamespaces) sourceCatalog).dropNamespace(namespace); } } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java index 8a00484..e795006 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java @@ -15,17 +15,19 @@ */ package org.projectnessie.tools.catalog.migration.api; +import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Order; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; +import org.junit.jupiter.api.Test; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHadoopToNessieCatalogMigrator extends AbstractTestCatalogMigrator { @@ -38,8 +40,8 @@ public class ITHadoopToNessieCatalogMigrator extends AbstractTestCatalogMigrator protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - catalog1 = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "hadoop"); - catalog2 = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); + sourceCatalog = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "hadoop"); + targetCatalog = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); createNamespaces(); } @@ -50,40 +52,132 @@ protected static void tearDown() throws Exception { HiveMetaStoreRunner.stopMetastore(); } - // disable large table test for IT to save CI time. It will be executed only for UT. - @Override - @Disabled - public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { - super.testRegisterLargeNumberOfTables(deleteSourceTables); - } + @Test + public void testRegisterWithNewNestedNamespaces() { + List namespaceList = + Arrays.asList( + Namespace.of("ns1"), + Namespace.of("ns2"), + Namespace.of("ns3"), + Namespace.of("ns1", "ns2"), + Namespace.of("ns1", "ns3"), + Namespace.of("ns1", "ns2", "ns3")); + List identifiers = + Arrays.asList( + TableIdentifier.parse("tblz"), // table from default namespace + TableIdentifier.parse("ns1.tblz"), + TableIdentifier.parse("ns2.tblz"), + TableIdentifier.parse("ns3.tblz"), + TableIdentifier.of(Namespace.of("ns1", "ns2"), "tblz"), + TableIdentifier.of(Namespace.of("ns1", "ns3"), "tblz"), + TableIdentifier.of(Namespace.of("ns1", "ns2", "ns3"), "tblz")); + namespaceList.forEach(((SupportsNamespaces) sourceCatalog)::createNamespace); + identifiers.forEach(identifier -> sourceCatalog.createTable(identifier, schema)); - @Order(8) - @ParameterizedTest - @ValueSource(booleans = {true, false}) - public void testRegisterWithNewNestedNamespace(boolean deleteSourceTables) { - // catalog2 doesn't have a namespace "a.b.c" - Namespace namespace = Namespace.of("a.b.c"); - String tableName = "tbl5_" + deleteSourceTables; - TableIdentifier tableIdentifier = TableIdentifier.parse("a.b.c." + tableName); - ((SupportsNamespaces) catalog1).createNamespace(namespace); - catalog1.createTable(tableIdentifier, schema); + CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(false); + Set matchingTableIdentifiers = + catalogMigrator.getMatchingTableIdentifiers(null); + // HadoopCatalog supports implicit namespaces. + // Hence, No concept of default namespace too. So, cannot list the tables from default + // namespaces. + // Can only load tables in default namespace using identifiers. + Assertions.assertThat(matchingTableIdentifiers) + .containsAll(identifiers.subList(1, 7)); // without "tblz" + Assertions.assertThat(matchingTableIdentifiers).doesNotContain(identifiers.get(0)); CatalogMigrationResult result = - catalogMigratorWithDefaultArgs(deleteSourceTables) - .registerTables(Collections.singletonList(tableIdentifier)) - .result(); + catalogMigrator.registerTables(matchingTableIdentifiers).result(); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsAll(identifiers.subList(1, 7)); // without "tblz" + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(tableIdentifier); + // manually register the table from default namespace + catalogMigrator = catalogMigratorWithDefaultArgs(false); + result = + catalogMigrator + .registerTables(Collections.singletonList(TableIdentifier.of("tblz"))) + .result(); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactly(TableIdentifier.of("tblz")); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(catalog2.loadTable(tableIdentifier)).isNotNull(); - Assertions.assertThat(((SupportsNamespaces) catalog2).listNamespaces()) - .contains(Namespace.of("a"), Namespace.of("a", "b"), Namespace.of("a", "b", "c")); + Collections.reverse(namespaceList); + identifiers.forEach(sourceCatalog::dropTable); + namespaceList.forEach(((SupportsNamespaces) sourceCatalog)::dropNamespace); + identifiers.forEach(targetCatalog::dropTable); + namespaceList.forEach(((SupportsNamespaces) targetCatalog)::dropNamespace); + } + + @Test + public void testCreateMissingNamespaces() { + ImmutableCatalogMigrator catalogMigrator = + ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(false) + .build(); + + List namespaceList = + Arrays.asList( + Namespace.of("a"), + Namespace.of("a", "b"), + Namespace.of("a", "b", "c"), + Namespace.of("a", "b", "c", "d"), + Namespace.of("a", "b", "c", "d", "e"), + Namespace.of("a", "c")); + catalogMigrator.createNamespacesIfNotExistOnTargetCatalog( + namespaceList.get(4)); // try creating "a.b.c.d.e" + catalogMigrator.createNamespacesIfNotExistOnTargetCatalog( + namespaceList.get(5)); // try creating "a.c" + + // should create all the levels of missing namespaces on target catalog. + // Ideally, listNamespaces() should return top level namespaces. But due to bug in Nessie code, + // It returns all the namespaces. Should be fixed by https://github.com/apache/iceberg/pull/7146 + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces()) + .containsAll(namespaceList); + + namespaceList.forEach( + namespace -> ((SupportsNamespaces) sourceCatalog).createNamespace(namespace)); + Set listedNamespaces = new HashSet<>(); + // collect all the namespaces from all levels + catalogMigrator.getAllNamespacesFromSourceCatalog(Namespace.empty(), listedNamespaces); + Assertions.assertThat(listedNamespaces).containsAll(namespaceList); + + Collections.reverse(namespaceList); + namespaceList.forEach(((SupportsNamespaces) targetCatalog)::dropNamespace); + namespaceList.forEach(((SupportsNamespaces) sourceCatalog)::dropNamespace); + } + + @Test + public void testListingNamespacesFromNessie() { + Catalog nessie = targetCatalog; + Catalog hadoop = sourceCatalog; + + ImmutableCatalogMigrator catalogMigrator = + ImmutableCatalogMigrator.builder() + .sourceCatalog(nessie) + .targetCatalog(hadoop) + .deleteEntriesFromSourceCatalog(false) + .build(); + + List namespaceList = + Arrays.asList( + Namespace.of("a"), + Namespace.of("a", "b"), + Namespace.of("a", "b", "c"), + Namespace.of("a", "b", "c", "d"), + Namespace.of("a", "b", "c", "d", "e"), + Namespace.of("a", "c")); + + namespaceList.forEach(namespace -> ((SupportsNamespaces) nessie).createNamespace(namespace)); + Set listedNamespaces = new HashSet<>(); + // collect all the namespaces from all levels + catalogMigrator.getAllNamespacesFromSourceCatalog(Namespace.empty(), listedNamespaces); + Assertions.assertThat(listedNamespaces).containsAll(namespaceList); - catalog2.dropTable(tableIdentifier); - ((SupportsNamespaces) catalog2).dropNamespace(namespace); - catalog1.dropTable(tableIdentifier); - ((SupportsNamespaces) catalog1).dropNamespace(namespace); + Collections.reverse(namespaceList); + namespaceList.forEach(((SupportsNamespaces) nessie)::dropNamespace); } } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToHadoopCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToHadoopCatalogMigrator.java index 236bce0..5ce0391 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToHadoopCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToHadoopCatalogMigrator.java @@ -17,7 +17,6 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHiveToHadoopCatalogMigrator extends AbstractTestCatalogMigrator { @@ -26,8 +25,8 @@ public class ITHiveToHadoopCatalogMigrator extends AbstractTestCatalogMigrator { protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - catalog1 = HiveMetaStoreRunner.hiveCatalog(); - catalog2 = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "hadoop"); + sourceCatalog = HiveMetaStoreRunner.hiveCatalog(); + targetCatalog = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "hadoop"); createNamespaces(); } @@ -37,11 +36,4 @@ protected static void tearDown() throws Exception { dropNamespaces(); HiveMetaStoreRunner.stopMetastore(); } - - // disable large table test for IT to save CI time. It will be executed only for UT. - @Override - @Disabled - public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { - super.testRegisterLargeNumberOfTables(deleteSourceTables); - } } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToNessieCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToNessieCatalogMigrator.java index 1e31214..afdd5e6 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToNessieCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToNessieCatalogMigrator.java @@ -17,7 +17,6 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHiveToNessieCatalogMigrator extends AbstractTestCatalogMigrator { @@ -30,8 +29,8 @@ public class ITHiveToNessieCatalogMigrator extends AbstractTestCatalogMigrator { protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - catalog1 = HiveMetaStoreRunner.hiveCatalog(); - catalog2 = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); + sourceCatalog = HiveMetaStoreRunner.hiveCatalog(); + targetCatalog = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); createNamespaces(); } @@ -41,11 +40,4 @@ protected static void tearDown() throws Exception { dropNamespaces(); HiveMetaStoreRunner.stopMetastore(); } - - // disable large table test for IT to save CI time. It will be executed only for UT. - @Override - @Disabled - public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { - super.testRegisterLargeNumberOfTables(deleteSourceTables); - } } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java index 3e86556..d65bd74 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java @@ -15,9 +15,12 @@ */ package org.projectnessie.tools.catalog.migration.api; +import java.util.Set; +import org.apache.iceberg.catalog.TableIdentifier; +import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITNessieToHiveCatalogMigrator extends AbstractTestCatalogMigrator { @@ -30,8 +33,8 @@ public class ITNessieToHiveCatalogMigrator extends AbstractTestCatalogMigrator { protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - catalog1 = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); - catalog2 = HiveMetaStoreRunner.hiveCatalog(); + sourceCatalog = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); + targetCatalog = HiveMetaStoreRunner.hiveCatalog(); createNamespaces(); } @@ -42,10 +45,24 @@ protected static void tearDown() throws Exception { HiveMetaStoreRunner.stopMetastore(); } - // disable large table test for IT to save CI time. It will be executed only for UT. - @Override - @Disabled - public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { - super.testRegisterLargeNumberOfTables(deleteSourceTables); + @Test + public void testRegisterWithDefaultNamespace() { + sourceCatalog.createTable(TableIdentifier.of("tblx"), schema); + + CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(false); + // should also include table from default namespace + Set matchingTableIdentifiers = + catalogMigrator.getMatchingTableIdentifiers(null); + Assertions.assertThat(matchingTableIdentifiers).contains(TableIdentifier.parse("tblx")); + + CatalogMigrationResult result = + catalogMigrator.registerTables(matchingTableIdentifiers).result(); + // hive will not support default namespace (namespace with level = 0). Hence, register will + // fail. + Assertions.assertThat(result.registeredTableIdentifiers()) + .doesNotContain(TableIdentifier.parse("tblx")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()) + .containsExactly(TableIdentifier.parse("tblx")); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); } } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java index ee487e1..1ceaa8b 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java @@ -16,13 +16,13 @@ package org.projectnessie.tools.catalog.migration.api; import java.nio.file.Path; -import java.util.Collections; import java.util.List; import org.apache.iceberg.BaseMetastoreCatalog; import org.apache.iceberg.TableOperations; import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -66,24 +66,30 @@ public boolean dropTable(TableIdentifier identifier, boolean purge) { public void renameTable(TableIdentifier from, TableIdentifier to) {} } - Catalog catalog1 = new TestCatalog(); - Catalog catalog2 = new TestCatalog(); + Catalog sourceCatalog = new TestCatalog(); + Catalog targetCatalog = new TestCatalog(); + + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(false) + .build()) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining( + "target catalog TestCatalog{} doesn't implement SupportsNamespaces to create missing namespaces."); CatalogMigrator catalogMigrator = ImmutableCatalogMigrator.builder() - .sourceCatalog(catalog1) - .targetCatalog(catalog2) - .deleteEntriesFromSourceCatalog(true) + .sourceCatalog(sourceCatalog) + .targetCatalog(new HadoopCatalog()) + .deleteEntriesFromSourceCatalog(false) .build(); Assertions.assertThatThrownBy(() -> catalogMigrator.getMatchingTableIdentifiers(null)) .isInstanceOf(UnsupportedOperationException.class) .hasMessageContaining( "source catalog TestCatalog{} doesn't implement SupportsNamespaces to list all namespaces."); - - Assertions.assertThatThrownBy(() -> catalogMigrator.registerTables(Collections.emptyList())) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageContaining( - "target catalog TestCatalog{} doesn't implement SupportsNamespaces to create missing namespaces."); } } diff --git a/buildSrc/build.gradle.kts b/buildSrc/build.gradle.kts index aedfd90..32d8d52 100644 --- a/buildSrc/build.gradle.kts +++ b/buildSrc/build.gradle.kts @@ -31,14 +31,6 @@ dependencies { implementation(baselibs.idea.ext) implementation(baselibs.shadow) implementation(baselibs.errorprone) - implementation(baselibs.nessie.buildsupport.jacoco) - implementation(baselibs.nessie.buildsupport.reflectionconfig) - - testImplementation(platform(baselibs.junit.bom)) - testImplementation(baselibs.assertj.core) - testImplementation(baselibs.junit.jupiter.api) - testImplementation(baselibs.junit.jupiter.params) - testRuntimeOnly(baselibs.junit.jupiter.engine) } java { diff --git a/buildSrc/src/main/kotlin/CodeCoverage.kt b/buildSrc/src/main/kotlin/CodeCoverage.kt new file mode 100644 index 0000000..808f5f2 --- /dev/null +++ b/buildSrc/src/main/kotlin/CodeCoverage.kt @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.gradle.api.Plugin +import org.gradle.api.Project +import org.gradle.kotlin.dsl.apply +import org.gradle.kotlin.dsl.configure +import org.gradle.kotlin.dsl.withType +import org.gradle.testing.jacoco.plugins.JacocoPlugin +import org.gradle.testing.jacoco.plugins.JacocoPluginExtension +import org.gradle.testing.jacoco.plugins.JacocoReportAggregationPlugin +import org.gradle.testing.jacoco.tasks.JacocoReport + +class CodeCoveragePlugin : Plugin { + override fun apply(project: Project): Unit = + project.run { + apply() + apply() + + tasks.withType().configureEach { + reports { + html.required.set(true) + xml.required.set(true) + } + } + + configure { toolVersion = libsRequiredVersion("jacoco") } + } +} \ No newline at end of file diff --git a/buildSrc/src/main/kotlin/Testing.kt b/buildSrc/src/main/kotlin/Testing.kt index 379a1b7..094998d 100644 --- a/buildSrc/src/main/kotlin/Testing.kt +++ b/buildSrc/src/main/kotlin/Testing.kt @@ -36,7 +36,6 @@ fun Project.configureTestTasks() { systemProperty("user.language", "en") systemProperty("user.country", "US") systemProperty("user.variant", "") - systemProperty("test.log.level", testLogLevel()) filter { isFailOnNoMatchingTests = false when (name) { diff --git a/buildSrc/src/main/kotlin/Utilities.kt b/buildSrc/src/main/kotlin/Utilities.kt index c1c35b4..3504b56 100644 --- a/buildSrc/src/main/kotlin/Utilities.kt +++ b/buildSrc/src/main/kotlin/Utilities.kt @@ -54,8 +54,6 @@ fun Project.libsRequiredVersion(name: String): String { return reqVer } -fun Project.testLogLevel() = System.getProperty("test.log.level", "WARN") - fun Project.applyShadowJar() { plugins.apply(ShadowPlugin::class.java) diff --git a/buildSrc/src/main/kotlin/build-conventions.gradle.kts b/buildSrc/src/main/kotlin/build-conventions.gradle.kts index 32e0d22..b8e4694 100644 --- a/buildSrc/src/main/kotlin/build-conventions.gradle.kts +++ b/buildSrc/src/main/kotlin/build-conventions.gradle.kts @@ -14,11 +14,6 @@ * limitations under the License. */ -plugins { - id("org.projectnessie.buildsupport.jacoco") - `eclipse` -} - val hasSrcMain = projectDir.resolve("src/main").exists() val hasSrcTest = projectDir.resolve("src/test").exists() @@ -32,6 +27,8 @@ configureJandex() configureJava() +apply() + if (hasSrcMain || hasSrcTest) { configureCheckstyle() diff --git a/cli/build.gradle.kts b/cli/build.gradle.kts index 75b789d..6d2dc4a 100644 --- a/cli/build.gradle.kts +++ b/cli/build.gradle.kts @@ -46,7 +46,6 @@ dependencies { testImplementation(libs.junit.jupiter.params) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.engine) - testImplementation(libs.junit.vintage.engine) testImplementation(libs.assertj) testImplementation(libs.logcaptor) @@ -56,6 +55,9 @@ dependencies { testImplementation( "org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests" ) + // this junit4 dependency is needed for above Iceberg's TestHiveMetastore + testRuntimeOnly("junit:junit:4.12") + testImplementation("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") { // these are taken from iceberg repo configurations exclude("org.apache.avro", "avro") diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java index e7563b1..683da62 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java @@ -16,13 +16,16 @@ package org.projectnessie.tools.catalog.migration.cli; import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import com.google.common.collect.Iterables; +import java.io.Console; import java.io.IOException; import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; @@ -40,7 +43,7 @@ public abstract class BaseRegisterCommand implements Callable { exclusive = false, multiplicity = "1", heading = "Source catalog options: %n") - private SourceCatalogOptions sourceCatalogOptions; + protected SourceCatalogOptions sourceCatalogOptions; @CommandLine.ArgGroup( exclusive = false, @@ -79,18 +82,19 @@ public abstract class BaseRegisterCommand implements Callable { names = {"--stacktrace"}, description = "Optional configuration to enable capturing stacktrace in logs in case of failures.") - protected boolean enableStackTrace; + private boolean enableStackTrace; private static final int BATCH_SIZE = 100; public static final String FAILED_IDENTIFIERS_FILE = "failed_identifiers.txt"; public static final String FAILED_TO_DELETE_AT_SOURCE_FILE = "failed_to_delete_at_source.txt"; public static final String DRY_RUN_FILE = "dry_run_identifiers.txt"; - private final Logger consoleLog = LoggerFactory.getLogger("console-log"); + private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); public BaseRegisterCommand() {} - protected abstract CatalogMigrator catalogMigrator(Catalog sourceCatalog, Catalog targetCatalog); + protected abstract CatalogMigrator catalogMigrator( + Catalog sourceCatalog, Catalog targetCatalog, boolean enableStackTrace); protected abstract boolean canProceed(Catalog sourceCatalog); @@ -102,15 +106,15 @@ public BaseRegisterCommand() {} @Override public Integer call() { - List identifiers = Collections.emptyList(); + Set identifiers = Collections.emptySet(); + String identifierRegEx = identifierOptions != null ? identifierOptions.identifiersRegEx : null; + if (identifierOptions != null) { identifiers = identifierOptions.processIdentifiersInput(); } + checkAndWarnAboutIdentifiers(identifiers, identifierRegEx); - Preconditions.checkArgument( - Files.exists(outputDirPath), "path specified in `--output-dir` does not exist"); - Preconditions.checkArgument( - Files.isWritable(outputDirPath), "path specified in `--output-dir` is not writable"); + validateOutputDir(); Catalog sourceCatalog = sourceCatalogOptions.build(); consoleLog.info("Configured source catalog: {}", sourceCatalog.name()); @@ -119,35 +123,25 @@ public Integer call() { consoleLog.info("Configured target catalog: {}", targetCatalog.name()); if (!isDryRun && !disablePrompts && !canProceed(sourceCatalog)) { - return 0; + return 2; } - CatalogMigrator catalogMigrator = catalogMigrator(sourceCatalog, targetCatalog); + CatalogMigrator catalogMigrator = + catalogMigrator(sourceCatalog, targetCatalog, enableStackTrace); - String identifierRegEx = identifierOptions != null ? identifierOptions.identifiersRegEx : null; if (identifiers.isEmpty()) { - if (identifierRegEx != null) { - consoleLog.info( - "User has not specified the table identifiers." - + " Selecting all the tables from all the namespaces from the source catalog " - + "which matches the regex pattern:{}", - identifierRegEx); - } else { - consoleLog.info( - "User has not specified the table identifiers." - + " Selecting all the tables from all the namespaces from the source catalog."); - } - + consoleLog.info("Identifying tables for {} ...", operation()); identifiers = catalogMigrator.getMatchingTableIdentifiers(identifierRegEx); if (identifiers.isEmpty()) { - consoleLog.info( - "No tables are identified for {}. Please check `catalog_migration.log` file for more info.", + consoleLog.warn( + "No tables were identified for {}. Please check `catalog_migration.log` file for more info.", operation()); - return 0; + return 2; } } if (isDryRun) { + consoleLog.info("Dry run is completed."); handleDryRunResult(identifiers); return 0; } @@ -156,38 +150,80 @@ public Integer call() { consoleLog.info("Started {} ...", operation()); - List> identifierBatches = Lists.partition(identifiers, BATCH_SIZE); - int totalIdentifiers = identifiers.size(); - AtomicInteger counter = new AtomicInteger(); - identifierBatches.forEach( - identifierBatch -> { - catalogMigrator.registerTables(identifierBatch); - consoleLog.info( - "Attempted {} for {} tables out of {} tables.", - operation(), - counter.addAndGet(identifierBatch.size()), - totalIdentifiers); - }); - - handleResults(catalogMigrator.result()); + CatalogMigrationResult result; + try { + Iterable> identifierBatches = + Iterables.partition(identifiers, BATCH_SIZE); + int totalIdentifiers = identifiers.size(); + AtomicInteger counter = new AtomicInteger(); + identifierBatches.forEach( + identifierBatch -> { + catalogMigrator.registerTables(identifierBatch); + consoleLog.info( + "Attempted {} for {} tables out of {} tables.", + operation(), + counter.addAndGet(identifierBatch.size()), + totalIdentifiers); + }); + } finally { + consoleLog.info("Finished {} ...", operation()); + result = catalogMigrator.result(); + handleResults(result); + } + + if (!result.failedToRegisterTableIdentifiers().isEmpty() + || !result.failedToDeleteTableIdentifiers().isEmpty() + || result.registeredTableIdentifiers().isEmpty()) { + return 1; + } + return 0; } + private void checkAndWarnAboutIdentifiers( + Set identifiers, String identifierRegEx) { + if (identifiers.isEmpty()) { + if (identifierRegEx != null) { + consoleLog.warn( + "User has not specified the table identifiers." + + " Will be selecting all the tables from all the namespaces from the source catalog " + + "which matches the regex pattern:{}", + identifierRegEx); + } else { + consoleLog.warn( + "User has not specified the table identifiers." + + " Will be selecting all the tables from all the namespaces from the source catalog."); + } + } + } + + private void validateOutputDir() { + Preconditions.checkArgument( + Files.exists(outputDirPath), "Path specified in `--output-dir` does not exist"); + Preconditions.checkArgument( + Files.isWritable(outputDirPath), "Path specified in `--output-dir` is not writable"); + } + private void handleResults(CatalogMigrationResult result) { - writeToFile( - outputDirPath.resolve(FAILED_IDENTIFIERS_FILE), result.failedToRegisterTableIdentifiers()); - writeToFile( - outputDirPath.resolve(FAILED_TO_DELETE_AT_SOURCE_FILE), - result.failedToDeleteTableIdentifiers()); - consoleLog.info("Finished {} ...", operation()); - printSummary(result); - printDetails(result); + try { + writeToFile( + outputDirPath.resolve(FAILED_IDENTIFIERS_FILE), + result.failedToRegisterTableIdentifiers()); + writeToFile( + outputDirPath.resolve(FAILED_TO_DELETE_AT_SOURCE_FILE), + result.failedToDeleteTableIdentifiers()); + } finally { + printSummary(result); + printDetails(result); + } } - private void handleDryRunResult(List identifiers) { - writeToFile(outputDirPath.resolve(DRY_RUN_FILE), identifiers); - consoleLog.info("Dry run is completed."); - printDryRunResult(identifiers); + private void handleDryRunResult(Set identifiers) { + try { + writeToFile(outputDirPath.resolve(DRY_RUN_FILE), identifiers); + } finally { + printDryRunResult(identifiers); + } } private void printSummary(CatalogMigrationResult result) { @@ -201,7 +237,7 @@ private void printSummary(CatalogMigrationResult result) { targetCatalogOptions.type.name()); } if (!result.failedToRegisterTableIdentifiers().isEmpty()) { - consoleLog.info( + consoleLog.error( "Failed to {} {} tables from {} catalog to {} catalog. " + "Please check the `catalog_migration.log` file for the failure reason. " + "Failed identifiers are written into `{}`. " @@ -214,7 +250,7 @@ private void printSummary(CatalogMigrationResult result) { FAILED_IDENTIFIERS_FILE); } if (!result.failedToDeleteTableIdentifiers().isEmpty()) { - consoleLog.info( + consoleLog.error( "Failed to delete {} tables from {} catalog. " + "Please check the `catalog_migration.log` file for the failure reason. " + "{}Failed to delete identifiers are written into `{}`.", @@ -236,7 +272,7 @@ private void printDetails(CatalogMigrationResult result) { } if (!result.failedToRegisterTableIdentifiers().isEmpty()) { - consoleLog.info( + consoleLog.error( "Failed to {} these tables:{}{}", operate(), System.lineSeparator(), @@ -244,14 +280,14 @@ private void printDetails(CatalogMigrationResult result) { } if (!result.failedToDeleteTableIdentifiers().isEmpty()) { - consoleLog.warn( + consoleLog.error( "Failed to delete these tables from source catalog:{}{}", System.lineSeparator(), result.failedToDeleteTableIdentifiers()); } } - private void printDryRunResult(List result) { + private void printDryRunResult(Set result) { consoleLog.info("Summary: "); consoleLog.info( "Identified {} tables for {} by dry-run. These identifiers are also written into {}. " @@ -267,7 +303,7 @@ private void printDryRunResult(List result) { result); } - private static void writeToFile(Path filePath, List identifiers) { + private static void writeToFile(Path filePath, Collection identifiers) { if (identifiers.isEmpty()) { return; } @@ -279,4 +315,23 @@ private static void writeToFile(Path filePath, List identifiers throw new UncheckedIOException("Failed to write the file:" + filePath, e); } } + + protected boolean proceed() { + Console console = System.console(); + while (true) { + consoleLog.info( + "Are you certain that you wish to proceed, after reading the above warnings? (yes/no):"); + String input = console.readLine(); + + if (input.equalsIgnoreCase("yes")) { + consoleLog.info("Continuing..."); + return true; + } else if (input.equalsIgnoreCase("no")) { + consoleLog.info("Aborting..."); + return false; + } else { + consoleLog.info("Invalid input. Please enter 'yes' or 'no'."); + } + } + } } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java index 9ea8add..9349d3b 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java @@ -15,6 +15,8 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import picocli.CommandLine; @CommandLine.Command( @@ -26,10 +28,31 @@ public class CatalogMigrationCLI { public CatalogMigrationCLI() {} + private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); + public static void main(String... args) { - CommandLine commandLine = new CommandLine(new CatalogMigrationCLI()); + CommandLine commandLine = + new CommandLine(new CatalogMigrationCLI()) + .setExecutionExceptionHandler( + (ex, cmd, parseResult) -> { + if (enableStacktrace(args)) { + cmd.getErr().println(cmd.getColorScheme().richStackTraceString(ex)); + } else { + consoleLog.error("Error during CLI execution: {}", ex.getMessage()); + } + return 1; + }); commandLine.setUsageHelpWidth(150); int exitCode = commandLine.execute(args); System.exit(exitCode); } + + private static boolean enableStacktrace(String... args) { + for (String arg : args) { + if (arg.equalsIgnoreCase("--stacktrace")) { + return true; + } + } + return false; + } } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java index 54f6380..7a81139 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java @@ -52,8 +52,6 @@ static Catalog buildCatalog( Map hadoopConf) { Preconditions.checkArgument(catalogProperties != null, "catalog properties is null"); Preconditions.checkArgument(catalogType != null, "catalog type is null"); - Preconditions.checkArgument(catalogName != null, "catalog name is null"); - Preconditions.checkArgument(!catalogName.trim().isEmpty(), "catalog name is empty"); Configuration catalogConf = new Configuration(); if (hadoopConf != null) { hadoopConf.forEach(catalogConf::set); diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java index ce2fb62..9ed9fb0 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java @@ -19,9 +19,9 @@ import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Paths; -import java.util.ArrayList; import java.util.Collections; -import java.util.List; +import java.util.HashSet; +import java.util.Set; import java.util.stream.Collectors; import org.apache.iceberg.catalog.TableIdentifier; import org.slf4j.Logger; @@ -34,17 +34,17 @@ public class IdentifierOptions { names = {"--identifiers"}, split = ",", description = { - "Optional selective list of identifiers to register. If not specified, all the tables will be registered. " + "Optional selective set of identifiers to register. If not specified, all the tables will be registered. " + "Use this when there are few identifiers that need to be registered. For a large number of identifiers, " + "use the `--identifiers-from-file` or `--identifiers-regex` option.", "Example: --identifiers foo.t1,bar.t2" }) - protected List identifiers = new ArrayList<>(); + protected Set identifiers = new HashSet<>(); @CommandLine.Option( names = {"--identifiers-from-file"}, description = { - "Optional text file path that contains a list of table identifiers (one per line) to register. Should not be " + "Optional text file path that contains a set of table identifiers (one per line) to register. Should not be " + "used with `--identifiers` or `--identifiers-regex` option.", "Example: --identifiers-from-file /tmp/files/ids.txt" }) @@ -59,30 +59,32 @@ public class IdentifierOptions { }) protected String identifiersRegEx; - private final Logger consoleLog = LoggerFactory.getLogger("console-log"); + private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); - protected List processIdentifiersInput() { + protected Set processIdentifiersInput() { if (identifiersFromFile != null && !Files.exists(Paths.get(identifiersFromFile))) { throw new IllegalArgumentException( "File specified in `--identifiers-from-file` option does not exist."); } - List tableIdentifiers; + Set tableIdentifiers; if (identifiersFromFile != null) { try { - consoleLog.info("Collecting identifiers from the file {}...", identifiersFromFile); + consoleLog.info("Collecting identifiers from the file {} ...", identifiersFromFile); tableIdentifiers = Files.readAllLines(Paths.get(identifiersFromFile)).stream() + .map(String::trim) + .filter(string -> !string.isEmpty()) .map(TableIdentifier::parse) - .collect(Collectors.toList()); + .collect(Collectors.toSet()); } catch (IOException e) { throw new UncheckedIOException( String.format("Failed to read the file: %s", identifiersFromFile), e); } } else if (!identifiers.isEmpty()) { tableIdentifiers = - identifiers.stream().map(TableIdentifier::parse).collect(Collectors.toList()); + identifiers.stream().map(TableIdentifier::parse).collect(Collectors.toSet()); } else { - tableIdentifiers = Collections.emptyList(); + tableIdentifiers = Collections.emptySet(); } return tableIdentifiers; } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java index af38dbf..70259ba 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java @@ -16,7 +16,6 @@ package org.projectnessie.tools.catalog.migration.cli; import org.apache.iceberg.catalog.Catalog; -import org.apache.iceberg.hadoop.HadoopCatalog; import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigrator; import org.slf4j.Logger; @@ -37,10 +36,13 @@ + "catalog.") public class MigrateCommand extends BaseRegisterCommand { - private final Logger consoleLog = LoggerFactory.getLogger("console-log"); + private static final String newLine = System.lineSeparator(); + private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); @Override - protected CatalogMigrator catalogMigrator(Catalog sourceCatalog, Catalog targetCatalog) { + protected CatalogMigrator catalogMigrator( + Catalog sourceCatalog, Catalog targetCatalog, boolean enableStackTrace) { + return ImmutableCatalogMigrator.builder() .sourceCatalog(sourceCatalog) .targetCatalog(targetCatalog) @@ -50,15 +52,34 @@ protected CatalogMigrator catalogMigrator(Catalog sourceCatalog, Catalog targetC } @Override - protected boolean canProceed(Catalog sourceCatalog) { - if (sourceCatalog instanceof HadoopCatalog) { - consoleLog.warn( - "Source catalog type is HADOOP and it doesn't support dropping tables just from " - + "catalog. {}Avoid operating the migrated tables from the source catalog after migration. " - + "Use the tables from target catalog.", - System.lineSeparator()); + public Integer call() { + if (sourceCatalogOptions.type == CatalogMigrationUtil.CatalogType.HADOOP) { + consoleLog.error( + "Source catalog is a Hadoop catalog and it doesn't support deleting the table entries just from the catalog. " + + "Please use 'register' command instead."); + return 2; } - return PromptUtil.proceedForMigration(); + return super.call(); + } + + @Override + protected boolean canProceed(Catalog sourceCatalog) { + consoleLog.warn( + "{}" + + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + + "{}\tcan lead to a data loss as the in-progress commits will not be considered for migration. " + + "{}\tSo, while using this tool please make sure there are no in-progress commits for the source " + + "catalog.{}" + + "{}" + + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " + + "{}\tand can only be accessed from the target catalog.", + newLine, + newLine, + newLine, + newLine, + newLine, + newLine); + return proceed(); } @Override diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java deleted file mode 100644 index 42f3b73..0000000 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/PromptUtil.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (C) 2023 Dremio - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.projectnessie.tools.catalog.migration.cli; - -import java.io.Console; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public final class PromptUtil { - - private PromptUtil() {} - - private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); - private static final String newLine = System.lineSeparator(); - - static boolean proceedForRegistration() { - consoleLog.warn( - "{}" - + "\ta) Executing catalog migration when the source catalog has some in-progress commits " - + "{}\tcan lead to a data loss as the in-progress commits will not be considered for migration. " - + "{}\tSo, while using this tool please make sure there are no in-progress commits for the source " - + "catalog.{}" - + "{}" - + "\tb) After the registration, successfully registered tables will be present in both source and target " - + "catalog. " - + "{}\tHaving the same metadata.json registered in more than one catalog can lead to missing updates, " - + "loss of data, and table corruption. " - + "{}\tUse `migrate` command to automatically delete the table from source catalog after " - + "migration.", - newLine, - newLine, - newLine, - newLine, - newLine, - newLine, - newLine); - return proceed(); - } - - static boolean proceedForMigration() { - consoleLog.warn( - "{}" - + "\ta) Executing catalog migration when the source catalog has some in-progress commits " - + "{}\tcan lead to a data loss as the in-progress commits will not be considered for migration. " - + "{}\tSo, while using this tool please make sure there are no in-progress commits for the source " - + "catalog.{}" - + "{}" - + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " - + "{}\tand can only be accessed from the target catalog.", - newLine, - newLine, - newLine, - newLine, - newLine, - newLine); - return proceed(); - } - - private static boolean proceed() { - Console console = System.console(); - while (true) { - consoleLog.info( - "Are you certain that you wish to proceed, after reading the above warnings? (yes/no):"); - String input = console.readLine(); - - if (input.equalsIgnoreCase("yes")) { - consoleLog.info("Continuing..."); - return true; - } else if (input.equalsIgnoreCase("no")) { - consoleLog.info("Aborting..."); - return false; - } else { - consoleLog.info("Invalid input. Please enter 'yes' or 'no'."); - } - } - } -} diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java index db9fac0..2bd7250 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/RegisterCommand.java @@ -18,6 +18,8 @@ import org.apache.iceberg.catalog.Catalog; import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigrator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import picocli.CommandLine; @CommandLine.Command( @@ -32,8 +34,12 @@ "Bulk register the iceberg tables from source catalog to target catalog without data copy.") public class RegisterCommand extends BaseRegisterCommand { + private static final String newLine = System.lineSeparator(); + private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); + @Override - protected CatalogMigrator catalogMigrator(Catalog sourceCatalog, Catalog targetCatalog) { + protected CatalogMigrator catalogMigrator( + Catalog sourceCatalog, Catalog targetCatalog, boolean enableStackTrace) { return ImmutableCatalogMigrator.builder() .sourceCatalog(sourceCatalog) .targetCatalog(targetCatalog) @@ -44,7 +50,27 @@ protected CatalogMigrator catalogMigrator(Catalog sourceCatalog, Catalog targetC @Override protected boolean canProceed(Catalog sourceCatalog) { - return PromptUtil.proceedForRegistration(); + consoleLog.warn( + "{}" + + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + + "{}\tcan lead to a data loss as the in-progress commits will not be considered for migration. " + + "{}\tSo, while using this tool please make sure there are no in-progress commits for the source " + + "catalog.{}" + + "{}" + + "\tb) After the registration, successfully registered tables will be present in both source and target " + + "catalog. " + + "{}\tHaving the same metadata.json registered in more than one catalog can lead to missing updates, " + + "loss of data, and table corruption. " + + "{}\tUse `migrate` command to automatically delete the table from source catalog after " + + "migration.", + newLine, + newLine, + newLine, + newLine, + newLine, + newLine, + newLine); + return proceed(); } @Override diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java index 89e5966..eb2b13a 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java @@ -38,9 +38,8 @@ public class SourceCatalogOptions { required = true, split = ",", description = { - "Catalog properties for source catalog (like uri, warehouse, etc).", - "Example: --source-catalog-properties warehouse=/tmp/warehouseHadoop,type=hadoop", - " --source-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie" + "Iceberg catalog properties for source catalog (like uri, warehouse, etc).", + "Example: --source-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie" }) private Map properties; diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java index 6791134..ca4d74e 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java @@ -38,9 +38,8 @@ public class TargetCatalogOptions { required = true, split = ",", description = { - "Catalog properties for target catalog (like uri, warehouse, etc).", - "Example: --target-catalog-properties warehouse=/tmp/warehouseHadoop,type=hadoop", - " --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie" + "Iceberg catalog properties for target catalog (like uri, warehouse, etc).", + "Example: --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie" }) private Map properties; diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java index 705044e..a0cac54 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java @@ -15,6 +15,9 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; +import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; + import com.google.common.collect.Lists; import java.io.IOException; import java.nio.file.Files; @@ -22,7 +25,6 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.stream.IntStream; import nl.altindag.log.LogCaptor; import nl.altindag.log.model.LogEvent; import org.apache.iceberg.aws.dynamodb.DynamoDbCatalog; @@ -39,8 +41,9 @@ import org.apache.iceberg.rest.RESTCatalog; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Order; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; @@ -65,6 +68,12 @@ public abstract class AbstractCLIMigrationTest extends AbstractTest { protected static String sourceCatalogType; protected static String targetCatalogType; + @BeforeAll + protected static void initFilesPaths() { + dryRunFile = outputDir.resolve(DRY_RUN_FILE); + failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); + } + @BeforeEach protected void beforeEach() { createTables(); @@ -75,8 +84,8 @@ protected void afterEach() throws IOException { // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 // create table will call refresh internally. - catalog1.createTable(TableIdentifier.of(Namespace.of("bar"), "tblx"), schema).refresh(); - catalog2.createTable(TableIdentifier.of(Namespace.of("bar"), "tblx"), schema).refresh(); + sourceCatalog.createTable(TableIdentifier.of(Namespace.of("bar"), "tblx"), schema).refresh(); + targetCatalog.createTable(TableIdentifier.of(Namespace.of("bar"), "tblx"), schema).refresh(); dropTables(); Files.deleteIfExists(dryRunFile); @@ -86,13 +95,15 @@ protected void afterEach() throws IOException { @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegister(boolean deleteSourceTables) throws Exception { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + RunCLI run = runCLI(deleteSourceTables, registerAllTablesArgs()); Assertions.assertThat(run.getExitCode()).isEqualTo(0); Assertions.assertThat(run.getOut()) .contains( "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog."); + + "Will be selecting all the tables from all the namespaces from the source catalog."); String operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) .contains(String.format("Identified 4 tables for %s.", operation)); @@ -107,38 +118,39 @@ public void testRegister(boolean deleteSourceTables) throws Exception { // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 - catalog2.loadTable(TableIdentifier.parse("foo.tbl1")).refresh(); + targetCatalog.loadTable(TableIdentifier.parse("foo.tbl1")).refresh(); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) .containsExactlyInAnyOrder( TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 - catalog1.tableExists(TableIdentifier.parse("foo.tbl1")); + sourceCatalog.tableExists(TableIdentifier.parse("foo.tbl1")); - if (deleteSourceTables && !(catalog1 instanceof HadoopCatalog)) { + if (deleteSourceTables && !(sourceCatalog instanceof HadoopCatalog)) { // table should be deleted after migration from source catalog - Assertions.assertThat(catalog1.listTables(Namespace.of("foo"))).isEmpty(); - Assertions.assertThat(catalog1.listTables(Namespace.of("bar"))).isEmpty(); + Assertions.assertThat(sourceCatalog.listTables(Namespace.of("foo"))).isEmpty(); + Assertions.assertThat(sourceCatalog.listTables(Namespace.of("bar"))).isEmpty(); return; } // tables should be present in source catalog. - Assertions.assertThat(catalog1.listTables(Namespace.of("foo"))) + Assertions.assertThat(sourceCatalog.listTables(Namespace.of("foo"))) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(catalog1.listTables(Namespace.of("bar"))) + Assertions.assertThat(sourceCatalog.listTables(Namespace.of("bar"))) .containsExactlyInAnyOrder( TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); } - @Order(1) @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterSelectedTables(boolean deleteSourceTables) throws Exception { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + // using `--identifiers` option RunCLI run = runCLI( @@ -175,10 +187,10 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 - catalog2.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); + targetCatalog.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))).isEmpty(); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))).isEmpty(); + Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) .containsExactly(TableIdentifier.parse("bar.tbl3")); Path identifierFile = outputDir.resolve("ids.txt"); @@ -222,10 +234,10 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 - catalog2.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); + targetCatalog.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))).isEmpty(); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))).isEmpty(); + Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) .containsExactlyInAnyOrder( TableIdentifier.parse("bar.tbl4"), TableIdentifier.parse("bar.tbl3")); @@ -249,7 +261,7 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except Assertions.assertThat(run.getExitCode()).isEqualTo(0); Assertions.assertThat(run.getOut()) .contains( - "User has not specified the table identifiers. Selecting all the tables from all the namespaces " + "User has not specified the table identifiers. Will be selecting all the tables from all the namespaces " + "from the source catalog which matches the regex pattern:^foo\\..*"); operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) @@ -265,20 +277,20 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) throws Except // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 - catalog2.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); + targetCatalog.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) .containsExactlyInAnyOrder( TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); } - @Order(2) @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterError(boolean deleteSourceTables) throws Exception { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); // use invalid namespace which leads to NoSuchTableException RunCLI run = runCLI( @@ -296,7 +308,7 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { "--output-dir", outputDir.toAbsolutePath().toString(), "--disable-safety-prompts"); - Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getExitCode()).isEqualTo(1); String operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) .contains(String.format("Identified 1 tables for %s.", operation)); @@ -342,7 +354,7 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { "--output-dir", outputDir.toAbsolutePath().toString(), "--disable-safety-prompts"); - Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getExitCode()).isEqualTo(1); operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) .contains(String.format("Identified 1 tables for %s.", operation)); @@ -357,10 +369,10 @@ public void testRegisterError(boolean deleteSourceTables) throws Exception { .contains(String.format("Details: %nFailed to %s these tables:%n[foo.tbl2]", operation)); } - @Order(3) @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Exception { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); // register only foo.tbl2 RunCLI run = runCLI( @@ -391,9 +403,9 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep Assertions.assertThat(run.getOut()) .contains(String.format("Details: %nSuccessfully %s these tables:%n[foo.tbl2]", operation)); - if (deleteSourceTables && !(catalog1 instanceof HadoopCatalog)) { + if (deleteSourceTables && !(sourceCatalog instanceof HadoopCatalog)) { // create a table with the same name in source catalog which got deleted. - catalog1.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); + sourceCatalog.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); } // register all the tables from source catalog again. So that registering `foo.tbl2` will fail. @@ -411,7 +423,7 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep "--output-dir", outputDir.toAbsolutePath().toString(), "--disable-safety-prompts"); - Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getExitCode()).isEqualTo(1); operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) .contains(String.format("Identified 4 tables for %s.", operation)); @@ -440,12 +452,12 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 - catalog2.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); + targetCatalog.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) + Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) .containsExactlyInAnyOrder( TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); @@ -482,11 +494,13 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep Assertions.assertThat(Files.readAllLines(failedIdentifiersFile)).containsExactly("foo.tbl2"); } - @Order(4) @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { - // source catalog is catalog2 which has no tables. + // use source catalog as targetCatalog which has no tables. + Assumptions.assumeFalse( + deleteSourceTables && targetCatalog instanceof HadoopCatalog, + "deleting source tables is unsupported for HadoopCatalog"); RunCLI run = runCLI( deleteSourceTables, @@ -502,19 +516,19 @@ public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { outputDir.toAbsolutePath().toString(), "--disable-safety-prompts"); - Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getExitCode()).isEqualTo(2); String operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) .contains( String.format( - "No tables are identified for %s. Please check `catalog_migration.log` file for more info.", + "No tables were identified for %s. Please check `catalog_migration.log` file for more info.", operation)); } - @Order(5) @ParameterizedTest @ValueSource(booleans = {true, false}) public void testDryRun(boolean deleteSourceTables) throws Exception { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); RunCLI run = runCLI( deleteSourceTables, @@ -554,50 +568,11 @@ public void testDryRun(boolean deleteSourceTables) throws Exception { .containsExactlyInAnyOrder("foo.tbl1", "foo.tbl2", "bar.tbl3", "bar.tbl4"); } - @Order(6) - @ParameterizedTest - @ValueSource(booleans = {true, false}) - public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { - // additionally create 240 tables along with 4 tables created in beforeEach() - IntStream.range(0, 240) - .forEach( - val -> - catalog1.createTable( - TableIdentifier.of(Namespace.of("foo"), "tblx" + val), schema)); - - RunCLI run = runCLI(deleteSourceTables, registerAllTablesArgs()); - - Assertions.assertThat(run.getExitCode()).isEqualTo(0); - String operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(run.getOut()) - .contains(String.format("Identified 244 tables for %s.", operation)); - operation = deleteSourceTables ? "migrated" : "registered"; - Assertions.assertThat(run.getOut()) - .contains( - String.format( - "Summary: %nSuccessfully %s 244 tables from %s catalog to" + " %s catalog.", - operation, sourceCatalogType, targetCatalogType)); - Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %nSuccessfully %s these tables:%n", operation)); - - operation = deleteSourceTables ? "migration" : "registration"; - // validate intermediate output - Assertions.assertThat(run.getOut()) - .contains(String.format("Attempted %s for 100 tables out of 244 tables.", operation)); - Assertions.assertThat(run.getOut()) - .contains(String.format("Attempted %s for 200 tables out of 244 tables.", operation)); - - Assertions.assertThat(catalog2.listTables(Namespace.of("foo"))).hasSize(242); - Assertions.assertThat(catalog2.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); - } - - @Order(7) @ParameterizedTest @CsvSource(value = {"false,false", "false,true", "true,false", "true,true"}) public void testStacktrace(boolean deleteSourceTables, boolean enableStacktrace) throws Exception { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); try (LogCaptor logCaptor = LogCaptor.forClass(CatalogMigrator.class)) { runCLI( deleteSourceTables, @@ -634,7 +609,7 @@ public void testStacktrace(boolean deleteSourceTables, boolean enableStacktrace) } } - private static String[] registerAllTablesArgs() { + protected static String[] registerAllTablesArgs() { ArrayList args = Lists.newArrayList( "--source-catalog-type", @@ -651,7 +626,7 @@ private static String[] registerAllTablesArgs() { return args.toArray(new String[0]); } - private static RunCLI runCLI(boolean deleteSourceTables, String... args) throws Exception { + protected static RunCLI runCLI(boolean deleteSourceTables, String... args) throws Exception { List argsList = Lists.newArrayList(args); if (!deleteSourceTables) { argsList.add(0, "register"); diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java index 05637e5..dfd3ad2 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java @@ -26,7 +26,6 @@ import java.util.stream.Stream; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; @@ -146,7 +145,6 @@ private static Stream optionErrors() { @ParameterizedTest @MethodSource("optionErrors") - @Order(0) public void testOptionErrorsForRegister(List args, String expectedMessage) throws Exception { executeAndValidateResults("register", args, expectedMessage, 2); @@ -154,7 +152,6 @@ public void testOptionErrorsForRegister(List args, String expectedMessag @ParameterizedTest @MethodSource("optionErrors") - @Order(1) public void testOptionErrorsForMigrate(List args, String expectedMessage) throws Exception { executeAndValidateResults("migrate", args, expectedMessage, 2); @@ -165,61 +162,72 @@ private static Stream invalidArgs() { arguments( Lists.newArrayList( "--source-catalog-type", - "HADOOP", + "HIVE", "--source-catalog-properties", "k1=v1,k2=v2", "--target-catalog-type", - "HIVE", + "HADOOP", "--target-catalog-properties", "k3=v3, k4=v4"), - "java.lang.IllegalArgumentException: Cannot initialize HadoopCatalog " + "Error during CLI execution: Cannot initialize HadoopCatalog " + "because warehousePath must not be null or empty"), arguments( Lists.newArrayList( "--source-catalog-type", - "HADOOP", + "HIVE", "--source-catalog-properties", "k1=v1,k2=v2", "--target-catalog-type", - "HIVE", + "HADOOP", "--target-catalog-properties", "k3=v3, k4=v4", "--identifiers-from-file", "file.txt"), - "java.lang.IllegalArgumentException: " - + "File specified in `--identifiers-from-file` option does not exist."), + "Error during CLI execution: File specified in `--identifiers-from-file` option does not exist."), arguments( Lists.newArrayList( "--source-catalog-type", - "HADOOP", + "HIVE", "--source-catalog-properties", "k1=v1,k2=v2", "--target-catalog-type", - "HIVE", + "HADOOP", "--target-catalog-properties", "k3=v3, k4=v4", "--output-dir", "/path/to/file"), - "java.lang.IllegalArgumentException: " - + "path specified in `--output-dir` does not exist"), + "Error during CLI execution: Path specified in `--output-dir` does not exist"), arguments( Lists.newArrayList( "--source-catalog-type", - "HADOOP", + "HIVE", "--source-catalog-properties", "k1=v1,k2=v2", "--target-catalog-type", - "HIVE", + "HADOOP", "--target-catalog-properties", "k3=v3, k4=v4", "--output-dir", readOnlyDirLocation()), - "java.lang.IllegalArgumentException: " - + "path specified in `--output-dir` is not writable")); + "Error during CLI execution: Path specified in `--output-dir` is not writable"), + // test with stacktrace + arguments( + Lists.newArrayList( + "--source-catalog-type", + "HIVE", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HADOOP", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--output-dir", + readOnlyDirLocation(), + "--stacktrace"), + "java.lang.IllegalArgumentException: Path specified in `--output-dir` is not writable")); } @ParameterizedTest - @Order(2) @MethodSource("invalidArgs") public void testInvalidArgsForRegister(List args, String expectedMessage) throws Exception { @@ -227,7 +235,6 @@ public void testInvalidArgsForRegister(List args, String expectedMessage } @ParameterizedTest - @Order(2) @MethodSource("invalidArgs") public void testInvalidArgsForMigrate(List args, String expectedMessage) throws Exception { @@ -235,7 +242,6 @@ public void testInvalidArgsForMigrate(List args, String expectedMessage) } @Test - @Order(4) public void version() throws Exception { RunCLI run = RunCLI.runWithPrintWriter("--version"); Assertions.assertThat(run.getExitCode()).isEqualTo(0); @@ -259,7 +265,7 @@ private static String readOnlyDirLocation() { } catch (IOException e) { throw new RuntimeException(e); } - readOnly.toFile().setWritable(false); + Assertions.assertThat(readOnly.toFile().setWritable(false)).isTrue(); return readOnly.toAbsolutePath().toString(); } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtilTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtilTest.java index 1bd3039..eb251c6 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtilTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtilTest.java @@ -30,7 +30,6 @@ import org.apache.iceberg.types.Types; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; @@ -49,7 +48,6 @@ static Stream blankOrNullStrings() { return Stream.of("", " ", null); } - @Order(0) @ParameterizedTest() @MethodSource("blankOrNullStrings") public void testCustomCatalogWithoutImpl(String impl) { @@ -66,7 +64,6 @@ public void testCustomCatalogWithoutImpl(String impl) { "Need to specify the fully qualified class name of the custom catalog impl"); } - @Order(1) @Test public void testInvalidArgs() { Assertions.assertThatThrownBy( @@ -79,24 +76,6 @@ public void testInvalidArgs() { .isInstanceOf(IllegalArgumentException.class) .hasMessageContaining("catalog type is null"); - Assertions.assertThatThrownBy( - () -> - CatalogMigrationUtil.buildCatalog( - Collections.emptyMap(), - CatalogMigrationUtil.CatalogType.HIVE, - null, - null, - null)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("catalog name is null"); - - Assertions.assertThatThrownBy( - () -> - CatalogMigrationUtil.buildCatalog( - Collections.emptyMap(), CatalogMigrationUtil.CatalogType.HIVE, " ", null, null)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("catalog name is empty"); - Assertions.assertThatThrownBy( () -> CatalogMigrationUtil.buildCatalog( @@ -111,7 +90,6 @@ public void testInvalidArgs() { } @Test - @Order(2) public void testBuildHadoopCatalog() { Map properties = new HashMap<>(); properties.put("warehouse", logDir.toAbsolutePath().toString()); @@ -137,7 +115,6 @@ public void testBuildHadoopCatalog() { } @Test - @Order(3) public void testBuildNessieCatalog() { Map properties = new HashMap<>(); properties.put("warehouse", logDir.toAbsolutePath().toString()); @@ -153,7 +130,6 @@ public void testBuildNessieCatalog() { } @Test - @Order(4) public void testBuildHiveCatalog() { Map properties = new HashMap<>(); properties.put("warehouse", logDir.toAbsolutePath().toString()); diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/HadoopCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/HadoopCLIMigrationTest.java index b15101c..f5a8d95 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/HadoopCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/HadoopCLIMigrationTest.java @@ -15,9 +15,6 @@ */ package org.projectnessie.tools.catalog.migration.cli; -import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; - import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; @@ -25,16 +22,14 @@ public class HadoopCLIMigrationTest extends AbstractCLIMigrationTest { @BeforeAll protected static void setup() { - dryRunFile = outputDir.resolve(DRY_RUN_FILE); - failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); sourceCatalogProperties = "warehouse=" + warehouse1.toAbsolutePath() + ",type=hadoop"; targetCatalogProperties = "warehouse=" + warehouse2.toAbsolutePath() + ",type=hadoop"; - catalog1 = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "catalog1"); - catalog2 = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "catalog2"); + sourceCatalog = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "sourceCatalog"); + targetCatalog = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "targetCatalog"); - sourceCatalogType = catalogType(catalog1); - targetCatalogType = catalogType(catalog2); + sourceCatalogType = catalogType(sourceCatalog); + targetCatalogType = catalogType(targetCatalog); createNamespaces(); } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToHiveCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToHiveCLIMigrationTest.java index 4a9ec34..2b89da3 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToHiveCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToHiveCLIMigrationTest.java @@ -15,12 +15,8 @@ */ package org.projectnessie.tools.catalog.migration.cli; -import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; - import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHadoopToHiveCLIMigrationTest extends AbstractCLIMigrationTest { @@ -28,8 +24,6 @@ public class ITHadoopToHiveCLIMigrationTest extends AbstractCLIMigrationTest { @BeforeAll protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - dryRunFile = outputDir.resolve(DRY_RUN_FILE); - failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); sourceCatalogProperties = "warehouse=" + warehouse1.toAbsolutePath() + ",type=hadoop"; targetCatalogProperties = "warehouse=" @@ -37,11 +31,11 @@ protected static void setup() throws Exception { + ",uri=" + HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"); - catalog1 = createHadoopCatalog(warehouse1.toString(), "catalog1"); - catalog2 = HiveMetaStoreRunner.hiveCatalog(); + sourceCatalog = createHadoopCatalog(warehouse1.toString(), "sourceCatalog"); + targetCatalog = HiveMetaStoreRunner.hiveCatalog(); - sourceCatalogType = catalogType(catalog1); - targetCatalogType = catalogType(catalog2); + sourceCatalogType = catalogType(sourceCatalog); + targetCatalogType = catalogType(targetCatalog); createNamespaces(); } @@ -51,11 +45,4 @@ protected static void tearDown() throws Exception { dropNamespaces(); HiveMetaStoreRunner.stopMetastore(); } - - // disable large table test for IT to save CI time. It will be executed only for UT. - @Override - @Disabled - public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { - super.testRegisterLargeNumberOfTables(deleteSourceTables); - } } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToHadoopCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToHadoopCLIMigrationTest.java index 223ad9c..76901a8 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToHadoopCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToHadoopCLIMigrationTest.java @@ -15,12 +15,8 @@ */ package org.projectnessie.tools.catalog.migration.cli; -import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; - import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHiveToHadoopCLIMigrationTest extends AbstractCLIMigrationTest { @@ -28,8 +24,6 @@ public class ITHiveToHadoopCLIMigrationTest extends AbstractCLIMigrationTest { @BeforeAll protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - dryRunFile = outputDir.resolve(DRY_RUN_FILE); - failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); sourceCatalogProperties = "warehouse=" + warehouse1.toAbsolutePath() @@ -37,11 +31,11 @@ protected static void setup() throws Exception { + HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"); targetCatalogProperties = "warehouse=" + warehouse2.toAbsolutePath() + ",type=hadoop"; - catalog1 = HiveMetaStoreRunner.hiveCatalog(); - catalog2 = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "hadoop"); + sourceCatalog = HiveMetaStoreRunner.hiveCatalog(); + targetCatalog = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "hadoop"); - sourceCatalogType = catalogType(catalog1); - targetCatalogType = catalogType(catalog2); + sourceCatalogType = catalogType(sourceCatalog); + targetCatalogType = catalogType(targetCatalog); createNamespaces(); } @@ -51,11 +45,4 @@ protected static void tearDown() throws Exception { dropNamespaces(); HiveMetaStoreRunner.stopMetastore(); } - - // disable large table test for IT to save CI time. It will be executed only for UT. - @Override - @Disabled - public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { - super.testRegisterLargeNumberOfTables(deleteSourceTables); - } } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToNessieCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToNessieCLIMigrationTest.java index ae38082..2b79b78 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToNessieCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToNessieCLIMigrationTest.java @@ -15,12 +15,14 @@ */ package org.projectnessie.tools.catalog.migration.cli; -import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; - +import java.util.stream.IntStream; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHiveToNessieCLIMigrationTest extends AbstractCLIMigrationTest { @@ -32,8 +34,6 @@ public class ITHiveToNessieCLIMigrationTest extends AbstractCLIMigrationTest { @BeforeAll protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - dryRunFile = outputDir.resolve(DRY_RUN_FILE); - failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); sourceCatalogProperties = "warehouse=" + warehouse1.toAbsolutePath() @@ -42,11 +42,11 @@ protected static void setup() throws Exception { targetCatalogProperties = "uri=" + nessieUri + ",ref=main,warehouse=" + warehouse2.toAbsolutePath(); - catalog1 = HiveMetaStoreRunner.hiveCatalog(); - catalog2 = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); + sourceCatalog = HiveMetaStoreRunner.hiveCatalog(); + targetCatalog = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); - sourceCatalogType = catalogType(catalog1); - targetCatalogType = catalogType(catalog2); + sourceCatalogType = catalogType(sourceCatalog); + targetCatalogType = catalogType(targetCatalog); createNamespaces(); } @@ -57,10 +57,47 @@ protected static void tearDown() throws Exception { HiveMetaStoreRunner.stopMetastore(); } - // disable large table test for IT to save CI time. It will be executed only for UT. - @Override - @Disabled + // Executing migration of large number of tables for only one set of catalogs to save CI time. + @ParameterizedTest + @ValueSource(booleans = {true, false}) public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { - super.testRegisterLargeNumberOfTables(deleteSourceTables); + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + // additionally create 240 tables along with 4 tables created in beforeEach() + IntStream.range(0, 240) + .forEach( + val -> + sourceCatalog.createTable( + TableIdentifier.of(Namespace.of("foo"), "tblx" + val), schema)); + + RunCLI run = runCLI(deleteSourceTables, registerAllTablesArgs()); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + String operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 244 tables for %s.", operation)); + operation = deleteSourceTables ? "migrated" : "registered"; + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Summary: %nSuccessfully %s 244 tables from %s catalog to" + " %s catalog.", + operation, sourceCatalogType, targetCatalogType)); + Assertions.assertThat(run.getOut()) + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operation)); + + operation = deleteSourceTables ? "migration" : "registration"; + // validate intermediate output + Assertions.assertThat(run.getOut()) + .contains(String.format("Attempted %s for 100 tables out of 244 tables.", operation)); + Assertions.assertThat(run.getOut()) + .contains(String.format("Attempted %s for 200 tables out of 244 tables.", operation)); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + targetCatalog.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); + + Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))).hasSize(242); + Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) + .containsExactlyInAnyOrder( + TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); } } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITNessieToHiveCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITNessieToHiveCLIMigrationTest.java index dbcb4cd..adf19dd 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITNessieToHiveCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITNessieToHiveCLIMigrationTest.java @@ -15,12 +15,8 @@ */ package org.projectnessie.tools.catalog.migration.cli; -import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; -import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; - import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITNessieToHiveCLIMigrationTest extends AbstractCLIMigrationTest { @@ -32,8 +28,6 @@ public class ITNessieToHiveCLIMigrationTest extends AbstractCLIMigrationTest { @BeforeAll protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - dryRunFile = outputDir.resolve(DRY_RUN_FILE); - failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); sourceCatalogProperties = "uri=" + nessieUri + ",ref=main,warehouse=" + warehouse1; targetCatalogProperties = "warehouse=" @@ -41,11 +35,11 @@ protected static void setup() throws Exception { + ",uri=" + HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"); - catalog1 = createNessieCatalog(warehouse1.toAbsolutePath().toString(), nessieUri); - catalog2 = HiveMetaStoreRunner.hiveCatalog(); + sourceCatalog = createNessieCatalog(warehouse1.toAbsolutePath().toString(), nessieUri); + targetCatalog = HiveMetaStoreRunner.hiveCatalog(); - sourceCatalogType = catalogType(catalog1); - targetCatalogType = catalogType(catalog2); + sourceCatalogType = catalogType(sourceCatalog); + targetCatalogType = catalogType(targetCatalog); createNamespaces(); } @@ -55,11 +49,4 @@ protected static void tearDown() throws Exception { dropNamespaces(); HiveMetaStoreRunner.stopMetastore(); } - - // disable large table test for IT to save CI time. It will be executed only for UT. - @Override - @Disabled - public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { - super.testRegisterLargeNumberOfTables(deleteSourceTables); - } } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ProcessIdentifiersTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ProcessIdentifiersTest.java index fc8fa67..782db41 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ProcessIdentifiersTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ProcessIdentifiersTest.java @@ -15,10 +15,12 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import com.google.common.collect.Sets; import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; +import java.util.Set; import org.apache.iceberg.catalog.TableIdentifier; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.BeforeAll; @@ -27,11 +29,11 @@ public class ProcessIdentifiersTest { - protected static @TempDir Path logDir; + protected static @TempDir Path tempDir; @BeforeAll protected static void initLogDir() { - System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); + System.setProperty("catalog.migration.log.dir", tempDir.toAbsolutePath().toString()); } @Test @@ -39,12 +41,12 @@ public void testOptions() throws Exception { Assertions.assertThat(new IdentifierOptions().processIdentifiersInput()).isEmpty(); IdentifierOptions identifierOptions = new IdentifierOptions(); - identifierOptions.identifiers = Arrays.asList("foo.abc", "bar.def"); + identifierOptions.identifiers = Sets.newHashSet("foo.abc", "bar.def"); Assertions.assertThat(identifierOptions.processIdentifiersInput()) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.abc"), TableIdentifier.parse("bar.def")); - Path identifierFile = logDir.resolve("file_with_ids.txt"); + Path identifierFile = tempDir.resolve("file_with_ids.txt"); Files.write(identifierFile, Arrays.asList("db1.t1", "db2.t2", "db123.t5")); IdentifierOptions newOptions = new IdentifierOptions(); newOptions.identifiersFromFile = identifierFile.toAbsolutePath().toString(); @@ -54,16 +56,47 @@ public void testOptions() throws Exception { TableIdentifier.parse("db2.t2"), TableIdentifier.parse("db123.t5")); - identifierFile.toFile().setReadable(false); + Assertions.assertThat(identifierFile.toFile().setReadable(false)).isTrue(); Assertions.assertThatThrownBy(newOptions::processIdentifiersInput) .isInstanceOf(UncheckedIOException.class) .hasMessageContaining("Failed to read the file: " + identifierFile); - identifierFile.toFile().setReadable(true); + Assertions.assertThat(identifierFile.toFile().setReadable(true)).isTrue(); IdentifierOptions options = new IdentifierOptions(); options.identifiersFromFile = "path/to/file"; Assertions.assertThatThrownBy(options::processIdentifiersInput) .isInstanceOf(IllegalArgumentException.class) .hasMessageContaining("File specified in `--identifiers-from-file` option does not exist"); + + // empty file + identifierFile = tempDir.resolve("ids1.txt"); + Files.createFile(identifierFile); + options = new IdentifierOptions(); + newOptions.identifiersFromFile = identifierFile.toAbsolutePath().toString(); + Assertions.assertThat(options.processIdentifiersInput()).isEmpty(); + + // with some blanks + identifierFile = tempDir.resolve("ids2.txt"); + String[] lines = {"abc. def", " abc 123 ", "", "", " xyz%n123"}; + Files.writeString(identifierFile, String.join(System.lineSeparator(), lines)); + options = new IdentifierOptions(); + options.identifiersFromFile = identifierFile.toAbsolutePath().toString(); + Set identifiers = options.processIdentifiersInput(); + Assertions.assertThat(identifiers) + .containsExactlyInAnyOrder( + TableIdentifier.parse("abc. def"), + TableIdentifier.parse("abc 123"), + TableIdentifier.parse("xyz%n123")); + + // with duplicate entries + identifierFile = tempDir.resolve("ids3.txt"); + String[] ids = {"abc.def", "xx.yy", "abc.def", "abc.def", "abc.def ", " xx.yy"}; + Files.writeString(identifierFile, String.join(System.lineSeparator(), ids)); + options = new IdentifierOptions(); + options.identifiersFromFile = identifierFile.toAbsolutePath().toString(); + identifiers = options.processIdentifiersInput(); + Assertions.assertThat(identifiers) + .containsExactlyInAnyOrder( + TableIdentifier.parse("abc.def"), TableIdentifier.parse("xx.yy")); } } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/RunCLI.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/RunCLI.java index 562cbfc..41877df 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/RunCLI.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/RunCLI.java @@ -66,7 +66,11 @@ private static int runMain(PrintWriter out, PrintWriter err, String... arguments new CommandLine(new CatalogMigrationCLI()) .setExecutionExceptionHandler( (ex, cmd, parseResult) -> { - cmd.getErr().println(cmd.getColorScheme().richStackTraceString(ex)); + if (enableStacktrace(arguments)) { + cmd.getErr().println(cmd.getColorScheme().richStackTraceString(ex)); + } else { + cmd.getErr().println("Error during CLI execution: " + ex.getMessage()); + } return cmd.getExitCodeExceptionMapper() != null ? cmd.getExitCodeExceptionMapper().getExitCode(ex) : cmd.getCommandSpec().exitCodeOnExecutionException(); @@ -96,6 +100,15 @@ public String getErr() { return err; } + private static boolean enableStacktrace(String... args) { + for (String arg : args) { + if (arg.equalsIgnoreCase("--stacktrace")) { + return true; + } + } + return false; + } + @Override public String toString() { return String.format( diff --git a/gradle/baselibs.versions.toml b/gradle/baselibs.versions.toml index 028df8b..4427ecd 100644 --- a/gradle/baselibs.versions.toml +++ b/gradle/baselibs.versions.toml @@ -2,23 +2,14 @@ [versions] errorpronePlugin = "3.0.1" +ideaExt = "1.1.7" jandexPlugin = "1.86" -junit = "5.9.2" -nessieBuildPlugins = "0.2.20" shadowPlugin = "8.1.0" spotlessPlugin = "6.16.0" [libraries] -assertj-core = { module = "org.assertj:assertj-core", version = "3.24.2" } errorprone = { module = "net.ltgt.gradle:gradle-errorprone-plugin", version.ref = "errorpronePlugin" } -idea-ext = { module = "gradle.plugin.org.jetbrains.gradle.plugin.idea-ext:gradle-idea-ext", version = "1.1.7" } +idea-ext = { module = "gradle.plugin.org.jetbrains.gradle.plugin.idea-ext:gradle-idea-ext", version.ref = "ideaExt" } jandex = { module = "com.github.vlsi.gradle:jandex-plugin", version.ref = "jandexPlugin" } -junit-bom = { module = "org.junit:junit-bom", version.ref = "junit" } -junit-jupiter-api = { module = "org.junit.jupiter:junit-jupiter-api" } -junit-jupiter-engine = { module = "org.junit.jupiter:junit-jupiter-engine" } -junit-jupiter-params = { module = "org.junit.jupiter:junit-jupiter-params" } -nessie-buildsupport-jacoco = { module = "org.projectnessie.buildsupport:jacoco", version.ref = "nessieBuildPlugins" } -nessie-buildsupport-jacoco-aggregator = { module = "org.projectnessie.buildsupport:jacoco-aggregator", version.ref = "nessieBuildPlugins" } -nessie-buildsupport-reflectionconfig = { module = "org.projectnessie.buildsupport:reflection-config", version.ref = "nessieBuildPlugins" } shadow = { module = "com.github.johnrengelman:shadow", version.ref = "shadowPlugin" } spotless = { module = "com.diffplug.spotless:spotless-plugin-gradle", version.ref = "spotlessPlugin" } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index a365fa0..c6e16ce 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -16,7 +16,6 @@ junit = "5.9.2" logback = "1.4.5" logcaptor = "2.8.0" nessie = "0.52.3" -nessieBuildPlugins = "0.2.19" nessieRunner = "0.29.0" picocli = "4.7.1" shadowPlugin = "7.1.2" @@ -41,7 +40,9 @@ hadoop-aws = { module = "org.apache.hadoop:hadoop-aws", version.ref = "hadoop" } hadoop-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop" } iceberg-dell = { module = "org.apache.iceberg:iceberg-dell", version.ref = "iceberg" } iceberg-spark-runtime = { module = "org.apache.iceberg:iceberg-spark-runtime-3.3_2.12", version.ref = "iceberg" } -immutables = { module = "org.immutables:value", version.ref = "immutables" } +immutables-builder = { module = "org.immutables:builder", version.ref = "immutables" } +immutables-value-annotations = { module = "org.immutables:value-annotations", version.ref = "immutables" } +immutables-value-processor = { module = "org.immutables:value-processor", version.ref = "immutables" } jacoco-ant = { module = "org.jacoco:org.jacoco.ant", version.ref = "jacoco" } jacoco-report = { module = "org.jacoco:org.jacoco.report", version.ref = "jacoco" } jacoco-maven-plugin = { module = "org.jacoco:jacoco-maven-plugin", version.ref = "jacoco" } @@ -50,7 +51,6 @@ junit-bom = { module = "org.junit:junit-bom", version.ref = "junit" } junit-jupiter-api = { module = "org.junit.jupiter:junit-jupiter-api", version.ref = "junit" } junit-jupiter-engine = { module = "org.junit.jupiter:junit-jupiter-engine" } junit-jupiter-params = { module = "org.junit.jupiter:junit-jupiter-params" } -junit-vintage-engine = { module = "org.junit.vintage:junit-vintage-engine" } logback-classic = { module = "ch.qos.logback:logback-classic", version.ref = "logback" } logcaptor = { module = "io.github.hakky54:logcaptor", version.ref = "logcaptor" } picocli = { module = "info.picocli:picocli", version.ref = "picocli" } From 9c90731b68ffa6ea31555609415f2e9a96031d50 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Wed, 29 Mar 2023 23:05:31 +0530 Subject: [PATCH 25/31] Fix depedency issues --- buildSrc/src/main/kotlin/Testing.kt | 1 + cli/build.gradle.kts | 57 ++++++++++++++++--- .../migration/cli/CatalogMigrationUtil.java | 3 +- gradle/libs.versions.toml | 18 ++++-- 4 files changed, 65 insertions(+), 14 deletions(-) diff --git a/buildSrc/src/main/kotlin/Testing.kt b/buildSrc/src/main/kotlin/Testing.kt index 094998d..5d90be8 100644 --- a/buildSrc/src/main/kotlin/Testing.kt +++ b/buildSrc/src/main/kotlin/Testing.kt @@ -18,6 +18,7 @@ import org.gradle.api.Project import org.gradle.api.services.BuildService import org.gradle.api.services.BuildServiceParameters import org.gradle.api.tasks.testing.Test +import org.gradle.api.tasks.testing.logging.TestExceptionFormat import org.gradle.kotlin.dsl.named import org.gradle.kotlin.dsl.provideDelegate import org.gradle.kotlin.dsl.register diff --git a/cli/build.gradle.kts b/cli/build.gradle.kts index 6d2dc4a..ef53c03 100644 --- a/cli/build.gradle.kts +++ b/cli/build.gradle.kts @@ -29,19 +29,62 @@ applyShadowJar() dependencies { implementation(project(":iceberg-catalog-migrator-api")) + implementation(libs.guava) implementation(libs.slf4j) runtimeOnly(libs.logback.classic) implementation(libs.picocli) implementation(libs.iceberg.spark.runtime) implementation(libs.iceberg.dell) implementation(libs.hadoop.aws) { exclude("com.amazonaws", "aws-java-sdk-bundle") } - implementation(libs.hadoop.common) + implementation(libs.hadoop.common) { + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("javax.servlet", "servlet-api") + exclude("com.google.code.gson", "gson") + exclude("commons-beanutils") + } // AWS dependencies based on https://iceberg.apache.org/docs/latest/aws/#enabling-aws-integration - implementation(libs.aws.sdk.glue) - implementation(libs.aws.sdk.s3) - implementation(libs.aws.sdk.dynamo) - implementation(libs.aws.sdk.kms) - implementation(libs.aws.sdk.sts) + runtimeOnly(libs.aws.sdk.apache.client) + runtimeOnly(libs.aws.sdk.auth) + runtimeOnly(libs.aws.sdk.glue) + runtimeOnly(libs.aws.sdk.s3) + runtimeOnly(libs.aws.sdk.dynamo) + runtimeOnly(libs.aws.sdk.kms) + runtimeOnly(libs.aws.sdk.lakeformation) + runtimeOnly(libs.aws.sdk.sts) + runtimeOnly(libs.aws.sdk.url.connection.client) + runtimeOnly(libs.ecs.bundle) + + // needed for Hive catalog + runtimeOnly("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") { + // these are taken from iceberg repo configurations + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hbase") + exclude("org.apache.logging.log4j") + exclude("co.cask.tephra") + exclude("com.google.code.findbugs", "jsr305") + exclude("org.eclipse.jetty.aggregate", "jetty-all") + exclude("org.eclipse.jetty.orbit", "javax.servlet") + exclude("org.apache.parquet", "parquet-hadoop-bundle") + exclude("com.tdunning", "json") + exclude("javax.transaction", "transaction-api") + exclude("com.zaxxer", "HikariCP") + } + runtimeOnly("org.apache.hive:hive-exec:${libs.versions.hive.get()}:core") { + // these are taken from iceberg repo configurations + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hive", "hive-llap-tez") + exclude("org.apache.logging.log4j") + exclude("com.google.protobuf", "protobuf-java") + exclude("org.apache.calcite") + exclude("org.apache.calcite.avatica") + exclude("com.google.code.findbugs", "jsr305") + } + runtimeOnly("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}") testImplementation(libs.junit.jupiter.params) testImplementation(libs.junit.jupiter.api) @@ -108,7 +151,7 @@ val processResources = val mainClassName = "org.projectnessie.tools.catalog.migration.cli.CatalogMigrationCLI" -val shadowJar = tasks.named("shadowJar") +val shadowJar = tasks.named("shadowJar") { isZip64 = true } val unixExecutable by tasks.registering { diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java index 7a81139..b132267 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java @@ -18,6 +18,7 @@ import com.google.common.base.Preconditions; import java.util.Map; import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.aws.dynamodb.DynamoDbCatalog; import org.apache.iceberg.aws.glue.GlueCatalog; import org.apache.iceberg.catalog.Catalog; @@ -56,7 +57,7 @@ static Catalog buildCatalog( if (hadoopConf != null) { hadoopConf.forEach(catalogConf::set); } - return org.apache.iceberg.CatalogUtil.loadCatalog( + return CatalogUtil.loadCatalog( catalogImpl(catalogType, customCatalogImpl), catalogName, catalogProperties, catalogConf); } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index c6e16ce..d837757 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,7 +1,8 @@ [versions] assertj = "3.24.2" -aws = "1.12.415" +aws = "2.20.18" checkstyle = "10.8.0" +ecs = "3.3.2" errorprone = "2.18.0" errorproneSlf4j = "0.1.18" googleJavaFormat = "1.16.0" @@ -23,15 +24,20 @@ slf4j = "1.7.36" [libraries] assertj = { module = "org.assertj:assertj-core", version.ref = "assertj" } -aws-sdk-dynamo = { module = "com.amazonaws:aws-java-sdk-dynamodb", version.ref = "aws" } -aws-sdk-glue = { module = "com.amazonaws:aws-java-sdk-glue", version.ref = "aws" } -aws-sdk-kms = { module = "com.amazonaws:aws-java-sdk-kms", version.ref = "aws" } -aws-sdk-sts = { module = "com.amazonaws:aws-java-sdk-sts", version.ref = "aws" } -aws-sdk-s3 = { module = "com.amazonaws:aws-java-sdk-s3", version.ref = "aws" } +aws-sdk-apache-client = { module = "software.amazon.awssdk:apache-client", version.ref = "aws" } +aws-sdk-auth = { module = "software.amazon.awssdk:auth", version.ref = "aws" } +aws-sdk-dynamo = { module = "software.amazon.awssdk:dynamodb", version.ref = "aws" } +aws-sdk-glue = { module = "software.amazon.awssdk:glue", version.ref = "aws" } +aws-sdk-kms = { module = "software.amazon.awssdk:kms", version.ref = "aws" } +aws-sdk-lakeformation = { module = "software.amazon.awssdk:lakeformation", version.ref = "aws" } +aws-sdk-sts = { module = "software.amazon.awssdk:sts", version.ref = "aws" } +aws-sdk-s3 = { module = "software.amazon.awssdk:s3", version.ref = "aws" } +aws-sdk-url-connection-client = { module = "software.amazon.awssdk:url-connection-client", version.ref = "aws" } checkstyle = { module = "com.puppycrawl.tools:checkstyle", version.ref = "checkstyle" } errorprone-annotations = { module = "com.google.errorprone:error_prone_annotations", version.ref = "errorprone" } errorprone-core = { module = "com.google.errorprone:error_prone_core", version.ref = "errorprone" } errorprone-slf4j = { module = "jp.skypencil.errorprone.slf4j:errorprone-slf4j", version.ref = "errorproneSlf4j" } +ecs-bundle = { module = "com.emc.ecs:object-client-bundle", version.ref = "ecs" } findbugs-annotations = { module = "com.google.code.findbugs:annotations", version = "3.0.1" } findbugs-jsr305 = { module = "com.google.code.findbugs:jsr305", version = "3.0.2" } google-java-format = { module = "com.google.googlejavaformat:google-java-format", version.ref = "googleJavaFormat" } From 05eccfeb6ff0448003e3d6baa2d1606fca8683d9 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Thu, 30 Mar 2023 21:22:11 +0530 Subject: [PATCH 26/31] Findings from all catalog test --- README.md | 40 +++++++++++++++++++ api-test/build.gradle.kts | 8 +++- api/build.gradle.kts | 8 +++- cli/build.gradle.kts | 10 +---- .../migration/cli/CatalogMigrationUtil.java | 6 +++ gradle/libs.versions.toml | 8 ++-- 6 files changed, 67 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index f83af8c..998a108 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,46 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --identifiers foo.t1,foo.t2 ``` +## Migrate all tables from GLUE catalog to Arctic catalog (main branch) +```shell +java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ +--source-catalog-type GLUE \ +--source-catalog-properties warehouse=s3a://ajantha-test/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT \ +--target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY +``` + +## Migrate all tables from HIVE catalog to Arctic catalog (main branch) +```shell +java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ +--source-catalog-type HIVE \ +--source-catalog-properties warehouse=s3a://ajantha-test/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,uri=thrift://localhost:9083 \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT \ +--target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY +``` + +## Migrate all tables from DYNAMODB catalog to Arctic catalog (main branch) +```shell +java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ +--source-catalog-type DYNAMODB \ +--source-catalog-properties warehouse=s3a://ajantha-test/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT \ +--target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY +``` + +## Migrate all tables from JDBC catalog to Arctic catalog (main branch) +```shell +java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ +--source-catalog-type JDBC \ +--source-catalog-properties warehouse=/tmp/warehouseJdbc,jdbc.user=root,jdbc.password=pass,uri=jdbc:mysql://localhost:3306/db1,name=catalogName \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT \ +--target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY +``` + # Scenarios ## A. User need to try out new catalog Users can use a new catalog by creating a fresh table to test the new catalog's capabilities, without requiring a tool to migrate the catalog. diff --git a/api-test/build.gradle.kts b/api-test/build.gradle.kts index 6223883..0c29bd8 100644 --- a/api-test/build.gradle.kts +++ b/api-test/build.gradle.kts @@ -22,7 +22,13 @@ plugins { dependencies { implementation(libs.guava) - implementation(libs.hadoop.common) + implementation(libs.hadoop.common) { + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("javax.servlet", "servlet-api") + exclude("com.google.code.gson", "gson") + exclude("commons-beanutils") + } implementation(libs.iceberg.spark.runtime) implementation(libs.junit.jupiter.api) implementation("org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests") diff --git a/api/build.gradle.kts b/api/build.gradle.kts index 29809b1..377a543 100644 --- a/api/build.gradle.kts +++ b/api/build.gradle.kts @@ -34,7 +34,13 @@ dependencies { testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.engine) testImplementation(libs.assertj) - testImplementation(libs.hadoop.common) + testImplementation(libs.hadoop.common) { + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("javax.servlet", "servlet-api") + exclude("com.google.code.gson", "gson") + exclude("commons-beanutils") + } testImplementation(libs.logcaptor) testImplementation(project(":iceberg-catalog-migrator-api-test")) diff --git a/cli/build.gradle.kts b/cli/build.gradle.kts index ef53c03..1c6c789 100644 --- a/cli/build.gradle.kts +++ b/cli/build.gradle.kts @@ -36,13 +36,8 @@ dependencies { implementation(libs.iceberg.spark.runtime) implementation(libs.iceberg.dell) implementation(libs.hadoop.aws) { exclude("com.amazonaws", "aws-java-sdk-bundle") } - implementation(libs.hadoop.common) { - exclude("org.apache.avro", "avro") - exclude("org.slf4j", "slf4j-log4j12") - exclude("javax.servlet", "servlet-api") - exclude("com.google.code.gson", "gson") - exclude("commons-beanutils") - } + runtimeOnly(libs.ecs.bundle) + runtimeOnly(libs.mysql.driver) // AWS dependencies based on https://iceberg.apache.org/docs/latest/aws/#enabling-aws-integration runtimeOnly(libs.aws.sdk.apache.client) runtimeOnly(libs.aws.sdk.auth) @@ -53,7 +48,6 @@ dependencies { runtimeOnly(libs.aws.sdk.lakeformation) runtimeOnly(libs.aws.sdk.sts) runtimeOnly(libs.aws.sdk.url.connection.client) - runtimeOnly(libs.ecs.bundle) // needed for Hive catalog runtimeOnly("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") { diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java index b132267..0319512 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java @@ -57,6 +57,12 @@ static Catalog buildCatalog( if (hadoopConf != null) { hadoopConf.forEach(catalogConf::set); } + if (catalogProperties.get("name") != null) { + // Some catalogs like jdbc stores the catalog name from the client when the namespace or table + // is created. + // Hence, when accessing the tables from another client, catalog name should match. + catalogName = catalogProperties.get("name"); + } return CatalogUtil.loadCatalog( catalogImpl(catalogType, customCatalogImpl), catalogName, catalogProperties, catalogConf); } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index d837757..8852836 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,14 +1,14 @@ [versions] assertj = "3.24.2" -aws = "2.20.18" +aws = "2.20.18" # this is in mapping with iceberg repo. checkstyle = "10.8.0" ecs = "3.3.2" errorprone = "2.18.0" errorproneSlf4j = "0.1.18" googleJavaFormat = "1.16.0" guava = "31.1-jre" -hadoop = "3.2.4" -hive = "2.3.8" # this is in mapping with iceberg repo. Later versions have junit depedency problem +hadoop = "2.7.3" # this is in mapping with iceberg repo. +hive = "2.3.8" # this is in mapping with iceberg repo. iceberg = "1.2.0" immutables = "2.9.3" jacoco = "0.8.8" @@ -16,6 +16,7 @@ jandex = "3.0.5" junit = "5.9.2" logback = "1.4.5" logcaptor = "2.8.0" +mysqlDriver = "8.0.32" nessie = "0.52.3" nessieRunner = "0.29.0" picocli = "4.7.1" @@ -59,6 +60,7 @@ junit-jupiter-engine = { module = "org.junit.jupiter:junit-jupiter-engine" } junit-jupiter-params = { module = "org.junit.jupiter:junit-jupiter-params" } logback-classic = { module = "ch.qos.logback:logback-classic", version.ref = "logback" } logcaptor = { module = "io.github.hakky54:logcaptor", version.ref = "logcaptor" } +mysql-driver = { module = "mysql:mysql-connector-java", version.ref = "mysqlDriver" } picocli = { module = "info.picocli:picocli", version.ref = "picocli" } slf4j = { module = "org.slf4j:log4j-over-slf4j", version.ref = "slf4j" } From 07157ccc26a2d12d996f73cc961c569f3c84c2c8 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Thu, 6 Apr 2023 17:10:57 +0530 Subject: [PATCH 27/31] Address comments from April 6 --- .../migration/api/test/AbstractTest.java | 59 +- api/build.gradle.kts | 15 +- .../migration/api}/CatalogMigrationUtil.java | 4 +- .../migration/api/CatalogMigrator.java | 52 +- .../api/AbstractTestCatalogMigrator.java | 207 ++++--- .../api}/CatalogMigrationUtilTest.java | 9 +- .../api/CatalogMigratorParamsTest.java | 12 - .../api/CustomCatalogMigratorTest.java | 6 +- .../api/HadoopCatalogMigratorTest.java | 40 +- .../api/ITHadoopToHiveCatalogMigrator.java | 10 +- .../api/ITHadoopToNessieCatalogMigrator.java | 50 +- .../api/ITHiveToHadoopCatalogMigrator.java | 8 +- .../api/ITHiveToNessieCatalogMigrator.java | 7 +- .../api/ITNessieToHiveCatalogMigrator.java | 17 +- buildSrc/src/main/kotlin/Java.kt | 1 - buildSrc/src/main/kotlin/Testing.kt | 3 - buildSrc/src/main/kotlin/Utilities.kt | 21 - cli/build.gradle.kts | 3 +- .../migration/cli/IdentifierOptions.java | 7 +- .../catalog/migration/cli/MigrateCommand.java | 1 + .../migration/cli/SourceCatalogOptions.java | 3 +- .../migration/cli/TargetCatalogOptions.java | 3 +- .../cli/AbstractCLIMigrationTest.java | 575 +++++++----------- .../migration/cli/HadoopCLIMigrationTest.java | 12 +- .../cli/ITHadoopToHiveCLIMigrationTest.java | 18 +- .../cli/ITHiveToHadoopCLIMigrationTest.java | 18 +- .../cli/ITHiveToNessieCLIMigrationTest.java | 63 +- .../cli/ITNessieToHiveCLIMigrationTest.java | 22 +- 28 files changed, 500 insertions(+), 746 deletions(-) rename {cli/src/main/java/org/projectnessie/tools/catalog/migration/cli => api/src/main/java/org/projectnessie/tools/catalog/migration/api}/CatalogMigrationUtil.java (97%) rename {cli/src/test/java/org/projectnessie/tools/catalog/migration/cli => api/src/test/java/org/projectnessie/tools/catalog/migration/api}/CatalogMigrationUtilTest.java (95%) diff --git a/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java index 5c4d805..ad0c3ad 100644 --- a/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java +++ b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java @@ -21,15 +21,12 @@ import java.util.List; import java.util.Map; import java.util.stream.Stream; -import org.apache.hadoop.conf.Configuration; -import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.Schema; import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.hadoop.HadoopCatalog; -import org.apache.iceberg.nessie.NessieCatalog; import org.apache.iceberg.types.Types; import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeAll; @@ -37,14 +34,26 @@ public abstract class AbstractTest { - protected static @TempDir Path logDir; + protected static @TempDir Path tempDir; - private static final List namespaceList = - Arrays.asList(Namespace.of("foo"), Namespace.of("bar"), Namespace.of("db1")); + public static final Namespace FOO = Namespace.of("foo"); + public static final Namespace BAR = Namespace.of("bar"); + public static final Namespace DB1 = Namespace.of("db1"); + public static final TableIdentifier FOO_TBL1 = TableIdentifier.of(FOO, "tbl1"); + public static final TableIdentifier FOO_TBL2 = TableIdentifier.of(FOO, "tbl2"); + public static final TableIdentifier BAR_TBL3 = TableIdentifier.of(BAR, "tbl3"); + public static final TableIdentifier BAR_TBL4 = TableIdentifier.of(BAR, "tbl4"); + + private static final List namespaceList = Arrays.asList(FOO, BAR, DB1); + + private static String sourceCatalogWarehouse; + private static String targetCatalogWarehouse; @BeforeAll protected static void initLogDir() { - System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); + System.setProperty("catalog.migration.log.dir", tempDir.toAbsolutePath().toString()); + sourceCatalogWarehouse = tempDir.resolve("sourceCatalogWarehouse").toAbsolutePath().toString(); + targetCatalogWarehouse = tempDir.resolve("targetCatalogWarehouse").toAbsolutePath().toString(); } protected static Catalog sourceCatalog; @@ -82,11 +91,11 @@ protected static void dropNamespaces() { protected static void createTables() { // two tables in 'foo' namespace - sourceCatalog.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl1"), schema); - sourceCatalog.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); + sourceCatalog.createTable(FOO_TBL1, schema); + sourceCatalog.createTable(FOO_TBL2, schema); // two tables in 'bar' namespace - sourceCatalog.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl3"), schema); - sourceCatalog.createTable(TableIdentifier.of(Namespace.of("bar"), "tbl4"), schema); + sourceCatalog.createTable(BAR_TBL3, schema); + sourceCatalog.createTable(BAR_TBL4, schema); } protected static void dropTables() { @@ -99,19 +108,27 @@ protected static void dropTables() { namespace -> catalog.listTables(namespace).forEach(catalog::dropTable))); } - protected static Catalog createHadoopCatalog(String warehousePath, String name) { + protected static Map nessieCatalogProperties(boolean isSourceCatalog) { Map properties = new HashMap<>(); - properties.put("warehouse", warehousePath); - return CatalogUtil.loadCatalog( - HadoopCatalog.class.getName(), name, properties, new Configuration()); + Integer nessiePort = Integer.getInteger("quarkus.http.test-port", 19121); + String nessieUri = String.format("http://localhost:%d/api/v1", nessiePort); + properties.put("uri", nessieUri); + properties.put("warehouse", isSourceCatalog ? sourceCatalogWarehouse : targetCatalogWarehouse); + properties.put("ref", "main"); + return properties; } - protected static Catalog createNessieCatalog(String warehousePath, String uri) { + protected static Map hadoopCatalogProperties(boolean isSourceCatalog) { Map properties = new HashMap<>(); - properties.put("warehouse", warehousePath); - properties.put("ref", "main"); - properties.put("uri", uri); - return CatalogUtil.loadCatalog( - NessieCatalog.class.getName(), "nessie", properties, new Configuration()); + properties.put("warehouse", isSourceCatalog ? sourceCatalogWarehouse : targetCatalogWarehouse); + return properties; + } + + protected static Map hiveCatalogProperties( + boolean isSourceCatalog, Map dynamicProperties) { + Map properties = new HashMap<>(); + properties.put("warehouse", isSourceCatalog ? sourceCatalogWarehouse : targetCatalogWarehouse); + properties.putAll(dynamicProperties); + return properties; } } diff --git a/api/build.gradle.kts b/api/build.gradle.kts index 377a543..581ab1a 100644 --- a/api/build.gradle.kts +++ b/api/build.gradle.kts @@ -25,6 +25,14 @@ dependencies { implementation(libs.guava) implementation(libs.slf4j) implementation(libs.iceberg.spark.runtime) + implementation(libs.iceberg.dell) + implementation(libs.hadoop.common) { + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("javax.servlet", "servlet-api") + exclude("com.google.code.gson", "gson") + exclude("commons-beanutils") + } compileOnly(libs.immutables.value.annotations) annotationProcessor(libs.immutables.value.processor) @@ -34,13 +42,6 @@ dependencies { testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.engine) testImplementation(libs.assertj) - testImplementation(libs.hadoop.common) { - exclude("org.apache.avro", "avro") - exclude("org.slf4j", "slf4j-log4j12") - exclude("javax.servlet", "servlet-api") - exclude("com.google.code.gson", "gson") - exclude("commons-beanutils") - } testImplementation(libs.logcaptor) testImplementation(project(":iceberg-catalog-migrator-api-test")) diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtil.java similarity index 97% rename from cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java rename to api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtil.java index 0319512..7c7de1b 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtil.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtil.java @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration.cli; +package org.projectnessie.tools.catalog.migration.api; import com.google.common.base.Preconditions; import java.util.Map; @@ -45,7 +45,7 @@ public enum CatalogType { REST } - static Catalog buildCatalog( + public static Catalog buildCatalog( Map catalogProperties, CatalogType catalogType, String catalogName, diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java index 4fbad4a..0bc243e 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java @@ -19,12 +19,13 @@ import java.util.Arrays; import java.util.Collection; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; -import java.util.Objects; import java.util.Set; import java.util.function.Predicate; import java.util.regex.Pattern; import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.iceberg.BaseTable; import org.apache.iceberg.TableOperations; import org.apache.iceberg.catalog.Catalog; @@ -57,9 +58,6 @@ public boolean enableStacktrace() { @Value.Check void check() { - Preconditions.checkArgument( - !targetCatalog().equals(sourceCatalog()), "target catalog is same as source catalog"); - if (!(targetCatalog() instanceof SupportsNamespaces)) { throw new UnsupportedOperationException( String.format( @@ -95,7 +93,7 @@ public Set getMatchingTableIdentifiers(String identifierRegex) sourceCatalog.name())); } LOG.info("Collecting all the namespaces from source catalog..."); - Set namespaces = new HashSet<>(); + Set namespaces = new LinkedHashSet<>(); getAllNamespacesFromSourceCatalog(Namespace.empty(), namespaces); Predicate matchedIdentifiersPredicate; @@ -111,28 +109,26 @@ public Set getMatchingTableIdentifiers(String identifierRegex) matchedIdentifiersPredicate = tableIdentifier -> pattern.matcher(tableIdentifier.toString()).matches(); } - Set identifiers = - namespaces.stream() - .filter(Objects::nonNull) - .flatMap( - namespace -> - sourceCatalog.listTables(namespace).stream() - .filter(matchedIdentifiersPredicate)) - .collect(Collectors.toSet()); - - // add the tables from default namespace - try { - List fromDefaultNamespace = - sourceCatalog.listTables(Namespace.empty()).stream() - .filter(matchedIdentifiersPredicate) - .collect(Collectors.toList()); - identifiers.addAll(fromDefaultNamespace); - } catch (Exception exception) { - // some catalogs don't support default namespace. Hence, just log the warning and ignore the - // exception. - LOG.warn("Failed to identify tables from default namespace: {}", exception.getMessage()); - } - return identifiers; + return namespaces.stream() + .flatMap( + namespace -> { + try { + return sourceCatalog.listTables(namespace).stream() + .filter(matchedIdentifiersPredicate); + } catch (Exception exception) { + if (namespace.isEmpty()) { + // some catalogs don't support default namespace. + // Hence, just log the warning and ignore the exception. + LOG.warn( + "Failed to identify tables from default namespace: {}", + exception.getMessage()); + return Stream.empty(); + } else { + throw exception; + } + } + }) + .collect(Collectors.toCollection(LinkedHashSet::new)); } /** @@ -208,7 +204,7 @@ protected void createNamespacesIfNotExistOnTargetCatalog(Namespace identifierNam } protected void getAllNamespacesFromSourceCatalog(Namespace namespace, Set visited) { - if (!namespace.isEmpty() && !visited.add(namespace)) { + if (!visited.add(namespace)) { return; } List children = ((SupportsNamespaces) sourceCatalog()).listNamespaces(namespace); diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java index e0d5c5f..89cc037 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java @@ -15,12 +15,13 @@ */ package org.projectnessie.tools.catalog.migration.api; -import java.nio.file.Path; import java.util.Collections; +import java.util.Map; import java.util.Set; import java.util.stream.IntStream; import nl.altindag.log.LogCaptor; import nl.altindag.log.model.LogEvent; +import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.NoSuchTableException; @@ -29,7 +30,6 @@ import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.ValueSource; @@ -37,9 +37,27 @@ public abstract class AbstractTestCatalogMigrator extends AbstractTest { - protected static @TempDir Path warehouse1; - - protected static @TempDir Path warehouse2; + protected static final Namespace NS_A = Namespace.of("a"); + protected static final Namespace NS_A_B = Namespace.of("a", "b"); + protected static final Namespace NS_A_C = Namespace.of("a", "c"); + protected static final Namespace NS_A_B_C = Namespace.of("a", "b", "c"); + protected static final Namespace NS_A_B_C_D = Namespace.of("a", "b", "c", "d"); + protected static final Namespace NS_A_B_C_D_E = Namespace.of("a", "b", "c", "d", "e"); + + protected static final Namespace NS1 = Namespace.of("ns1"); + protected static final Namespace NS2 = Namespace.of("ns2"); + protected static final Namespace NS3 = Namespace.of("ns3"); + protected static final Namespace NS1_NS2 = Namespace.of("ns1", "ns2"); + protected static final Namespace NS1_NS3 = Namespace.of("ns1", "ns3"); + protected static final Namespace NS1_NS2_NS3 = Namespace.of("ns1", "ns2", "ns3"); + + protected static final TableIdentifier TBL = TableIdentifier.parse("tblz"); + protected static final TableIdentifier NS1_TBL = TableIdentifier.of(NS1, "tblz"); + protected static final TableIdentifier NS2_TBL = TableIdentifier.of(NS2, "tblz"); + protected static final TableIdentifier NS3_TBL = TableIdentifier.of(NS3, "tblz"); + protected static final TableIdentifier NS1_NS2_TBL = TableIdentifier.of(NS1_NS2, "tblz"); + protected static final TableIdentifier NS1_NS3_TBL = TableIdentifier.of(NS1_NS3, "tblz"); + protected static final TableIdentifier NS1_NS2_NS3_TBL = TableIdentifier.of(NS1_NS2_NS3, "tblz"); @BeforeEach protected void beforeEach() { @@ -51,6 +69,49 @@ protected void afterEach() { dropTables(); } + protected static void initializeSourceCatalog( + CatalogMigrationUtil.CatalogType catalogType, Map additionalProp) { + initializeCatalog(true, catalogType, additionalProp); + } + + protected static void initializeTargetCatalog( + CatalogMigrationUtil.CatalogType catalogType, Map additionalProp) { + initializeCatalog(false, catalogType, additionalProp); + } + + private static void initializeCatalog( + boolean isSourceCatalog, + CatalogMigrationUtil.CatalogType catalogType, + Map additionalProp) { + Map properties; + switch (catalogType) { + case HADOOP: + properties = hadoopCatalogProperties(isSourceCatalog); + break; + case NESSIE: + properties = nessieCatalogProperties(isSourceCatalog); + break; + case HIVE: + properties = hiveCatalogProperties(isSourceCatalog, additionalProp); + break; + default: + throw new UnsupportedOperationException( + String.format("Unsupported for catalog type: %s", catalogType)); + } + Catalog catalog = + CatalogMigrationUtil.buildCatalog( + properties, + catalogType, + isSourceCatalog ? "sourceCatalog" : "targetCatalog" + "_" + catalogType, + null, + null); + if (isSourceCatalog) { + sourceCatalog = catalog; + } else { + targetCatalog = catalog; + } + } + @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegister(boolean deleteSourceTables) { @@ -59,34 +120,26 @@ public void testRegister(boolean deleteSourceTables) { CatalogMigrationResult result = registerAllTables(deleteSourceTables); Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), - TableIdentifier.parse("foo.tbl2"), - TableIdentifier.parse("bar.tbl3"), - TableIdentifier.parse("bar.tbl4")); + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2, BAR_TBL3, BAR_TBL4); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + Assertions.assertThat(targetCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); if (deleteSourceTables && !(sourceCatalog instanceof HadoopCatalog)) { // table should be deleted after migration from source catalog - Assertions.assertThat(sourceCatalog.listTables(Namespace.of("foo"))).isEmpty(); - Assertions.assertThat(sourceCatalog.listTables(Namespace.of("bar"))).isEmpty(); + Assertions.assertThat(sourceCatalog.listTables(FOO)).isEmpty(); + Assertions.assertThat(sourceCatalog.listTables(BAR)).isEmpty(); return; } // tables should be present in source catalog. - Assertions.assertThat(sourceCatalog.listTables(Namespace.of("foo"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(sourceCatalog.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + Assertions.assertThat(sourceCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(sourceCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); } @ParameterizedTest @@ -97,16 +150,14 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) { // using `--identifiers` option CatalogMigrationResult result = catalogMigratorWithDefaultArgs(deleteSourceTables) - .registerTables(Collections.singletonList(TableIdentifier.parse("bar.tbl3"))) + .registerTables(Collections.singletonList(BAR_TBL3)) .result(); - Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactly(TableIdentifier.parse("bar.tbl3")); + Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(BAR_TBL3); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))).isEmpty(); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) - .containsExactly(TableIdentifier.parse("bar.tbl3")); + Assertions.assertThat(targetCatalog.listTables(FOO)).isEmpty(); + Assertions.assertThat(targetCatalog.listTables(BAR)).containsExactly(BAR_TBL3); // using --identifiers-regex option which matches all the tables starts with "foo." CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(deleteSourceTables); @@ -115,16 +166,13 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) { .registerTables(catalogMigrator.getMatchingTableIdentifiers("^foo\\..*")) .result(); Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) - .containsExactly(TableIdentifier.parse("bar.tbl3")); + Assertions.assertThat(targetCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(targetCatalog.listTables(BAR)).containsExactly(BAR_TBL3); } @ParameterizedTest @@ -133,32 +181,30 @@ public void testRegisterError(boolean deleteSourceTables) { validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); // use invalid namespace which leads to NoSuchTableException + TableIdentifier identifier = TableIdentifier.parse("dummy.tbl3"); CatalogMigrationResult result = catalogMigratorWithDefaultArgs(deleteSourceTables) - .registerTables(Collections.singletonList(TableIdentifier.parse("dummy.tbl3"))) + .registerTables(Collections.singletonList(identifier)) .result(); Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()) - .containsExactly(TableIdentifier.parse("dummy.tbl3")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).containsExactly(identifier); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); // try to register same table twice which leads to AlreadyExistsException result = catalogMigratorWithDefaultArgs(deleteSourceTables) - .registerTables(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))) + .registerTables(Collections.singletonList(FOO_TBL2)) .result(); - Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactly(TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(FOO_TBL2); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); result = catalogMigratorWithDefaultArgs(deleteSourceTables) - .registerTables(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))) + .registerTables(Collections.singletonList(FOO_TBL2)) .result(); Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()) - .contains(TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).contains(FOO_TBL2); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); } @@ -170,35 +216,28 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) { // register only foo.tbl2 CatalogMigrationResult result = catalogMigratorWithDefaultArgs(deleteSourceTables) - .registerTables(Collections.singletonList(TableIdentifier.parse("foo.tbl2"))) + .registerTables(Collections.singletonList(FOO_TBL2)) .result(); - Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactly(TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(FOO_TBL2); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); if (deleteSourceTables && !(sourceCatalog instanceof HadoopCatalog)) { // create a table with the same name in source catalog which got deleted. - sourceCatalog.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); + sourceCatalog.createTable(FOO_TBL2, schema); } // register all the tables from source catalog again. So that `foo.tbl2` will fail to register. result = registerAllTables(deleteSourceTables); Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), - TableIdentifier.parse("bar.tbl3"), - TableIdentifier.parse("bar.tbl4")); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()) - .contains(TableIdentifier.parse("foo.tbl2")); + .containsExactlyInAnyOrder(FOO_TBL1, BAR_TBL3, BAR_TBL4); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).contains(FOO_TBL2); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + Assertions.assertThat(targetCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); } @ParameterizedTest @@ -231,10 +270,7 @@ public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) { // additionally create 240 tables along with 4 tables created in beforeEach() IntStream.range(0, 240) - .forEach( - val -> - sourceCatalog.createTable( - TableIdentifier.of(Namespace.of("foo"), "tblx" + val), schema)); + .forEach(val -> sourceCatalog.createTable(TableIdentifier.of(FOO, "tblx" + val), schema)); CatalogMigrationResult result; result = registerAllTables(deleteSourceTables); @@ -243,10 +279,9 @@ public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) { Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))).hasSize(242); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + Assertions.assertThat(targetCatalog.listTables(FOO)).hasSize(242); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); } @ParameterizedTest @@ -260,17 +295,11 @@ public void testListingTableIdentifiers(boolean deleteSourceTables) { Set matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers(null); Assertions.assertThat(matchingTableIdentifiers) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), - TableIdentifier.parse("foo.tbl2"), - TableIdentifier.parse("bar.tbl3"), - TableIdentifier.parse("bar.tbl4")); + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2, BAR_TBL3, BAR_TBL4); // list the tables whose identifier starts with "foo." matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers("^foo\\..*"); - Assertions.assertThat(matchingTableIdentifiers) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); + Assertions.assertThat(matchingTableIdentifiers).containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); // test filter that doesn't match any table. matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers("^dev\\..*"); @@ -282,21 +311,20 @@ public void testListingTableIdentifiers(boolean deleteSourceTables) { public void testRegisterWithNewNamespace(boolean deleteSourceTables) { validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); - // create namespace "db1" only in source catalog - sourceCatalog.createTable(TableIdentifier.of(Namespace.of("db1"), "tbl5"), schema); + TableIdentifier tbl5 = TableIdentifier.of(DB1, "tbl5"); + // namespace "db1" exists only in source catalog + sourceCatalog.createTable(tbl5, schema); CatalogMigrationResult result = catalogMigratorWithDefaultArgs(deleteSourceTables) - .registerTables(Collections.singletonList(TableIdentifier.parse("db1.tbl5"))) + .registerTables(Collections.singletonList(tbl5)) .result(); - Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactly(TableIdentifier.parse("db1.tbl5")); + Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(tbl5); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("db1"))) - .containsExactly(TableIdentifier.parse("db1.tbl5")); + Assertions.assertThat(targetCatalog.listTables(DB1)).containsExactly(tbl5); } @ParameterizedTest @@ -304,6 +332,8 @@ public void testRegisterWithNewNamespace(boolean deleteSourceTables) { public void testStacktrace(boolean deleteSourceTables, boolean enableStacktrace) { validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + TableIdentifier identifier = TableIdentifier.parse("db.dummy_table"); + ImmutableCatalogMigrator migrator = ImmutableCatalogMigrator.builder() .sourceCatalog(sourceCatalog) @@ -313,12 +343,9 @@ public void testStacktrace(boolean deleteSourceTables, boolean enableStacktrace) .build(); try (LogCaptor logCaptor = LogCaptor.forClass(CatalogMigrator.class)) { CatalogMigrationResult result = - migrator - .registerTables(Collections.singletonList(TableIdentifier.parse("db.dummy_table"))) - .result(); + migrator.registerTables(Collections.singletonList(identifier)).result(); Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()) - .containsExactly(TableIdentifier.parse("db.dummy_table")); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).containsExactly(identifier); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); Assertions.assertThat(logCaptor.getLogEvents()).hasSize(1); diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtilTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtilTest.java similarity index 95% rename from cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtilTest.java rename to api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtilTest.java index eb251c6..078df2c 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationUtilTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtilTest.java @@ -13,7 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.projectnessie.tools.catalog.migration.cli; +package org.projectnessie.tools.catalog.migration.api; + +import static org.projectnessie.tools.catalog.migration.api.test.AbstractTest.FOO_TBL1; import java.nio.file.Path; import java.util.Collections; @@ -23,7 +25,6 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.Table; import org.apache.iceberg.catalog.Catalog; -import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.hadoop.HadoopCatalog; import org.apache.iceberg.hive.HiveCatalog; import org.apache.iceberg.nessie.NessieCatalog; @@ -109,9 +110,9 @@ public void testBuildHadoopCatalog() { new Schema( Types.StructType.of(Types.NestedField.required(1, "id", Types.LongType.get())) .fields()); - Table table = catalog.createTable(TableIdentifier.parse("foo.tbl1"), schema); + Table table = catalog.createTable(FOO_TBL1, schema); Assertions.assertThat(table.location()).contains(logDir.toAbsolutePath().toString()); - catalog.dropTable(TableIdentifier.parse("foo.tbl1")); + catalog.dropTable(FOO_TBL1); } @Test diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java index 24c9039..8e45fe7 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java @@ -18,7 +18,6 @@ import java.nio.file.Path; import java.util.Collections; import org.apache.iceberg.catalog.Catalog; -import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.hadoop.HadoopCatalog; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.BeforeAll; @@ -39,17 +38,6 @@ public void testInvalidArgs() { Catalog sourceCatalog = new HadoopCatalog(); Catalog targetCatalog = new HadoopCatalog(); - Assertions.assertThatThrownBy( - () -> - ImmutableCatalogMigrator.builder() - .sourceCatalog(targetCatalog) // source-catalog is same as target catalog - .targetCatalog(targetCatalog) - .deleteEntriesFromSourceCatalog(true) - .build() - .registerTables(Collections.singletonList(TableIdentifier.parse("foo.abc")))) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("target catalog is same as source catalog"); - Assertions.assertThatThrownBy( () -> ImmutableCatalogMigrator.builder() diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java index 63e71ca..a44bb1a 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java @@ -73,11 +73,7 @@ public void testRegister() { catalogMigrator.registerTables(catalogMigrator.getMatchingTableIdentifiers(null)).result(); Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToRegisterTableIdentifiers()) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), - TableIdentifier.parse("foo.tbl2"), - TableIdentifier.parse("bar.tbl3"), - TableIdentifier.parse("bar.tbl4")); + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2, BAR_TBL3, BAR_TBL4); } private static Catalog createCustomCatalog(String warehousePath, String name) { diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java index fe95316..f376e91 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java @@ -32,8 +32,8 @@ public class HadoopCatalogMigratorTest extends AbstractTestCatalogMigrator { @BeforeAll protected static void setup() { - sourceCatalog = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "sourceCatalog"); - targetCatalog = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "targetCatalog"); + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); createNamespaces(); } @@ -45,23 +45,11 @@ protected static void tearDown() { @Test public void testRegisterWithNewNestedNamespaces() { - List namespaceList = - Arrays.asList( - Namespace.of("ns1"), - Namespace.of("ns2"), - Namespace.of("ns3"), - Namespace.of("ns1", "ns2"), - Namespace.of("ns1", "ns3"), - Namespace.of("ns1", "ns2", "ns3")); + List namespaceList = Arrays.asList(NS1, NS2, NS3, NS1_NS2, NS1_NS3, NS1_NS2_NS3); + List identifiers = - Arrays.asList( - TableIdentifier.parse("tblz"), - TableIdentifier.parse("ns1.tblz"), - TableIdentifier.parse("ns2.tblz"), - TableIdentifier.parse("ns3.tblz"), - TableIdentifier.of(Namespace.of("ns1", "ns2"), "tblz"), - TableIdentifier.of(Namespace.of("ns1", "ns3"), "tblz"), - TableIdentifier.of(Namespace.of("ns1", "ns2", "ns3"), "tblz")); + Arrays.asList(TBL, NS1_TBL, NS2_TBL, NS3_TBL, NS1_NS2_TBL, NS1_NS3_TBL, NS1_NS2_NS3_TBL); + namespaceList.forEach(((SupportsNamespaces) sourceCatalog)::createNamespace); identifiers.forEach(identifier -> sourceCatalog.createTable(identifier, schema)); @@ -85,12 +73,8 @@ public void testRegisterWithNewNestedNamespaces() { // manually register the table from default namespace catalogMigrator = catalogMigratorWithDefaultArgs(false); - result = - catalogMigrator - .registerTables(Collections.singletonList(TableIdentifier.of("tblz"))) - .result(); - Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactly(TableIdentifier.of("tblz")); + result = catalogMigrator.registerTables(Collections.singletonList(TBL)).result(); + Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(TBL); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); @@ -111,13 +95,7 @@ public void testCreateAndListNamespaces() { .build(); List namespaceList = - Arrays.asList( - Namespace.of("a"), - Namespace.of("a", "b"), - Namespace.of("a", "b", "c"), - Namespace.of("a", "b", "c", "d"), - Namespace.of("a", "b", "c", "d", "e"), - Namespace.of("a", "c")); + Arrays.asList(NS_A, NS_A_B, NS_A_B_C, NS_A_B_C_D, NS_A_B_C_D_E, NS_A_C); catalogMigrator.createNamespacesIfNotExistOnTargetCatalog( namespaceList.get(4)); // try creating "a.b.c.d.e" catalogMigrator.createNamespacesIfNotExistOnTargetCatalog( diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java index b8a640f..a8fe17d 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java @@ -16,7 +16,6 @@ package org.projectnessie.tools.catalog.migration.api; import java.util.Collections; -import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; import org.assertj.core.api.Assertions; @@ -31,7 +30,7 @@ public class ITHadoopToHiveCatalogMigrator extends AbstractTestCatalogMigrator { protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - sourceCatalog = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "hadoop"); + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); targetCatalog = HiveMetaStoreRunner.hiveCatalog(); createNamespaces(); @@ -45,10 +44,9 @@ protected static void tearDown() throws Exception { @Test public void testRegisterWithNewNestedNamespace() { - Namespace namespace = Namespace.of("a.b.c"); - TableIdentifier tableIdentifier = TableIdentifier.parse("a.b.c.tbl5"); + TableIdentifier tableIdentifier = TableIdentifier.of(NS_A_B_C, "tbl5"); // create namespace "a.b.c" only in source catalog - ((SupportsNamespaces) sourceCatalog).createNamespace(namespace); + ((SupportsNamespaces) sourceCatalog).createNamespace(NS_A_B_C); sourceCatalog.createTable(tableIdentifier, schema); CatalogMigrationResult result = @@ -63,6 +61,6 @@ public void testRegisterWithNewNestedNamespace() { Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); sourceCatalog.dropTable(tableIdentifier); - ((SupportsNamespaces) sourceCatalog).dropNamespace(namespace); + ((SupportsNamespaces) sourceCatalog).dropNamespace(NS_A_B_C); } } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java index e795006..a9bd51c 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java @@ -32,16 +32,12 @@ public class ITHadoopToNessieCatalogMigrator extends AbstractTestCatalogMigrator { - protected static final int NESSIE_PORT = Integer.getInteger("quarkus.http.test-port", 19121); - - protected static String nessieUri = String.format("http://localhost:%d/api/v1", NESSIE_PORT); - @BeforeAll protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - sourceCatalog = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "hadoop"); - targetCatalog = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); createNamespaces(); } @@ -54,23 +50,9 @@ protected static void tearDown() throws Exception { @Test public void testRegisterWithNewNestedNamespaces() { - List namespaceList = - Arrays.asList( - Namespace.of("ns1"), - Namespace.of("ns2"), - Namespace.of("ns3"), - Namespace.of("ns1", "ns2"), - Namespace.of("ns1", "ns3"), - Namespace.of("ns1", "ns2", "ns3")); + List namespaceList = Arrays.asList(NS1, NS2, NS3, NS1_NS2, NS1_NS3, NS1_NS2_NS3); List identifiers = - Arrays.asList( - TableIdentifier.parse("tblz"), // table from default namespace - TableIdentifier.parse("ns1.tblz"), - TableIdentifier.parse("ns2.tblz"), - TableIdentifier.parse("ns3.tblz"), - TableIdentifier.of(Namespace.of("ns1", "ns2"), "tblz"), - TableIdentifier.of(Namespace.of("ns1", "ns3"), "tblz"), - TableIdentifier.of(Namespace.of("ns1", "ns2", "ns3"), "tblz")); + Arrays.asList(TBL, NS1_TBL, NS2_TBL, NS3_TBL, NS1_NS2_TBL, NS1_NS3_TBL, NS1_NS2_NS3_TBL); namespaceList.forEach(((SupportsNamespaces) sourceCatalog)::createNamespace); identifiers.forEach(identifier -> sourceCatalog.createTable(identifier, schema)); @@ -94,12 +76,8 @@ public void testRegisterWithNewNestedNamespaces() { // manually register the table from default namespace catalogMigrator = catalogMigratorWithDefaultArgs(false); - result = - catalogMigrator - .registerTables(Collections.singletonList(TableIdentifier.of("tblz"))) - .result(); - Assertions.assertThat(result.registeredTableIdentifiers()) - .containsExactly(TableIdentifier.of("tblz")); + result = catalogMigrator.registerTables(Collections.singletonList(TBL)).result(); + Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(TBL); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); @@ -120,13 +98,7 @@ public void testCreateMissingNamespaces() { .build(); List namespaceList = - Arrays.asList( - Namespace.of("a"), - Namespace.of("a", "b"), - Namespace.of("a", "b", "c"), - Namespace.of("a", "b", "c", "d"), - Namespace.of("a", "b", "c", "d", "e"), - Namespace.of("a", "c")); + Arrays.asList(NS_A, NS_A_B, NS_A_B_C, NS_A_B_C_D, NS_A_B_C_D_E, NS_A_C); catalogMigrator.createNamespacesIfNotExistOnTargetCatalog( namespaceList.get(4)); // try creating "a.b.c.d.e" catalogMigrator.createNamespacesIfNotExistOnTargetCatalog( @@ -163,13 +135,7 @@ public void testListingNamespacesFromNessie() { .build(); List namespaceList = - Arrays.asList( - Namespace.of("a"), - Namespace.of("a", "b"), - Namespace.of("a", "b", "c"), - Namespace.of("a", "b", "c", "d"), - Namespace.of("a", "b", "c", "d", "e"), - Namespace.of("a", "c")); + Arrays.asList(NS_A, NS_A_B, NS_A_B_C, NS_A_B_C_D, NS_A_B_C_D_E, NS_A_C); namespaceList.forEach(namespace -> ((SupportsNamespaces) nessie).createNamespace(namespace)); Set listedNamespaces = new HashSet<>(); diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToHadoopCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToHadoopCatalogMigrator.java index 5ce0391..d3269c7 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToHadoopCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToHadoopCatalogMigrator.java @@ -15,6 +15,7 @@ */ package org.projectnessie.tools.catalog.migration.api; +import java.util.Collections; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; @@ -25,8 +26,11 @@ public class ITHiveToHadoopCatalogMigrator extends AbstractTestCatalogMigrator { protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - sourceCatalog = HiveMetaStoreRunner.hiveCatalog(); - targetCatalog = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "hadoop"); + initializeSourceCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"))); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); createNamespaces(); } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToNessieCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToNessieCatalogMigrator.java index afdd5e6..e5d1718 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToNessieCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToNessieCatalogMigrator.java @@ -15,22 +15,19 @@ */ package org.projectnessie.tools.catalog.migration.api; +import java.util.Collections; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHiveToNessieCatalogMigrator extends AbstractTestCatalogMigrator { - protected static final int NESSIE_PORT = Integer.getInteger("quarkus.http.test-port", 19121); - - protected static String nessieUri = String.format("http://localhost:%d/api/v1", NESSIE_PORT); - @BeforeAll protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); sourceCatalog = HiveMetaStoreRunner.hiveCatalog(); - targetCatalog = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); createNamespaces(); } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java index d65bd74..3e21e9f 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java @@ -15,6 +15,7 @@ */ package org.projectnessie.tools.catalog.migration.api; +import java.util.Collections; import java.util.Set; import org.apache.iceberg.catalog.TableIdentifier; import org.assertj.core.api.Assertions; @@ -25,15 +26,11 @@ public class ITNessieToHiveCatalogMigrator extends AbstractTestCatalogMigrator { - protected static final int NESSIE_PORT = Integer.getInteger("quarkus.http.test-port", 19121); - - protected static String nessieUri = String.format("http://localhost:%d/api/v1", NESSIE_PORT); - @BeforeAll protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - sourceCatalog = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); targetCatalog = HiveMetaStoreRunner.hiveCatalog(); createNamespaces(); @@ -47,22 +44,20 @@ protected static void tearDown() throws Exception { @Test public void testRegisterWithDefaultNamespace() { - sourceCatalog.createTable(TableIdentifier.of("tblx"), schema); + sourceCatalog.createTable(TBL, schema); CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(false); // should also include table from default namespace Set matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers(null); - Assertions.assertThat(matchingTableIdentifiers).contains(TableIdentifier.parse("tblx")); + Assertions.assertThat(matchingTableIdentifiers).contains(TBL); CatalogMigrationResult result = catalogMigrator.registerTables(matchingTableIdentifiers).result(); // hive will not support default namespace (namespace with level = 0). Hence, register will // fail. - Assertions.assertThat(result.registeredTableIdentifiers()) - .doesNotContain(TableIdentifier.parse("tblx")); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()) - .containsExactly(TableIdentifier.parse("tblx")); + Assertions.assertThat(result.registeredTableIdentifiers()).doesNotContain(TBL); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).containsExactly(TBL); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); } } diff --git a/buildSrc/src/main/kotlin/Java.kt b/buildSrc/src/main/kotlin/Java.kt index 2a88991..3a20257 100644 --- a/buildSrc/src/main/kotlin/Java.kt +++ b/buildSrc/src/main/kotlin/Java.kt @@ -24,7 +24,6 @@ import org.gradle.api.tasks.compile.JavaCompile import org.gradle.api.tasks.javadoc.Javadoc import org.gradle.external.javadoc.CoreJavadocOptions import org.gradle.kotlin.dsl.configure -import org.gradle.kotlin.dsl.maven import org.gradle.kotlin.dsl.repositories import org.gradle.kotlin.dsl.withType diff --git a/buildSrc/src/main/kotlin/Testing.kt b/buildSrc/src/main/kotlin/Testing.kt index 5d90be8..7d520e0 100644 --- a/buildSrc/src/main/kotlin/Testing.kt +++ b/buildSrc/src/main/kotlin/Testing.kt @@ -15,10 +15,7 @@ */ import org.gradle.api.Project -import org.gradle.api.services.BuildService -import org.gradle.api.services.BuildServiceParameters import org.gradle.api.tasks.testing.Test -import org.gradle.api.tasks.testing.logging.TestExceptionFormat import org.gradle.kotlin.dsl.named import org.gradle.kotlin.dsl.provideDelegate import org.gradle.kotlin.dsl.register diff --git a/buildSrc/src/main/kotlin/Utilities.kt b/buildSrc/src/main/kotlin/Utilities.kt index 3504b56..3016e9b 100644 --- a/buildSrc/src/main/kotlin/Utilities.kt +++ b/buildSrc/src/main/kotlin/Utilities.kt @@ -16,32 +16,11 @@ import com.github.jengelman.gradle.plugins.shadow.ShadowPlugin import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar -import com.github.vlsi.jandex.JandexProcessResources -import java.io.File -import java.io.FileInputStream -import java.lang.IllegalStateException -import java.util.Properties -import org.gradle.api.JavaVersion import org.gradle.api.Project -import org.gradle.api.artifacts.Dependency -import org.gradle.api.artifacts.ExternalModuleDependency -import org.gradle.api.artifacts.ModuleDependency import org.gradle.api.artifacts.VersionCatalogsExtension -import org.gradle.api.plugins.JavaPluginExtension -import org.gradle.api.tasks.SourceSetContainer import org.gradle.api.tasks.bundling.Jar -import org.gradle.api.tasks.testing.Test -import org.gradle.jvm.toolchain.JavaLanguageVersion -import org.gradle.jvm.toolchain.JavaToolchainService -import org.gradle.kotlin.dsl.DependencyHandlerScope -import org.gradle.kotlin.dsl.exclude -import org.gradle.kotlin.dsl.extra -import org.gradle.kotlin.dsl.findByType import org.gradle.kotlin.dsl.getByType -import org.gradle.kotlin.dsl.module import org.gradle.kotlin.dsl.named -import org.gradle.kotlin.dsl.project -import org.gradle.kotlin.dsl.provideDelegate import org.gradle.kotlin.dsl.withType fun Project.libsRequiredVersion(name: String): String { diff --git a/cli/build.gradle.kts b/cli/build.gradle.kts index 1c6c789..05c09c5 100644 --- a/cli/build.gradle.kts +++ b/cli/build.gradle.kts @@ -34,7 +34,6 @@ dependencies { runtimeOnly(libs.logback.classic) implementation(libs.picocli) implementation(libs.iceberg.spark.runtime) - implementation(libs.iceberg.dell) implementation(libs.hadoop.aws) { exclude("com.amazonaws", "aws-java-sdk-bundle") } runtimeOnly(libs.ecs.bundle) runtimeOnly(libs.mysql.driver) @@ -93,7 +92,7 @@ dependencies { "org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests" ) // this junit4 dependency is needed for above Iceberg's TestHiveMetastore - testRuntimeOnly("junit:junit:4.12") + testRuntimeOnly("junit:junit:4.13.2") testImplementation("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") { // these are taken from iceberg repo configurations diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java index 9ed9fb0..24a5ed4 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java @@ -21,6 +21,7 @@ import java.nio.file.Paths; import java.util.Collections; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.Set; import java.util.stream.Collectors; import org.apache.iceberg.catalog.TableIdentifier; @@ -75,14 +76,16 @@ protected Set processIdentifiersInput() { .map(String::trim) .filter(string -> !string.isEmpty()) .map(TableIdentifier::parse) - .collect(Collectors.toSet()); + .collect(Collectors.toCollection(LinkedHashSet::new)); } catch (IOException e) { throw new UncheckedIOException( String.format("Failed to read the file: %s", identifiersFromFile), e); } } else if (!identifiers.isEmpty()) { tableIdentifiers = - identifiers.stream().map(TableIdentifier::parse).collect(Collectors.toSet()); + identifiers.stream() + .map(TableIdentifier::parse) + .collect(Collectors.toCollection(LinkedHashSet::new)); } else { tableIdentifiers = Collections.emptySet(); } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java index 70259ba..1a8512f 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java @@ -16,6 +16,7 @@ package org.projectnessie.tools.catalog.migration.cli; import org.apache.iceberg.catalog.Catalog; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrationUtil; import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; import org.projectnessie.tools.catalog.migration.api.ImmutableCatalogMigrator; import org.slf4j.Logger; diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java index eb2b13a..4c4fc5f 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java @@ -18,6 +18,7 @@ import java.util.HashMap; import java.util.Map; import org.apache.iceberg.catalog.Catalog; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrationUtil; import picocli.CommandLine; public class SourceCatalogOptions { @@ -51,7 +52,7 @@ public class SourceCatalogOptions { + "using an Iceberg FileIO.", "Example: --source-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY" }) - private Map hadoopConf = new HashMap<>(); + private final Map hadoopConf = new HashMap<>(); @CommandLine.Option( names = {"--source-custom-catalog-impl"}, diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java index ca4d74e..24e6378 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java @@ -18,6 +18,7 @@ import java.util.HashMap; import java.util.Map; import org.apache.iceberg.catalog.Catalog; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrationUtil; import picocli.CommandLine; public class TargetCatalogOptions { @@ -51,7 +52,7 @@ public class TargetCatalogOptions { + "using an Iceberg FileIO.", "Example: --target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY" }) - private Map hadoopConf = new HashMap<>(); + private final Map hadoopConf = new HashMap<>(); @CommandLine.Option( names = {"--target-custom-catalog-impl"}, diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java index a0cac54..a4af2b7 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java @@ -18,27 +18,20 @@ import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.DRY_RUN_FILE; import static org.projectnessie.tools.catalog.migration.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; +import com.google.common.base.Joiner; import com.google.common.collect.Lists; -import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Map; import nl.altindag.log.LogCaptor; import nl.altindag.log.model.LogEvent; -import org.apache.iceberg.aws.dynamodb.DynamoDbCatalog; -import org.apache.iceberg.aws.glue.GlueCatalog; import org.apache.iceberg.catalog.Catalog; -import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.dell.ecs.EcsCatalog; import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.hadoop.HadoopCatalog; -import org.apache.iceberg.hive.HiveCatalog; -import org.apache.iceberg.jdbc.JdbcCatalog; -import org.apache.iceberg.nessie.NessieCatalog; -import org.apache.iceberg.rest.RESTCatalog; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assumptions; @@ -48,17 +41,13 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.ValueSource; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrationUtil; import org.projectnessie.tools.catalog.migration.api.CatalogMigrator; import org.projectnessie.tools.catalog.migration.api.test.AbstractTest; public abstract class AbstractCLIMigrationTest extends AbstractTest { - protected static @TempDir Path warehouse1; - - protected static @TempDir Path warehouse2; - protected static @TempDir Path outputDir; - protected static Path dryRunFile; protected static Path failedIdentifiersFile; @@ -68,6 +57,54 @@ public abstract class AbstractCLIMigrationTest extends AbstractTest { protected static String sourceCatalogType; protected static String targetCatalogType; + protected static void initializeSourceCatalog( + CatalogMigrationUtil.CatalogType catalogType, Map additionalProp) { + initializeCatalog(true, catalogType, additionalProp); + } + + protected static void initializeTargetCatalog( + CatalogMigrationUtil.CatalogType catalogType, Map additionalProp) { + initializeCatalog(false, catalogType, additionalProp); + } + + private static void initializeCatalog( + boolean isSourceCatalog, + CatalogMigrationUtil.CatalogType catalogType, + Map additionalProp) { + Map properties; + switch (catalogType) { + case HADOOP: + properties = hadoopCatalogProperties(isSourceCatalog); + break; + case NESSIE: + properties = nessieCatalogProperties(isSourceCatalog); + break; + case HIVE: + properties = hiveCatalogProperties(isSourceCatalog, additionalProp); + break; + default: + throw new UnsupportedOperationException( + String.format("Unsupported for catalog type: %s", catalogType)); + } + Catalog catalog = + CatalogMigrationUtil.buildCatalog( + properties, + catalogType, + isSourceCatalog ? "sourceCatalog" : "targetCatalog" + "_" + catalogType, + null, + null); + String propertiesStr = Joiner.on(",").withKeyValueSeparator("=").join(properties); + if (isSourceCatalog) { + sourceCatalog = catalog; + sourceCatalogProperties = propertiesStr; + sourceCatalogType = catalogType.name(); + } else { + targetCatalog = catalog; + targetCatalogProperties = propertiesStr; + targetCatalogType = catalogType.name(); + } + } + @BeforeAll protected static void initFilesPaths() { dryRunFile = outputDir.resolve(DRY_RUN_FILE); @@ -80,16 +117,14 @@ protected void beforeEach() { } @AfterEach - protected void afterEach() throws IOException { + protected void afterEach() { // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 // create table will call refresh internally. - sourceCatalog.createTable(TableIdentifier.of(Namespace.of("bar"), "tblx"), schema).refresh(); - targetCatalog.createTable(TableIdentifier.of(Namespace.of("bar"), "tblx"), schema).refresh(); + sourceCatalog.createTable(TableIdentifier.of(BAR, "tblx"), schema).refresh(); + targetCatalog.createTable(TableIdentifier.of(BAR, "tblx"), schema).refresh(); dropTables(); - Files.deleteIfExists(dryRunFile); - Files.deleteIfExists(failedIdentifiersFile); } @ParameterizedTest @@ -97,53 +132,48 @@ protected void afterEach() throws IOException { public void testRegister(boolean deleteSourceTables) throws Exception { validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); - RunCLI run = runCLI(deleteSourceTables, registerAllTablesArgs()); + String operation = deleteSourceTables ? "migration" : "registration"; + String operated = deleteSourceTables ? "migrated" : "registered"; + + // register or migrate all the tables + RunCLI run = runCLI(deleteSourceTables, defaultArgs()); Assertions.assertThat(run.getExitCode()).isEqualTo(0); Assertions.assertThat(run.getOut()) .contains( "User has not specified the table identifiers. " - + "Will be selecting all the tables from all the namespaces from the source catalog."); - String operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(run.getOut()) - .contains(String.format("Identified 4 tables for %s.", operation)); - operation = deleteSourceTables ? "migrated" : "registered"; - Assertions.assertThat(run.getOut()) + + "Will be selecting all the tables from all the namespaces from the source catalog.") + .contains(String.format("Identified 4 tables for %s.", operation)) .contains( String.format( "Summary: %nSuccessfully %s 4 tables from %s catalog to %s catalog.", - operation, sourceCatalogType, targetCatalogType)); - Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %nSuccessfully %s these tables:%n", operation)); + operated, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operated)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 - targetCatalog.loadTable(TableIdentifier.parse("foo.tbl1")).refresh(); + targetCatalog.loadTable(FOO_TBL1).refresh(); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + Assertions.assertThat(targetCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 - sourceCatalog.tableExists(TableIdentifier.parse("foo.tbl1")); + sourceCatalog.tableExists(FOO_TBL1); if (deleteSourceTables && !(sourceCatalog instanceof HadoopCatalog)) { // table should be deleted after migration from source catalog - Assertions.assertThat(sourceCatalog.listTables(Namespace.of("foo"))).isEmpty(); - Assertions.assertThat(sourceCatalog.listTables(Namespace.of("bar"))).isEmpty(); + Assertions.assertThat(sourceCatalog.listTables(FOO)).isEmpty(); + Assertions.assertThat(sourceCatalog.listTables(BAR)).isEmpty(); return; } // tables should be present in source catalog. - Assertions.assertThat(sourceCatalog.listTables(Namespace.of("foo"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(sourceCatalog.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + Assertions.assertThat(sourceCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(sourceCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); } @ParameterizedTest @@ -151,285 +181,164 @@ public void testRegister(boolean deleteSourceTables) throws Exception { public void testRegisterSelectedTables(boolean deleteSourceTables) throws Exception { validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + String operation = deleteSourceTables ? "migration" : "registration"; + String operated = deleteSourceTables ? "migrated" : "registered"; + // using `--identifiers` option - RunCLI run = - runCLI( - deleteSourceTables, - "--source-catalog-type", - sourceCatalogType, - "--source-catalog-properties", - sourceCatalogProperties, - "--target-catalog-type", - targetCatalogType, - "--target-catalog-properties", - targetCatalogProperties, - "--identifiers", - "bar.tbl3", - "--output-dir", - outputDir.toAbsolutePath().toString(), - "--disable-safety-prompts"); + List argsList = defaultArgs(); + argsList.addAll(Arrays.asList("--identifiers", "bar.tbl3")); + RunCLI run = runCLI(deleteSourceTables, argsList); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); Assertions.assertThat(run.getOut()) .doesNotContain( "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog."); - String operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(run.getOut()) - .contains(String.format("Identified 1 tables for %s.", operation)); - operation = deleteSourceTables ? "migrated" : "registered"; - Assertions.assertThat(run.getOut()) + + "Selecting all the tables from all the namespaces from the source catalog.") + .contains(String.format("Identified 1 tables for %s.", operation)) .contains( String.format( "Summary: %nSuccessfully %s 1 tables from %s catalog to %s catalog.", - operation, sourceCatalogType, targetCatalogType)); - Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %nSuccessfully %s these tables:%n[bar.tbl3]", operation)); + operated, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nSuccessfully %s these tables:%n[bar.tbl3]", operated)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 - targetCatalog.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); + targetCatalog.loadTable(BAR_TBL3).refresh(); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))).isEmpty(); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) - .containsExactly(TableIdentifier.parse("bar.tbl3")); + Assertions.assertThat(targetCatalog.listTables(FOO)).isEmpty(); + Assertions.assertThat(targetCatalog.listTables(BAR)).containsExactly(BAR_TBL3); Path identifierFile = outputDir.resolve("ids.txt"); // using `--identifiers-from-file` option Files.write(identifierFile, Collections.singletonList("bar.tbl4")); - run = - runCLI( - deleteSourceTables, - "--source-catalog-type", - sourceCatalogType, - "--source-catalog-properties", - sourceCatalogProperties, - "--target-catalog-type", - targetCatalogType, - "--target-catalog-properties", - targetCatalogProperties, - "--identifiers-from-file", - identifierFile.toAbsolutePath().toString(), - "--output-dir", - outputDir.toAbsolutePath().toString(), - "--disable-safety-prompts"); - Files.delete(identifierFile); + argsList = defaultArgs(); + argsList.addAll( + Arrays.asList("--identifiers-from-file", identifierFile.toAbsolutePath().toString())); + run = runCLI(deleteSourceTables, argsList); Assertions.assertThat(run.getExitCode()).isEqualTo(0); Assertions.assertThat(run.getOut()) .doesNotContain( "User has not specified the table identifiers. " - + "Selecting all the tables from all the namespaces from the source catalog."); - operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(run.getOut()) - .contains(String.format("Identified 1 tables for %s.", operation)); - operation = deleteSourceTables ? "migrated" : "registered"; - Assertions.assertThat(run.getOut()) + + "Selecting all the tables from all the namespaces from the source catalog.") + .contains(String.format("Identified 1 tables for %s.", operation)) .contains( String.format( "Summary: %nSuccessfully %s 1 tables from %s catalog to %s catalog.", - operation, sourceCatalogType, targetCatalogType)); - Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %nSuccessfully %s these tables:%n", operation)); + operated, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operated)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 - targetCatalog.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); + targetCatalog.loadTable(BAR_TBL3).refresh(); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))).isEmpty(); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl4"), TableIdentifier.parse("bar.tbl3")); + Assertions.assertThat(targetCatalog.listTables(FOO)).isEmpty(); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL4, BAR_TBL3); + Files.delete(identifierFile); // using `--identifiers-regex` option which matches all the tables starts with "foo." - run = - runCLI( - deleteSourceTables, - "--source-catalog-type", - sourceCatalogType, - "--source-catalog-properties", - sourceCatalogProperties, - "--target-catalog-type", - targetCatalogType, - "--target-catalog-properties", - targetCatalogProperties, - "--identifiers-regex", - "^foo\\..*", - "--output-dir", - outputDir.toAbsolutePath().toString(), - "--disable-safety-prompts"); + argsList = defaultArgs(); + argsList.addAll(Arrays.asList("--identifiers-regex", "^foo\\..*")); + run = runCLI(deleteSourceTables, argsList); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); Assertions.assertThat(run.getOut()) .contains( "User has not specified the table identifiers. Will be selecting all the tables from all the namespaces " - + "from the source catalog which matches the regex pattern:^foo\\..*"); - operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(run.getOut()) - .contains(String.format("Identified 2 tables for %s.", operation)); - operation = deleteSourceTables ? "migrated" : "registered"; - Assertions.assertThat(run.getOut()) + + "from the source catalog which matches the regex pattern:^foo\\..*") + .contains(String.format("Identified 2 tables for %s.", operation)) .contains( String.format( "Summary: %nSuccessfully %s 2 tables from %s catalog to %s catalog.", - operation, sourceCatalogType, targetCatalogType)); - Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %nSuccessfully %s these tables:%n", operation)); + operated, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operated)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 - targetCatalog.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); - - Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + targetCatalog.loadTable(BAR_TBL3).refresh(); + + Assertions.assertThat(targetCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); } @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterError(boolean deleteSourceTables) throws Exception { validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + String operation = deleteSourceTables ? "migration" : "registration"; + String operate = deleteSourceTables ? "migrate" : "register"; + // use invalid namespace which leads to NoSuchTableException - RunCLI run = - runCLI( - deleteSourceTables, - "--source-catalog-type", - sourceCatalogType, - "--source-catalog-properties", - sourceCatalogProperties, - "--target-catalog-type", - targetCatalogType, - "--target-catalog-properties", - targetCatalogProperties, - "--identifiers", - "dummy.tbl3", - "--output-dir", - outputDir.toAbsolutePath().toString(), - "--disable-safety-prompts"); + List argsList = defaultArgs(); + argsList.addAll(Arrays.asList("--identifiers", "dummy.tbl3")); + RunCLI run = runCLI(deleteSourceTables, argsList); + Assertions.assertThat(run.getExitCode()).isEqualTo(1); - String operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(run.getOut()) - .contains(String.format("Identified 1 tables for %s.", operation)); - operation = deleteSourceTables ? "migrate" : "register"; Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 1 tables for %s.", operation)) .contains( String.format( "Summary: %nFailed to %s 1 tables from %s catalog to %s catalog." + " Please check the `catalog_migration.log`", - operation, sourceCatalogType, targetCatalogType)); - Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %nFailed to %s these tables:%n[dummy.tbl3]", operation)); + operate, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nFailed to %s these tables:%n[dummy.tbl3]", operate)); // try to register same table twice which leads to AlreadyExistsException - runCLI( - deleteSourceTables, - "--source-catalog-type", - sourceCatalogType, - "--source-catalog-properties", - sourceCatalogProperties, - "--target-catalog-type", - targetCatalogType, - "--target-catalog-properties", - targetCatalogProperties, - "--identifiers", - "foo.tbl2", - "--output-dir", - outputDir.toAbsolutePath().toString(), - "--disable-safety-prompts"); - run = - runCLI( - deleteSourceTables, - "--source-catalog-type", - sourceCatalogType, - "--source-catalog-properties", - sourceCatalogProperties, - "--target-catalog-type", - targetCatalogType, - "--target-catalog-properties", - targetCatalogProperties, - "--identifiers", - "foo.tbl2", - "--output-dir", - outputDir.toAbsolutePath().toString(), - "--disable-safety-prompts"); + argsList = defaultArgs(); + argsList.addAll(Arrays.asList("--identifiers", "foo.tbl2")); + runCLI(deleteSourceTables, argsList); + run = RunCLI.run(argsList.toArray(new String[0])); + Assertions.assertThat(run.getExitCode()).isEqualTo(1); - operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(run.getOut()) - .contains(String.format("Identified 1 tables for %s.", operation)); - operation = deleteSourceTables ? "migrate" : "register"; Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 1 tables for %s.", operation)) .contains( String.format( "Summary: %nFailed to %s 1 tables from %s catalog to %s catalog." + " Please check the `catalog_migration.log`", - operation, sourceCatalogType, targetCatalogType)); - Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %nFailed to %s these tables:%n[foo.tbl2]", operation)); + operate, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nFailed to %s these tables:%n[foo.tbl2]", operate)); } @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Exception { validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + String operation = deleteSourceTables ? "migration" : "registration"; + String operated = deleteSourceTables ? "migrated" : "registered"; + String operate = deleteSourceTables ? "migrate" : "register"; + // register only foo.tbl2 - RunCLI run = - runCLI( - deleteSourceTables, - "--source-catalog-type", - sourceCatalogType, - "--source-catalog-properties", - sourceCatalogProperties, - "--target-catalog-type", - targetCatalogType, - "--target-catalog-properties", - targetCatalogProperties, - "--identifiers", - "foo.tbl2", - "--output-dir", - outputDir.toAbsolutePath().toString(), - "--disable-safety-prompts"); + List argsList = defaultArgs(); + argsList.addAll(Arrays.asList("--identifiers", "foo.tbl2")); + RunCLI run = runCLI(deleteSourceTables, argsList); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); - String operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(run.getOut()) - .contains(String.format("Identified 1 tables for %s.", operation)); - operation = deleteSourceTables ? "migrated" : "registered"; Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 1 tables for %s.", operation)) .contains( String.format( "Summary: %nSuccessfully %s 1 tables from %s catalog to %s catalog.", - operation, sourceCatalogType, targetCatalogType)); - Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %nSuccessfully %s these tables:%n[foo.tbl2]", operation)); + operated, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nSuccessfully %s these tables:%n[foo.tbl2]", operated)); if (deleteSourceTables && !(sourceCatalog instanceof HadoopCatalog)) { // create a table with the same name in source catalog which got deleted. - sourceCatalog.createTable(TableIdentifier.of(Namespace.of("foo"), "tbl2"), schema); + sourceCatalog.createTable(FOO_TBL2, schema); } // register all the tables from source catalog again. So that registering `foo.tbl2` will fail. - run = - runCLI( - deleteSourceTables, - "--source-catalog-type", - sourceCatalogType, - "--source-catalog-properties", - sourceCatalogProperties, - "--target-catalog-type", - targetCatalogType, - "--target-catalog-properties", - targetCatalogProperties, - "--output-dir", - outputDir.toAbsolutePath().toString(), - "--disable-safety-prompts"); + run = runCLI(deleteSourceTables, defaultArgs()); + Assertions.assertThat(run.getExitCode()).isEqualTo(1); - operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(run.getOut()) - .contains(String.format("Identified 4 tables for %s.", operation)); - operation = deleteSourceTables ? "migrated" : "registered"; - String ops = deleteSourceTables ? "migrate" : "register"; Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 4 tables for %s.", operation)) .contains( String.format( "Summary: %n" @@ -439,45 +348,31 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep + "Failed identifiers are written into `failed_identifiers.txt`. " + "Retry with that file using `--identifiers-from-file` option " + "if the failure is because of network/connection timeouts.", - operation, + operated, sourceCatalogType, targetCatalogType, - ops, + operate, sourceCatalogType, - targetCatalogType)); - Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %nSuccessfully %s these tables:%n", operation)); - Assertions.assertThat(run.getOut()) - .contains(String.format("Failed to %s these tables:%n[foo.tbl2]", ops)); + targetCatalogType)) + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operated)) + .contains(String.format("Failed to %s these tables:%n[foo.tbl2]", operate)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 - targetCatalog.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); + targetCatalog.loadTable(BAR_TBL3).refresh(); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("foo.tbl1"), TableIdentifier.parse("foo.tbl2")); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + Assertions.assertThat(targetCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); // retry the failed tables using `--identifiers-from-file` - run = - runCLI( - deleteSourceTables, - "--source-catalog-type", - sourceCatalogType, - "--source-catalog-properties", - sourceCatalogProperties, - "--target-catalog-type", - targetCatalogType, - "--target-catalog-properties", - targetCatalogProperties, - "--identifiers-from-file", - failedIdentifiersFile.toAbsolutePath().toString(), - "--output-dir", - outputDir.toAbsolutePath().toString(), - "--disable-safety-prompts"); + argsList = defaultArgs(); + argsList.addAll( + Arrays.asList( + "--identifiers-from-file", failedIdentifiersFile.toAbsolutePath().toString())); + run = runCLI(deleteSourceTables, argsList); + Assertions.assertThat(run.getOut()) .contains( String.format( @@ -487,9 +382,8 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep + "Failed identifiers are written into `failed_identifiers.txt`. " + "Retry with that file using `--identifiers-from-file` option " + "if the failure is because of network/connection timeouts.", - ops, sourceCatalogType, targetCatalogType)); - Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %nFailed to %s these tables:%n[foo.tbl2]", ops)); + operate, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nFailed to %s these tables:%n[foo.tbl2]", operate)); Assertions.assertThat(Files.exists(failedIdentifiersFile)).isTrue(); Assertions.assertThat(Files.readAllLines(failedIdentifiersFile)).containsExactly("foo.tbl2"); } @@ -497,24 +391,26 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { - // use source catalog as targetCatalog which has no tables. Assumptions.assumeFalse( deleteSourceTables && targetCatalog instanceof HadoopCatalog, "deleting source tables is unsupported for HadoopCatalog"); + + // use source catalog as targetCatalog which has no tables. RunCLI run = runCLI( deleteSourceTables, - "--source-catalog-type", - targetCatalogType, - "--source-catalog-properties", - targetCatalogProperties, - "--target-catalog-type", - sourceCatalogType, - "--target-catalog-properties", - sourceCatalogProperties, - "--output-dir", - outputDir.toAbsolutePath().toString(), - "--disable-safety-prompts"); + Lists.newArrayList( + "--source-catalog-type", + targetCatalogType, + "--source-catalog-properties", + targetCatalogProperties, + "--target-catalog-type", + sourceCatalogType, + "--target-catalog-properties", + sourceCatalogProperties, + "--output-dir", + outputDir.toAbsolutePath().toString(), + "--disable-safety-prompts")); Assertions.assertThat(run.getExitCode()).isEqualTo(2); String operation = deleteSourceTables ? "migration" : "registration"; @@ -529,38 +425,25 @@ public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { @ValueSource(booleans = {true, false}) public void testDryRun(boolean deleteSourceTables) throws Exception { validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); - RunCLI run = - runCLI( - deleteSourceTables, - "--source-catalog-type", - sourceCatalogType, - "--source-catalog-properties", - sourceCatalogProperties, - "--target-catalog-type", - targetCatalogType, - "--target-catalog-properties", - targetCatalogProperties, - "--dry-run", - "--output-dir", - outputDir.toAbsolutePath().toString(), - "--disable-safety-prompts"); + + List argsList = defaultArgs(); + argsList.add("--dry-run"); + RunCLI run = runCLI(deleteSourceTables, argsList); Assertions.assertThat(run.getExitCode()).isEqualTo(0); + String operation = deleteSourceTables ? "migration" : "registration"; // should not prompt for dry run Assertions.assertThat(run.getOut()) .doesNotContain( - "Are you certain that you wish to proceed, after reading the above warnings? (yes/no):"); - Assertions.assertThat(run.getOut()).contains("Dry run is completed."); - String operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(run.getOut()) + "Are you certain that you wish to proceed, after reading the above warnings? (yes/no):") + .contains("Dry run is completed.") .contains( String.format( "Summary: %n" + "Identified 4 tables for %s by dry-run. " + "These identifiers are also written into dry_run_identifiers.txt. " + "This file can be used with `--identifiers-from-file` option for an actual run.", - operation)); - Assertions.assertThat(run.getOut()) + operation)) .contains( String.format("Details: %nIdentified these tables for %s by dry-run:%n", operation)); Assertions.assertThat(Files.exists(dryRunFile)).isTrue(); @@ -574,22 +457,10 @@ public void testStacktrace(boolean deleteSourceTables, boolean enableStacktrace) throws Exception { validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); try (LogCaptor logCaptor = LogCaptor.forClass(CatalogMigrator.class)) { - runCLI( - deleteSourceTables, - "--source-catalog-type", - sourceCatalogType, - "--source-catalog-properties", - sourceCatalogProperties, - "--target-catalog-type", - targetCatalogType, - "--target-catalog-properties", - targetCatalogProperties, - "--identifiers", - "db.dummy_table", - "--output-dir", - outputDir.toAbsolutePath().toString(), - "--disable-safety-prompts", - "--stacktrace=" + enableStacktrace); + List argsList = defaultArgs(); + argsList.addAll( + Arrays.asList("--identifiers", "db.dummy_table", "--stacktrace=" + enableStacktrace)); + runCLI(deleteSourceTables, argsList); Assertions.assertThat(logCaptor.getLogEvents()).hasSize(1); LogEvent logEvent = logCaptor.getLogEvents().get(0); @@ -609,25 +480,23 @@ public void testStacktrace(boolean deleteSourceTables, boolean enableStacktrace) } } - protected static String[] registerAllTablesArgs() { - ArrayList args = - Lists.newArrayList( - "--source-catalog-type", - sourceCatalogType, - "--source-catalog-properties", - sourceCatalogProperties, - "--target-catalog-type", - targetCatalogType, - "--target-catalog-properties", - targetCatalogProperties, - "--output-dir", - outputDir.toAbsolutePath().toString(), - "--disable-safety-prompts"); - return args.toArray(new String[0]); + protected static List defaultArgs() { + return Lists.newArrayList( + "--source-catalog-type", + sourceCatalogType, + "--source-catalog-properties", + sourceCatalogProperties, + "--target-catalog-type", + targetCatalogType, + "--target-catalog-properties", + targetCatalogProperties, + "--output-dir", + outputDir.toAbsolutePath().toString(), + "--disable-safety-prompts"); } - protected static RunCLI runCLI(boolean deleteSourceTables, String... args) throws Exception { - List argsList = Lists.newArrayList(args); + protected static RunCLI runCLI(boolean deleteSourceTables, List argsList) + throws Exception { if (!deleteSourceTables) { argsList.add(0, "register"); } else { @@ -635,26 +504,4 @@ protected static RunCLI runCLI(boolean deleteSourceTables, String... args) throw } return RunCLI.run(argsList.toArray(new String[0])); } - - protected static String catalogType(Catalog catalog) { - if (catalog instanceof DynamoDbCatalog) { - return CatalogMigrationUtil.CatalogType.DYNAMODB.name(); - } else if (catalog instanceof EcsCatalog) { - return CatalogMigrationUtil.CatalogType.ECS.name(); - } else if (catalog instanceof GlueCatalog) { - return CatalogMigrationUtil.CatalogType.GLUE.name(); - } else if (catalog instanceof HadoopCatalog) { - return CatalogMigrationUtil.CatalogType.HADOOP.name(); - } else if (catalog instanceof HiveCatalog) { - return CatalogMigrationUtil.CatalogType.HIVE.name(); - } else if (catalog instanceof JdbcCatalog) { - return CatalogMigrationUtil.CatalogType.JDBC.name(); - } else if (catalog instanceof NessieCatalog) { - return CatalogMigrationUtil.CatalogType.NESSIE.name(); - } else if (catalog instanceof RESTCatalog) { - return CatalogMigrationUtil.CatalogType.REST.name(); - } else { - return CatalogMigrationUtil.CatalogType.CUSTOM.name(); - } - } } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/HadoopCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/HadoopCLIMigrationTest.java index f5a8d95..e8d3760 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/HadoopCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/HadoopCLIMigrationTest.java @@ -15,21 +15,17 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import java.util.Collections; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrationUtil; public class HadoopCLIMigrationTest extends AbstractCLIMigrationTest { @BeforeAll protected static void setup() { - sourceCatalogProperties = "warehouse=" + warehouse1.toAbsolutePath() + ",type=hadoop"; - targetCatalogProperties = "warehouse=" + warehouse2.toAbsolutePath() + ",type=hadoop"; - - sourceCatalog = createHadoopCatalog(warehouse1.toAbsolutePath().toString(), "sourceCatalog"); - targetCatalog = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "targetCatalog"); - - sourceCatalogType = catalogType(sourceCatalog); - targetCatalogType = catalogType(targetCatalog); + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); createNamespaces(); } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToHiveCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToHiveCLIMigrationTest.java index 2b89da3..47a3653 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToHiveCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToHiveCLIMigrationTest.java @@ -15,8 +15,10 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import java.util.Collections; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrationUtil; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHadoopToHiveCLIMigrationTest extends AbstractCLIMigrationTest { @@ -24,18 +26,12 @@ public class ITHadoopToHiveCLIMigrationTest extends AbstractCLIMigrationTest { @BeforeAll protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - sourceCatalogProperties = "warehouse=" + warehouse1.toAbsolutePath() + ",type=hadoop"; - targetCatalogProperties = - "warehouse=" - + warehouse2.toAbsolutePath() - + ",uri=" - + HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"); - sourceCatalog = createHadoopCatalog(warehouse1.toString(), "sourceCatalog"); - targetCatalog = HiveMetaStoreRunner.hiveCatalog(); - - sourceCatalogType = catalogType(sourceCatalog); - targetCatalogType = catalogType(targetCatalog); + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); + initializeTargetCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"))); createNamespaces(); } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToHadoopCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToHadoopCLIMigrationTest.java index 76901a8..1542d2f 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToHadoopCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToHadoopCLIMigrationTest.java @@ -15,8 +15,10 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import java.util.Collections; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrationUtil; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHiveToHadoopCLIMigrationTest extends AbstractCLIMigrationTest { @@ -24,18 +26,12 @@ public class ITHiveToHadoopCLIMigrationTest extends AbstractCLIMigrationTest { @BeforeAll protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - sourceCatalogProperties = - "warehouse=" - + warehouse1.toAbsolutePath() - + ",uri=" - + HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"); - targetCatalogProperties = "warehouse=" + warehouse2.toAbsolutePath() + ",type=hadoop"; - sourceCatalog = HiveMetaStoreRunner.hiveCatalog(); - targetCatalog = createHadoopCatalog(warehouse2.toAbsolutePath().toString(), "hadoop"); - - sourceCatalogType = catalogType(sourceCatalog); - targetCatalogType = catalogType(targetCatalog); + initializeSourceCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"))); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); createNamespaces(); } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToNessieCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToNessieCLIMigrationTest.java index 2b79b78..ff27cd7 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToNessieCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToNessieCLIMigrationTest.java @@ -15,38 +15,28 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import java.util.Collections; import java.util.stream.IntStream; -import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrationUtil; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITHiveToNessieCLIMigrationTest extends AbstractCLIMigrationTest { - protected static final int NESSIE_PORT = Integer.getInteger("quarkus.http.test-port", 19121); - - protected static String nessieUri = String.format("http://localhost:%d/api/v1", NESSIE_PORT); - @BeforeAll protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - sourceCatalogProperties = - "warehouse=" - + warehouse1.toAbsolutePath() - + ",uri=" - + HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"); - targetCatalogProperties = - "uri=" + nessieUri + ",ref=main,warehouse=" + warehouse2.toAbsolutePath(); - - sourceCatalog = HiveMetaStoreRunner.hiveCatalog(); - targetCatalog = createNessieCatalog(warehouse2.toAbsolutePath().toString(), nessieUri); - sourceCatalogType = catalogType(sourceCatalog); - targetCatalogType = catalogType(targetCatalog); + initializeSourceCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"))); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); createNamespaces(); } @@ -62,42 +52,35 @@ protected static void tearDown() throws Exception { @ValueSource(booleans = {true, false}) public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + String operation = deleteSourceTables ? "migration" : "registration"; + String operated = deleteSourceTables ? "migrated" : "registered"; + // additionally create 240 tables along with 4 tables created in beforeEach() IntStream.range(0, 240) - .forEach( - val -> - sourceCatalog.createTable( - TableIdentifier.of(Namespace.of("foo"), "tblx" + val), schema)); + .forEach(val -> sourceCatalog.createTable(TableIdentifier.of(FOO, "tblx" + val), schema)); - RunCLI run = runCLI(deleteSourceTables, registerAllTablesArgs()); + // register or migrate all the tables + RunCLI run = runCLI(deleteSourceTables, defaultArgs()); Assertions.assertThat(run.getExitCode()).isEqualTo(0); - String operation = deleteSourceTables ? "migration" : "registration"; - Assertions.assertThat(run.getOut()) - .contains(String.format("Identified 244 tables for %s.", operation)); - operation = deleteSourceTables ? "migrated" : "registered"; Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 244 tables for %s.", operation)) .contains( String.format( "Summary: %nSuccessfully %s 244 tables from %s catalog to" + " %s catalog.", - operation, sourceCatalogType, targetCatalogType)); - Assertions.assertThat(run.getOut()) - .contains(String.format("Details: %nSuccessfully %s these tables:%n", operation)); - - operation = deleteSourceTables ? "migration" : "registration"; - // validate intermediate output - Assertions.assertThat(run.getOut()) - .contains(String.format("Attempted %s for 100 tables out of 244 tables.", operation)); - Assertions.assertThat(run.getOut()) + operated, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operated)) + // validate intermediate output + .contains(String.format("Attempted %s for 100 tables out of 244 tables.", operation)) .contains(String.format("Attempted %s for 200 tables out of 244 tables.", operation)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 - targetCatalog.loadTable(TableIdentifier.parse("bar.tbl3")).refresh(); + targetCatalog.loadTable(BAR_TBL3).refresh(); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("foo"))).hasSize(242); - Assertions.assertThat(targetCatalog.listTables(Namespace.of("bar"))) - .containsExactlyInAnyOrder( - TableIdentifier.parse("bar.tbl3"), TableIdentifier.parse("bar.tbl4")); + Assertions.assertThat(targetCatalog.listTables(FOO)).hasSize(242); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); } } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITNessieToHiveCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITNessieToHiveCLIMigrationTest.java index adf19dd..7fdc8a9 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITNessieToHiveCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITNessieToHiveCLIMigrationTest.java @@ -15,31 +15,23 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import java.util.Collections; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrationUtil; import org.projectnessie.tools.catalog.migration.api.test.HiveMetaStoreRunner; public class ITNessieToHiveCLIMigrationTest extends AbstractCLIMigrationTest { - protected static final int NESSIE_PORT = Integer.getInteger("quarkus.http.test-port", 19121); - - protected static String nessieUri = String.format("http://localhost:%d/api/v1", NESSIE_PORT); - @BeforeAll protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - sourceCatalogProperties = "uri=" + nessieUri + ",ref=main,warehouse=" + warehouse1; - targetCatalogProperties = - "warehouse=" - + warehouse2 - + ",uri=" - + HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"); - - sourceCatalog = createNessieCatalog(warehouse1.toAbsolutePath().toString(), nessieUri); - targetCatalog = HiveMetaStoreRunner.hiveCatalog(); - sourceCatalogType = catalogType(sourceCatalog); - targetCatalogType = catalogType(targetCatalog); + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); + initializeTargetCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"))); createNamespaces(); } From 678d5ca057e3aa5bde02fe6a618148af5f3c3cf6 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Wed, 12 Apr 2023 19:46:01 +0530 Subject: [PATCH 28/31] Address comments from April 12 --- README.md | 149 +++++------------- .../migration/api/test/AbstractTest.java | 46 ++++-- api/build.gradle.kts | 2 +- .../migration/api/CatalogMigrationUtil.java | 13 ++ .../migration/api/CatalogMigrator.java | 3 +- .../api/AbstractTestCatalogMigrator.java | 49 +++--- .../api/CatalogMigrationUtilTest.java | 66 +++++--- .../api/CatalogMigratorParamsTest.java | 30 ++-- .../api/CustomCatalogMigratorTest.java | 8 - .../api/HadoopCatalogMigratorTest.java | 8 - .../api/ITHadoopToHiveCatalogMigrator.java | 7 +- .../api/ITHadoopToNessieCatalogMigrator.java | 2 - .../api/ITHiveToHadoopCatalogMigrator.java | 2 - .../api/ITHiveToNessieCatalogMigrator.java | 7 +- .../api/ITNessieToHiveCatalogMigrator.java | 7 +- build.gradle.kts | 17 -- .../migration/cli/BaseRegisterCommand.java | 117 ++++++++------ .../migration/cli/CatalogMigrationCLI.java | 4 +- .../migration/cli/IdentifierOptions.java | 49 +++--- .../catalog/migration/cli/MigrateCommand.java | 2 +- .../cli/AbstractCLIMigrationTest.java | 66 +++----- .../catalog/migration/cli/CLIOptionsTest.java | 2 +- .../migration/cli/HadoopCLIMigrationTest.java | 8 - .../cli/ITHadoopToHiveCLIMigrationTest.java | 2 - .../cli/ITHadoopToNessieCLIMigrationTest.java | 96 +++++++++++ .../cli/ITHiveToHadoopCLIMigrationTest.java | 2 - .../cli/ITHiveToNessieCLIMigrationTest.java | 6 +- .../cli/ITNessieToHiveCLIMigrationTest.java | 2 - .../migration/cli/ProcessIdentifiersTest.java | 70 +++++--- gradle/baselibs.versions.toml | 6 +- gradle/libs.versions.toml | 14 +- gradle/wrapper/gradle-wrapper.properties | 4 +- 32 files changed, 468 insertions(+), 398 deletions(-) delete mode 100644 build.gradle.kts create mode 100644 cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToNessieCLIMigrationTest.java diff --git a/README.md b/README.md index 998a108..4333fc8 100644 --- a/README.md +++ b/README.md @@ -6,13 +6,22 @@ There are various reasons why users may want to move their Iceberg tables to a d * They just heard about the awesome Arctic catalog (or Nessie) and want to move their existing iceberg tables to Dremio Arctic. * They had an on-premise Hive catalog, but want to move tables to a cloud-based catalog as part of their cloud migration strategy. -Previously, before the Iceberg `1.1.0` release, the only way to migrate tables was by copying the data using the command `insert into targetCatalog.db.tableName as select * from sourceCatalog.db.tableName`. -After the iceberg `1.1.0` release, all Iceberg Catalogs supports register table with the `catalog#registerTable()` API. -However, custom code is needed to migrate all the tables in bulk. -**Hence, we introduce a CLI tool to migrate Iceberg tables in bulk from one Iceberg Catalog to another without a data copy.** +The CLI tool should support two commands +* migrate - To bulk migrate the iceberg tables from source catalog to target catalog without data copy. +Table entries from source catalog will be deleted after the successful migration to the target catalog. +* register - To bulk register the iceberg tables from source catalog to target catalog without data copy. + +> :warning: `register` command just registers the table. +Which means the table will be present in both the catalogs after registering. +**Operating same table from more than one catalog can lead to missing updates, loss of data and table corruption. +So, it is recommended to use the 'migrate' command in CLI to automatically delete the table from source catalog after registering +or avoid operating tables from the source catalog after registering if 'migrate' command is not used.** + +> :warning: **It is recommended to use this CLI tool when there is no in-progress commits for the tables in the source catalog. +In-progress commits may not make it into the target catalog if used. Which can lead to missing updates, loss of data and table corruption.** # Iceberg-catalog-migrator -Need to have java installed in your machine(JDK11 or later version) to use this CLI tool. +Need to have Java installed in your machine(JDK11 is recommended) to use this CLI tool. Below is the CLI syntax: ``` @@ -27,7 +36,7 @@ Commands: ``` ``` -$ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar register -h +$ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate -h Usage: iceberg-catalog-migrator migrate [-hV] [--disable-safety-prompts] [--dry-run] [--stacktrace] [--output-dir=] (--source-catalog-type= --source-catalog-properties=[,...] [--source-catalog-properties=[,...]]... @@ -98,16 +107,7 @@ Identifier options: Example: --identifiers-regex ^foo\..* ``` -Note: Options for migrate command is exactly same as register command. - -> :warning: **It is recommended to use this CLI tool when there is no in-progress commits for the tables in the source catalog.** -In-progress commits may not make it into the target catalog if used. - -> :warning: `register` command just registers the table. -Which means the table will be present in both the catalogs after registering. -Operating same table from more than one catalog can lead to missing updates, loss of data and table corruption. -So, it is recommended to use the 'migrate' command in CLI to automatically delete the table from source catalog after registering -or avoid operating tables from the source catalog after registering if 'migrate' command is not used. +Note: Options for register command is exactly same as migrate command. # Sample Inputs ## Bulk migrating all the tables from Hadoop catalog to Nessie catalog (main branch) @@ -123,8 +123,9 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ ```shell export PAT=xxxxxxx -export SECRETKEY=xxxxxxx -export ACCESSKEY=xxxxxxx +export AWS_ACCESS_KEY_ID=xxxxxxx +export AWS_SECRET_ACCESS_KEY=xxxxxxx +export AWS_S3_ENDPOINT=xxxxxxx ``` ```shell @@ -132,16 +133,16 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar register \ --source-catalog-type HADOOP \ --source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ --target-catalog-type NESSIE \ ---target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/8158e68a-5046-42c6-a7e4-c920d9ae2475,ref=main,warehouse=/tmp/warehouse,authentication.type=BEARER,authentication.token=$PAT \ ---target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY +--target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/8158e68a-5046-42c6-a7e4-c920d9ae2475,ref=main,warehouse=/tmp/warehouse,authentication.type=BEARER,authentication.token=$PAT ``` ## Migrate selected tables (t1,t2 in namespace foo) from Arctic catalog (main branch) to Hadoop catalog. ```shell export PAT=xxxxxxx -export SECRETKEY=xxxxxxx -export ACCESSKEY=xxxxxxx +export AWS_ACCESS_KEY_ID=xxxxxxx +export AWS_SECRET_ACCESS_KEY=xxxxxxx +export AWS_S3_ENDPOINT=xxxxxxx ``` ```shell @@ -149,7 +150,6 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type NESSIE \ --source-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/8158e68a-5046-42c6-a7e4-c920d9ae2475,ref=main,warehouse=/tmp/warehouse,authentication.type=BEARER,authentication.token=$PAT \ --target-catalog-type HADOOP \ ---target-catalog-properties warehouse=/tmp/warehouse,type=hadoop --source-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY \ --identifiers foo.t1,foo.t2 ``` @@ -159,8 +159,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type GLUE \ --source-catalog-properties warehouse=s3a://ajantha-test/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO \ --target-catalog-type NESSIE \ ---target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT \ ---target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY +--target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT ``` ## Migrate all tables from HIVE catalog to Arctic catalog (main branch) @@ -169,8 +168,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HIVE \ --source-catalog-properties warehouse=s3a://ajantha-test/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,uri=thrift://localhost:9083 \ --target-catalog-type NESSIE \ ---target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT \ ---target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY +--target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT ``` ## Migrate all tables from DYNAMODB catalog to Arctic catalog (main branch) @@ -179,8 +177,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type DYNAMODB \ --source-catalog-properties warehouse=s3a://ajantha-test/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO \ --target-catalog-type NESSIE \ ---target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT \ ---target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY +--target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT ``` ## Migrate all tables from JDBC catalog to Arctic catalog (main branch) @@ -189,8 +186,7 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type JDBC \ --source-catalog-properties warehouse=/tmp/warehouseJdbc,jdbc.user=root,jdbc.password=pass,uri=jdbc:mysql://localhost:3306/db1,name=catalogName \ --target-catalog-type NESSIE \ ---target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT \ ---target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY +--target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT ``` # Scenarios @@ -276,7 +272,7 @@ Details: Please note that a log file will be created, which will print "successfully migrated table X" for every table migration, and also log any table level failures, if present. -### B.3) executes the migration and out of 1000 tables 10 tables have failed to migrate because the target catalog had the same table and namespace (maybe different schema).Remaining 990 tables were successfully migrated. +### B.3) executes the migration and out of 1000 tables 10 tables have failed to migrate because of some error. Remaining 990 tables were successfully migrated. Sample input: ```shell @@ -305,79 +301,20 @@ Details: ``` Please note that a log file will be generated, which will print "successfully migrated table X" for every table migration and log any table-level failures in the `failed_identifiers.txt` file. -Users can use this file to identify failed tables and search for them in the log, which will contain a stacktrace with the `TableAlreadyExists` exception for up to 10 tables. -This can help users understand why the migration failed. In such cases, users can rename the tables in the source catalog and migrate only those 10 tables using any of the identifier options available in the argument. - - -### B.4) executes the migration and out of 1000 tables 900 tables have failed to migrate because the target/source catalog connection went off. Only 100 tables were successfully migrated. - -Sample input: -```shell -java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ ---source-catalog-type HADOOP \ ---source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ ---target-catalog-type NESSIE \ ---target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ ---stacktrace -``` - -Console output will be same as B.2) till summary because even in case of failure, -all the identified tables will be attempted for migration. - -``` -Summary: -- Successfully migrated 100 tables from HADOOP catalog to NESSIE catalog. -- Failed to migrate 900 tables from HADOOP catalog to NESSIE catalog. Please check the `catalog_migration.log` file for the failure reason. - Failed Identifiers are written to `failed_identifiers.txt`. Retry with that file using the `--identifiers-from-file` option if the failure is because of network/connection timeouts. - -Details: -- Successfully migrated these tables: - [foo.tbl-1, foo.tbl-2,…,bar.tbl-100] -- Failed to migrate these tables: - [bar.tbl-201, foo.tbl-202, …, …,bar.tbl-1000] -``` - -Please note that a log file will be generated, which will print "successfully migrated table X" for every table migration and log any table-level failures in the `failed_identifiers.txt` file. -Users can use this file to identify failed tables and search for them in the log, which may contain a stacktrace with the `ConnectionTimeOut` exception for up to 900 tables. -This can help users understand why the migration failed. Since these are timeout exceptions, users can retry migrating only those 900 tables using the `--identifiers-from-file` option with the `failed_identifiers.txt` file. - -### B.5) executes the migration and out of 1000 tables. Where all the 1000 tables were migrated successfully but deletion of 200 tables from the source catalog has failed due to network issues. - -Sample input: -```shell -java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ ---source-catalog-type HADOOP \ ---source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ ---target-catalog-type NESSIE \ ---target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse -``` - -The console output will be the same as in B.2) until the summary because, even in case of failure, all the identified tables will be attempted for migration. -However, any tables that fail to delete will be stored in the `failed_to_delete.txt` file, and the user will have to delete them manually or stop using them from the source catalog. -The console will print this warning. - -``` -Summary: -- Successfully migrated 1000 tables from HADOOP catalog to NESSIE catalog. -- 200 tables were failed to delete from the source catalog due the reason captured in the logs. These table names are written into the `failed_to_delete.txt` file. Do not operate these tables from the source catalog. - -Details: -- Successfully migrated these tables: - [foo.tbl-1, foo.tbl-2,…,bar.tbl-1000] -- [WARNING] Failed to delete these tables from source catalog: - [bar.tbl-201, foo.tbl-202, …, …,bar.tbl-400] -``` - -Users should manually drop the table entry from the source catalog in this case or stop using these tables from the source catalog. - +Users can use this file to identify failed tables and search for them in the log, which will contain the exception stacktrace for those 10 tables. +This can help users understand why the migration failed. +* If the migration of those tables failed with `TableAlreadyExists` exception, users can rename the tables in the source catalog and migrate only those 10 tables using any of the identifier options available in the argument. +* If the migration of those tables failed with `ConnectionTimeOut` exception, users can retry migrating only those 10 tables using the `--identifiers-from-file` option with the `failed_identifiers.txt` file. +* If the migration is successful but deletion of some tables form source catalog is failed, summary will mention that these table names were written into the `failed_to_delete.txt` file and logs will capture the failure reason. +Do not operate these tables from the source catalog and user will have to delete them manually. -### B.6) executes the migration and out of 1000 tables. But manually aborts the migration by killing the process. +### B.4) executes the migration and out of 1000 tables. But manually aborts the migration by killing the process. To determine the number of migrated tables, the user can either review the log or use the listTables() function in the target catalog. In the event of an abort, migrated tables may not be deleted from the source catalog, and users should avoid manipulating them from there. If necessary, users can manually remove these tables from the source catalog or attempt a bulk migration to transfer all tables from the source catalog. -### B.7) Users need to move away from one catalog to another with selective tables (maybe want to move only the production tables, test tables, etc) +### B.5) Users need to move away from one catalog to another with selective tables (maybe want to move only the production tables, test tables, etc) Users can provide the selective list of identifiers to migrate using any of these 3 options `--identifiers`, `--identifiers-from-file`, `--identifier-regex` and it can be used along with the dry-run option too. @@ -414,16 +351,4 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ ``` Console will clearly print that only these identifiers are used for table migration. -Rest of the behavior will be the same as mentioned in the previous sections. - -# Appendix A: Iceberg catalogs -Iceberg supports managing the iceberg tables using the following Iceberg Catalogs: -* CUSTOM (By plugging in the jar and providing implementation class name) -* DYNAMODB -* ECS -* GLUE -* HADOOP -* HIVE -* JDBC -* NESSIE (Arctic) -* REST \ No newline at end of file +Rest of the behavior will be the same as mentioned in the previous sections. \ No newline at end of file diff --git a/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java index ad0c3ad..d31ac7a 100644 --- a/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java +++ b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java @@ -28,6 +28,7 @@ import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.hadoop.HadoopCatalog; import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.io.TempDir; @@ -44,11 +45,25 @@ public abstract class AbstractTest { public static final TableIdentifier BAR_TBL3 = TableIdentifier.of(BAR, "tbl3"); public static final TableIdentifier BAR_TBL4 = TableIdentifier.of(BAR, "tbl4"); - private static final List namespaceList = Arrays.asList(FOO, BAR, DB1); + private static final List defaultNamespaceList = Arrays.asList(FOO, BAR, DB1); + + protected static final Namespace NS_A = Namespace.of("a"); + protected static final Namespace NS_A_B = Namespace.of("a", "b"); + protected static final Namespace NS_A_C = Namespace.of("a", "c"); + protected static final Namespace NS_A_B_C = Namespace.of("a", "b", "c"); + protected static final Namespace NS_A_B_C_D = Namespace.of("a", "b", "c", "d"); + protected static final Namespace NS_A_B_C_D_E = Namespace.of("a", "b", "c", "d", "e"); private static String sourceCatalogWarehouse; private static String targetCatalogWarehouse; + protected static Catalog sourceCatalog; + protected static Catalog targetCatalog; + + protected static final Schema schema = + new Schema( + Types.StructType.of(Types.NestedField.required(1, "id", Types.LongType.get())).fields()); + @BeforeAll protected static void initLogDir() { System.setProperty("catalog.migration.log.dir", tempDir.toAbsolutePath().toString()); @@ -56,13 +71,15 @@ protected static void initLogDir() { targetCatalogWarehouse = tempDir.resolve("targetCatalogWarehouse").toAbsolutePath().toString(); } - protected static Catalog sourceCatalog; - - protected static Catalog targetCatalog; - - protected static final Schema schema = - new Schema( - Types.StructType.of(Types.NestedField.required(1, "id", Types.LongType.get())).fields()); + @AfterAll + protected static void close() throws Exception { + if (sourceCatalog instanceof AutoCloseable) { + ((AutoCloseable) sourceCatalog).close(); + } + if (targetCatalog instanceof AutoCloseable) { + ((AutoCloseable) targetCatalog).close(); + } + } protected void validateAssumptionForHadoopCatalogAsSource(boolean deleteSourceTables) { Assumptions.assumeFalse( @@ -70,11 +87,14 @@ protected void validateAssumptionForHadoopCatalogAsSource(boolean deleteSourceTa "deleting source tables is unsupported for HadoopCatalog"); } - protected static void createNamespaces() { - namespaceList.forEach( + protected static void createNamespacesForSourceCatalog() { + defaultNamespaceList.forEach( namespace -> ((SupportsNamespaces) sourceCatalog).createNamespace(namespace)); + } + + protected static void createNamespacesForTargetCatalog() { // don't create "db1" namespace in targetCatalog - namespaceList + defaultNamespaceList .subList(0, 2) .forEach(namespace -> ((SupportsNamespaces) targetCatalog).createNamespace(namespace)); } @@ -84,7 +104,7 @@ protected static void dropNamespaces() { .map(catalog -> (SupportsNamespaces) catalog) .forEach( catalog -> - namespaceList.stream() + defaultNamespaceList.stream() .filter(catalog::namespaceExists) .forEach(catalog::dropNamespace)); } @@ -102,7 +122,7 @@ protected static void dropTables() { Stream.of(sourceCatalog, targetCatalog) .forEach( catalog -> - namespaceList.stream() + defaultNamespaceList.stream() .filter(namespace -> ((SupportsNamespaces) catalog).namespaceExists(namespace)) .forEach( namespace -> catalog.listTables(namespace).forEach(catalog::dropTable))); diff --git a/api/build.gradle.kts b/api/build.gradle.kts index 581ab1a..e9620c2 100644 --- a/api/build.gradle.kts +++ b/api/build.gradle.kts @@ -51,7 +51,7 @@ dependencies { "org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests" ) // this junit4 dependency is needed for above Iceberg's TestHiveMetastore - testRuntimeOnly("junit:junit:4.12") + testRuntimeOnly("junit:junit:4.13.2") testImplementation("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") { // these are taken from iceberg repo configurations diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtil.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtil.java index 7c7de1b..09a41e1 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtil.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtil.java @@ -54,6 +54,7 @@ public static Catalog buildCatalog( Preconditions.checkArgument(catalogProperties != null, "catalog properties is null"); Preconditions.checkArgument(catalogType != null, "catalog type is null"); Configuration catalogConf = new Configuration(); + addAwsConfigFromEnv(catalogConf); if (hadoopConf != null) { hadoopConf.forEach(catalogConf::set); } @@ -94,4 +95,16 @@ private static String catalogImpl(CatalogType type, String customCatalogImpl) { throw new IllegalArgumentException("Unsupported type: " + type.name()); } } + + private static void addAwsConfigFromEnv(Configuration configuration) { + if (System.getenv("AWS_ACCESS_KEY_ID") != null) { + configuration.set("fs.s3a.access.key", System.getenv("AWS_ACCESS_KEY_ID")); + } + if (System.getenv("AWS_SECRET_ACCESS_KEY") != null) { + configuration.set("fs.s3a.secret.key", System.getenv("AWS_SECRET_ACCESS_KEY")); + } + if (System.getenv("AWS_S3_ENDPOINT") != null) { + configuration.set("fs.s3a.endpoint", System.getenv("AWS_S3_ENDPOINT")); + } + } } diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java index 0bc243e..5a38ab0 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java @@ -33,6 +33,7 @@ import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.hadoop.HadoopCatalog; import org.immutables.value.Value; import org.slf4j.Logger; @@ -115,7 +116,7 @@ public Set getMatchingTableIdentifiers(String identifierRegex) try { return sourceCatalog.listTables(namespace).stream() .filter(matchedIdentifiersPredicate); - } catch (Exception exception) { + } catch (IllegalArgumentException | NoSuchNamespaceException exception) { if (namespace.isEmpty()) { // some catalogs don't support default namespace. // Hence, just log the warning and ignore the exception. diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java index 89cc037..cb839cd 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java @@ -25,10 +25,9 @@ import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.NoSuchTableException; -import org.apache.iceberg.hadoop.HadoopCatalog; import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; @@ -37,13 +36,6 @@ public abstract class AbstractTestCatalogMigrator extends AbstractTest { - protected static final Namespace NS_A = Namespace.of("a"); - protected static final Namespace NS_A_B = Namespace.of("a", "b"); - protected static final Namespace NS_A_C = Namespace.of("a", "c"); - protected static final Namespace NS_A_B_C = Namespace.of("a", "b", "c"); - protected static final Namespace NS_A_B_C_D = Namespace.of("a", "b", "c", "d"); - protected static final Namespace NS_A_B_C_D_E = Namespace.of("a", "b", "c", "d", "e"); - protected static final Namespace NS1 = Namespace.of("ns1"); protected static final Namespace NS2 = Namespace.of("ns2"); protected static final Namespace NS3 = Namespace.of("ns3"); @@ -59,6 +51,11 @@ public abstract class AbstractTestCatalogMigrator extends AbstractTest { protected static final TableIdentifier NS1_NS3_TBL = TableIdentifier.of(NS1_NS3, "tblz"); protected static final TableIdentifier NS1_NS2_NS3_TBL = TableIdentifier.of(NS1_NS2_NS3, "tblz"); + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + } + @BeforeEach protected void beforeEach() { createTables(); @@ -72,11 +69,13 @@ protected void afterEach() { protected static void initializeSourceCatalog( CatalogMigrationUtil.CatalogType catalogType, Map additionalProp) { initializeCatalog(true, catalogType, additionalProp); + createNamespacesForSourceCatalog(); } protected static void initializeTargetCatalog( CatalogMigrationUtil.CatalogType catalogType, Map additionalProp) { initializeCatalog(false, catalogType, additionalProp); + createNamespacesForTargetCatalog(); } private static void initializeCatalog( @@ -129,17 +128,17 @@ public void testRegister(boolean deleteSourceTables) { Assertions.assertThat(targetCatalog.listTables(BAR)) .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); - if (deleteSourceTables && !(sourceCatalog instanceof HadoopCatalog)) { + if (deleteSourceTables) { // table should be deleted after migration from source catalog Assertions.assertThat(sourceCatalog.listTables(FOO)).isEmpty(); Assertions.assertThat(sourceCatalog.listTables(BAR)).isEmpty(); - return; + } else { + // tables should be present in source catalog. + Assertions.assertThat(sourceCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(sourceCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); } - // tables should be present in source catalog. - Assertions.assertThat(sourceCatalog.listTables(FOO)) - .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); - Assertions.assertThat(sourceCatalog.listTables(BAR)) - .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); } @ParameterizedTest @@ -222,7 +221,7 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) { Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - if (deleteSourceTables && !(sourceCatalog instanceof HadoopCatalog)) { + if (deleteSourceTables) { // create a table with the same name in source catalog which got deleted. sourceCatalog.createTable(FOO_TBL2, schema); } @@ -243,16 +242,12 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) { @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterNoTables(boolean deleteSourceTables) { - // use source catalog as targetCatalog which has no tables. - Assumptions.assumeFalse( - deleteSourceTables && targetCatalog instanceof HadoopCatalog, - "deleting source tables is unsupported for HadoopCatalog"); - CatalogMigrator catalogMigrator = - ImmutableCatalogMigrator.builder() - .sourceCatalog(targetCatalog) - .targetCatalog(sourceCatalog) - .deleteEntriesFromSourceCatalog(deleteSourceTables) - .build(); + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + // clean up the default tables present in the source catalog. + dropTables(); + + CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(deleteSourceTables); Set matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers(null); Assertions.assertThat(matchingTableIdentifiers).isEmpty(); diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtilTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtilTest.java index 078df2c..4b07e06 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtilTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtilTest.java @@ -21,6 +21,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.UUID; import java.util.stream.Stream; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; @@ -38,11 +39,11 @@ public class CatalogMigrationUtilTest { - protected static @TempDir Path logDir; + protected static @TempDir Path tempDir; @BeforeAll protected static void initLogDir() { - System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); + System.setProperty("catalog.migration.log.dir", tempDir.toAbsolutePath().toString()); } static Stream blankOrNullStrings() { @@ -91,9 +92,10 @@ public void testInvalidArgs() { } @Test - public void testBuildHadoopCatalog() { + public void testBuildHadoopCatalog() throws Exception { Map properties = new HashMap<>(); - properties.put("warehouse", logDir.toAbsolutePath().toString()); + properties.put( + "warehouse", tempDir.resolve(UUID.randomUUID().toString()).toAbsolutePath().toString()); properties.put("type", "hadoop"); Map conf = new HashMap<>(); @@ -103,22 +105,29 @@ public void testBuildHadoopCatalog() { CatalogMigrationUtil.buildCatalog( properties, CatalogMigrationUtil.CatalogType.HADOOP, "catalogName", null, conf); - Assertions.assertThat(catalog).isInstanceOf(HadoopCatalog.class); - Assertions.assertThat(catalog.name()).isEqualTo("catalogName"); - Assertions.assertThat(((HadoopCatalog) catalog).getConf().get("k1")).isEqualTo("v1"); - Schema schema = - new Schema( - Types.StructType.of(Types.NestedField.required(1, "id", Types.LongType.get())) - .fields()); - Table table = catalog.createTable(FOO_TBL1, schema); - Assertions.assertThat(table.location()).contains(logDir.toAbsolutePath().toString()); - catalog.dropTable(FOO_TBL1); + try { + Assertions.assertThat(catalog).isInstanceOf(HadoopCatalog.class); + Assertions.assertThat(catalog.name()).isEqualTo("catalogName"); + Assertions.assertThat(((HadoopCatalog) catalog).getConf().get("k1")).isEqualTo("v1"); + Schema schema = + new Schema( + Types.StructType.of(Types.NestedField.required(1, "id", Types.LongType.get())) + .fields()); + Table table = catalog.createTable(FOO_TBL1, schema); + Assertions.assertThat(table.location()).contains(tempDir.toAbsolutePath().toString()); + catalog.dropTable(FOO_TBL1); + } finally { + if (catalog instanceof AutoCloseable) { + ((AutoCloseable) catalog).close(); + } + } } @Test - public void testBuildNessieCatalog() { + public void testBuildNessieCatalog() throws Exception { Map properties = new HashMap<>(); - properties.put("warehouse", logDir.toAbsolutePath().toString()); + properties.put( + "warehouse", tempDir.resolve(UUID.randomUUID().toString()).toAbsolutePath().toString()); properties.put("ref", "main"); properties.put("uri", "http://localhost:19120/api/v1"); @@ -126,14 +135,21 @@ public void testBuildNessieCatalog() { CatalogMigrationUtil.buildCatalog( properties, CatalogMigrationUtil.CatalogType.NESSIE, "catalogName", null, null); - Assertions.assertThat(catalog).isInstanceOf(NessieCatalog.class); - Assertions.assertThat(catalog.name()).isEqualTo("catalogName"); + try { + Assertions.assertThat(catalog).isInstanceOf(NessieCatalog.class); + Assertions.assertThat(catalog.name()).isEqualTo("catalogName"); + } finally { + if (catalog instanceof AutoCloseable) { + ((AutoCloseable) catalog).close(); + } + } } @Test - public void testBuildHiveCatalog() { + public void testBuildHiveCatalog() throws Exception { Map properties = new HashMap<>(); - properties.put("warehouse", logDir.toAbsolutePath().toString()); + properties.put( + "warehouse", tempDir.resolve(UUID.randomUUID().toString()).toAbsolutePath().toString()); properties.put("type", "hive"); properties.put("uri", "thrift://localhost:9083"); @@ -141,7 +157,13 @@ public void testBuildHiveCatalog() { CatalogMigrationUtil.buildCatalog( properties, CatalogMigrationUtil.CatalogType.HIVE, "catalogName", null, null); - Assertions.assertThat(catalog).isInstanceOf(HiveCatalog.class); - Assertions.assertThat(catalog.name()).isEqualTo("catalogName"); + try { + Assertions.assertThat(catalog).isInstanceOf(HiveCatalog.class); + Assertions.assertThat(catalog.name()).isEqualTo("catalogName"); + } finally { + if (catalog instanceof AutoCloseable) { + ((AutoCloseable) catalog).close(); + } + } } } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java index 8e45fe7..15413a2 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java @@ -15,28 +15,30 @@ */ package org.projectnessie.tools.catalog.migration.api; -import java.nio.file.Path; import java.util.Collections; -import org.apache.iceberg.catalog.Catalog; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.hadoop.HadoopCatalog; import org.assertj.core.api.Assertions; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; +import org.projectnessie.tools.catalog.migration.api.test.AbstractTest; -public class CatalogMigratorParamsTest { - - protected static @TempDir Path logDir; - - @BeforeAll - protected static void initLogDir() { - System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); - } +public class CatalogMigratorParamsTest extends AbstractTest { @Test public void testInvalidArgs() { - Catalog sourceCatalog = new HadoopCatalog(); - Catalog targetCatalog = new HadoopCatalog(); + sourceCatalog = + CatalogUtil.loadCatalog( + HadoopCatalog.class.getName(), + "source", + hadoopCatalogProperties(true), + new Configuration()); + targetCatalog = + CatalogUtil.loadCatalog( + HadoopCatalog.class.getName(), + "target", + hadoopCatalogProperties(true), + new Configuration()); Assertions.assertThatThrownBy( () -> diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java index a44bb1a..542d845 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java @@ -24,7 +24,6 @@ import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.hadoop.HadoopCatalog; import org.assertj.core.api.Assertions; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; @@ -41,8 +40,6 @@ public class CustomCatalogMigratorTest extends AbstractTest { protected static void setup() { sourceCatalog = createCustomCatalog(warehouse1.toAbsolutePath().toString(), "sourceCatalog"); targetCatalog = createCustomCatalog(warehouse2.toAbsolutePath().toString(), "targetCatalog"); - - createNamespaces(); } @BeforeEach @@ -55,11 +52,6 @@ protected void afterEach() { dropTables(); } - @AfterAll - protected static void tearDown() { - dropNamespaces(); - } - @Test public void testRegister() { CatalogMigrator catalogMigrator = diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java index f376e91..21d72b7 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java @@ -24,7 +24,6 @@ import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; import org.assertj.core.api.Assertions; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -34,13 +33,6 @@ public class HadoopCatalogMigratorTest extends AbstractTestCatalogMigrator { protected static void setup() { initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); initializeTargetCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); - - createNamespaces(); - } - - @AfterAll - protected static void tearDown() { - dropNamespaces(); } @Test diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java index a8fe17d..1f3c421 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java @@ -31,9 +31,10 @@ protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); - targetCatalog = HiveMetaStoreRunner.hiveCatalog(); - - createNamespaces(); + initializeTargetCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"))); } @AfterAll diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java index a9bd51c..b4af31d 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java @@ -38,8 +38,6 @@ protected static void setup() throws Exception { initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); initializeTargetCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); - - createNamespaces(); } @AfterAll diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToHadoopCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToHadoopCatalogMigrator.java index d3269c7..c6aad73 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToHadoopCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToHadoopCatalogMigrator.java @@ -31,8 +31,6 @@ protected static void setup() throws Exception { Collections.singletonMap( "uri", HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"))); initializeTargetCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); - - createNamespaces(); } @AfterAll diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToNessieCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToNessieCatalogMigrator.java index e5d1718..e06ae37 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToNessieCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHiveToNessieCatalogMigrator.java @@ -26,10 +26,11 @@ public class ITHiveToNessieCatalogMigrator extends AbstractTestCatalogMigrator { protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); - sourceCatalog = HiveMetaStoreRunner.hiveCatalog(); + initializeSourceCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"))); initializeTargetCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); - - createNamespaces(); } @AfterAll diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java index 3e21e9f..8cbeaee 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java @@ -31,9 +31,10 @@ protected static void setup() throws Exception { HiveMetaStoreRunner.startMetastore(); initializeSourceCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); - targetCatalog = HiveMetaStoreRunner.hiveCatalog(); - - createNamespaces(); + initializeTargetCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"))); } @AfterAll diff --git a/build.gradle.kts b/build.gradle.kts deleted file mode 100644 index 231cc69..0000000 --- a/build.gradle.kts +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (C) 2023 Dremio - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -extra["versionGoogleJavaFormat"] = libs.versions.googleJavaFormat.get() diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java index 683da62..c0590d2 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java @@ -16,18 +16,17 @@ package org.projectnessie.tools.catalog.migration.cli; import com.google.common.base.Preconditions; -import com.google.common.collect.Iterables; import java.io.Console; import java.io.IOException; import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Set; import java.util.concurrent.Callable; -import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.TableIdentifier; @@ -116,68 +115,86 @@ public Integer call() { validateOutputDir(); - Catalog sourceCatalog = sourceCatalogOptions.build(); - consoleLog.info("Configured source catalog: {}", sourceCatalog.name()); + Catalog sourceCatalog = null; + Catalog targetCatalog = null; - Catalog targetCatalog = targetCatalogOptions.build(); - consoleLog.info("Configured target catalog: {}", targetCatalog.name()); + try { + sourceCatalog = sourceCatalogOptions.build(); + consoleLog.info("Configured source catalog: {}", sourceCatalog.name()); - if (!isDryRun && !disablePrompts && !canProceed(sourceCatalog)) { - return 2; - } + targetCatalog = targetCatalogOptions.build(); + consoleLog.info("Configured target catalog: {}", targetCatalog.name()); - CatalogMigrator catalogMigrator = - catalogMigrator(sourceCatalog, targetCatalog, enableStackTrace); + if (!isDryRun && !disablePrompts && !canProceed(sourceCatalog)) { + return 1; + } + + CatalogMigrator catalogMigrator = + catalogMigrator(sourceCatalog, targetCatalog, enableStackTrace); - if (identifiers.isEmpty()) { - consoleLog.info("Identifying tables for {} ...", operation()); - identifiers = catalogMigrator.getMatchingTableIdentifiers(identifierRegEx); if (identifiers.isEmpty()) { - consoleLog.warn( - "No tables were identified for {}. Please check `catalog_migration.log` file for more info.", - operation()); - return 2; + consoleLog.info("Identifying tables for {} ...", operation()); + identifiers = catalogMigrator.getMatchingTableIdentifiers(identifierRegEx); + if (identifiers.isEmpty()) { + consoleLog.warn( + "No tables were identified for {}. Please check `catalog_migration.log` file for more info.", + operation()); + return 1; + } } - } - if (isDryRun) { - consoleLog.info("Dry run is completed."); - handleDryRunResult(identifiers); - return 0; - } + if (isDryRun) { + consoleLog.info("Dry run is completed."); + handleDryRunResult(identifiers); + return 0; + } - consoleLog.info("Identified {} tables for {}.", identifiers.size(), operation()); + consoleLog.info("Identified {} tables for {}.", identifiers.size(), operation()); + + consoleLog.info("Started {} ...", operation()); + + CatalogMigrationResult result; + try { + List identifiersList = new ArrayList<>(identifiers); + int fromIndex = 0; + while (fromIndex < identifiersList.size()) { + int toIndex = Math.min(fromIndex + BATCH_SIZE, identifiersList.size()); + List identifierBatch = identifiersList.subList(fromIndex, toIndex); + catalogMigrator.registerTables(identifierBatch); + consoleLog.info( + "Attempted {} for {} tables out of {} tables.", + operation(), + toIndex, + identifiersList.size()); + fromIndex += BATCH_SIZE; + } + } finally { + consoleLog.info("Finished {} ...", operation()); + result = catalogMigrator.result(); + handleResults(result); + } - consoleLog.info("Started {} ...", operation()); + if (!result.failedToRegisterTableIdentifiers().isEmpty() + || !result.failedToDeleteTableIdentifiers().isEmpty() + || result.registeredTableIdentifiers().isEmpty()) { + return 1; + } - CatalogMigrationResult result; - try { - Iterable> identifierBatches = - Iterables.partition(identifiers, BATCH_SIZE); - int totalIdentifiers = identifiers.size(); - AtomicInteger counter = new AtomicInteger(); - identifierBatches.forEach( - identifierBatch -> { - catalogMigrator.registerTables(identifierBatch); - consoleLog.info( - "Attempted {} for {} tables out of {} tables.", - operation(), - counter.addAndGet(identifierBatch.size()), - totalIdentifiers); - }); + return 0; } finally { - consoleLog.info("Finished {} ...", operation()); - result = catalogMigrator.result(); - handleResults(result); + close(sourceCatalog); + close(targetCatalog); } + } - if (!result.failedToRegisterTableIdentifiers().isEmpty() - || !result.failedToDeleteTableIdentifiers().isEmpty() - || result.registeredTableIdentifiers().isEmpty()) { - return 1; + private void close(Catalog catalog) { + if (catalog instanceof AutoCloseable) { + try { + ((AutoCloseable) catalog).close(); + } catch (Exception e) { + throw new RuntimeException(e); + } } - - return 0; } private void checkAndWarnAboutIdentifiers( diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java index 9349d3b..e65f1cd 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/CatalogMigrationCLI.java @@ -38,7 +38,9 @@ public static void main(String... args) { if (enableStacktrace(args)) { cmd.getErr().println(cmd.getColorScheme().richStackTraceString(ex)); } else { - consoleLog.error("Error during CLI execution: {}", ex.getMessage()); + consoleLog.error( + "Error during CLI execution: {}. Please check `catalog_migration.log` file for more info.", + ex.getMessage()); } return 1; }); diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java index 24a5ed4..6f0d277 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/IdentifierOptions.java @@ -15,14 +15,17 @@ */ package org.projectnessie.tools.catalog.migration.cli; +import com.google.common.base.Preconditions; +import com.google.common.collect.Sets; import java.io.IOException; import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Paths; -import java.util.Collections; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.Set; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; import java.util.stream.Collectors; import org.apache.iceberg.catalog.TableIdentifier; import org.slf4j.Logger; @@ -63,32 +66,36 @@ public class IdentifierOptions { private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); protected Set processIdentifiersInput() { - if (identifiersFromFile != null && !Files.exists(Paths.get(identifiersFromFile))) { - throw new IllegalArgumentException( - "File specified in `--identifiers-from-file` option does not exist."); - } - Set tableIdentifiers; - if (identifiersFromFile != null) { + + if (!identifiers.isEmpty()) { + return identifiers.stream() + .map(TableIdentifier::parse) + .collect(Collectors.toCollection(LinkedHashSet::new)); + } else if (identifiersFromFile != null) { + Preconditions.checkArgument( + Files.exists(Paths.get(identifiersFromFile)), + "File specified in `--identifiers-from-file` option does not exist"); try { consoleLog.info("Collecting identifiers from the file {} ...", identifiersFromFile); - tableIdentifiers = - Files.readAllLines(Paths.get(identifiersFromFile)).stream() - .map(String::trim) - .filter(string -> !string.isEmpty()) - .map(TableIdentifier::parse) - .collect(Collectors.toCollection(LinkedHashSet::new)); + return Files.readAllLines(Paths.get(identifiersFromFile)).stream() + .map(String::trim) + .filter(string -> !string.isEmpty()) + .map(TableIdentifier::parse) + .collect(Collectors.toCollection(LinkedHashSet::new)); } catch (IOException e) { throw new UncheckedIOException( String.format("Failed to read the file: %s", identifiersFromFile), e); } - } else if (!identifiers.isEmpty()) { - tableIdentifiers = - identifiers.stream() - .map(TableIdentifier::parse) - .collect(Collectors.toCollection(LinkedHashSet::new)); - } else { - tableIdentifiers = Collections.emptySet(); + } else if (identifiersRegEx != null) { + Preconditions.checkArgument( + !identifiersRegEx.trim().isEmpty(), "--identifiers-regex should not be empty"); + // check whether pattern is compilable + try { + Pattern.compile(identifiersRegEx); + } catch (PatternSyntaxException ex) { + throw new IllegalArgumentException("--identifiers-regex pattern is not compilable", ex); + } } - return tableIdentifiers; + return Sets.newHashSet(); } } diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java index 1a8512f..c234784 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/MigrateCommand.java @@ -58,7 +58,7 @@ public Integer call() { consoleLog.error( "Source catalog is a Hadoop catalog and it doesn't support deleting the table entries just from the catalog. " + "Please use 'register' command instead."); - return 2; + return 1; } return super.call(); } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java index a4af2b7..1d5b10c 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/AbstractCLIMigrationTest.java @@ -31,11 +31,9 @@ import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.NoSuchTableException; -import org.apache.iceberg.hadoop.HadoopCatalog; import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assumptions; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; @@ -48,8 +46,6 @@ public abstract class AbstractCLIMigrationTest extends AbstractTest { protected static @TempDir Path outputDir; - protected static Path dryRunFile; - protected static Path failedIdentifiersFile; protected static String sourceCatalogProperties; protected static String targetCatalogProperties; @@ -60,11 +56,13 @@ public abstract class AbstractCLIMigrationTest extends AbstractTest { protected static void initializeSourceCatalog( CatalogMigrationUtil.CatalogType catalogType, Map additionalProp) { initializeCatalog(true, catalogType, additionalProp); + createNamespacesForSourceCatalog(); } protected static void initializeTargetCatalog( CatalogMigrationUtil.CatalogType catalogType, Map additionalProp) { initializeCatalog(false, catalogType, additionalProp); + createNamespacesForTargetCatalog(); } private static void initializeCatalog( @@ -105,10 +103,9 @@ private static void initializeCatalog( } } - @BeforeAll - protected static void initFilesPaths() { - dryRunFile = outputDir.resolve(DRY_RUN_FILE); - failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); } @BeforeEach @@ -163,17 +160,17 @@ public void testRegister(boolean deleteSourceTables) throws Exception { // https://github.com/apache/iceberg/pull/6789 sourceCatalog.tableExists(FOO_TBL1); - if (deleteSourceTables && !(sourceCatalog instanceof HadoopCatalog)) { + if (deleteSourceTables) { // table should be deleted after migration from source catalog Assertions.assertThat(sourceCatalog.listTables(FOO)).isEmpty(); Assertions.assertThat(sourceCatalog.listTables(BAR)).isEmpty(); - return; + } else { + // tables should be present in source catalog. + Assertions.assertThat(sourceCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(sourceCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); } - // tables should be present in source catalog. - Assertions.assertThat(sourceCatalog.listTables(FOO)) - .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); - Assertions.assertThat(sourceCatalog.listTables(BAR)) - .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); } @ParameterizedTest @@ -328,7 +325,7 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep operated, sourceCatalogType, targetCatalogType)) .contains(String.format("Details: %nSuccessfully %s these tables:%n[foo.tbl2]", operated)); - if (deleteSourceTables && !(sourceCatalog instanceof HadoopCatalog)) { + if (deleteSourceTables) { // create a table with the same name in source catalog which got deleted. sourceCatalog.createTable(FOO_TBL2, schema); } @@ -366,6 +363,8 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep Assertions.assertThat(targetCatalog.listTables(BAR)) .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); + Path failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); + // retry the failed tables using `--identifiers-from-file` argsList = defaultArgs(); argsList.addAll( @@ -384,35 +383,21 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Excep + "if the failure is because of network/connection timeouts.", operate, sourceCatalogType, targetCatalogType)) .contains(String.format("Details: %nFailed to %s these tables:%n[foo.tbl2]", operate)); - Assertions.assertThat(Files.exists(failedIdentifiersFile)).isTrue(); + Assertions.assertThat(failedIdentifiersFile).exists(); Assertions.assertThat(Files.readAllLines(failedIdentifiersFile)).containsExactly("foo.tbl2"); } @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { - Assumptions.assumeFalse( - deleteSourceTables && targetCatalog instanceof HadoopCatalog, - "deleting source tables is unsupported for HadoopCatalog"); - - // use source catalog as targetCatalog which has no tables. - RunCLI run = - runCLI( - deleteSourceTables, - Lists.newArrayList( - "--source-catalog-type", - targetCatalogType, - "--source-catalog-properties", - targetCatalogProperties, - "--target-catalog-type", - sourceCatalogType, - "--target-catalog-properties", - sourceCatalogProperties, - "--output-dir", - outputDir.toAbsolutePath().toString(), - "--disable-safety-prompts")); + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + // clean up the default tables present in the source catalog. + dropTables(); - Assertions.assertThat(run.getExitCode()).isEqualTo(2); + RunCLI run = runCLI(deleteSourceTables, defaultArgs()); + + Assertions.assertThat(run.getExitCode()).isEqualTo(1); String operation = deleteSourceTables ? "migration" : "registration"; Assertions.assertThat(run.getOut()) .contains( @@ -446,7 +431,8 @@ public void testDryRun(boolean deleteSourceTables) throws Exception { operation)) .contains( String.format("Details: %nIdentified these tables for %s by dry-run:%n", operation)); - Assertions.assertThat(Files.exists(dryRunFile)).isTrue(); + Path dryRunFile = outputDir.resolve(DRY_RUN_FILE); + Assertions.assertThat(dryRunFile).exists(); Assertions.assertThat(Files.readAllLines(dryRunFile)) .containsExactlyInAnyOrder("foo.tbl1", "foo.tbl2", "bar.tbl3", "bar.tbl4"); } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java index dfd3ad2..ccd6d0b 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java @@ -183,7 +183,7 @@ private static Stream invalidArgs() { "k3=v3, k4=v4", "--identifiers-from-file", "file.txt"), - "Error during CLI execution: File specified in `--identifiers-from-file` option does not exist."), + "Error during CLI execution: File specified in `--identifiers-from-file` option does not exist"), arguments( Lists.newArrayList( "--source-catalog-type", diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/HadoopCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/HadoopCLIMigrationTest.java index e8d3760..1684c70 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/HadoopCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/HadoopCLIMigrationTest.java @@ -16,7 +16,6 @@ package org.projectnessie.tools.catalog.migration.cli; import java.util.Collections; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.projectnessie.tools.catalog.migration.api.CatalogMigrationUtil; @@ -26,12 +25,5 @@ public class HadoopCLIMigrationTest extends AbstractCLIMigrationTest { protected static void setup() { initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); initializeTargetCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); - - createNamespaces(); - } - - @AfterAll - protected static void tearDown() { - dropNamespaces(); } } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToHiveCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToHiveCLIMigrationTest.java index 47a3653..1c877ad 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToHiveCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToHiveCLIMigrationTest.java @@ -32,8 +32,6 @@ protected static void setup() throws Exception { CatalogMigrationUtil.CatalogType.HIVE, Collections.singletonMap( "uri", HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"))); - - createNamespaces(); } @AfterAll diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToNessieCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToNessieCLIMigrationTest.java new file mode 100644 index 0000000..836369f --- /dev/null +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHadoopToNessieCLIMigrationTest.java @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.projectnessie.tools.catalog.migration.cli; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.IntStream; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.projectnessie.tools.catalog.migration.api.CatalogMigrationUtil; + +public class ITHadoopToNessieCLIMigrationTest extends AbstractCLIMigrationTest { + + @BeforeAll + protected static void setup() { + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); + } + + @Test + public void testRegisterLargeNumberOfTablesWithNestedNamespaces() throws Exception { + List namespaceList = + Arrays.asList(NS_A, NS_A_B, NS_A_B_C, NS_A_B_C_D, NS_A_B_C_D_E, NS_A_C); + + // additionally create 240 tables along with 4 tables created in beforeEach() + namespaceList.forEach( + namespace -> { + ((SupportsNamespaces) sourceCatalog).createNamespace(namespace); + IntStream.range(0, 40) + .forEach( + val -> + sourceCatalog.createTable( + TableIdentifier.of(namespace, "tblx" + val), schema)); + }); + + // register or migrate all the tables + RunCLI run = runCLI(false, defaultArgs()); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .contains("Identified 244 tables for registration.") + .contains( + String.format( + "Summary: %nSuccessfully registered 244 tables from %s catalog to" + " %s catalog.", + sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nSuccessfully registered these tables:%n")) + // validate intermediate output + .contains("Attempted registration for 100 tables out of 244 tables.") + .contains("Attempted registration for 200 tables out of 244 tables.") + .contains("Attempted registration for 244 tables out of 244 tables."); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + targetCatalog.loadTable(BAR_TBL3).refresh(); + + Assertions.assertThat(targetCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); + + Collections.reverse(namespaceList); + namespaceList.forEach( + namespace -> { + List identifiers = targetCatalog.listTables(namespace); + + // validate tables count in each namespace. + Assertions.assertThat(identifiers).hasSize(40); + + identifiers.forEach( + identifier -> { + targetCatalog.dropTable(identifier); + sourceCatalog.dropTable(identifier); + }); + ((SupportsNamespaces) sourceCatalog).dropNamespace(namespace); + ((SupportsNamespaces) targetCatalog).dropNamespace(namespace); + }); + } +} diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToHadoopCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToHadoopCLIMigrationTest.java index 1542d2f..1fde265 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToHadoopCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToHadoopCLIMigrationTest.java @@ -32,8 +32,6 @@ protected static void setup() throws Exception { Collections.singletonMap( "uri", HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"))); initializeTargetCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); - - createNamespaces(); } @AfterAll diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToNessieCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToNessieCLIMigrationTest.java index ff27cd7..786eebf 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToNessieCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITHiveToNessieCLIMigrationTest.java @@ -37,8 +37,6 @@ protected static void setup() throws Exception { Collections.singletonMap( "uri", HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"))); initializeTargetCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); - - createNamespaces(); } @AfterAll @@ -47,7 +45,6 @@ protected static void tearDown() throws Exception { HiveMetaStoreRunner.stopMetastore(); } - // Executing migration of large number of tables for only one set of catalogs to save CI time. @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { @@ -73,7 +70,8 @@ public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws E .contains(String.format("Details: %nSuccessfully %s these tables:%n", operated)) // validate intermediate output .contains(String.format("Attempted %s for 100 tables out of 244 tables.", operation)) - .contains(String.format("Attempted %s for 200 tables out of 244 tables.", operation)); + .contains(String.format("Attempted %s for 200 tables out of 244 tables.", operation)) + .contains(String.format("Attempted %s for 244 tables out of 244 tables.", operation)); // manually refreshing catalog due to missing refresh in Nessie catalog // https://github.com/apache/iceberg/pull/6789 diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITNessieToHiveCLIMigrationTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITNessieToHiveCLIMigrationTest.java index 7fdc8a9..8e652a7 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITNessieToHiveCLIMigrationTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ITNessieToHiveCLIMigrationTest.java @@ -32,8 +32,6 @@ protected static void setup() throws Exception { CatalogMigrationUtil.CatalogType.HIVE, Collections.singletonMap( "uri", HiveMetaStoreRunner.hiveCatalog().getConf().get("hive.metastore.uris"))); - - createNamespaces(); } @AfterAll diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ProcessIdentifiersTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ProcessIdentifiersTest.java index 782db41..5350132 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ProcessIdentifiersTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/ProcessIdentifiersTest.java @@ -37,45 +37,39 @@ protected static void initLogDir() { } @Test - public void testOptions() throws Exception { + public void testIdentifiersSet() { + // test empty set Assertions.assertThat(new IdentifierOptions().processIdentifiersInput()).isEmpty(); + // test valid elements IdentifierOptions identifierOptions = new IdentifierOptions(); identifierOptions.identifiers = Sets.newHashSet("foo.abc", "bar.def"); Assertions.assertThat(identifierOptions.processIdentifiersInput()) .containsExactlyInAnyOrder( TableIdentifier.parse("foo.abc"), TableIdentifier.parse("bar.def")); + } + @Test + public void testIdentifiersFromFile() throws Exception { + // valid file contents Path identifierFile = tempDir.resolve("file_with_ids.txt"); Files.write(identifierFile, Arrays.asList("db1.t1", "db2.t2", "db123.t5")); - IdentifierOptions newOptions = new IdentifierOptions(); - newOptions.identifiersFromFile = identifierFile.toAbsolutePath().toString(); - Assertions.assertThat(newOptions.processIdentifiersInput()) + IdentifierOptions options = new IdentifierOptions(); + options.identifiersFromFile = identifierFile.toAbsolutePath().toString(); + Assertions.assertThat(options.processIdentifiersInput()) .containsExactlyInAnyOrder( TableIdentifier.parse("db1.t1"), TableIdentifier.parse("db2.t2"), TableIdentifier.parse("db123.t5")); - Assertions.assertThat(identifierFile.toFile().setReadable(false)).isTrue(); - Assertions.assertThatThrownBy(newOptions::processIdentifiersInput) - .isInstanceOf(UncheckedIOException.class) - .hasMessageContaining("Failed to read the file: " + identifierFile); - Assertions.assertThat(identifierFile.toFile().setReadable(true)).isTrue(); - - IdentifierOptions options = new IdentifierOptions(); - options.identifiersFromFile = "path/to/file"; - Assertions.assertThatThrownBy(options::processIdentifiersInput) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("File specified in `--identifiers-from-file` option does not exist"); - // empty file identifierFile = tempDir.resolve("ids1.txt"); Files.createFile(identifierFile); options = new IdentifierOptions(); - newOptions.identifiersFromFile = identifierFile.toAbsolutePath().toString(); + options.identifiersFromFile = identifierFile.toAbsolutePath().toString(); Assertions.assertThat(options.processIdentifiersInput()).isEmpty(); - // with some blanks + // file with some blanks contents identifierFile = tempDir.resolve("ids2.txt"); String[] lines = {"abc. def", " abc 123 ", "", "", " xyz%n123"}; Files.writeString(identifierFile, String.join(System.lineSeparator(), lines)); @@ -99,4 +93,44 @@ public void testOptions() throws Exception { .containsExactlyInAnyOrder( TableIdentifier.parse("abc.def"), TableIdentifier.parse("xx.yy")); } + + @Test + public void testIdentifiersFromFileInvalidInputs() throws Exception { + // file without permission to read + Path identifierFile = tempDir.resolve("non_readable_file.txt"); + Files.createFile(identifierFile); + Assertions.assertThat(identifierFile.toFile().setReadable(false)).isTrue(); + IdentifierOptions options = new IdentifierOptions(); + options.identifiersFromFile = identifierFile.toAbsolutePath().toString(); + Assertions.assertThatThrownBy(options::processIdentifiersInput) + .isInstanceOf(UncheckedIOException.class) + .hasMessageContaining("Failed to read the file: " + identifierFile); + Assertions.assertThat(identifierFile.toFile().setReadable(true)).isTrue(); + + // file doesn't exist + options.identifiersFromFile = "path/to/file"; + Assertions.assertThatThrownBy(options::processIdentifiersInput) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("File specified in `--identifiers-from-file` option does not exist"); + } + + @Test + public void testIdentifiersRegEx() { + // test valid regex + IdentifierOptions options = new IdentifierOptions(); + options.identifiersRegEx = "^foo\\..*"; + Assertions.assertThat(options.processIdentifiersInput()).isEmpty(); + + // test invalid regex + options.identifiersRegEx = "(23erf423!"; + Assertions.assertThatThrownBy(options::processIdentifiersInput) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("--identifiers-regex pattern is not compilable"); + + options = new IdentifierOptions(); + options.identifiersRegEx = " "; + Assertions.assertThatThrownBy(options::processIdentifiersInput) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("--identifiers-regex should not be empty"); + } } diff --git a/gradle/baselibs.versions.toml b/gradle/baselibs.versions.toml index 4427ecd..7bfff2d 100644 --- a/gradle/baselibs.versions.toml +++ b/gradle/baselibs.versions.toml @@ -3,9 +3,9 @@ [versions] errorpronePlugin = "3.0.1" ideaExt = "1.1.7" -jandexPlugin = "1.86" -shadowPlugin = "8.1.0" -spotlessPlugin = "6.16.0" +jandexPlugin = "1.87" +shadowPlugin = "8.1.1" +spotlessPlugin = "6.18.0" [libraries] errorprone = { module = "net.ltgt.gradle:gradle-errorprone-plugin", version.ref = "errorpronePlugin" } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 8852836..e11224b 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,7 +1,7 @@ [versions] assertj = "3.24.2" aws = "2.20.18" # this is in mapping with iceberg repo. -checkstyle = "10.8.0" +checkstyle = "10.9.3" ecs = "3.3.2" errorprone = "2.18.0" errorproneSlf4j = "0.1.18" @@ -9,18 +9,18 @@ googleJavaFormat = "1.16.0" guava = "31.1-jre" hadoop = "2.7.3" # this is in mapping with iceberg repo. hive = "2.3.8" # this is in mapping with iceberg repo. -iceberg = "1.2.0" +iceberg = "1.2.1" immutables = "2.9.3" -jacoco = "0.8.8" -jandex = "3.0.5" +jacoco = "0.8.9" +jandex = "3.1.0" junit = "5.9.2" logback = "1.4.5" logcaptor = "2.8.0" mysqlDriver = "8.0.32" -nessie = "0.52.3" +nessie = "0.57.0" nessieRunner = "0.29.0" -picocli = "4.7.1" -shadowPlugin = "7.1.2" +picocli = "4.7.2" +shadowPlugin = "8.1.1" slf4j = "1.7.36" [libraries] diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 19acfb4..8196ca1 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=ff7bf6a86f09b9b2c40bb8f48b25fc19cf2b2664fd1d220cd7ab833ec758d0d7 -distributionUrl=https\://services.gradle.org/distributions/gradle-8.0.2-bin.zip +distributionSha256Sum=a62c5f99585dd9e1f95dab7b9415a0e698fa9dd1e6c38537faa81ac078f4d23e +distributionUrl=https\://services.gradle.org/distributions/gradle-8.1-bin.zip networkTimeout=10000 zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists From e46aad33e23777b6f622fd50028502b15089f08a Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Sat, 15 Apr 2023 07:29:06 +0530 Subject: [PATCH 29/31] Handle leftover comments --- README.md | 90 +++++++++---------- .../migration/api/CatalogMigrationUtil.java | 13 --- .../migration/api/CatalogMigrator.java | 69 ++++++++------ .../api/AbstractTestCatalogMigrator.java | 30 ++----- .../api/CatalogMigratorParamsTest.java | 11 +++ build.gradle.kts | 24 +++++ buildSrc/src/main/kotlin/CodeCoverage.kt | 26 +++--- .../migration/cli/BaseRegisterCommand.java | 35 ++++---- .../catalog/migration/cli/CLIOptionsTest.java | 2 +- codestyle/copyright-header.txt | 13 +++ settings.gradle.kts | 26 +++--- 11 files changed, 185 insertions(+), 154 deletions(-) create mode 100644 build.gradle.kts create mode 100644 codestyle/copyright-header.txt diff --git a/README.md b/README.md index 4333fc8..bcbe52a 100644 --- a/README.md +++ b/README.md @@ -157,27 +157,27 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ ```shell java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type GLUE \ ---source-catalog-properties warehouse=s3a://ajantha-test/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO \ +--source-catalog-properties warehouse=s3a://some-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO \ --target-catalog-type NESSIE \ ---target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT +--target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=s3a://some-other-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,authentication.type=BEARER,authentication.token=$PAT ``` ## Migrate all tables from HIVE catalog to Arctic catalog (main branch) ```shell java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type HIVE \ ---source-catalog-properties warehouse=s3a://ajantha-test/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,uri=thrift://localhost:9083 \ +--source-catalog-properties warehouse=s3a://some-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,uri=thrift://localhost:9083 \ --target-catalog-type NESSIE \ ---target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT +--target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=s3a://some-other-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,authentication.type=BEARER,authentication.token=$PAT ``` ## Migrate all tables from DYNAMODB catalog to Arctic catalog (main branch) ```shell java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ --source-catalog-type DYNAMODB \ ---source-catalog-properties warehouse=s3a://ajantha-test/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO \ +--source-catalog-properties warehouse=s3a://some-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO \ --target-catalog-type NESSIE \ ---target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT +--target-catalog-properties uri=https://nessie.test1.dremio.site/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=s3a://some-other-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,authentication.type=BEARER,authentication.token=$PAT ``` ## Migrate all tables from JDBC catalog to Arctic catalog (main branch) @@ -225,48 +225,39 @@ java -jar iceberg-catalog-migrator-cli-0.1.0-SNAPSHOT.jar migrate \ After input validation, users will receive a prompt message with the option to either abort or continue the operation. ``` -Configured source catalog: HADOOP - -Configured target catalog: NESSIE - -[WARNING] -a) Executing catalog migration when the source catalog has some in-progress commits -can lead to a data loss as the in-progress commits will not be considered for migration. -So, while using this tool please make sure there are no in-progress commits for the source catalog. - -b) After the migration, successfully migrated tables will be deleted from the source catalog -and can only be accessed from the target catalog. -Are you certain that you wish to proceed, after reading the above warnings? (yes/no): +WARN - User has not specified the table identifiers. Will be selecting all the tables from all the namespaces from the source catalog. +INFO - Configured source catalog: SOURCE_CATALOG_HADOOP +INFO - Configured target catalog: TARGET_CATALOG_NESSIE +WARN - + a) Executing catalog migration when the source catalog has some in-progress commits + can lead to a data loss as the in-progress commits will not be considered for migration. + So, while using this tool please make sure there are no in-progress commits for the source catalog. + + b) After the migration, successfully migrated tables will be deleted from the source catalog + and can only be accessed from the target catalog. +INFO - Are you certain that you wish to proceed, after reading the above warnings? (yes/no): ``` If the user chooses to continue, additional information will be displayed on the console. ``` -Continuing... - -User has not specified the table identifiers. Selecting all the tables from all the namespaces from the source catalog. -Collecting all the namespaces from source catalog... -Collecting all the tables from all the namespaces of source catalog... - -Identified 1000 tables for migration. -Started migration ... - -Attempted Migration for 100 tables out of 1000 -Attempted Migration for 200 tables out of 1000 +INFO - Continuing... +INFO - Identifying tables for migration ... +INFO - Identified 1000 tables for migration. +INFO - Started migration ... +INFO - Attempted Migration for 100 tables out of 1000 tables. +INFO - Attempted Migration for 200 tables out of 1000 tables. . . . -Attempted Migration for 900 tables out of 1000 -Attempted Migration for 1000 tables out of 1000 - -Finished migration ... - -Summary: -- Successfully migrated 1000 tables from HADOOP catalog to NESSIE catalog. - -Details: -- Successfully migrated these tables: - [foo.tbl-1, foo.tbl-2, bar.tbl-4, bar.tbl-3, …, …,bar.tbl-1000] +INFO - Attempted Migration for 900 tables out of 1000 tables. +INFO - Attempted Migration for 1000 tables out of 1000 tables. +INFO - Finished migration ... +INFO - Summary: +INFO - Successfully migrated 1000 tables from HADOOP catalog to NESSIE catalog. +INFO - Details: +INFO - Successfully migrated these tables: +[foo.tbl-1, foo.tbl-2, bar.tbl-4, bar.tbl-3, …, …,bar.tbl-1000] ``` Please note that a log file will be created, which will print "successfully migrated table X" for every table migration, @@ -288,16 +279,15 @@ Console output will be same as B.2) till summary because even in case of failure all the identified tables will be attempted for migration. ``` -Summary: -- Successfully migrated 990 tables from HADOOP catalog to NESSIE catalog. -- Failed to migrate 10 tables from HADOOP catalog to NESSIE catalog. Please check the `catalog_migration.log` file for the failure reason. - Failed Identifiers are written to `failed_identifiers.txt`. Retry with that file using the `--identifiers-from-file` option if the failure is because of network/connection timeouts. - -Details: -- Successfully migrated these tables: - [foo.tbl-1, foo.tbl-2, bar.tbl-4, bar.tbl-3, …, …,bar.tbl-1000] -- Failed to migrate these tables: - [bar.tbl-201, foo.tbl-202, …, …,bar.tbl-210] +INFO - Summary: +INFO - Successfully migrated 990 tables from HADOOP catalog to NESSIE catalog. +ERROR - Failed to migrate 10 tables from HADOOP catalog to NESSIE catalog. Please check the `catalog_migration.log` file for the failure reason. +Failed Identifiers are written to `failed_identifiers.txt`. Retry with that file using the `--identifiers-from-file` option if the failure is because of network/connection timeouts. +INFO - Details: +INFO - Successfully migrated these tables: +[foo.tbl-1, foo.tbl-2, bar.tbl-4, bar.tbl-3, …, …,bar.tbl-1000] +ERROR - Failed to migrate these tables: +[bar.tbl-201, foo.tbl-202, …, …,bar.tbl-210] ``` Please note that a log file will be generated, which will print "successfully migrated table X" for every table migration and log any table-level failures in the `failed_identifiers.txt` file. diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtil.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtil.java index 09a41e1..7c7de1b 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtil.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtil.java @@ -54,7 +54,6 @@ public static Catalog buildCatalog( Preconditions.checkArgument(catalogProperties != null, "catalog properties is null"); Preconditions.checkArgument(catalogType != null, "catalog type is null"); Configuration catalogConf = new Configuration(); - addAwsConfigFromEnv(catalogConf); if (hadoopConf != null) { hadoopConf.forEach(catalogConf::set); } @@ -95,16 +94,4 @@ private static String catalogImpl(CatalogType type, String customCatalogImpl) { throw new IllegalArgumentException("Unsupported type: " + type.name()); } } - - private static void addAwsConfigFromEnv(Configuration configuration) { - if (System.getenv("AWS_ACCESS_KEY_ID") != null) { - configuration.set("fs.s3a.access.key", System.getenv("AWS_ACCESS_KEY_ID")); - } - if (System.getenv("AWS_SECRET_ACCESS_KEY") != null) { - configuration.set("fs.s3a.secret.key", System.getenv("AWS_SECRET_ACCESS_KEY")); - } - if (System.getenv("AWS_S3_ENDPOINT") != null) { - configuration.set("fs.s3a.endpoint", System.getenv("AWS_S3_ENDPOINT")); - } - } } diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java index 5a38ab0..97b1d0b 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java @@ -149,34 +149,47 @@ public CatalogMigrator registerTables(Collection identifiers) { return this; } - identifiers.forEach( - tableIdentifier -> { - boolean isRegistered = registerTable(tableIdentifier); - if (isRegistered) { - resultBuilder.addRegisteredTableIdentifiers(tableIdentifier); - } else { - resultBuilder.addFailedToRegisterTableIdentifiers(tableIdentifier); - } + identifiers.forEach(this::registerTable); + return this; + } - try { - if (isRegistered - && deleteEntriesFromSourceCatalog() - && !sourceCatalog().dropTable(tableIdentifier, false)) { - resultBuilder.addFailedToDeleteTableIdentifiers(tableIdentifier); - } - } catch (Exception exception) { - resultBuilder.addFailedToDeleteTableIdentifiers(tableIdentifier); - if (enableStacktrace()) { - LOG.error( - "Failed to delete the table after migration {}", tableIdentifier, exception); - } else { - LOG.error( - "Failed to delete the table after migration {} : {}", - tableIdentifier, - exception.getMessage()); - } - } - }); + /** + * Register or Migrate a single table from one catalog(source catalog) to another catalog(target + * catalog). + * + *

Users must make sure that no in-progress commits on the tables of source catalog during + * registration. + * + * @param identifier table identifier to register or migrate + * @return {@code this} for use in a chained invocation + */ + public CatalogMigrator registerTable(TableIdentifier identifier) { + Preconditions.checkArgument(identifier != null, "Identifier is null"); + + boolean isRegistered = registerTableToTargetCatalog(identifier); + if (isRegistered) { + resultBuilder.addRegisteredTableIdentifiers(identifier); + } else { + resultBuilder.addFailedToRegisterTableIdentifiers(identifier); + } + + try { + if (isRegistered + && deleteEntriesFromSourceCatalog() + && !sourceCatalog().dropTable(identifier, false)) { + resultBuilder.addFailedToDeleteTableIdentifiers(identifier); + } + } catch (Exception exception) { + resultBuilder.addFailedToDeleteTableIdentifiers(identifier); + if (enableStacktrace()) { + LOG.error("Failed to delete the table after migration {}", identifier, exception); + } else { + LOG.error( + "Failed to delete the table after migration {} : {}", + identifier, + exception.getMessage()); + } + } return this; } @@ -214,7 +227,7 @@ protected void getAllNamespacesFromSourceCatalog(Namespace namespace, Set + ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(false) + .build() + .registerTable(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Identifier is null"); + Assertions.assertThatThrownBy( () -> ImmutableCatalogMigrator.builder() diff --git a/build.gradle.kts b/build.gradle.kts new file mode 100644 index 0000000..62c049b --- /dev/null +++ b/build.gradle.kts @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2023 Dremio + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +plugins { `build-conventions` } + +spotless { + kotlinGradle { + // Must be repeated :( - there's no "addTarget" or so + target("*.gradle.kts", "buildSrc/*.gradle.kts") + } +} diff --git a/buildSrc/src/main/kotlin/CodeCoverage.kt b/buildSrc/src/main/kotlin/CodeCoverage.kt index 808f5f2..accafa4 100644 --- a/buildSrc/src/main/kotlin/CodeCoverage.kt +++ b/buildSrc/src/main/kotlin/CodeCoverage.kt @@ -25,18 +25,18 @@ import org.gradle.testing.jacoco.plugins.JacocoReportAggregationPlugin import org.gradle.testing.jacoco.tasks.JacocoReport class CodeCoveragePlugin : Plugin { - override fun apply(project: Project): Unit = - project.run { - apply() - apply() + override fun apply(project: Project): Unit = + project.run { + apply() + apply() - tasks.withType().configureEach { - reports { - html.required.set(true) - xml.required.set(true) - } - } - - configure { toolVersion = libsRequiredVersion("jacoco") } + tasks.withType().configureEach { + reports { + html.required.set(true) + xml.required.set(true) } -} \ No newline at end of file + } + + configure { toolVersion = libsRequiredVersion("jacoco") } + } +} diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java index c0590d2..f5ecb94 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java @@ -21,7 +21,6 @@ import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -155,18 +154,18 @@ public Integer call() { CatalogMigrationResult result; try { - List identifiersList = new ArrayList<>(identifiers); - int fromIndex = 0; - while (fromIndex < identifiersList.size()) { - int toIndex = Math.min(fromIndex + BATCH_SIZE, identifiersList.size()); - List identifierBatch = identifiersList.subList(fromIndex, toIndex); - catalogMigrator.registerTables(identifierBatch); - consoleLog.info( - "Attempted {} for {} tables out of {} tables.", - operation(), - toIndex, - identifiersList.size()); - fromIndex += BATCH_SIZE; + int processedIdentifiersCount = 0; + for (TableIdentifier identifier : identifiers) { + catalogMigrator.registerTable(identifier); + processedIdentifiersCount++; + if (processedIdentifiersCount % BATCH_SIZE == 0 + || processedIdentifiersCount == identifiers.size()) { + consoleLog.info( + "Attempted {} for {} tables out of {} tables.", + operation(), + processedIdentifiersCount, + identifiers.size()); + } } } finally { consoleLog.info("Finished {} ...", operation()); @@ -215,8 +214,14 @@ private void checkAndWarnAboutIdentifiers( } private void validateOutputDir() { - Preconditions.checkArgument( - Files.exists(outputDirPath), "Path specified in `--output-dir` does not exist"); + if (!Files.exists(outputDirPath)) { + try { + Files.createDirectories(outputDirPath); + } catch (IOException ex) { + throw new UncheckedIOException( + "Failed to create the output directory from the path specified in `--output-dir`", ex); + } + } Preconditions.checkArgument( Files.isWritable(outputDirPath), "Path specified in `--output-dir` is not writable"); } diff --git a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java index ccd6d0b..2c55bba 100644 --- a/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java +++ b/cli/src/test/java/org/projectnessie/tools/catalog/migration/cli/CLIOptionsTest.java @@ -196,7 +196,7 @@ private static Stream invalidArgs() { "k3=v3, k4=v4", "--output-dir", "/path/to/file"), - "Error during CLI execution: Path specified in `--output-dir` does not exist"), + "Error during CLI execution: Failed to create the output directory from the path specified in `--output-dir`"), arguments( Lists.newArrayList( "--source-catalog-type", diff --git a/codestyle/copyright-header.txt b/codestyle/copyright-header.txt new file mode 100644 index 0000000..9a795b8 --- /dev/null +++ b/codestyle/copyright-header.txt @@ -0,0 +1,13 @@ +Copyright (C) $today.year Dremio + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/settings.gradle.kts b/settings.gradle.kts index 71b23b6..045c94e 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -19,22 +19,26 @@ val baseVersion = file("version.txt").readText().trim() rootProject.name = "iceberg-catalog-migrator" gradle.beforeProject { - group = "org.projectnessie.tools.catalog.migration" - version = baseVersion - description = - when (name) { - "api" -> "Iceberg catalog migrator - api implementation" - "api-test" -> "Iceberg catalog migrator - common test implementation" - "cli" -> "Iceberg catalog migrator - CLI implementation" - else -> name - } + group = "org.projectnessie.tools.catalog.migration" + version = baseVersion + description = + when (name) { + "api" -> "Iceberg catalog migrator - api implementation" + "api-test" -> "Iceberg catalog migrator - common test implementation" + "cli" -> "Iceberg catalog migrator - CLI implementation" + else -> name + } } fun catalogMigratorProject(name: String) { - include("iceberg-catalog-migrator-$name") - project(":iceberg-catalog-migrator-$name").projectDir = file(name) + include("iceberg-catalog-migrator-$name") + project(":iceberg-catalog-migrator-$name").projectDir = file(name) } catalogMigratorProject("api") + catalogMigratorProject("api-test") + catalogMigratorProject("cli") + +catalogMigratorProject("bom") From b70dab8ea15adfbdb2f4afcaadd03258c96093f6 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Sat, 15 Apr 2023 17:21:04 +0530 Subject: [PATCH 30/31] Address comments from April 15 --- README.md | 13 +++++++------ .../catalog/migration/api/test/AbstractTest.java | 8 +++++--- .../catalog/migration/api/CatalogMigrator.java | 16 ++++++++-------- .../migration/api/CatalogMigrationUtilTest.java | 16 +++++++--------- .../migration/api/UnsupportedNamespaceTest.java | 15 +++++++-------- cli/build.gradle.kts | 2 -- .../migration/cli/SourceCatalogOptions.java | 5 ++--- .../migration/cli/TargetCatalogOptions.java | 5 ++--- gradle/libs.versions.toml | 4 ---- 9 files changed, 38 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index bcbe52a..2709b99 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,9 @@ Which means the table will be present in both the catalogs after registering. So, it is recommended to use the 'migrate' command in CLI to automatically delete the table from source catalog after registering or avoid operating tables from the source catalog after registering if 'migrate' command is not used.** -> :warning: **It is recommended to use this CLI tool when there is no in-progress commits for the tables in the source catalog. -In-progress commits may not make it into the target catalog if used. Which can lead to missing updates, loss of data and table corruption.** +> :warning: **Avoid using this CLI tool when there are in-progress commits for tables in the source catalog +to prevent missing updates, data loss and table corruption in the target catalog. +In-progress commits may not be properly transferred and could compromise the integrity of your data.** # Iceberg-catalog-migrator Need to have Java installed in your machine(JDK11 is recommended) to use this CLI tool. @@ -70,8 +71,8 @@ Source catalog options: Iceberg catalog properties for source catalog (like uri, warehouse, etc). Example: --source-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie --source-catalog-hadoop-conf=[,...] - Optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg FileIO. - Example: --source-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY + Optional source catalog Hadoop configurations required by the Iceberg catalog. + Example: --source-catalog-hadoop-conf key1=value1,key2=value2 --source-custom-catalog-impl= Optional fully qualified class name of the custom catalog implementation of the source catalog. Required when the catalog type is CUSTOM. @@ -85,8 +86,8 @@ Target catalog options: Iceberg catalog properties for target catalog (like uri, warehouse, etc). Example: --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie --target-catalog-hadoop-conf=[,...] - Optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when using an Iceberg FileIO. - Example: --target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY + Optional target catalog Hadoop configurations required by the Iceberg catalog. + Example: --target-catalog-hadoop-conf key1=value1,key2=value2 --target-custom-catalog-impl= Optional fully qualified class name of the custom catalog implementation of the target catalog. Required when the catalog type is CUSTOM. diff --git a/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java index d31ac7a..0ca13a8 100644 --- a/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java +++ b/api-test/src/main/java/org/projectnessie/tools/catalog/migration/api/test/AbstractTest.java @@ -35,8 +35,6 @@ public abstract class AbstractTest { - protected static @TempDir Path tempDir; - public static final Namespace FOO = Namespace.of("foo"); public static final Namespace BAR = Namespace.of("bar"); public static final Namespace DB1 = Namespace.of("db1"); @@ -64,9 +62,13 @@ public abstract class AbstractTest { new Schema( Types.StructType.of(Types.NestedField.required(1, "id", Types.LongType.get())).fields()); + protected static @TempDir Path logDir; + + protected static @TempDir Path tempDir; + @BeforeAll protected static void initLogDir() { - System.setProperty("catalog.migration.log.dir", tempDir.toAbsolutePath().toString()); + System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); sourceCatalogWarehouse = tempDir.resolve("sourceCatalogWarehouse").toAbsolutePath().toString(); targetCatalogWarehouse = tempDir.resolve("targetCatalogWarehouse").toAbsolutePath().toString(); } diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java index 97b1d0b..6bb0189 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java @@ -66,6 +66,13 @@ void check() { targetCatalog().name())); } + if (!(sourceCatalog() instanceof SupportsNamespaces)) { + throw new UnsupportedOperationException( + String.format( + "source catalog %s doesn't implement SupportsNamespaces to list all namespaces.", + sourceCatalog().name())); + } + if (deleteEntriesFromSourceCatalog() && sourceCatalog() instanceof HadoopCatalog) { throw new UnsupportedOperationException( "Source catalog is a Hadoop catalog and it doesn't support deleting the table entries just from the catalog. Please configure `deleteEntriesFromSourceCatalog` as `false`"); @@ -86,13 +93,6 @@ void check() { * @return Set of table identifiers. */ public Set getMatchingTableIdentifiers(String identifierRegex) { - Catalog sourceCatalog = sourceCatalog(); - if (!(sourceCatalog instanceof SupportsNamespaces)) { - throw new UnsupportedOperationException( - String.format( - "source catalog %s doesn't implement SupportsNamespaces to list all namespaces.", - sourceCatalog.name())); - } LOG.info("Collecting all the namespaces from source catalog..."); Set namespaces = new LinkedHashSet<>(); getAllNamespacesFromSourceCatalog(Namespace.empty(), namespaces); @@ -114,7 +114,7 @@ public Set getMatchingTableIdentifiers(String identifierRegex) .flatMap( namespace -> { try { - return sourceCatalog.listTables(namespace).stream() + return sourceCatalog().listTables(namespace).stream() .filter(matchedIdentifiersPredicate); } catch (IllegalArgumentException | NoSuchNamespaceException exception) { if (namespace.isEmpty()) { diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtilTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtilTest.java index 4b07e06..5f228f9 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtilTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrationUtilTest.java @@ -21,7 +21,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; -import java.util.UUID; import java.util.stream.Stream; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; @@ -39,11 +38,13 @@ public class CatalogMigrationUtilTest { - protected static @TempDir Path tempDir; + private static @TempDir Path logDir; + + private static @TempDir Path tempDir; @BeforeAll protected static void initLogDir() { - System.setProperty("catalog.migration.log.dir", tempDir.toAbsolutePath().toString()); + System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); } static Stream blankOrNullStrings() { @@ -94,8 +95,7 @@ public void testInvalidArgs() { @Test public void testBuildHadoopCatalog() throws Exception { Map properties = new HashMap<>(); - properties.put( - "warehouse", tempDir.resolve(UUID.randomUUID().toString()).toAbsolutePath().toString()); + properties.put("warehouse", tempDir.toAbsolutePath().toString()); properties.put("type", "hadoop"); Map conf = new HashMap<>(); @@ -126,8 +126,7 @@ public void testBuildHadoopCatalog() throws Exception { @Test public void testBuildNessieCatalog() throws Exception { Map properties = new HashMap<>(); - properties.put( - "warehouse", tempDir.resolve(UUID.randomUUID().toString()).toAbsolutePath().toString()); + properties.put("warehouse", tempDir.toAbsolutePath().toString()); properties.put("ref", "main"); properties.put("uri", "http://localhost:19120/api/v1"); @@ -148,8 +147,7 @@ public void testBuildNessieCatalog() throws Exception { @Test public void testBuildHiveCatalog() throws Exception { Map properties = new HashMap<>(); - properties.put( - "warehouse", tempDir.resolve(UUID.randomUUID().toString()).toAbsolutePath().toString()); + properties.put("warehouse", tempDir.toAbsolutePath().toString()); properties.put("type", "hive"); properties.put("uri", "thrift://localhost:9083"); diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java index 1ceaa8b..0926150 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/UnsupportedNamespaceTest.java @@ -80,14 +80,13 @@ public void renameTable(TableIdentifier from, TableIdentifier to) {} .hasMessageContaining( "target catalog TestCatalog{} doesn't implement SupportsNamespaces to create missing namespaces."); - CatalogMigrator catalogMigrator = - ImmutableCatalogMigrator.builder() - .sourceCatalog(sourceCatalog) - .targetCatalog(new HadoopCatalog()) - .deleteEntriesFromSourceCatalog(false) - .build(); - - Assertions.assertThatThrownBy(() -> catalogMigrator.getMatchingTableIdentifiers(null)) + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(new HadoopCatalog()) + .deleteEntriesFromSourceCatalog(false) + .build()) .isInstanceOf(UnsupportedOperationException.class) .hasMessageContaining( "source catalog TestCatalog{} doesn't implement SupportsNamespaces to list all namespaces."); diff --git a/cli/build.gradle.kts b/cli/build.gradle.kts index 05c09c5..3b125f8 100644 --- a/cli/build.gradle.kts +++ b/cli/build.gradle.kts @@ -35,8 +35,6 @@ dependencies { implementation(libs.picocli) implementation(libs.iceberg.spark.runtime) implementation(libs.hadoop.aws) { exclude("com.amazonaws", "aws-java-sdk-bundle") } - runtimeOnly(libs.ecs.bundle) - runtimeOnly(libs.mysql.driver) // AWS dependencies based on https://iceberg.apache.org/docs/latest/aws/#enabling-aws-integration runtimeOnly(libs.aws.sdk.apache.client) runtimeOnly(libs.aws.sdk.auth) diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java index 4c4fc5f..6c53bc6 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/SourceCatalogOptions.java @@ -48,9 +48,8 @@ public class SourceCatalogOptions { names = "--source-catalog-hadoop-conf", split = ",", description = { - "Optional source catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when " - + "using an Iceberg FileIO.", - "Example: --source-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY" + "Optional source catalog Hadoop configurations required by the Iceberg catalog.", + "Example: --source-catalog-hadoop-conf key1=value1,key2=value2" }) private final Map hadoopConf = new HashMap<>(); diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java index 24e6378..d0c84a3 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/TargetCatalogOptions.java @@ -48,9 +48,8 @@ public class TargetCatalogOptions { names = "--target-catalog-hadoop-conf", split = ",", description = { - "Optional target catalog Hadoop configurations (like fs.s3a.secret.key, fs.s3a.access.key) required when " - + "using an Iceberg FileIO.", - "Example: --target-catalog-hadoop-conf fs.s3a.secret.key=$SECRETKEY,fs.s3a.access.key=$ACCESSKEY" + "Optional target catalog Hadoop configurations required by the Iceberg catalog.", + "Example: --target-catalog-hadoop-conf key1=value1,key2=value2" }) private final Map hadoopConf = new HashMap<>(); diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index e11224b..e775465 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -2,7 +2,6 @@ assertj = "3.24.2" aws = "2.20.18" # this is in mapping with iceberg repo. checkstyle = "10.9.3" -ecs = "3.3.2" errorprone = "2.18.0" errorproneSlf4j = "0.1.18" googleJavaFormat = "1.16.0" @@ -16,7 +15,6 @@ jandex = "3.1.0" junit = "5.9.2" logback = "1.4.5" logcaptor = "2.8.0" -mysqlDriver = "8.0.32" nessie = "0.57.0" nessieRunner = "0.29.0" picocli = "4.7.2" @@ -38,7 +36,6 @@ checkstyle = { module = "com.puppycrawl.tools:checkstyle", version.ref = "checks errorprone-annotations = { module = "com.google.errorprone:error_prone_annotations", version.ref = "errorprone" } errorprone-core = { module = "com.google.errorprone:error_prone_core", version.ref = "errorprone" } errorprone-slf4j = { module = "jp.skypencil.errorprone.slf4j:errorprone-slf4j", version.ref = "errorproneSlf4j" } -ecs-bundle = { module = "com.emc.ecs:object-client-bundle", version.ref = "ecs" } findbugs-annotations = { module = "com.google.code.findbugs:annotations", version = "3.0.1" } findbugs-jsr305 = { module = "com.google.code.findbugs:jsr305", version = "3.0.2" } google-java-format = { module = "com.google.googlejavaformat:google-java-format", version.ref = "googleJavaFormat" } @@ -60,7 +57,6 @@ junit-jupiter-engine = { module = "org.junit.jupiter:junit-jupiter-engine" } junit-jupiter-params = { module = "org.junit.jupiter:junit-jupiter-params" } logback-classic = { module = "ch.qos.logback:logback-classic", version.ref = "logback" } logcaptor = { module = "io.github.hakky54:logcaptor", version.ref = "logcaptor" } -mysql-driver = { module = "mysql:mysql-connector-java", version.ref = "mysqlDriver" } picocli = { module = "info.picocli:picocli", version.ref = "picocli" } slf4j = { module = "org.slf4j:log4j-over-slf4j", version.ref = "slf4j" } From 3c3e40e93882141c6331de5125cd6f8cb636fb19 Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Mon, 17 Apr 2023 15:38:08 +0530 Subject: [PATCH 31/31] Handle comments from April 17 --- .../migration/api/CatalogMigrator.java | 26 ++------------- .../api/AbstractTestCatalogMigrator.java | 32 ++++--------------- .../api/CatalogMigratorParamsTest.java | 15 +-------- .../api/CustomCatalogMigratorTest.java | 4 +-- .../api/HadoopCatalogMigratorTest.java | 13 ++++---- .../api/ITHadoopToHiveCatalogMigrator.java | 4 +-- .../api/ITHadoopToNessieCatalogMigrator.java | 13 ++++---- .../api/ITNessieToHiveCatalogMigrator.java | 8 ++--- .../migration/cli/BaseRegisterCommand.java | 3 -- 9 files changed, 28 insertions(+), 90 deletions(-) diff --git a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java index 6bb0189..30b81f9 100644 --- a/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java +++ b/api/src/main/java/org/projectnessie/tools/catalog/migration/api/CatalogMigrator.java @@ -17,7 +17,6 @@ import com.google.common.base.Preconditions; import java.util.Arrays; -import java.util.Collection; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; @@ -118,10 +117,10 @@ public Set getMatchingTableIdentifiers(String identifierRegex) .filter(matchedIdentifiersPredicate); } catch (IllegalArgumentException | NoSuchNamespaceException exception) { if (namespace.isEmpty()) { - // some catalogs don't support default namespace. + // some catalogs don't support empty namespace. // Hence, just log the warning and ignore the exception. LOG.warn( - "Failed to identify tables from default namespace: {}", + "Failed to identify tables from empty namespace : {}", exception.getMessage()); return Stream.empty(); } else { @@ -132,27 +131,6 @@ public Set getMatchingTableIdentifiers(String identifierRegex) .collect(Collectors.toCollection(LinkedHashSet::new)); } - /** - * Register or Migrate tables from one catalog(source catalog) to another catalog(target catalog). - * - *

Users must make sure that no in-progress commits on the tables of source catalog during - * registration. - * - * @param identifiers collection of table identifiers to register or migrate - * @return {@code this} for use in a chained invocation - */ - public CatalogMigrator registerTables(Collection identifiers) { - Preconditions.checkArgument(identifiers != null, "Identifiers list is null"); - - if (identifiers.isEmpty()) { - LOG.warn("Identifiers list is empty"); - return this; - } - - identifiers.forEach(this::registerTable); - return this; - } - /** * Register or Migrate a single table from one catalog(source catalog) to another catalog(target * catalog). diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java index 5987dee..cecc3c8 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/AbstractTestCatalogMigrator.java @@ -157,10 +157,10 @@ public void testRegisterSelectedTables(boolean deleteSourceTables) { // using --identifiers-regex option which matches all the tables starts with "foo." CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(deleteSourceTables); - result = - catalogMigrator - .registerTables(catalogMigrator.getMatchingTableIdentifiers("^foo\\..*")) - .result(); + catalogMigrator + .getMatchingTableIdentifiers("^foo\\..*") + .forEach(catalogMigrator::registerTable); + result = catalogMigrator.result(); Assertions.assertThat(result.registeredTableIdentifiers()) .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); @@ -226,25 +226,6 @@ public void testRegisterWithFewFailures(boolean deleteSourceTables) { .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); } - @ParameterizedTest - @ValueSource(booleans = {true, false}) - public void testRegisterNoTables(boolean deleteSourceTables) { - validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); - - // clean up the default tables present in the source catalog. - dropTables(); - - CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(deleteSourceTables); - Set matchingTableIdentifiers = - catalogMigrator.getMatchingTableIdentifiers(null); - Assertions.assertThat(matchingTableIdentifiers).isEmpty(); - CatalogMigrationResult result = - catalogMigrator.registerTables(matchingTableIdentifiers).result(); - Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); - Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - } - @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) { @@ -355,8 +336,7 @@ protected CatalogMigrator catalogMigratorWithDefaultArgs(boolean deleteSourceTab private CatalogMigrationResult registerAllTables(boolean deleteSourceTables) { CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(deleteSourceTables); - return catalogMigrator - .registerTables(catalogMigrator.getMatchingTableIdentifiers(null)) - .result(); + catalogMigrator.getMatchingTableIdentifiers(null).forEach(catalogMigrator::registerTable); + return catalogMigrator.result(); } } diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java index 2e70868..8baac56 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CatalogMigratorParamsTest.java @@ -15,7 +15,6 @@ */ package org.projectnessie.tools.catalog.migration.api; -import java.util.Collections; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.hadoop.HadoopCatalog; @@ -40,17 +39,6 @@ public void testInvalidArgs() { hadoopCatalogProperties(true), new Configuration()); - Assertions.assertThatThrownBy( - () -> - ImmutableCatalogMigrator.builder() - .sourceCatalog(sourceCatalog) - .targetCatalog(targetCatalog) - .deleteEntriesFromSourceCatalog(false) - .build() - .registerTables(null)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("Identifiers list is null"); - Assertions.assertThatThrownBy( () -> ImmutableCatalogMigrator.builder() @@ -89,8 +77,7 @@ public void testInvalidArgs() { .sourceCatalog(sourceCatalog) .targetCatalog(targetCatalog) .deleteEntriesFromSourceCatalog(true) - .build() - .registerTables(Collections.emptyList())) + .build()) .isInstanceOf(UnsupportedOperationException.class) .hasMessageContaining( "Source catalog is a Hadoop catalog and it doesn't support deleting the table entries just from the catalog. " diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java index 542d845..531fce3 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/CustomCatalogMigratorTest.java @@ -61,8 +61,8 @@ public void testRegister() { .deleteEntriesFromSourceCatalog(false) .build(); // should fail to register as catalog doesn't support register table operations. - CatalogMigrationResult result = - catalogMigrator.registerTables(catalogMigrator.getMatchingTableIdentifiers(null)).result(); + catalogMigrator.getMatchingTableIdentifiers(null).forEach(catalogMigrator::registerTable); + CatalogMigrationResult result = catalogMigrator.result(); Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToRegisterTableIdentifiers()) .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2, BAR_TBL3, BAR_TBL4); diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java index 21d72b7..2c33f3a 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/HadoopCatalogMigratorTest.java @@ -49,23 +49,22 @@ public void testRegisterWithNewNestedNamespaces() { Set matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers(null); // HadoopCatalog supports implicit namespaces. - // Hence, No concept of default namespace too. So, cannot list the tables from default + // Hence, No concept of empty namespace too. So, cannot list the tables from empty // namespaces. - // Can only load tables in default namespace using identifiers. + // Can only load tables in empty namespace using identifiers. Assertions.assertThat(matchingTableIdentifiers) .containsAll(identifiers.subList(1, 7)); // without "tblz" Assertions.assertThat(matchingTableIdentifiers).doesNotContain(identifiers.get(0)); - CatalogMigrationResult result = - catalogMigrator.registerTables(matchingTableIdentifiers).result(); + matchingTableIdentifiers.forEach(catalogMigrator::registerTable); + CatalogMigrationResult result = catalogMigrator.result(); Assertions.assertThat(result.registeredTableIdentifiers()) .containsAll(identifiers.subList(1, 7)); // without "tblz" Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - // manually register the table from default namespace - catalogMigrator = catalogMigratorWithDefaultArgs(false); - result = catalogMigrator.registerTables(Collections.singletonList(TBL)).result(); + // manually register the table from empty namespace + result = catalogMigratorWithDefaultArgs(false).registerTable(TBL).result(); Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(TBL); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java index 1f3c421..49d7b1f 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToHiveCatalogMigrator.java @@ -51,9 +51,7 @@ public void testRegisterWithNewNestedNamespace() { sourceCatalog.createTable(tableIdentifier, schema); CatalogMigrationResult result = - catalogMigratorWithDefaultArgs(false) - .registerTables(Collections.singletonList(tableIdentifier)) - .result(); + catalogMigratorWithDefaultArgs(false).registerTable(tableIdentifier).result(); // hive catalog doesn't support multipart namespace. Hence, table should fail to register. Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java index b4af31d..292e213 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITHadoopToNessieCatalogMigrator.java @@ -58,23 +58,22 @@ public void testRegisterWithNewNestedNamespaces() { Set matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers(null); // HadoopCatalog supports implicit namespaces. - // Hence, No concept of default namespace too. So, cannot list the tables from default + // Hence, No concept of empty namespace too. So, cannot list the tables from default // namespaces. - // Can only load tables in default namespace using identifiers. + // Can only load tables in empty namespace using identifiers. Assertions.assertThat(matchingTableIdentifiers) .containsAll(identifiers.subList(1, 7)); // without "tblz" Assertions.assertThat(matchingTableIdentifiers).doesNotContain(identifiers.get(0)); - CatalogMigrationResult result = - catalogMigrator.registerTables(matchingTableIdentifiers).result(); + matchingTableIdentifiers.forEach(catalogMigrator::registerTable); + CatalogMigrationResult result = catalogMigrator.result(); Assertions.assertThat(result.registeredTableIdentifiers()) .containsAll(identifiers.subList(1, 7)); // without "tblz" Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); - // manually register the table from default namespace - catalogMigrator = catalogMigratorWithDefaultArgs(false); - result = catalogMigrator.registerTables(Collections.singletonList(TBL)).result(); + // manually register the table from empty namespace + result = catalogMigratorWithDefaultArgs(false).registerTable(TBL).result(); Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(TBL); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); diff --git a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java index 8cbeaee..eca36e5 100644 --- a/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java +++ b/api/src/test/java/org/projectnessie/tools/catalog/migration/api/ITNessieToHiveCatalogMigrator.java @@ -48,14 +48,14 @@ public void testRegisterWithDefaultNamespace() { sourceCatalog.createTable(TBL, schema); CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(false); - // should also include table from default namespace + // should also include table from empty namespace Set matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers(null); Assertions.assertThat(matchingTableIdentifiers).contains(TBL); - CatalogMigrationResult result = - catalogMigrator.registerTables(matchingTableIdentifiers).result(); - // hive will not support default namespace (namespace with level = 0). Hence, register will + matchingTableIdentifiers.forEach(catalogMigrator::registerTable); + CatalogMigrationResult result = catalogMigrator.result(); + // hive will not support empty namespace (namespace with level = 0). Hence, register will // fail. Assertions.assertThat(result.registeredTableIdentifiers()).doesNotContain(TBL); Assertions.assertThat(result.failedToRegisterTableIdentifiers()).containsExactly(TBL); diff --git a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java index f5ecb94..b678baf 100644 --- a/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java +++ b/cli/src/main/java/org/projectnessie/tools/catalog/migration/cli/BaseRegisterCommand.java @@ -326,9 +326,6 @@ private void printDryRunResult(Set result) { } private static void writeToFile(Path filePath, Collection identifiers) { - if (identifiers.isEmpty()) { - return; - } List identifiersString = identifiers.stream().map(TableIdentifier::toString).collect(Collectors.toList()); try {