diff --git a/.github/renovate.json5 b/.github/renovate.json5 new file mode 100644 index 00000000..398468ea --- /dev/null +++ b/.github/renovate.json5 @@ -0,0 +1,42 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + extends: ["config:base"], + + "labels": ["dependencies"], + + packageRules: [ + // Check for updates, merge automatically + { + matchManagers: ["maven", "gradle", "gradle-wrapper"], + matchUpdateTypes: ["minor", "patch"], + automerge: true, + platformAutomerge: true, + }, + + // Special rule for Iceberg and related artifacts + { + matchManagers: ["maven", "gradle"], + matchUpdateTypes: ["minor", "patch"], + matchPackagePatterns: [ + "^org[.]apache[.]hadoop:hadoop-aws$", + "^org[.]apache[.]hadoop:hadoop-common$", + "^org[.]apache[.]hive:.*$", + "^org[.]apache[.]iceberg:.*", + "^software[.]amazon[.]awssdk:.*", + ], + automerge: false, + prBodyNotes: [":warning: Only update org.apache.iceberg with org.apache.hadoop + software.amazon.awssdk:warning:"] + }, + + // Check for major updates, but do not merge automatically + { + matchManagers: ["maven", "gradle", "gradle-wrapper"], + matchUpdateTypes: ["major"], + automerge: false, + }, + ], + + // Max 50 PRs in total, 10 per hour + prConcurrentLimit: 50, + prHourlyLimit: 10, +} diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000..dbe9de76 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,61 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: CI + +on: + push: + branches: [ main ] + pull_request: + +jobs: + java: + name: Java/Gradle + runs-on: ubuntu-24.04 + strategy: + max-parallel: 4 + matrix: + java-version: [21, 23] + steps: + - uses: actions/checkout@v4 + with: + submodules: 'true' + + - name: Set up JDK + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: | + 21 + ${{ matrix.java-version != '21' && matrix.java-version || '' }} + + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v4 + + - name: Build & Check + run: ./gradlew --rerun-tasks assemble ${{ env.ADDITIONAL_GRADLE_OPTS }} check publishToMavenLocal + + - name: Capture test results + uses: actions/upload-artifact@v4 + if: failure() + with: + name: test-results + path: | + **/build/reports/* + **/build/test-results/* diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..a893541d --- /dev/null +++ b/.gitignore @@ -0,0 +1,74 @@ + +### Java ### +# Compiled class file +*.class + +# Log file +*.log + +# BlueJ files +*.ctxt + +# Mobile Tools for Java (J2ME) +.mtj.tmp/ + +# Package Files # +*.jar +*.war +*.nar +*.ear +*.zip +*.tar.gz +*.rar + +# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml +hs_err_pid* + +#misc +target/ +dependency-reduced-pom.xml +*.patch +*.DS_Store +.DS_Store + +#intellij +*.iml +.idea +*.ipr +*.iws + +# vscode +.vscode + +# node +node_modules/ +ui/src/generated/ + +# Eclipse IDE +.classpath +.factorypath +.project +.settings +.checkstyle +out/ + +# gradle +.gradle/ +build/ +gradle/wrapper/gradle-wrapper.jar +version.txt + +# Python venv +venv/ + +# Maven flatten plugin +.flattened-pom.xml + +# Site +site/site + +# Ignore Gradle project-specific cache directory +.gradle + +# Ignore Gradle build output directory +build diff --git a/LICENSE b/LICENSE new file mode 100755 index 00000000..9501cd25 --- /dev/null +++ b/LICENSE @@ -0,0 +1,215 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +This product includes a gradle wrapper. + +* gradlew +* gradle/wrapper/gradle-wrapper.properties + +Copyright: 2010-2019 Gradle Authors. +Home page: https://github.com/gradle/gradle +License: https://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- \ No newline at end of file diff --git a/NOTICE b/NOTICE new file mode 100755 index 00000000..f5c840ea --- /dev/null +++ b/NOTICE @@ -0,0 +1,10 @@ +Apache Polaris (incubating) +Copyright 2025 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +The initial code for the iceberg catalog migrator project was donated +to the ASF by Dremio Corporation. (https://www.dremio.com/) copyright 2022. + +------------------------------------------------------------------------- diff --git a/README.md b/README.md new file mode 100644 index 00000000..930b47b5 --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +This repo has the tools developed by [Apache polaris community](https://polaris.apache.org/) + +Read more about the following tools: + +1. Iceberg catalog migrator: [README.md](/iceberg-catalog-migrator/README.md) +2. Polaris Synchronizer/Migrator: [README.md](/polaris-synchronizer/README.md) \ No newline at end of file diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..da74663a --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,30 @@ + + +# Reporting Security Issues + +The Apache Iceberg Project uses the standard process outlined by the [Apache +Security Team](https://www.apache.org/security/) for reporting vulnerabilities. +Note that vulnerabilities should not be publicly disclosed until the project has +responded. + +To report a possible security vulnerability, please email security@iceberg.apache.org. + + +# Verifying Signed Releases + +Please refer to the instructions on the [Release Verification](https://www.apache.org/info/verification.html) page. diff --git a/build.gradle.kts b/build.gradle.kts new file mode 100644 index 00000000..fbc0783c --- /dev/null +++ b/build.gradle.kts @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +plugins { + `maven-publish` + signing + `build-conventions` +} + +spotless { + kotlinGradle { + // Must be repeated :( - there's no "addTarget" or so + target("*.gradle.kts", "buildSrc/*.gradle.kts") + } +} diff --git a/buildSrc/build.gradle.kts b/buildSrc/build.gradle.kts new file mode 100644 index 00000000..96798761 --- /dev/null +++ b/buildSrc/build.gradle.kts @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +plugins { `kotlin-dsl` } + +repositories { + mavenCentral() + gradlePluginPortal() + if (System.getProperty("withMavenLocal").toBoolean()) { + mavenLocal() + } +} + +dependencies { + implementation(gradleKotlinDsl()) + implementation(baselibs.spotless) + implementation(baselibs.jandex) + implementation(baselibs.idea.ext) + implementation(baselibs.shadow) + implementation(baselibs.errorprone) +} + +java { + sourceCompatibility = JavaVersion.VERSION_21 + targetCompatibility = JavaVersion.VERSION_21 +} + +kotlinDslPluginOptions { jvmTarget.set(JavaVersion.VERSION_21.toString()) } + +tasks.withType().configureEach { useJUnitPlatform() } diff --git a/buildSrc/settings.gradle.kts b/buildSrc/settings.gradle.kts new file mode 100644 index 00000000..14085355 --- /dev/null +++ b/buildSrc/settings.gradle.kts @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +dependencyResolutionManagement { + versionCatalogs { create("baselibs") { from(files("../gradle/baselibs.versions.toml")) } } +} diff --git a/buildSrc/src/main/kotlin/Checkstyle.kt b/buildSrc/src/main/kotlin/Checkstyle.kt new file mode 100644 index 00000000..8d893cf4 --- /dev/null +++ b/buildSrc/src/main/kotlin/Checkstyle.kt @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.gradle.api.Project +import org.gradle.api.plugins.quality.Checkstyle +import org.gradle.api.plugins.quality.CheckstyleExtension +import org.gradle.api.plugins.quality.CheckstylePlugin +import org.gradle.kotlin.dsl.apply +import org.gradle.kotlin.dsl.configure +import org.gradle.kotlin.dsl.withType + +fun Project.configureCheckstyle() { + apply() + configure { + toolVersion = libsRequiredVersion("checkstyle") + config = resources.text.fromFile(rootProject.file("codestyle/checkstyle-config.xml")) + isShowViolations = true + isIgnoreFailures = false + } + + tasks.withType().configureEach { + when (name) { + "checkstyleMain" -> dependsOn(tasks.named("processJandexIndex")) + "checkstyleTest" -> dependsOn(tasks.named("processTestJandexIndex")) + else -> {} + } + maxWarnings = 0 // treats warnings as errors + } +} diff --git a/buildSrc/src/main/kotlin/CodeCoverage.kt b/buildSrc/src/main/kotlin/CodeCoverage.kt new file mode 100644 index 00000000..26eb6317 --- /dev/null +++ b/buildSrc/src/main/kotlin/CodeCoverage.kt @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.gradle.api.Plugin +import org.gradle.api.Project +import org.gradle.kotlin.dsl.apply +import org.gradle.kotlin.dsl.configure +import org.gradle.kotlin.dsl.withType +import org.gradle.testing.jacoco.plugins.JacocoPlugin +import org.gradle.testing.jacoco.plugins.JacocoPluginExtension +import org.gradle.testing.jacoco.plugins.JacocoReportAggregationPlugin +import org.gradle.testing.jacoco.tasks.JacocoReport + +class CodeCoveragePlugin : Plugin { + override fun apply(project: Project): Unit = + project.run { + apply() + apply() + + tasks.withType().configureEach { + reports { + html.required.set(true) + xml.required.set(true) + } + } + + configure { toolVersion = libsRequiredVersion("jacoco") } + } +} diff --git a/buildSrc/src/main/kotlin/Errorprone.kt b/buildSrc/src/main/kotlin/Errorprone.kt new file mode 100644 index 00000000..3220102e --- /dev/null +++ b/buildSrc/src/main/kotlin/Errorprone.kt @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.Properties +import kotlin.collections.HashMap +import net.ltgt.gradle.errorprone.CheckSeverity +import net.ltgt.gradle.errorprone.ErrorPronePlugin +import net.ltgt.gradle.errorprone.errorprone +import org.gradle.api.Project +import org.gradle.api.plugins.JavaPlugin +import org.gradle.api.plugins.JavaPluginExtension +import org.gradle.api.tasks.PathSensitivity +import org.gradle.api.tasks.compile.JavaCompile +import org.gradle.kotlin.dsl.apply +import org.gradle.kotlin.dsl.configure +import org.gradle.kotlin.dsl.dependencies +import org.gradle.kotlin.dsl.withType + +fun Project.configureErrorprone() { + apply() + tasks.withType().configureEach { + options.errorprone.disableWarningsInGeneratedCode.set(true) + + val errorproneRules = rootProject.projectDir.resolve("codestyle/errorprone-rules.properties") + inputs.file(errorproneRules).withPathSensitivity(PathSensitivity.RELATIVE) + + val checksMapProperty = + objects + .mapProperty(String::class.java, CheckSeverity::class.java) + .convention( + provider { + val checksMap = HashMap() + errorproneRules.reader().use { + val rules = Properties() + rules.load(it) + rules.forEach { k, v -> + val key = k as String + val value = v as String + if (key.isNotEmpty() && value.isNotEmpty()) { + checksMap[key.trim()] = CheckSeverity.valueOf(value.trim()) + } + } + } + checksMap + } + ) + + options.errorprone.checks.putAll(checksMapProperty) + options.errorprone.excludedPaths.set(".*/build/generated.*") + } + plugins.withType().configureEach { + configure { + sourceSets.configureEach { + dependencies { + add( + "errorprone", + "com.google.errorprone:error_prone_core:${libsRequiredVersion("errorprone")}", + ) + add( + "errorprone", + "jp.skypencil.errorprone.slf4j:errorprone-slf4j:${libsRequiredVersion("errorproneSlf4j")}", + ) + } + } + } + } +} diff --git a/buildSrc/src/main/kotlin/Ide.kt b/buildSrc/src/main/kotlin/Ide.kt new file mode 100644 index 00000000..a8ed74e6 --- /dev/null +++ b/buildSrc/src/main/kotlin/Ide.kt @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.gradle.api.Project +import org.gradle.kotlin.dsl.apply +import org.gradle.kotlin.dsl.configure +import org.gradle.plugins.ide.eclipse.EclipsePlugin +import org.gradle.plugins.ide.eclipse.model.EclipseModel +import org.gradle.plugins.ide.idea.model.IdeaModel +import org.jetbrains.gradle.ext.ActionDelegationConfig +import org.jetbrains.gradle.ext.IdeaExtPlugin +import org.jetbrains.gradle.ext.copyright +import org.jetbrains.gradle.ext.delegateActions +import org.jetbrains.gradle.ext.encodings +import org.jetbrains.gradle.ext.runConfigurations +import org.jetbrains.gradle.ext.settings + +fun Project.configureIde() { + apply() + + if (this == rootProject) { + + val projectName = rootProject.file("ide-name.txt").readText().trim() + val ideName = + "$projectName ${rootProject.version.toString().replace(Regex("^([0-9.]+).*"), "$1")}" + + apply() + configure { + module { + name = ideName + isDownloadSources = true // this is the default BTW + inheritOutputDirs = true + } + + project.settings { + copyright { + useDefault = "Nessie-ASF" + profiles.create("Nessie-ASF") { + // strip trailing LF + val copyrightText = + rootProject.file("codestyle/copyright-header.txt").readLines().joinToString("\n") + notice = copyrightText + } + } + + encodings.encoding = "UTF-8" + encodings.properties.encoding = "UTF-8" + + runConfigurations.register("Gradle", org.jetbrains.gradle.ext.Gradle::class.java) { + defaults = true + + jvmArgs = + rootProject.projectDir + .resolve("gradle.properties") + .reader() + .use { + val rules = java.util.Properties() + rules.load(it) + rules + } + .map { e -> "-D${e.key}=${e.value}" } + .joinToString(" ") + } + + delegateActions.testRunner = ActionDelegationConfig.TestRunner.CHOOSE_PER_TEST + } + } + + // There's no proper way to set the name of the IDEA project (when "just importing" or syncing + // the Gradle project) + val ideaDir = projectDir.resolve(".idea") + + if (ideaDir.isDirectory) { + ideaDir.resolve(".name").writeText(ideName) + } + + configure { project { name = ideName } } + } +} diff --git a/buildSrc/src/main/kotlin/Jandex.kt b/buildSrc/src/main/kotlin/Jandex.kt new file mode 100644 index 00000000..4780eabb --- /dev/null +++ b/buildSrc/src/main/kotlin/Jandex.kt @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import com.github.vlsi.jandex.JandexExtension +import com.github.vlsi.jandex.JandexPlugin +import org.gradle.api.Project +import org.gradle.api.tasks.testing.Test +import org.gradle.kotlin.dsl.apply +import org.gradle.kotlin.dsl.configure +import org.gradle.kotlin.dsl.withType + +fun Project.configureJandex() { + apply() + configure { toolVersion.set(libsRequiredVersion("jandex")) } + + tasks.withType().configureEach { dependsOn(tasks.named("processTestJandexIndex")) } +} diff --git a/buildSrc/src/main/kotlin/Java.kt b/buildSrc/src/main/kotlin/Java.kt new file mode 100644 index 00000000..b301c15a --- /dev/null +++ b/buildSrc/src/main/kotlin/Java.kt @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.gradle.api.Project +import org.gradle.api.file.DuplicatesStrategy +import org.gradle.api.plugins.JavaPlugin +import org.gradle.api.plugins.JavaPluginExtension +import org.gradle.api.tasks.bundling.Jar +import org.gradle.api.tasks.compile.AbstractCompile +import org.gradle.api.tasks.compile.JavaCompile +import org.gradle.api.tasks.javadoc.Javadoc +import org.gradle.external.javadoc.CoreJavadocOptions +import org.gradle.kotlin.dsl.configure +import org.gradle.kotlin.dsl.repositories +import org.gradle.kotlin.dsl.withType +import org.gradle.language.jvm.tasks.ProcessResources + +fun Project.configureJava() { + tasks.withType().configureEach { + manifest { + attributes["Implementation-Title"] = "iceberg-catalog-migrator" + attributes["Implementation-Version"] = project.version + } + duplicatesStrategy = DuplicatesStrategy.WARN + } + + repositories { + mavenCentral() + if (System.getProperty("withMavenLocal").toBoolean()) { + mavenLocal() + } + } + + tasks.withType().configureEach { + options.encoding = "UTF-8" + options.compilerArgs.add("-parameters") + options.release.set(21) + + // Required to enable incremental compilation w/ immutables, see + // https://github.com/immutables/immutables/pull/858 and + // https://github.com/immutables/immutables/issues/804#issuecomment-487366544 + options.compilerArgs.add("-Aimmutables.gradle.incremental") + } + + tasks.withType().configureEach { + val opt = options as CoreJavadocOptions + // don't spam log w/ "warning: no @param/@return" + opt.addStringOption("Xdoclint:-reference", "-quiet") + } + + plugins.withType().configureEach { + configure { + withJavadocJar() + withSourcesJar() + } + } + + tasks.register("compileAll").configure { + group = "build" + description = "Runs all compilation and jar tasks" + dependsOn( + tasks.withType(AbstractCompile::class.java), + tasks.withType(ProcessResources::class.java), + ) + } +} diff --git a/buildSrc/src/main/kotlin/Spotless.kt b/buildSrc/src/main/kotlin/Spotless.kt new file mode 100644 index 00000000..a2662190 --- /dev/null +++ b/buildSrc/src/main/kotlin/Spotless.kt @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import com.diffplug.gradle.spotless.SpotlessExtension +import com.diffplug.gradle.spotless.SpotlessPlugin +import org.gradle.api.Project +import org.gradle.kotlin.dsl.apply +import org.gradle.kotlin.dsl.configure +import org.gradle.kotlin.dsl.withType + +fun Project.configureSpotless() { + + apply() + if (!java.lang.Boolean.getBoolean("idea.sync.active")) { + plugins.withType().configureEach { + configure { + format("xml") { + target("src/**/*.xml", "src/**/*.xsd") + eclipseWtp(com.diffplug.spotless.extra.wtp.EclipseWtpFormatterStep.XML) + .configFile(rootProject.projectDir.resolve("codestyle/org.eclipse.wst.xml.core.prefs")) + } + kotlinGradle { + ktfmt().googleStyle() + licenseHeaderFile(rootProject.file("codestyle/copyright-header-java.txt"), "$") + if (project == rootProject) { + target("*.gradle.kts", "buildSrc/*.gradle.kts") + } + } + if (project == rootProject) { + kotlin { + ktfmt().googleStyle() + licenseHeaderFile(rootProject.file("codestyle/copyright-header-java.txt"), "$") + target("buildSrc/src/**/kotlin/**") + targetExclude("buildSrc/build/**") + } + } + + val dirsInSrc = projectDir.resolve("src").listFiles() + val sourceLangs = + if (dirsInSrc != null) + dirsInSrc + .filter { f -> f.isDirectory } + .map { f -> f.listFiles() } + .filterNotNull() + .flatMap { l -> l.filter { f -> f.isDirectory } } + .map { f -> f.name } + .distinct() + else listOf() + + if (sourceLangs.contains("java")) { + java { + googleJavaFormat(libsRequiredVersion("googleJavaFormat")) + licenseHeaderFile(rootProject.file("codestyle/copyright-header-java.txt")) + target("src/**/java/**") + targetExclude("build/**") + } + } + if (sourceLangs.contains("kotlin")) { + kotlin { + ktfmt().googleStyle() + licenseHeaderFile(rootProject.file("codestyle/copyright-header-java.txt"), "$") + target("src/**/kotlin/**") + targetExclude("build/**") + } + } + } + } + } +} diff --git a/buildSrc/src/main/kotlin/Testing.kt b/buildSrc/src/main/kotlin/Testing.kt new file mode 100644 index 00000000..236fb80f --- /dev/null +++ b/buildSrc/src/main/kotlin/Testing.kt @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.gradle.api.Project +import org.gradle.api.tasks.testing.Test +import org.gradle.kotlin.dsl.named +import org.gradle.kotlin.dsl.provideDelegate +import org.gradle.kotlin.dsl.register +import org.gradle.kotlin.dsl.withType + +fun Project.configureTestTasks() { + tasks.withType().configureEach { + useJUnitPlatform {} + val testJvmArgs: String? by project + val testHeapSize: String? by project + if (testJvmArgs != null) { + jvmArgs((testJvmArgs as String).split(" ")) + } + + systemProperty("file.encoding", "UTF-8") + systemProperty("user.language", "en") + systemProperty("user.country", "US") + systemProperty("user.variant", "") + filter { + isFailOnNoMatchingTests = false + when (name) { + "test" -> { + includeTestsMatching("*Test") + includeTestsMatching("Test*") + excludeTestsMatching("Abstract*") + excludeTestsMatching("IT*") + } + "intTest" -> includeTestsMatching("IT*") + } + } + if (name != "test") { + mustRunAfter(tasks.named("test")) + } + + if (testHeapSize != null) { + setMinHeapSize(testHeapSize) + setMaxHeapSize(testHeapSize) + } + } + val intTest = + tasks.register("intTest") { + group = "verification" + description = "Runs the integration tests." + } + tasks.named("check") { dependsOn(intTest) } +} diff --git a/buildSrc/src/main/kotlin/Utilities.kt b/buildSrc/src/main/kotlin/Utilities.kt new file mode 100644 index 00000000..d00a236e --- /dev/null +++ b/buildSrc/src/main/kotlin/Utilities.kt @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import com.github.jengelman.gradle.plugins.shadow.ShadowPlugin +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar +import org.gradle.api.Project +import org.gradle.api.artifacts.VersionCatalogsExtension +import org.gradle.api.tasks.bundling.Jar +import org.gradle.kotlin.dsl.getByType +import org.gradle.kotlin.dsl.named +import org.gradle.kotlin.dsl.withType + +fun Project.libsRequiredVersion(name: String): String { + val libVer = + extensions.getByType().named("libs").findVersion(name).get() + val reqVer = libVer.requiredVersion + check(reqVer.isNotEmpty()) { + "libs-version for '$name' is empty, but must not be empty, version. strict: ${libVer.strictVersion}, required: ${libVer.requiredVersion}, preferred: ${libVer.preferredVersion}" + } + return reqVer +} + +fun Project.applyShadowJar() { + plugins.apply(ShadowPlugin::class.java) + + plugins.withType().configureEach { + val shadowJar = + tasks.named("shadowJar") { + outputs.cacheIf { false } // do not cache uber/shaded jars + archiveClassifier.set("") + mergeServiceFiles() + } + + tasks.named("jar") { + dependsOn(shadowJar) + archiveClassifier.set("raw") + } + } +} diff --git a/buildSrc/src/main/kotlin/build-conventions.gradle.kts b/buildSrc/src/main/kotlin/build-conventions.gradle.kts new file mode 100644 index 00000000..4be6e6bc --- /dev/null +++ b/buildSrc/src/main/kotlin/build-conventions.gradle.kts @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +val hasSrcMain = projectDir.resolve("src/main").exists() +val hasSrcTest = projectDir.resolve("src/test").exists() + +configureIde() + +configureSpotless() + +configureJandex() + +configureJava() + +apply() + +if (hasSrcMain || hasSrcTest) { + configureCheckstyle() + + configureErrorprone() + + if (hasSrcTest) { + configureTestTasks() + } +} diff --git a/codestyle/checkstyle-config.xml b/codestyle/checkstyle-config.xml new file mode 100644 index 00000000..a38bca20 --- /dev/null +++ b/codestyle/checkstyle-config.xml @@ -0,0 +1,183 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/codestyle/copyright-header-java.txt b/codestyle/copyright-header-java.txt new file mode 100644 index 00000000..042f3ce1 --- /dev/null +++ b/codestyle/copyright-header-java.txt @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ diff --git a/codestyle/copyright-header.txt b/codestyle/copyright-header.txt new file mode 100644 index 00000000..90705e02 --- /dev/null +++ b/codestyle/copyright-header.txt @@ -0,0 +1,16 @@ +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. \ No newline at end of file diff --git a/codestyle/errorprone-rules.properties b/codestyle/errorprone-rules.properties new file mode 100644 index 00000000..5f6a6222 --- /dev/null +++ b/codestyle/errorprone-rules.properties @@ -0,0 +1,1655 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Contains bug patterns up to Error Prone 2.15.0 + +#################################################################################################### +# On by default : ERROR +# See https://errorprone.info/bugpatterns +#################################################################################################### + +#AlwaysThrows=ERROR +# Detects calls that will fail at runtime + +#AndroidInjectionBeforeSuper=ERROR +# AndroidInjection.inject() should always be invoked before calling super.lifecycleMethod() + +#ArrayEquals=ERROR +# Reference equality used to compare arrays + +#ArrayFillIncompatibleType=ERROR +# Arrays.fill(Object[], Object) called with incompatible types. + +#ArrayHashCode=ERROR +# hashcode method on array does not hash array contents + +#ArrayToString=ERROR +# Calling toString on an array does not provide useful information + +#ArraysAsListPrimitiveArray=ERROR +# Arrays.asList does not autobox primitive arrays, as one might expect. + +#AsyncCallableReturnsNull=ERROR +# AsyncCallable should not return a null Future, only a Future whose result is null. + +#AsyncFunctionReturnsNull=ERROR +# AsyncFunction should not return a null Future, only a Future whose result is null. + +#AutoValueBuilderDefaultsInConstructor=ERROR +# Defaults for AutoValue Builders should be set in the factory method returning Builder instances, not the constructor + +#AutoValueConstructorOrderChecker=ERROR +# Arguments to AutoValue constructor are in the wrong order + +#BadAnnotationImplementation=ERROR +# Classes that implement Annotation must override equals and hashCode. Consider using AutoAnnotation instead of implementing Annotation by hand. + +#BadShiftAmount=ERROR +# Shift by an amount that is out of range + +#BanJNDI=ERROR +# Using JNDI may deserialize user input via the `Serializable` API which is extremely dangerous + +#BoxedPrimitiveEquality=ERROR +# Comparison using reference equality instead of value equality. Reference equality of boxed primitive types is usually not useful, as they are value objects, and it is bug-prone, as instances are cached for some values but not others. + +#BundleDeserializationCast=ERROR +# Object serialized in Bundle may have been flattened to base type. + +#ChainingConstructorIgnoresParameter=ERROR +# The called constructor accepts a parameter with the same name and type as one of its caller's parameters, but its caller doesn't pass that parameter to it. It's likely that it was intended to. + +#CheckNotNullMultipleTimes=ERROR +# A variable was checkNotNulled multiple times. Did you mean to check something else? + +#CheckReturnValue=ERROR +# Ignored return value of method that is annotated with @CheckReturnValue + +#CollectionIncompatibleType=ERROR +# Incompatible type as argument to Object-accepting Java collections method + +#CollectionToArraySafeParameter=ERROR +# The type of the array parameter of Collection.toArray needs to be compatible with the array type + +#ComparableType=ERROR +# Implementing 'Comparable' where T is not the same as the implementing class is incorrect, since it violates the symmetry contract of compareTo. + +#ComparingThisWithNull=ERROR +# this == null is always false, this != null is always true + +#ComparisonOutOfRange=ERROR +# Comparison to value that is out of range for the compared type + +#CompatibleWithAnnotationMisuse=ERROR +# @CompatibleWith's value is not a type argument. + +#CompileTimeConstant=ERROR +# Non-compile-time constant expression passed to parameter with @CompileTimeConstant type annotation. + +#ComputeIfAbsentAmbiguousReference=ERROR +# computeIfAbsent passes the map key to the provided class's constructor + +#ConditionalExpressionNumericPromotion=ERROR +# A conditional expression with numeric operands of differing types will perform binary numeric promotion of the operands; when these operands are of reference types, the expression's result may not be of the expected type. + +#ConstantOverflow=ERROR +# Compile-time constant expression overflows + +#DaggerProvidesNull=ERROR +# Dagger @Provides methods may not return null unless annotated with @Nullable + +#DangerousLiteralNull=ERROR +# This method is null-hostile: passing a null literal to it is always wrong + +#DeadException=ERROR +# Exception created but not thrown + +#DeadThread=ERROR +# Thread created but not started + +#DiscardedPostfixExpression=ERROR +# The result of this unary operation on a lambda parameter is discarded + +#DoNotCall=ERROR +# This method should not be called. + +#DoNotMock=ERROR +# Identifies undesirable mocks. + +#DoubleBraceInitialization=ERROR +# Prefer collection factory methods or builders to the double-brace initialization pattern. + +#DuplicateMapKeys=ERROR +# Map#ofEntries will throw an IllegalArgumentException if there are any duplicate keys + +#DurationFrom=ERROR +# Duration.from(Duration) returns itself; from(Period) throws a runtime exception. + +#DurationGetTemporalUnit=ERROR +# Duration.get() only works with SECONDS or NANOS. + +#DurationTemporalUnit=ERROR +# Duration APIs only work for DAYS or exact durations. + +#DurationToLongTimeUnit=ERROR +# Unit mismatch when decomposing a Duration or Instant to call a API + +#EmptyTopLevelDeclaration=ERROR +# Empty top-level type declarations should be omitted + +#EqualsHashCode=ERROR +# Classes that override equals should also override hashCode. + +#EqualsNaN=ERROR +# == NaN always returns false; use the isNaN methods instead + +#EqualsNull=ERROR +# The contract of Object.equals() states that for any non-null reference value x, x.equals(null) should return false. If x is null, a NullPointerException is thrown. Consider replacing equals() with the == operator. + +#EqualsReference=ERROR +# == must be used in equals method to check equality to itself or an infinite loop will occur. + +#EqualsWrongThing=ERROR +# Comparing different pairs of fields/getters in an equals implementation is probably a mistake. + +#FloggerFormatString=ERROR +# Invalid printf-style format string + +#FloggerLogString=ERROR +# Arguments to log(String) must be compile-time constants or parameters annotated with @CompileTimeConstant. If possible, use Flogger's formatting log methods instead. + +#FloggerLogVarargs=ERROR +# logVarargs should be used to pass through format strings and arguments. + +#FloggerSplitLogStatement=ERROR +# Splitting log statements and using Api instances directly breaks logging. + +#ForOverride=ERROR +# Method annotated @ForOverride must be protected or package-private and only invoked from declaring class, or from an override of the method + +#FormatString=ERROR +# Invalid printf-style format string + +#FormatStringAnnotation=ERROR +# Invalid format string passed to formatting method. + +#FromTemporalAccessor=ERROR +# Certain combinations of javaTimeType.from(TemporalAccessor) will always throw a DateTimeException or return the parameter directly. + +#FunctionalInterfaceMethodChanged=ERROR +# Casting a lambda to this @FunctionalInterface can cause a behavior change from casting to a functional superinterface, which is surprising to users. Prefer decorator methods to this surprising behavior. + +#FuturesGetCheckedIllegalExceptionType=ERROR +# Futures.getChecked requires a checked exception type with a standard constructor. + +#FuzzyEqualsShouldNotBeUsedInEqualsMethod=ERROR +# DoubleMath.fuzzyEquals should never be used in an Object.equals() method + +#GetClassOnAnnotation=ERROR +# Calling getClass() on an annotation may return a proxy class + +#GetClassOnClass=ERROR +# Calling getClass() on an object of type Class returns the Class object for java.lang.Class; you probably meant to operate on the object directly + +#GuardedBy=ERROR +# Checks for unguarded accesses to fields and methods with @GuardedBy annotations + +#GuiceAssistedInjectScoping=ERROR +# Scope annotation on implementation class of AssistedInject factory is not allowed + +#GuiceAssistedParameters=ERROR +# A constructor cannot have two @Assisted parameters of the same type unless they are disambiguated with named @Assisted annotations. + +#GuiceInjectOnFinalField=ERROR +# Although Guice allows injecting final fields, doing so is disallowed because the injected value may not be visible to other threads. + +#HashtableContains=ERROR +# contains() is a legacy method that is equivalent to containsValue() + +#IdentityBinaryExpression=ERROR +# A binary expression where both operands are the same is usually incorrect. + +#IdentityHashMapBoxing=ERROR +# Using IdentityHashMap with a boxed type as the key is risky since boxing may produce distinct instances + +#IgnoredPureGetter=ERROR +# Getters on AutoValues, AutoBuilders, and Protobuf Messages are side-effect free, so there is no point in calling them if the return value is ignored. While there are no side effects from the getter, the receiver may have side effects. + +#Immutable=ERROR +# Type declaration annotated with @Immutable is not immutable + +#Incomparable=ERROR +# Types contained in sorted collections must implement Comparable. + +#IncompatibleArgumentType=ERROR +# Passing argument to a generic method with an incompatible type. + +#IncompatibleModifiers=ERROR +# This annotation has incompatible modifiers as specified by its @IncompatibleModifiers annotation + +#IndexOfChar=ERROR +# The first argument to indexOf is a Unicode code point, and the second is the index to start the search from + +#InexactVarargsConditional=ERROR +# Conditional expression in varargs call contains array and non-array arguments + +#InfiniteRecursion=ERROR +# This method always recurses, and will cause a StackOverflowError + +#InjectMoreThanOneScopeAnnotationOnClass=ERROR +# A class can be annotated with at most one scope annotation. + +#InjectOnMemberAndConstructor=ERROR +# Members shouldn't be annotated with @Inject if constructor is already annotated @Inject + +#InlineMeValidator=ERROR +# Ensures that the @InlineMe annotation is used correctly. + +#InstantTemporalUnit=ERROR +# Instant APIs only work for NANOS, MICROS, MILLIS, SECONDS, MINUTES, HOURS, HALF_DAYS and DAYS. + +#InvalidJavaTimeConstant=ERROR +# This checker errors on calls to java.time methods using values that are guaranteed to throw a DateTimeException. + +#InvalidPatternSyntax=ERROR +# Invalid syntax used for a regular expression + +#InvalidTimeZoneID=ERROR +# Invalid time zone identifier. TimeZone.getTimeZone(String) will silently return GMT instead of the time zone you intended. + +#InvalidZoneId=ERROR +# Invalid zone identifier. ZoneId.of(String) will throw exception at runtime. + +#IsInstanceIncompatibleType=ERROR +# This use of isInstance will always evaluate to false. + +#IsInstanceOfClass=ERROR +# The argument to Class#isInstance(Object) should not be a Class + +#IsLoggableTagLength=ERROR +# Log tag too long, cannot exceed 23 characters. + +#JUnit3TestNotRun=ERROR +# Test method will not be run; please correct method signature (Should be public, non-static, and method name should begin with "test"). + +#JUnit4ClassAnnotationNonStatic=ERROR +# This method should be static + +#JUnit4SetUpNotRun=ERROR +# setUp() method will not be run; please add JUnit's @Before annotation + +#JUnit4TearDownNotRun=ERROR +# tearDown() method will not be run; please add JUnit's @After annotation + +#JUnit4TestNotRun=ERROR +# This looks like a test method but is not run; please add @Test and @Ignore, or, if this is a helper method, reduce its visibility. + +#JUnit4TestsNotRunWithinEnclosed=ERROR +# This test is annotated @Test, but given it's within a class using the Enclosed runner, will not run. + +#JUnitAssertSameCheck=ERROR +# An object is tested for reference equality to itself using JUnit library. + +#JUnitParameterMethodNotFound=ERROR +# The method for providing parameters was not found. + +#JavaxInjectOnAbstractMethod=ERROR +# Abstract and default methods are not injectable with javax.inject.Inject + +#JodaToSelf=ERROR +# Use of Joda-Time's DateTime.toDateTime(), Duration.toDuration(), Instant.toInstant(), Interval.toInterval(), and Period.toPeriod() are not allowed. + +#LiteByteStringUtf8=ERROR +# This pattern will silently corrupt certain byte sequences from the serialized protocol message. Use ByteString or byte[] directly + +#LocalDateTemporalAmount=ERROR +# LocalDate.plus() and minus() does not work with Durations. LocalDate represents civil time (years/months/days), so java.time.Period is the appropriate thing to add or subtract instead. + +#LockOnBoxedPrimitive=ERROR +# It is dangerous to use a boxed primitive as a lock as it can unintentionally lead to sharing a lock with another piece of code. + +#LoopConditionChecker=ERROR +# Loop condition is never modified in loop body. + +#LossyPrimitiveCompare=ERROR +# Using an unnecessarily-wide comparison method can lead to lossy comparison + +#MathRoundIntLong=ERROR +# Math.round(Integer) results in truncation + +#MislabeledAndroidString=ERROR +# Certain resources in `android.R.string` have names that do not match their content + +#MisplacedScopeAnnotations=ERROR +# Scope annotations used as qualifier annotations don't have any effect. Move the scope annotation to the binding location or delete it. + +#MissingSuperCall=ERROR +# Overriding method is missing a call to overridden super method + +#MissingTestCall=ERROR +# A terminating method call is required for a test helper to have any effect. + +#MisusedDayOfYear=ERROR +# Use of 'DD' (day of year) in a date pattern with 'MM' (month of year) is not likely to be intentional, as it would lead to dates like 'March 73rd'. + +#MisusedWeekYear=ERROR +# Use of "YYYY" (week year) in a date pattern without "ww" (week in year). You probably meant to use "yyyy" (year) instead. + +#MixedDescriptors=ERROR +# The field number passed into #getFieldByNumber belongs to a different proto to the Descriptor. + +#MockitoUsage=ERROR +# Missing method call for verify(mock) here + +#ModifyingCollectionWithItself=ERROR +# Using a collection function with itself as the argument. + +#MoreThanOneInjectableConstructor=ERROR +# This class has more than one @Inject-annotated constructor. Please remove the @Inject annotation from all but one of them. + +MustBeClosedChecker=ERROR +# This method returns a resource which must be managed carefully, not just left for garbage collection. If it is a constant that will persist for the lifetime of your program, move it to a private static final field. Otherwise, you should use it in a try-with-resources. + +#NCopiesOfChar=ERROR +# The first argument to nCopies is the number of copies, and the second is the item to copy + +#NoCanIgnoreReturnValueOnClasses=ERROR +# @CanIgnoreReturnValue should not be applied to classes as it almost always overmatches (as it +# applies to constructors and all methods), and the CIRVness isn't conferred to its subclasses. + +#NonCanonicalStaticImport=ERROR +# Static import of type uses non-canonical name + +#NonFinalCompileTimeConstant=ERROR +# @CompileTimeConstant parameters should be final or effectively final + +#NonRuntimeAnnotation=ERROR +# Calling getAnnotation on an annotation that is not retained at runtime. + +#NullArgumentForNonNullParameter=ERROR +# Null is not permitted for this parameter. + +#NullTernary=ERROR +# This conditional expression may evaluate to null, which will result in an NPE when the result is unboxed. + +#OptionalEquality=ERROR +# Comparison using reference equality instead of value equality + +#OptionalMapUnusedValue=ERROR +# Optional.ifPresent is preferred over Optional.map when the return value is unused + +#OptionalOfRedundantMethod=ERROR +# Optional.of() always returns a non-empty optional. Using ifPresent/isPresent/orElse/orElseGet/orElseThrow/isPresent/or/orNull method on it is unnecessary and most probably a bug. + +#OverlappingQualifierAndScopeAnnotation=ERROR +# Annotations cannot be both Scope annotations and Qualifier annotations: this causes confusion when trying to use them. + +#OverridesJavaxInjectableMethod=ERROR +# This method is not annotated with @Inject, but it overrides a method that is annotated with @javax.inject.Inject. The method will not be Injected. + +#PackageInfo=ERROR +# Declaring types inside package-info.java files is very bad form + +#ParametersButNotParameterized=ERROR +# This test has @Parameters but is using the default JUnit4 runner. The parameters will have no effect. + +#ParcelableCreator=ERROR +# Detects classes which implement Parcelable but don't have CREATOR + +#PeriodFrom=ERROR +# Period.from(Period) returns itself; from(Duration) throws a runtime exception. + +#PeriodGetTemporalUnit=ERROR +# Period.get() only works with YEARS, MONTHS, or DAYS. + +#PeriodTimeMath=ERROR +# When adding or subtracting from a Period, Duration is incompatible. + +#PreconditionsInvalidPlaceholder=ERROR +# Preconditions only accepts the %s placeholder in error message strings + +#PrivateSecurityContractProtoAccess=ERROR +# Access to a private protocol buffer field is forbidden. This protocol buffer carries a security contract, and can only be created using an approved library. Direct access to the fields is forbidden. + +#ProtoBuilderReturnValueIgnored=ERROR +# Unnecessary call to proto's #build() method. If you don't consume the return value of #build(), the result is discarded and the only effect is to verify that all required fields are set, which can be expressed more directly with #isInitialized(). + +#ProtoFieldNullComparison=ERROR +# Protobuf fields cannot be null. + +#ProtoStringFieldReferenceEquality=ERROR +# Comparing protobuf fields of type String using reference equality + +#ProtoTruthMixedDescriptors=ERROR +# The arguments passed to `ignoringFields` are inconsistent with the proto which is the subject of the assertion. + +#ProtocolBufferOrdinal=ERROR +# To get the tag number of a protocol buffer enum, use getNumber() instead. + +#ProvidesMethodOutsideOfModule=ERROR +# @Provides methods need to be declared in a Module to have any effect. + +#RandomCast=ERROR +# Casting a random number in the range [0.0, 1.0) to an integer or long always results in 0. + +#RandomModInteger=ERROR +# Use Random.nextInt(int). Random.nextInt() % n can have negative results + +#RectIntersectReturnValueIgnored=ERROR +# Return value of android.graphics.Rect.intersect() must be checked + +#RequiredModifiers=ERROR +# This annotation is missing required modifiers as specified by its @RequiredModifiers annotation + +#RestrictedApiChecker=ERROR +# Check for non-allowlisted callers to RestrictedApiChecker. + +#ReturnValueIgnored=ERROR +# Return value of this method must be used + +#SelfAssignment=ERROR +# Variable assigned to itself + +#SelfComparison=ERROR +# An object is compared to itself + +#SelfEquals=ERROR +# Testing an object for equality with itself will always be true. + +#ShouldHaveEvenArgs=ERROR +# This method must be called with an even number of arguments. + +#SizeGreaterThanOrEqualsZero=ERROR +# Comparison of a size >= 0 is always true, did you intend to check for non-emptiness? + +#StreamToString=ERROR +# Calling toString on a Stream does not provide useful information + +#StringBuilderInitWithChar=ERROR +# StringBuilder does not have a char constructor; this invokes the int constructor. + +#SubstringOfZero=ERROR +# String.substring(0) returns the original String + +#SuppressWarningsDeprecated=ERROR +# Suppressing "deprecated" is probably a typo for "deprecation" + +#TemporalAccessorGetChronoField=ERROR +# TemporalAccessor.get() only works for certain values of ChronoField. + +#TestParametersNotInitialized=ERROR +# This test has @TestParameter fields but is using the default JUnit4 runner. The parameters will not be initialised beyond their default value. + +#TheoryButNoTheories=ERROR +# This test has members annotated with @Theory, @DataPoint, or @DataPoints but is using the default JUnit4 runner. + +#ThrowIfUncheckedKnownChecked=ERROR +# throwIfUnchecked(knownCheckedException) is a no-op. + +#ThrowNull=ERROR +# Throwing 'null' always results in a NullPointerException being thrown. + +#TreeToString=ERROR +# Tree#toString shouldn't be used for Trees deriving from the code being compiled, as it discards whitespace and comments. + +#TruthSelfEquals=ERROR +# isEqualTo should not be used to test an object for equality with itself; the assertion will never fail. + +#TryFailThrowable=ERROR +# Catching Throwable/Error masks failures from fail() or assert*() in the try block + +#TypeParameterQualifier=ERROR +# Type parameter used as type qualifier + +#UnicodeDirectionalityCharacters=ERROR +# Unicode directionality modifiers can be used to conceal code in many editors. + +UnicodeInCode=OFF +# Avoid using non-ASCII Unicode characters outside of comments and literals, as they can be confusing. + +#UnnecessaryCheckNotNull=ERROR +# This null check is unnecessary; the expression can never be null + +#UnnecessaryTypeArgument=ERROR +# Non-generic methods should not be invoked with type arguments + +#UnsafeWildcard=ERROR +# Certain wildcard types can confuse the compiler. + +#UnusedAnonymousClass=ERROR +# Instance created but never used + +#UnusedCollectionModifiedInPlace=ERROR +# Collection is modified in place, but the result is not used + +#VarTypeName=ERROR +# `var` should not be used as a type name. + +#WrongOneof=ERROR +# This field is guaranteed not to be set given it's within a switch over a one_of. + +#XorPower=ERROR +# The `^` operator is binary XOR, not a power operator. + +#ZoneIdOfZ=ERROR +# Use ZoneOffset.UTC instead of ZoneId.of("Z"). + +#################################################################################################### +# On by default : WARNING +# See https://errorprone.info/bugpatterns +#################################################################################################### + +#AlmostJavadoc=WARN +# This comment contains Javadoc or HTML tags, but isn't started with a double asterisk (/**); is it meant to be Javadoc? + +#AlreadyChecked=WARN +# This condition has already been checked. + +#AmbiguousMethodReference=WARN +# Method reference is ambiguous + +AnnotateFormatMethod=ERROR +# This method passes a pair of parameters through to String.format, but the enclosing method wasn't annotated @FormatMethod. Doing so gives compile-time rather than run-time protection against malformed format strings. + +#ArgumentSelectionDefectChecker=WARN +# Arguments are in the wrong order or could be commented for clarity. + +ArrayAsKeyOfSetOrMap=ERROR +# Arrays do not override equals() or hashCode, so comparisons will be done on reference equality only. If neither deduplication nor lookup are needed, consider using a List instead. Otherwise, use IdentityHashMap/Set, a Map from a library that handles object arrays, or an Iterable/List of pairs. + +AssertEqualsArgumentOrderChecker=ERROR +# Arguments are swapped in assertEquals-like call + +AssertThrowsMultipleStatements=ERROR +# The lambda passed to assertThrows should contain exactly one statement + +AssertionFailureIgnored=ERROR +# This assertion throws an AssertionError if it fails, which will be caught by an enclosing try block. + +#AssistedInjectAndInjectOnSameConstructor=WARN +# @AssistedInject and @Inject cannot be used on the same constructor. + +#AutoValueFinalMethods=WARN +# Make toString(), hashCode() and equals() final in AutoValue classes, so it is clear to readers that AutoValue is not overriding them + +#AutoValueImmutableFields=WARN +# AutoValue recommends using immutable collections + +#AutoValueSubclassLeaked=WARN +# Do not refer to the autogenerated AutoValue_ class outside the file containing the corresponding @AutoValue base class. + +#BadComparable=WARN +# Possible sign flip from narrowing conversion + +BadImport=ERROR +# Importing nested classes/static methods/static fields with commonly-used names can make code harder to read, because it may not be clear from the context exactly which type is being referred to. Qualifying the name with that of the containing class can make the code clearer. + +BadInstanceof=ERROR +# instanceof used in a way that is equivalent to a null check. + +BareDotMetacharacter=ERROR +# "." is rarely useful as a regex, as it matches any character. To match a literal '.' character, instead write "\.". + +BigDecimalEquals=ERROR +# BigDecimal#equals has surprising behavior: it also compares scale. + +BigDecimalLiteralDouble=ERROR +# new BigDecimal(double) loses precision in this case. + +BoxedPrimitiveConstructor=ERROR +# valueOf or autoboxing provides better time and space performance + +#BugPatternNaming=WARN +# Giving BugPatterns a name different to the enclosing class can be confusing + +#BuilderReturnThis=WARN +# Builder instance method does not return 'this' + +ByteBufferBackingArray=ERROR +# ByteBuffer.array() shouldn't be called unless ByteBuffer.arrayOffset() is used or if the ByteBuffer was initialized using ByteBuffer.wrap() or ByteBuffer.allocate(). + +#CacheLoaderNull=WARN +# The result of CacheLoader#load must be non-null. + +CanIgnoreReturnValueSuggester=OFF +# Methods that always 'return this' should be annotated with @CanIgnoreReturnValue + +#CannotMockFinalClass=WARN +# Mockito cannot mock final classes + +#CanonicalDuration=WARN +# Duration can be expressed more clearly with different units + +CatchAndPrintStackTrace=ERROR +# Logging or rethrowing exceptions should usually be preferred to catching and calling printStackTrace + +#CatchFail=WARN +# Ignoring exceptions and calling fail() is unnecessary, and makes test output less useful + +#ChainedAssertionLosesContext=WARN +# Inside a Subject, use check(…) instead of assert*() to preserve user-supplied messages and other settings. + +#CharacterGetNumericValue=WARN +# getNumericValue has unexpected behaviour: it interprets A-Z as base-36 digits with values 10-35, but also supports non-arabic numerals and miscellaneous numeric unicode characters like ㊷; consider using Character.digit or UCharacter.getUnicodeNumericValue instead + +#ClassCanBeStatic=WARN +# Inner class is non-static but does not reference enclosing class + +ClassNewInstance=ERROR +# Class.newInstance() bypasses exception checking; prefer getDeclaredConstructor().newInstance() + +#CloseableProvides=WARN +# Providing Closeable resources makes their lifecycle unclear + +#CollectionUndefinedEquality=WARN +# This type does not have well-defined equals behavior. + +#CollectorShouldNotUseState=WARN +# Collector.of() should not use state + +#ComparableAndComparator=WARN +# Class should not implement both `Comparable` and `Comparator` + +#CompareToZero=WARN +# The result of #compareTo or #compare should only be compared to 0. It is an implementation detail whether a given type returns strictly the values {-1, 0, +1} or others. + +#ComplexBooleanConstant=WARN +# Non-trivial compile time constant boolean expressions shouldn't be used. + +#DateChecker=WARN +# Warns against suspect looking calls to java.util.Date APIs + +DateFormatConstant=ERROR +# DateFormat is not thread-safe, and should not be used as a constant field. + +DefaultCharset=ERROR +# Implicit use of the platform default charset, which can result in differing behaviour between JVM executions or incorrect behavior if the encoding of the data source doesn't match expectations. + +#DefaultPackage=WARN +# Java classes shouldn't use default package + +#DeprecatedVariable=WARN +# Applying the @Deprecated annotation to local variables or parameters has no effect + +#DirectInvocationOnMock=WARN +# Methods should not be directly invoked on mocks. Should this be part of a verify(..) call? + +DistinctVarargsChecker=ERROR +# Method expects distinct arguments at some/all positions + +#DoNotCallSuggester=WARN +# Consider annotating methods that always throw with @DoNotCall. Read more at https://errorprone.info/bugpattern/DoNotCall + +#DoNotClaimAnnotations=WARN +# Don't 'claim' annotations in annotation processors; Processor#process should unconditionally return `false` + +#DoNotMockAutoValue=WARN +# AutoValue classes represent pure data classes, so mocking them should not be necessary. Construct a real instance of the class instead. + +DoubleCheckedLocking=ERROR +# Double-checked locking on non-volatile fields is unsafe + +#EmptyBlockTag=WARN +# A block tag (@param, @return, @throws, @deprecated) has an empty description. Block tags without descriptions don't add much value for future readers of the code; consider removing the tag entirely or adding a description. + +#EmptyCatch=WARN +# Caught exceptions should not be ignored + +#EmptySetMultibindingContributions=WARN +# @Multibinds is a more efficient and declarative mechanism for ensuring that a set multibinding is present in the graph. + +EqualsGetClass=ERROR +# Prefer instanceof to getClass when implementing Object#equals. + +EqualsIncompatibleType=ERROR +# An equality test between objects with incompatible types always returns false + +EqualsUnsafeCast=ERROR +# The contract of #equals states that it should return false for incompatible types, while this implementation may throw ClassCastException. + +EqualsUsingHashCode=ERROR +# Implementing #equals by just comparing hashCodes is fragile. Hashes collide frequently, and this will lead to false positives in #equals. + +ErroneousBitwiseExpression=ERROR +# This expression evaluates to 0. If this isn't an error, consider expressing it as a literal 0. + +ErroneousThreadPoolConstructorChecker=ERROR +# Thread pool size will never go beyond corePoolSize if an unbounded queue is used + +EscapedEntity=ERROR +# HTML entities in @code/@literal tags will appear literally in the rendered javadoc. + +#ExtendingJUnitAssert=WARN +# When only using JUnit Assert's static methods, you should import statically instead of extending. + +#ExtendsObject=WARN +# `T extends Object` is redundant (unless you are using the Checker Framework). + +#FallThrough=WARN +# Switch case may fall through + +#Finally=WARN +# If you return or throw from a finally, then values returned or thrown from the try-catch block will be ignored. Consider using try-with-resources instead. + +FloatCast=ERROR +# Use parentheses to make the precedence explicit + +FloatingPointAssertionWithinEpsilon=ERROR +# This fuzzy equality check is using a tolerance less than the gap to the next number. You may want a less restrictive tolerance, or to assert equality. + +FloatingPointLiteralPrecision=ERROR +# Floating point literal loses precision + +#FloggerArgumentToString=WARN +# Use Flogger's printf-style formatting instead of explicitly converting arguments to strings + +#FloggerStringConcatenation=WARN +# Prefer string formatting using printf placeholders (e.g. %s) instead of string concatenation + +#FragmentInjection=WARN +# Classes extending PreferenceActivity must implement isValidFragment such that it does not unconditionally return true to prevent vulnerability to fragment injection attacks. + +#FragmentNotInstantiable=WARN +# Subclasses of Fragment must be instantiable via Class#newInstance(): the class must be public, static and have a public nullary constructor + +FutureReturnValueIgnored=ERROR +# Return value of methods returning Future must be checked. Ignoring returned Futures suppresses exceptions thrown from the code that completes the Future. + +GetClassOnEnum=ERROR +# Calling getClass() on an enum may return a subclass of the enum type + +#HidingField=WARN +# Hiding fields of superclasses may cause confusion and errors + +#IdentityHashMapUsage=WARN +# IdentityHashMap usage shouldn't be intermingled with Map + +#ImmutableAnnotationChecker=WARN +# Annotations should always be immutable + +#ImmutableEnumChecker=WARN +# Enums should always be immutable + +#InconsistentCapitalization=WARN +# It is confusing to have a field and a parameter under the same scope that differ only in capitalization. + +InconsistentHashCode=ERROR +# Including fields in hashCode which are not compared in equals violates the contract of hashCode. + +#IncorrectMainMethod=WARN +# 'main' methods must be public, static, and void + +#IncrementInForLoopAndHeader=WARN +# This for loop increments the same variable in the header and in the body + +#InheritDoc=WARN +# Invalid use of @inheritDoc. + +#InjectInvalidTargetingOnScopingAnnotation=WARN +# A scoping annotation's Target should include TYPE and METHOD. + +#InjectOnConstructorOfAbstractClass=WARN +# Constructors on abstract classes are never directly @Inject'ed, only the constructors of their subclasses can be @Inject'ed. + +#InjectScopeAnnotationOnInterfaceOrAbstractClass=WARN +# Scope annotation on an interface or abstract class is not allowed + +#InjectedConstructorAnnotations=WARN +# Injected constructors cannot be optional nor have binding annotations + +#InlineFormatString=WARN +# Prefer to create format strings inline, instead of extracting them to a single-use constant + +#InlineMeInliner=WARN +# Callers of this API should be inlined. + +#InlineMeSuggester=WARN +# This deprecated API looks inlineable. If you'd like the body of the API to be inlined to its callers, please annotate it with @InlineMe. + +#InputStreamSlowMultibyteRead=WARN +# Please also override int read(byte[], int, int), otherwise multi-byte reads from this input stream are likely to be slow. + +#InstanceOfAndCastMatchWrongType=WARN +# Casting inside an if block should be plausibly consistent with the instanceof type + +IntLongMath=ERROR +# Expression of type int may overflow before being assigned to a long + +#InvalidBlockTag=WARN +# This tag is invalid. + +#InvalidInlineTag=WARN +# This tag is invalid. + +#InvalidLink=WARN +# This @link tag looks wrong. + +#InvalidParam=WARN +# This @param tag doesn't refer to a parameter of the method. + +#InvalidThrows=WARN +# The documented method doesn't actually throw this checked exception. + +#InvalidThrowsLink=WARN +# Javadoc links to exceptions in @throws without a @link tag (@throws Exception, not @throws {@link Exception}). + +#IterableAndIterator=WARN +# Class should not implement both `Iterable` and `Iterator` + +#JUnit3FloatingPointComparisonWithoutDelta=WARN +# Floating-point comparison without error tolerance + +#JUnit4ClassUsedInJUnit3=WARN +# Some JUnit4 construct cannot be used in a JUnit3 context. Convert your class to JUnit4 style to use them. + +#JUnitAmbiguousTestClass=WARN +# Test class inherits from JUnit 3's TestCase but has JUnit 4 @Test or @RunWith annotations. + +#JavaDurationGetSecondsGetNano=WARN +# duration.getNano() only accesses the underlying nanosecond adjustment from the whole second. + +#JavaDurationWithNanos=WARN +# Use of java.time.Duration.withNanos(int) is not allowed. + +#JavaDurationWithSeconds=WARN +# Use of java.time.Duration.withSeconds(long) is not allowed. + +#JavaInstantGetSecondsGetNano=WARN +# instant.getNano() only accesses the underlying nanosecond adjustment from the whole second. + +JavaLangClash=ERROR +# Never reuse class names from java.lang + +#JavaLocalDateTimeGetNano=WARN +# localDateTime.getNano() only accesss the nanos-of-second field. It's rare to only use getNano() without a nearby getSecond() call. + +#JavaLocalTimeGetNano=WARN +# localTime.getNano() only accesses the nanos-of-second field. It's rare to only use getNano() without a nearby getSecond() call. + +#JavaPeriodGetDays=WARN +# period.getDays() only accesses the "days" portion of the Period, and doesn't represent the total span of time of the period. Consider using org.threeten.extra.Days to extract the difference between two civil dates if you want the whole time. + +#JavaTimeDefaultTimeZone=WARN +# java.time APIs that silently use the default system time-zone are not allowed. + +#JavaUtilDate=WARN +# Date has a bad API that leads to bugs; prefer java.time.Instant or LocalDate. + +#JavaxInjectOnFinalField=WARN +# @javax.inject.Inject cannot be put on a final field. + +JdkObsolete=ERROR +# Suggests alternatives to obsolete JDK classes. + +#JodaConstructors=WARN +# Use of certain JodaTime constructors are not allowed. + +#JodaDateTimeConstants=WARN +# Using the `PER` constants in `DateTimeConstants` is problematic because they encourage manual date/time math. + +#JodaDurationWithMillis=WARN +# Use of duration.withMillis(long) is not allowed. Please use Duration.millis(long) instead. + +#JodaInstantWithMillis=WARN +# Use of instant.withMillis(long) is not allowed. Please use new Instant(long) instead. + +#JodaNewPeriod=WARN +# This may have surprising semantics, e.g. new Period(LocalDate.parse("1970-01-01"), LocalDate.parse("1970-02-02")).getDays() == 1, not 32. + +#JodaPlusMinusLong=WARN +# Use of JodaTime's type.plus(long) or type.minus(long) is not allowed (where = {Duration,Instant,DateTime,DateMidnight}). Please use type.plus(Duration.millis(long)) or type.minus(Duration.millis(long)) instead. + +#JodaTimeConverterManager=WARN +# Joda-Time's ConverterManager makes the semantics of DateTime/Instant/etc construction subject to global static state. If you need to define your own converters, use a helper. + +#JodaWithDurationAddedLong=WARN +# Use of JodaTime's type.withDurationAdded(long, int) (where = {Duration,Instant,DateTime}). Please use type.withDurationAdded(Duration.millis(long), int) instead. + +#LiteEnumValueOf=WARN +# Instead of converting enums to string and back, its numeric value should be used instead as it is the stable part of the protocol defined by the enum. + +#LiteProtoToString=WARN +# toString() on lite protos will not generate a useful representation of the proto from optimized builds. Consider whether using some subset of fields instead would provide useful information. + +LockNotBeforeTry=ERROR +# Calls to Lock#lock should be immediately followed by a try block which releases the lock. + +#LogicalAssignment=WARN +# Assignment where a boolean expression was expected; use == if this assignment wasn't expected or add parentheses for clarity. + +LongDoubleConversion=ERROR +# Conversion from long to double may lose precision; use an explicit cast to double if this was intentional + +LongFloatConversion=ERROR +# Conversion from long to float may lose precision; use an explicit cast to float if this was intentional + +#LoopOverCharArray=WARN +# toCharArray allocates a new array, using charAt is more efficient + +#MalformedInlineTag=WARN +# This Javadoc tag is malformed. The correct syntax is {@tag and not @{tag. + +#MathAbsoluteRandom=WARN +# Math.abs does not always give a positive result. Please consider other methods for positive random numbers. + +#MemoizeConstantVisitorStateLookups=WARN +# Anytime you need to look up a constant value from VisitorState, improve performance by creating a cache for it with VisitorState.memoize + +#MissingCasesInEnumSwitch=WARN +# Switches on enum types should either handle all values, or have a default case. + +#MissingFail=WARN +# Not calling fail() when expecting an exception masks bugs + +#MissingImplementsComparable=WARN +# Classes implementing valid compareTo function should implement Comparable interface + +MissingOverride=ERROR +# method overrides method in supertype; expected @Override + +#MissingSummary=WARN +# A summary line is required on public/protected Javadocs. + +#MixedMutabilityReturnType=WARN +# This method returns both mutable and immutable collections or maps from different paths. This may be confusing for users of the method. + +#ModifiedButNotUsed=WARN +# A collection or proto builder was created, but its values were never accessed. + +#MockNotUsedInProduction=WARN +# This mock is instantiated and configured, but is never passed to production code. It should be +# either removed or used. + +#ModifyCollectionInEnhancedForLoop=WARN +# Modifying a collection while iterating over it in a loop may cause a ConcurrentModificationException to be thrown or lead to undefined behavior. + +#ModifySourceCollectionInStream=WARN +# Modifying the backing source during stream operations may cause unintended results. + +#MultipleParallelOrSequentialCalls=WARN +# Multiple calls to either parallel or sequential are unnecessary and cause confusion. + +#MultipleUnaryOperatorsInMethodCall=WARN +# Avoid having multiple unary operators acting on the same variable in a method call + +#MutablePublicArray=WARN +# Non-empty arrays are mutable, so this `public static final` array is not a constant and can be modified by clients of this class. Prefer an ImmutableList, or provide an accessor method that returns a defensive copy. + +#NarrowCalculation=WARN +# This calculation may lose precision compared to its target type. + +#NarrowingCompoundAssignment=WARN +# Compound assignments may hide dangerous casts + +#NegativeCharLiteral=WARN +# Casting a negative signed literal to an (unsigned) char might be misleading. + +#NestedInstanceOfConditions=WARN +# Nested instanceOf conditions of disjoint types create blocks of code that never execute + +#NonAtomicVolatileUpdate=WARN +# This update of a volatile variable is non-atomic + +#NonCanonicalType=WARN +# This type is referred to by a non-canonical name, which may be misleading. + +#NonOverridingEquals=WARN +# equals method doesn't override Object.equals + +#NullOptional=WARN +# Passing a literal null to an Optional parameter is almost certainly a mistake. Did you mean to provide an empty Optional? + +#NullableConstructor=WARN +# Constructors should not be annotated with @Nullable since they cannot return null + +#NullablePrimitive=WARN +# @Nullable should not be used for primitive types since they cannot be null + +#NullablePrimitiveArray=WARN +# @Nullable type annotations should not be used for primitive types since they cannot be null + +#NullableVoid=WARN +# void-returning methods should not be annotated with @Nullable, since they cannot return null + +ObjectEqualsForPrimitives=ERROR +# Avoid unnecessary boxing by using plain == for primitive types. + +#ObjectToString=WARN +# Calling toString on Objects that don't override toString() doesn't provide useful information + +#ObjectsHashCodePrimitive=WARN +# Objects.hashCode(Object o) should not be passed a primitive value + +OperatorPrecedence=ERROR +# Use grouping parenthesis to make the operator precedence explicit + +#OptionalMapToOptional=WARN +# Mapping to another Optional will yield a nested Optional. Did you mean flatMap? + +#OptionalNotPresent=WARN +# This Optional has been confirmed to be empty at this point, so the call to `get` will throw. + +OrphanedFormatString=ERROR +# String literal contains format specifiers, but is not passed to a format method + +#OutlineNone=WARN +# Setting CSS outline style to none or 0 (while not otherwise providing visual focus indicators) is inaccessible for users navigating a web page without a mouse. + +#OverrideThrowableToString=WARN +# To return a custom message with a Throwable class, one should override getMessage() instead of toString(). + +Overrides=ERROR +# Varargs doesn't agree for overridden method + +#OverridesGuiceInjectableMethod=WARN +# This method is not annotated with @Inject, but it overrides a method that is annotated with @com.google.inject.Inject. Guice will inject this method, and it is recommended to annotate it explicitly. + +#ParameterName=WARN +# Detects `/* name= */`-style comments on actual parameters where the name doesn't match the formal parameter + +#PreconditionsCheckNotNullRepeated=WARN +# Including the first argument of checkNotNull in the failure message is not useful, as it will always be `null`. + +#PrimitiveAtomicReference=WARN +# Using compareAndSet with boxed primitives is dangerous, as reference rather than value equality is used. Consider using AtomicInteger, AtomicLong, AtomicBoolean from JDK or AtomicDouble from Guava instead. + +#ProtectedMembersInFinalClass=WARN +# Protected members in final classes can be package-private + +#ProtoDurationGetSecondsGetNano=WARN +# getNanos() only accesses the underlying nanosecond-adjustment of the duration. + +#ProtoRedundantSet=WARN +# A field on a protocol buffer was set twice in the same chained expression. + +#ProtoTimestampGetSecondsGetNano=WARN +# getNanos() only accesses the underlying nanosecond-adjustment of the instant. + +#QualifierOrScopeOnInjectMethod=WARN +# Qualifiers/Scope annotations on @Inject methods don't have any effect. Move the qualifier annotation to the binding location. + +#ReachabilityFenceUsage=WARN +# reachabilityFence should always be called inside a finally block + +#ReferenceEquality=WARN +# Comparison using reference equality instead of value equality + +#RethrowReflectiveOperationExceptionAsLinkageError=WARN +# Prefer LinkageError for rethrowing ReflectiveOperationException as unchecked + +#ReturnFromVoid=WARN +# Void methods should not have a @return tag. + +#RobolectricShadowDirectlyOn=WARN +# Migrate off a deprecated overload of org.robolectric.shadow.api.Shadow#directlyOn + +#RxReturnValueIgnored=WARN +# Returned Rx objects must be checked. Ignoring a returned Rx value means it is never scheduled for execution + +#SameNameButDifferent=WARN +# This type name shadows another in a way that may be confusing. + +#SelfAlwaysReturnsThis=WARN +# Non-abstract instance methods named 'self()' that return the enclosing class must always 'return this'. + +#ShortCircuitBoolean=WARN +# Prefer the short-circuiting boolean operators && and || to & and |. + +StaticAssignmentInConstructor=ERROR +# This assignment is to a static field. Mutating static state from a constructor is highly error-prone. + +#StaticAssignmentOfThrowable=WARN +# Saving instances of Throwable in static fields is discouraged, prefer to create them on-demand when an exception is thrown + +#StaticGuardedByInstance=WARN +# Writes to static fields should not be guarded by instance locks + +#StaticMockMember=WARN +# @Mock members of test classes shouldn't share state between tests and preferably be non-static + +#StreamResourceLeak=WARN +# Streams that encapsulate a closeable resource should be closed using try-with-resources + +StreamToIterable=ERROR +# Using stream::iterator creates a one-shot Iterable, which may cause surprising failures. + +#StringSplitter=WARN +# String.split(String) has surprising behavior + +#SwigMemoryLeak=WARN +# SWIG generated code that can't call a C++ destructor will leak memory + +SynchronizeOnNonFinalField=ERROR +# Synchronizing on non-final fields is not safe: if the field is ever updated, different threads may end up locking on different objects. + +#ThreadJoinLoop=WARN +# Thread.join needs to be immediately surrounded by a loop until it succeeds. Consider using Uninterruptibles.joinUninterruptibly. + +ThreadLocalUsage=ERROR +# ThreadLocals should be stored in static fields + +#ThreadPriorityCheck=WARN +# Relying on the thread scheduler is discouraged. + +#ThreeLetterTimeZoneID=WARN +# Three-letter time zone identifiers are deprecated, may be ambiguous, and might not do what you intend; the full IANA time zone ID should be used instead. + +#TimeUnitConversionChecker=WARN +# This TimeUnit conversion looks buggy: converting from a smaller unit to a larger unit (and passing a constant), converting to/from the same TimeUnit, or converting TimeUnits where the result is statically known to be 0 or 1 are all buggy patterns. + +#ToStringReturnsNull=WARN +# An implementation of Object.toString() should never return null. + +#TruthAssertExpected=WARN +# The actual and expected values appear to be swapped, which results in poor assertion failure messages. The actual value should come first. + +#TruthConstantAsserts=WARN +# Truth Library assert is called on a constant. + +#TruthGetOrDefault=WARN +# Asserting on getOrDefault is unclear; prefer containsEntry or doesNotContainKey + +#TruthIncompatibleType=WARN +# Argument is not compatible with the subject's type. + +#TypeEquals=WARN +# com.sun.tools.javac.code.Type doesn't override Object.equals and instances are not interned by javac, so testing types for equality should be done with Types#isSameType instead + +#TypeNameShadowing=WARN +# Type parameter declaration shadows another named type + +#TypeParameterShadowing=WARN +# Type parameter declaration overrides another type parameter already declared + +#TypeParameterUnusedInFormals=WARN +# Declaring a type parameter that is only used in the return type is a misuse of generics: operations on the type parameter are unchecked, it hides unsafe casts at invocations of the method, and it interacts badly with method overload resolution. + +URLEqualsHashCode=ERROR +# Avoid hash-based containers of java.net.URL–the containers rely on equals() and hashCode(), which cause java.net.URL to make blocking internet connections. + +#UndefinedEquals=WARN +# This type is not guaranteed to implement a useful #equals method. + +#UnescapedEntity=WARN +# Javadoc is interpreted as HTML, so HTML entities such as &, <, > must be escaped. If this finding seems wrong (e.g. is within a @code or @literal tag), check whether the tag could be malformed and not recognised by the compiler. + +#UnicodeEscape=WARN +# Using unicode escape sequences for printable ASCII characters is obfuscated, and potentially dangerous. + +#UnnecessaryAssignment=WARN +# Fields annotated with @Inject/@Mock should not be manually assigned to, as they should be initialized by a framework. Remove the assignment if a framework is being used, or the annotation if one isn't. + +UnnecessaryLambda=ERROR +# Returning a lambda from a helper method or saving it in a constant is unnecessary; prefer to implement the functional interface method directly and use a method reference instead. + +#UnnecessaryLongToIntConversion=WARN +# Converting a long or Long to an int to pass as a long parameter is usually not necessary. If this conversion is intentional, consider `Longs.constrainToRange()` instead. + +#UnnecessaryMethodInvocationMatcher=WARN +# It is not necessary to wrap a MethodMatcher with methodInvocation(). + +#UnnecessaryMethodReference=WARN +# This method reference is unnecessary, and can be replaced with the variable itself. + +#UnnecessaryParentheses=WARN +# These grouping parentheses are unnecessary; it is unlikely the code will be misinterpreted without them + +#UnrecognisedJavadocTag=WARN +# This Javadoc tag wasn't recognised by the parser. Is it malformed somehow, perhaps with mismatched braces? + +#UnsafeFinalization=WARN +# Finalizer may run before native code finishes execution + +#UnsafeReflectiveConstructionCast=WARN +# Prefer `asSubclass` instead of casting the result of `newInstance`, to detect classes of incorrect type before invoking their constructors.This way, if the class is of the incorrect type,it will throw an exception before invoking its constructor. + +#UnsynchronizedOverridesSynchronized=WARN +# Unsynchronized method overrides a synchronized method. + +UnusedMethod=ERROR +# Unused. + +#UnusedNestedClass=WARN +# This nested class is unused, and can be removed. + +#UnusedTypeParameter=WARN +# This type parameter is unused and can be removed. + +#UnusedVariable=WARN +# Unused. + +#UseBinds=WARN +# @Binds is a more efficient and declarative mechanism for delegating a binding. + +UseCorrectAssertInTests=ERROR +# Java assert is used in test. For testing purposes Assert.* matchers should be used. + +#VariableNameSameAsType=WARN +# variableName and type with the same name would refer to the static field instead of the class + +#WaitNotInLoop=WARN +# Because of spurious wakeups, Object.wait() and Condition.await() must always be called in a loop + +#WakelockReleasedDangerously=WARN +# A wakelock acquired with a timeout may be released by the system before calling `release`, even after checking `isHeld()`. If so, it will throw a RuntimeException. Please wrap in a try/catch block. + +#WithSignatureDiscouraged=WARN +# withSignature is discouraged. Prefer .named and/or .withParameters where possible. + +#################################################################################################### +# Experimental : ERROR +# See https://errorprone.info/bugpatterns +#################################################################################################### + +#AndroidJdkLibsChecker=ERROR +# Use of class, field, or method that is not compatible with legacy Android devices + +#AutoFactoryAtInject=ERROR +# @AutoFactory and @Inject should not be used in the same type. + +#BanSerializableRead=ERROR +# Deserializing user input via the `Serializable` API is extremely dangerous + +#ClassName=ERROR +# The source file name should match the name of the top-level class it contains + +#ComparisonContractViolated=ERROR +# This comparison method violates the contract + +#DeduplicateConstants=ERROR +# This expression was previously declared as a constant; consider replacing this occurrence. + +#DepAnn=ERROR +# Item documented with a @deprecated javadoc note is not annotated with @Deprecated + +#EmptyIf=ERROR +# Empty statement after if + +#ExtendsAutoValue=ERROR +# Do not extend an @AutoValue/@AutoOneOf class in non-generated code. + +#InjectMoreThanOneQualifier=ERROR +# Using more than one qualifier annotation on the same element is not allowed. + +#InjectScopeOrQualifierAnnotationRetention=ERROR +# Scoping and qualifier annotations must have runtime retention. + +#InsecureCryptoUsage=ERROR +# A standard cryptographic operation is used in a mode that is prone to vulnerabilities + +#IterablePathParameter=ERROR +# Path implements Iterable; prefer Collection for clarity + +#Java7ApiChecker=ERROR +# Use of class, field, or method that is not compatible with JDK 7 + +#Java8ApiChecker=ERROR +# Use of class, field, or method that is not compatible with JDK 8 + +#LockMethodChecker=ERROR +# This method does not acquire the locks specified by its @LockMethod annotation + +#LongLiteralLowerCaseSuffix=ERROR +# Prefer 'L' to 'l' for the suffix to long literals + +#NoAllocation=ERROR +# @NoAllocation was specified on this method, but something was found that would trigger an allocation + +#RefersToDaggerCodegen=ERROR +# Don't refer to Dagger's internal or generated code + +#StaticOrDefaultInterfaceMethod=ERROR +# Static and default interface methods are not natively supported on older Android devices. + +#StaticQualifiedUsingExpression=ERROR +# A static variable or method should be qualified with a class name, not expression + +#UnlockMethod=ERROR +# This method does not acquire the locks specified by its @UnlockMethod annotation + +#################################################################################################### +# Experimental : WARNING +# See https://errorprone.info/bugpatterns +#################################################################################################### + +#AnnotationPosition=WARN +# Annotations should be positioned after Javadocs, but before modifiers. + +#AssertFalse=WARN +# Assertions may be disabled at runtime and do not guarantee that execution will halt here; consider throwing an exception instead + +#AssistedInjectAndInjectOnConstructors=WARN +# @AssistedInject and @Inject should not be used on different constructors in the same class. + +#BinderIdentityRestoredDangerously=WARN +# A call to Binder.clearCallingIdentity() should be followed by Binder.restoreCallingIdentity() in a finally block. Otherwise the wrong Binder identity may be used by subsequent code. + +#BindingToUnqualifiedCommonType=WARN +# This code declares a binding for a common value type without a Qualifier annotation. + +#CannotMockFinalClass=WARN +# Mockito cannot mock final classes + +#CatchingUnchecked=WARN +# This catch block catches `Exception`, but can only catch unchecked exceptions. Consider catching RuntimeException (or something more specific) instead so it is more apparent that no checked exceptions are being handled. + +#CheckedExceptionNotThrown=WARN +# This method cannot throw a checked exception that it claims to. This may cause consumers of the API to incorrectly attempt to handle, or propagate, this exception. + +ConstantPatternCompile=WARN +# Variables initialized with Pattern#compile calls on constants can be constants + +#DifferentNameButSame=WARN +# This type is referred to in different ways within this file, which may be confusing. + +#EqualsBrokenForNull=WARN +# equals() implementation may throw NullPointerException when given null + +#ExpectedExceptionChecker=WARN +# Prefer assertThrows to ExpectedException + +#FloggerLogWithCause=WARN +# Setting the caught exception as the cause of the log message may provide more context for anyone debugging errors. + +#FloggerMessageFormat=WARN +# Invalid message format-style format specifier ({0}), expected printf-style (%s) + +#FloggerPassedAround=WARN +# There is no advantage to passing around a logger rather than declaring one in the class that needs it. + +#FloggerRedundantIsEnabled=WARN +# Logger level check is already implied in the log() call. An explicit atLevel.isEnabled() check is redundant. + +#FloggerRequiredModifiers=WARN +# FluentLogger.forEnclosingClass should always be saved to a private static final field. + +#FloggerWithCause=WARN +# Calling withCause(Throwable) with an inline allocated Throwable is discouraged. Consider using withStackTrace(StackSize) instead, and specifying a reduced stack size (e.g. SMALL, MEDIUM or LARGE) instead of FULL, to improve performance. + +#FloggerWithoutCause=WARN +# Use withCause to associate Exceptions with log statements + +#FunctionalInterfaceClash=WARN +# Overloads will be ambiguous when passing lambda arguments. + +#HardCodedSdCardPath=WARN +# Hardcoded reference to /sdcard + +#InconsistentOverloads=WARN +# The ordering of parameters in overloaded methods should be as consistent as possible (when viewed from left to right) + +#InitializeInline=WARN +# Initializing variables in their declaring statement is clearer, where possible. + +#InterfaceWithOnlyStatics=WARN +# This interface only contains static fields and methods; consider making it a final class instead to prevent subclassing. + +#InterruptedExceptionSwallowed=WARN +# This catch block appears to be catching an explicitly declared InterruptedException as an Exception/Throwable and not handling the interruption separately. + +#MemberName=WARN +# Methods and non-static variables should be named in lowerCamelCase. + +#MissingDefault=WARN +# The Google Java Style Guide requires that each switch statement includes a default statement group, even if it contains no code. (This requirement is lifted for any switch statement that covers all values of an enum.) + +#NonCanonicalStaticMemberImport=WARN +# Static import of member uses non-canonical name + +#PreferJavaTimeOverload=WARN +# Prefer using java.time-based APIs when available. Note that this checker does not and cannot guarantee that the overloads have equivalent semantics, but that is generally the case with overloaded methods. + +#PreferredInterfaceType=WARN +# This type can be more specific. + +PrimitiveArrayPassedToVarargsMethod=WARN +# Passing a primitive array to a varargs method is usually wrong + +#QualifierWithTypeUse=WARN +# Injection frameworks currently don't understand Qualifiers in TYPE_PARAMETER or TYPE_USE contexts. + +RedundantOverride=ERROR +# This overriding method is redundant, and can be removed. + +RedundantThrows=ERROR +# Thrown exception is a subtype of another + +StronglyTypeByteString=WARN +# This primitive byte array is only used to construct ByteStrings. It would be clearer to strongly type the field instead. + +StronglyTypeTime=WARN +# This primitive integral type is only used to construct time types. It would be clearer to strongly type the field instead. + +#SuppressWarningsWithoutExplanation=WARN +# Use of @SuppressWarnings should be accompanied by a comment describing why the warning is safe to ignore. + +#SystemExitOutsideMain=WARN +# Code that contains System.exit() is untestable. + +#SystemOut=WARN +# Printing to standard output should only be used for debugging, not in production code + +#TestExceptionChecker=WARN +# Using @Test(expected=…) is discouraged, since the test will pass if any statement in the test method throws the expected exception + +#ThrowSpecificExceptions=WARN +# Base exception classes should be treated as abstract. If the exception is intended to be caught, throw a domain-specific exception. Otherwise, prefer a more specific exception for clarity. Common alternatives include: AssertionError, IllegalArgumentException, IllegalStateException, and (Guava's) VerifyException. + +#TimeUnitMismatch=WARN +# An value that appears to be represented in one unit is used where another appears to be required (e.g., seconds where nanos are needed) + +#TooManyParameters=WARN +# A large number of parameters on public APIs should be avoided. + +#TransientMisuse=WARN +# Static fields are implicitly transient, so the explicit modifier is unnecessary + +#TryWithResourcesVariable=WARN +# This variable is unnecessary, the try-with-resources resource can be a reference to a final or effectively final variable + +#UnnecessarilyFullyQualified=WARN +# This fully qualified name is unambiguous to the compiler if imported. + +#UnnecessarilyVisible=WARN +# Some methods (such as those annotated with @Inject or @Provides) are only intended to be called by a framework, and so should have default visibility. + +#UnnecessaryAnonymousClass=WARN +# Implementing a functional interface is unnecessary; prefer to implement the functional interface method directly and use a method reference instead. + +#UnnecessaryDefaultInEnumSwitch=WARN +# Switch handles all enum values: an explicit default case is unnecessary and defeats error checking for non-exhaustive switches. + +#UnnecessaryFinal=WARN +# Since Java 8, it's been unnecessary to make local variables and parameters `final` for use in lambdas or anonymous classes. Marking them as `final` is weakly discouraged, as it adds a fair amount of noise for minimal benefit. + +#UnnecessaryOptionalGet=WARN +# This code can be simplified by directly using the lambda parameters instead of calling get..() on optional. + +#UnsafeLocaleUsage=WARN +# Possible unsafe operation related to the java.util.Locale library. + +#UnusedException=WARN +# This catch block catches an exception and re-throws another, but swallows the caught exception rather than setting it as a cause. This can make debugging harder. + +#UrlInSee=WARN +# URLs should not be used in @see tags; they are designed for Java elements which could be used with @link. + +#UsingJsr305CheckReturnValue=WARN +# Prefer ErrorProne's @CheckReturnValue over JSR305's version. + +#Var=WARN +# Non-constant variable missing @Var annotation + +#Varifier=WARN +# Consider using `var` here to avoid boilerplate. + +#################################################################################################### +# Experimental : SUGGESTION +# See https://errorprone.info/bugpatterns +#################################################################################################### + +#AnnotationMirrorToString=WARN +# AnnotationMirror#toString doesn't use fully qualified type names, prefer auto-common's AnnotationMirrors#toString + +#AnnotationValueToString=WARN +# AnnotationValue#toString doesn't use fully qualified type names, prefer auto-common's AnnotationValues#toString + +#BooleanParameter=WARN +# Use parameter comments to document ambiguous literals + +#ClassNamedLikeTypeParameter=WARN +# This class's name looks like a Type Parameter. + +#ConstantField=WARN +# Fields with CONSTANT_CASE names should be both static and final + +#EqualsMissingNullable=WARN +# Method overrides Object.equals but does not have @Nullable on its parameter + +#FieldCanBeFinal=WARN +# This field is only assigned during initialization; consider making it final + +#FieldCanBeLocal=WARN +# This field can be replaced with a local variable in the methods that use it. + +#FieldCanBeStatic=WARN +# A final field initialized at compile-time with an instance of an immutable type can be static. + +#FieldMissingNullable=WARN +# Field is assigned (or compared against) a definitely null value but is not annotated @Nullable + +#ForEachIterable=WARN +# This loop can be replaced with an enhanced for loop. + +#ImmutableMemberCollection=WARN +# If you don't intend to mutate a member collection prefer using Immutable types. + +#ImmutableRefactoring=WARN +# Refactors uses of the JSR 305 @Immutable to Error Prone's annotation + +#ImmutableSetForContains=WARN +# This private static ImmutableList is only used for contains, containsAll or isEmpty checks; prefer ImmutableSet. + +#ImplementAssertionWithChaining=WARN +# Prefer check(…), which usually generates more readable failure messages. + +#LambdaFunctionalInterface=WARN +# Use Java's utility functional interfaces instead of Function for primitive types. + +#MethodCanBeStatic=WARN +# A private method that does not reference the enclosing instance can be static + +#MissingBraces=WARN +# The Google Java Style Guide requires braces to be used with if, else, for, do and while statements, even when the body is empty or contains only a single statement. + +#MixedArrayDimensions=WARN +# C-style array declarations should not be used + +#MultiVariableDeclaration=WARN +# Variable declarations should declare only one variable + +#MultipleTopLevelClasses=WARN +# Source files should not contain multiple top-level class declarations + +#PackageLocation=WARN +# Package names should match the directory they are declared in + +#ParameterComment=WARN +# Non-standard parameter comment; prefer `/* paramName= */ arg` + +#ParameterMissingNullable=WARN +# Parameter has handling for null but is not annotated @Nullable + +#PrivateConstructorForNoninstantiableModule=WARN +# Add a private constructor to modules that will not be instantiated by Dagger. + +#PrivateConstructorForUtilityClass=WARN +# Classes which are not intended to be instantiated should be made non-instantiable with a private constructor. This includes utility classes (classes with only static members), and the main class. + +#PublicApiNamedStreamShouldReturnStream=WARN +# Public methods named stream() are generally expected to return a type whose name ends with Stream. Consider choosing a different method name instead. + +#RemoveUnusedImports=WARN +# Unused imports + +#ReturnMissingNullable=WARN +# Method returns a definitely null value but is not annotated @Nullable + +#ReturnsNullCollection=WARN +# Method has a collection return type and returns {@code null} in some cases but does not annotate the method as @Nullable. See Effective Java 3rd Edition Item 54. + +#ScopeOnModule=WARN +# Scopes on modules have no function and will soon be an error. + +#SwitchDefault=WARN +# The default case of a switch should appear at the end of the last statement group + +#SymbolToString=WARN +# Symbol#toString shouldn't be used for comparison as it is expensive and fragile. + +#ThrowsUncheckedException=WARN +# Unchecked exceptions do not need to be declared in the method signature. + +TryFailRefactoring=WARN +# Prefer assertThrows to try/fail + +#TypeParameterNaming=WARN +# Type parameters must be a single letter with an optional numeric suffix, or an UpperCamelCase name followed by the letter 'T'. + +#TypeToString=WARN +# Type#toString shouldn't be used for comparison as it is expensive and fragile. + +#UngroupedOverloads=WARN +# Constructors and methods with the same name should appear sequentially with no other code in between, even when modifiers such as static or private differ between the methods. Please re-order or re-name methods. + +UnnecessaryBoxedAssignment=WARN +# This expression can be implicitly boxed. + +UnnecessaryBoxedVariable=ERROR +# It is unnecessary for this variable to be boxed. Use the primitive instead. + +#UnnecessarySetDefault=WARN +# Unnecessary call to NullPointerTester#setDefault + +#UnnecessaryStaticImport=WARN +# Using static imports for types is unnecessary + +UseEnumSwitch=WARN +# Prefer using a switch instead of a chained if-else for enums + +#VoidMissingNullable=WARN +# The type Void is not annotated @Nullable + +#WildcardImport=WARN +# Wildcard imports, static or otherwise, should not be used + +#################################################################################################### +# SLF4j +# See https://github.com/KengoTODA/errorprone-slf4j +#################################################################################################### + +Slf4jPlaceholderMismatch=ERROR +Slf4jFormatShouldBeConst=ERROR +Slf4jLoggerShouldBePrivate=ERROR +Slf4jLoggerShouldBeFinal=ERROR +Slf4jLoggerShouldBeNonStatic=OFF +Slf4jIllegalPassedClass=ERROR +#Slf4jSignOnlyFormat=OFF +Slf4jDoNotLogMessageOfExceptionExplicitly=OFF + diff --git a/codestyle/intellij-java-google-style.xml b/codestyle/intellij-java-google-style.xml new file mode 100644 index 00000000..f3a6743e --- /dev/null +++ b/codestyle/intellij-java-google-style.xml @@ -0,0 +1,598 @@ + + + + + + diff --git a/codestyle/org.eclipse.wst.xml.core.prefs b/codestyle/org.eclipse.wst.xml.core.prefs new file mode 100644 index 00000000..9fe6a375 --- /dev/null +++ b/codestyle/org.eclipse.wst.xml.core.prefs @@ -0,0 +1,7 @@ +eclipse.preferences.version=1 +formatCommentJoinLines=false +formatCommentText=false +indentationChar=space +indentationSize=2 +lineWidth=100 +spaceBeforeEmptyCloseTag=false diff --git a/gradle.properties b/gradle.properties new file mode 100644 index 00000000..8f3a93cf --- /dev/null +++ b/gradle.properties @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# enable the Gradle build cache +org.gradle.caching=true +# enable Gradle parallel builds +org.gradle.parallel=true +# configure only necessary Gradle tasks +org.gradle.configureondemand=true +# explicitly disable the configuration cache +org.gradle.configuration-cache=false +#org.gradle.configuration-cache-problems=warn +# bump the Gradle daemon heap size (you can set bigger heap sizes as well) +org.gradle.jvmargs=-Xms2g -Xmx4g -XX:MaxMetaspaceSize=768m \ No newline at end of file diff --git a/gradle/baselibs.versions.toml b/gradle/baselibs.versions.toml new file mode 100644 index 00000000..18d8384a --- /dev/null +++ b/gradle/baselibs.versions.toml @@ -0,0 +1,15 @@ +# Dependencies needed by buildSrc/ + +[versions] +errorpronePlugin = "4.1.0" +ideaExt = "1.1.10" +jandexPlugin = "1.90" +shadowPlugin = "8.1.1" +spotlessPlugin = "7.0.2" + +[libraries] +errorprone = { module = "net.ltgt.gradle:gradle-errorprone-plugin", version.ref = "errorpronePlugin" } +idea-ext = { module = "gradle.plugin.org.jetbrains.gradle.plugin.idea-ext:gradle-idea-ext", version.ref = "ideaExt" } +jandex = { module = "com.github.vlsi.gradle:jandex-plugin", version.ref = "jandexPlugin" } +shadow = { module = "com.github.johnrengelman:shadow", version.ref = "shadowPlugin" } +spotless = { module = "com.diffplug.spotless:spotless-plugin-gradle", version.ref = "spotlessPlugin" } diff --git a/gradle/gradlew-include.sh b/gradle/gradlew-include.sh new file mode 100644 index 00000000..38251e36 --- /dev/null +++ b/gradle/gradlew-include.sh @@ -0,0 +1,65 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Downloads the gradle-wrapper.jar if necessary and verifies its integrity. +# Included from /.gradlew + +# Extract the Gradle version from gradle-wrapper.properties. +GRADLE_DIST_VERSION="$(grep distributionUrl= "$APP_HOME/gradle/wrapper/gradle-wrapper.properties" | sed 's/^.*gradle-\([0-9.]*\)-[a-z]*.zip$/\1/')" +GRADLE_WRAPPER_SHA256="$APP_HOME/gradle/wrapper/gradle-wrapper-${GRADLE_DIST_VERSION}.jar.sha256" +GRADLE_WRAPPER_JAR="$APP_HOME/gradle/wrapper/gradle-wrapper.jar" +if [ -x "$(command -v sha256sum)" ] ; then + SHASUM="sha256sum" +else + if [ -x "$(command -v shasum)" ] ; then + SHASUM="shasum -a 256" + else + echo "Neither sha256sum nor shasum are available, install either." > /dev/stderr + exit 1 + fi +fi +if [ ! -e "${GRADLE_WRAPPER_SHA256}" ]; then + # Delete the wrapper jar, if the checksum file does not exist. + rm -f "${GRADLE_WRAPPER_JAR}" +fi +if [ -e "${GRADLE_WRAPPER_JAR}" ]; then + # Verify the wrapper jar, if it exists, delete wrapper jar and checksum file, if the checksums + # do not match. + JAR_CHECKSUM="$(${SHASUM} "${GRADLE_WRAPPER_JAR}" | cut -d\ -f1)" + EXPECTED="$(cat "${GRADLE_WRAPPER_SHA256}")" + if [ "${JAR_CHECKSUM}" != "${EXPECTED}" ]; then + rm -f "${GRADLE_WRAPPER_JAR}" "${GRADLE_WRAPPER_SHA256}" + fi +fi +if [ ! -e "${GRADLE_WRAPPER_SHA256}" ]; then + curl --location --output "${GRADLE_WRAPPER_SHA256}" https://services.gradle.org/distributions/gradle-${GRADLE_DIST_VERSION}-wrapper.jar.sha256 || exit 1 +fi +if [ ! -e "${GRADLE_WRAPPER_JAR}" ]; then + # The Gradle version extracted from the `distributionUrl` property does not contain ".0" patch + # versions. Need to append a ".0" in that case to download the wrapper jar. + GRADLE_VERSION="$(echo "$GRADLE_DIST_VERSION" | sed 's/^\([0-9]*[.][0-9]*\)$/\1.0/')" + curl --location --output "${GRADLE_WRAPPER_JAR}" https://raw.githubusercontent.com/gradle/gradle/v${GRADLE_VERSION}/gradle/wrapper/gradle-wrapper.jar || exit 1 + JAR_CHECKSUM="$(${SHASUM} "${GRADLE_WRAPPER_JAR}" | cut -d\ -f1)" + EXPECTED="$(cat "${GRADLE_WRAPPER_SHA256}")" + if [ "${JAR_CHECKSUM}" != "${EXPECTED}" ]; then + # If the (just downloaded) checksum and the downloaded wrapper jar do not match, something + # really bad is going on. + echo "Expected sha256 of the downloaded gradle-wrapper.jar does not match the downloaded sha256!" > /dev/stderr + exit 1 + fi +fi diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml new file mode 100644 index 00000000..a2f7c282 --- /dev/null +++ b/gradle/libs.versions.toml @@ -0,0 +1,71 @@ +[versions] +apacheCommonsCSV = "1.13.0" +assertj = "3.27.3" +aws = "2.30.3" # this is in mapping with iceberg repo. +checkstyle = "10.21.3" +errorprone = "2.36.0" +errorproneSlf4j = "0.1.28" +googleJavaFormat = "1.25.2" +guava = "33.4.0-jre" +hadoop = "2.7.3" # this is in mapping with iceberg repo. +hive = "2.3.9" # this is in mapping with iceberg repo. +iceberg = "1.7.1" +immutables = "2.10.1" +jacoco = "0.8.12" +jakartaAnnotation = "2.1.1" +jandex = "3.2.3" +junit = "5.12.0" +logback = "1.5.17" +logcaptor = "2.10.1" +nessie = "0.101.3" +nessieRunner = "0.32.2" +openApiGenerator = "7.11.0" +picocli = "4.7.6" +shadowPlugin = "8.1.1" +slf4j = "1.7.36" + +[libraries] +apache-commons-csv = { module = "org.apache.commons:commons-csv", version.ref = "apacheCommonsCSV" } +assertj = { module = "org.assertj:assertj-core", version.ref = "assertj" } +aws-sdk-apache-client = { module = "software.amazon.awssdk:apache-client", version.ref = "aws" } +aws-sdk-auth = { module = "software.amazon.awssdk:auth", version.ref = "aws" } +aws-sdk-dynamo = { module = "software.amazon.awssdk:dynamodb", version.ref = "aws" } +aws-sdk-glue = { module = "software.amazon.awssdk:glue", version.ref = "aws" } +aws-sdk-kms = { module = "software.amazon.awssdk:kms", version.ref = "aws" } +aws-sdk-lakeformation = { module = "software.amazon.awssdk:lakeformation", version.ref = "aws" } +aws-sdk-sts = { module = "software.amazon.awssdk:sts", version.ref = "aws" } +aws-sdk-s3 = { module = "software.amazon.awssdk:s3", version.ref = "aws" } +aws-sdk-url-connection-client = { module = "software.amazon.awssdk:url-connection-client", version.ref = "aws" } +checkstyle = { module = "com.puppycrawl.tools:checkstyle", version.ref = "checkstyle" } +errorprone-annotations = { module = "com.google.errorprone:error_prone_annotations", version.ref = "errorprone" } +errorprone-core = { module = "com.google.errorprone:error_prone_core", version.ref = "errorprone" } +errorprone-slf4j = { module = "jp.skypencil.errorprone.slf4j:errorprone-slf4j", version.ref = "errorproneSlf4j" } +findbugs-annotations = { module = "com.google.code.findbugs:annotations", version = "3.0.1" } +findbugs-jsr305 = { module = "com.google.code.findbugs:jsr305", version = "3.0.2" } +google-java-format = { module = "com.google.googlejavaformat:google-java-format", version.ref = "googleJavaFormat" } +guava = { module = "com.google.guava:guava", version.ref = "guava" } +hadoop-aws = { module = "org.apache.hadoop:hadoop-aws", version.ref = "hadoop" } +hadoop-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop" } +iceberg-dell = { module = "org.apache.iceberg:iceberg-dell", version.ref = "iceberg" } +iceberg-spark-runtime = { module = "org.apache.iceberg:iceberg-spark-runtime-3.3_2.12", version.ref = "iceberg" } +immutables-builder = { module = "org.immutables:builder", version.ref = "immutables" } +immutables-value-annotations = { module = "org.immutables:value-annotations", version.ref = "immutables" } +immutables-value-processor = { module = "org.immutables:value-processor", version.ref = "immutables" } +jacoco-ant = { module = "org.jacoco:org.jacoco.ant", version.ref = "jacoco" } +jacoco-report = { module = "org.jacoco:org.jacoco.report", version.ref = "jacoco" } +jacoco-maven-plugin = { module = "org.jacoco:jacoco-maven-plugin", version.ref = "jacoco" } +jakarta-annotation = { module = "jakarta.annotation:jakarta.annotation-api", version.ref = "jakartaAnnotation" } +jandex = { module = "org.jboss:jandex", version.ref = "jandex" } +junit-bom = { module = "org.junit:junit-bom", version.ref = "junit" } +logback-classic = { module = "ch.qos.logback:logback-classic", version.ref = "logback" } +logcaptor = { module = "io.github.hakky54:logcaptor", version.ref = "logcaptor" } +picocli = { module = "info.picocli:picocli", version.ref = "picocli" } +slf4j = { module = "org.slf4j:log4j-over-slf4j", version.ref = "slf4j" } +openapi-generator = { module = "org.openapitools:openapi-generator", version.ref = "openApiGenerator" } + +[plugins] +errorprone = { id = "net.ltgt.errorprone", version = "4.1.0" } +nessie-run = { id = "org.projectnessie", version.ref = "nessieRunner" } +nexus-publish-plugin = { id = "io.github.gradle-nexus.publish-plugin", version = "2.0.0" } +openapi-generator-gradle-plugin = { id = "org.openapi.generator", version.ref = "openApiGenerator" } +shadow = { id = "com.github.johnrengelman.shadow", version.ref = "shadowPlugin" } diff --git a/gradle/wrapper/gradle-wrapper-8.13.jar.sha256 b/gradle/wrapper/gradle-wrapper-8.13.jar.sha256 new file mode 100644 index 00000000..b7daa59f --- /dev/null +++ b/gradle/wrapper/gradle-wrapper-8.13.jar.sha256 @@ -0,0 +1 @@ +81a82aaea5abcc8ff68b3dfcb58b3c3c429378efd98e7433460610fecd7ae45f \ No newline at end of file diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 00000000..36e4933e --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,8 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionSha256Sum=20f1b1176237254a6fc204d8434196fa11a4cfb387567519c61556e8710aed78 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.13-bin.zip +networkTimeout=10000 +validateDistributionUrl=true +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100755 index 00000000..2741145b --- /dev/null +++ b/gradlew @@ -0,0 +1,253 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit + +. ${APP_HOME}/gradle/gradlew-include.sh + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" \ No newline at end of file diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 00000000..9b42019c --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,94 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem +@rem SPDX-License-Identifier: Apache-2.0 +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/iceberg-catalog-migrator/README.md b/iceberg-catalog-migrator/README.md new file mode 100644 index 00000000..49681d26 --- /dev/null +++ b/iceberg-catalog-migrator/README.md @@ -0,0 +1,345 @@ +# Objective +Introduce a command-line tool that enables bulk migration of Iceberg tables from one catalog to another without the need to copy the data. + +There are various reasons why users may want to move their Iceberg tables to a different catalog. For instance, +* They were using hadoop catalog and later realized that it is not production recommended. So, they want to move tables to other production ready catalogs. +* They just heard about the awesome Arctic catalog (or Nessie) and want to move their existing iceberg tables to Dremio Arctic. +* They had an on-premise Hive catalog, but want to move tables to a cloud-based catalog as part of their cloud migration strategy. + +The CLI tool should support two commands +* migrate - To bulk migrate the iceberg tables from source catalog to target catalog without data copy. +Table entries from source catalog will be deleted after the successful migration to the target catalog. +* register - To bulk register the iceberg tables from source catalog to target catalog without data copy. + +> :warning: `register` command just registers the table. +Which means the table will be present in both the catalogs after registering. +**Operating same table from more than one catalog can lead to missing updates, loss of data and table corruption. +So, it is recommended to use the 'migrate' command in CLI to automatically delete the table from source catalog after registering +or avoid operating tables from the source catalog after registering if 'migrate' command is not used.** + +> :warning: **Avoid using this CLI tool when there are in-progress commits for tables in the source catalog +to prevent missing updates, data loss and table corruption in the target catalog. +In-progress commits may not be properly transferred and could compromise the integrity of your data.** + +# Iceberg-catalog-migrator +Need to have Java installed in your machine (Java 21 is recommended and the minimum Java version) to use this CLI tool. + +Below is the CLI syntax: +``` +$ java -jar iceberg-catalog-migrator-cli-0.3.0.jar -h +Usage: iceberg-catalog-migrator [-hV] [COMMAND] + -h, --help Show this help message and exit. + -V, --version Print version information and exit. +Commands: + migrate Bulk migrate the iceberg tables from source catalog to target catalog without data copy. Table entries from source catalog will be + deleted after the successful migration to the target catalog. + register Bulk register the iceberg tables from source catalog to target catalog without data copy. +``` + +``` +$ java -jar iceberg-catalog-migrator-cli-0.3.0.jar migrate -h +Usage: iceberg-catalog-migrator migrate [-hV] [--disable-safety-prompts] [--dry-run] [--stacktrace] [--output-dir=] + (--source-catalog-type= --source-catalog-properties=[,...] + [--source-catalog-properties=[,...]]... + [--source-catalog-hadoop-conf=[,...]]... + [--source-custom-catalog-impl=]) (--target-catalog-type= + --target-catalog-properties=[,...] [--target-catalog-properties= + [,...]]... [--target-catalog-hadoop-conf=[,...]]... + [--target-custom-catalog-impl=]) [--identifiers=[,...] + [--identifiers=[,...]]... | --identifiers-from-file= | + --identifiers-regex=] +Bulk migrate the iceberg tables from source catalog to target catalog without data copy. Table entries from source catalog will be deleted after the +successful migration to the target catalog. + --output-dir= + Optional local output directory path to write CLI output files like `failed_identifiers.txt`, `failed_to_delete_at_source.txt`, + `dry_run_identifiers.txt`. If not specified, uses the present working directory. + Example: --output-dir /tmp/output/ + --output-dir $PWD/output_folder + --dry-run Optional configuration to simulate the registration without actually registering. Can learn about a list of tables that will be + registered by running this. + --disable-safety-prompts + Optional configuration to disable safety prompts which needs console input. + --stacktrace Optional configuration to enable capturing stacktrace in logs in case of failures. + -h, --help Show this help message and exit. + -V, --version Print version information and exit. +Source catalog options: + --source-catalog-type= + Source catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]. + Example: --source-catalog-type GLUE + --source-catalog-type NESSIE + --source-catalog-properties=[,...] + Iceberg catalog properties for source catalog (like uri, warehouse, etc). + Example: --source-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie + --source-catalog-hadoop-conf=[,...] + Optional source catalog Hadoop configurations required by the Iceberg catalog. + Example: --source-catalog-hadoop-conf key1=value1,key2=value2 + --source-custom-catalog-impl= + Optional fully qualified class name of the custom catalog implementation of the source catalog. Required when the catalog type + is CUSTOM. + Example: --source-custom-catalog-impl org.apache.iceberg.AwesomeCatalog +Target catalog options: + --target-catalog-type= + Target catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, NESSIE, REST]. + Example: --target-catalog-type GLUE + --target-catalog-type NESSIE + --target-catalog-properties=[,...] + Iceberg catalog properties for target catalog (like uri, warehouse, etc). + Example: --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie + --target-catalog-hadoop-conf=[,...] + Optional target catalog Hadoop configurations required by the Iceberg catalog. + Example: --target-catalog-hadoop-conf key1=value1,key2=value2 + --target-custom-catalog-impl= + Optional fully qualified class name of the custom catalog implementation of the target catalog. Required when the catalog type + is CUSTOM. + Example: --target-custom-catalog-impl org.apache.iceberg.AwesomeCatalog +Identifier options: + --identifiers=[,...] + Optional selective set of identifiers to register. If not specified, all the tables will be registered. Use this when there are + few identifiers that need to be registered. For a large number of identifiers, use the `--identifiers-from-file` or + `--identifiers-regex` option. + Example: --identifiers foo.t1,bar.t2 + --identifiers-from-file= + Optional text file path that contains a set of table identifiers (one per line) to register. Should not be used with + `--identifiers` or `--identifiers-regex` option. + Example: --identifiers-from-file /tmp/files/ids.txt + --identifiers-regex= + Optional regular expression pattern used to register only the tables whose identifiers match this pattern. Should not be used + with `--identifiers` or '--identifiers-from-file' option. + Example: --identifiers-regex ^foo\..* +``` + +Note: Options for register command is exactly same as migrate command. + +# Sample Inputs +## Bulk registering all the tables from Hadoop catalog to Nessie catalog (main branch) +```shell +java -jar iceberg-catalog-migrator-cli-0.3.0.jar register \ +--source-catalog-type HADOOP \ +--source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse +``` + +## Register all the tables from Hadoop catalog to Arctic catalog (main branch) + +```shell +export PAT=xxxxxxx +export AWS_ACCESS_KEY_ID=xxxxxxx +export AWS_SECRET_ACCESS_KEY=xxxxxxx +export AWS_S3_ENDPOINT=xxxxxxx +``` + +```shell +java -jar iceberg-catalog-migrator-cli-0.3.0.jar register \ +--source-catalog-type HADOOP \ +--source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=https://nessie.dremio.cloud/v1/repositories/8158e68a-5046-42c6-a7e4-c920d9ae2475,ref=main,warehouse=/tmp/warehouse,authentication.type=BEARER,authentication.token=$PAT +``` + +## Migrate selected tables (t1,t2 in namespace foo) from Arctic catalog (main branch) to Hadoop catalog. + +```shell +export PAT=xxxxxxx +export AWS_ACCESS_KEY_ID=xxxxxxx +export AWS_SECRET_ACCESS_KEY=xxxxxxx +export AWS_S3_ENDPOINT=xxxxxxx +``` + +```shell +java -jar iceberg-catalog-migrator-cli-0.3.0.jar migrate \ +--source-catalog-type NESSIE \ +--source-catalog-properties uri=https://nessie.dremio.cloud/v1/repositories/8158e68a-5046-42c6-a7e4-c920d9ae2475,ref=main,warehouse=/tmp/warehouse,authentication.type=BEARER,authentication.token=$PAT \ +--target-catalog-type HADOOP \ +--identifiers foo.t1,foo.t2 +``` + +## Migrate all tables from GLUE catalog to Arctic catalog (main branch) +```shell +java -jar iceberg-catalog-migrator-cli-0.3.0.jar migrate \ +--source-catalog-type GLUE \ +--source-catalog-properties warehouse=s3a://some-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=https://nessie.dremio.cloud/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=s3a://some-other-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,authentication.type=BEARER,authentication.token=$PAT +``` + +## Migrate all tables from HIVE catalog to Arctic catalog (main branch) +```shell +java -jar iceberg-catalog-migrator-cli-0.3.0.jar migrate \ +--source-catalog-type HIVE \ +--source-catalog-properties warehouse=s3a://some-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,uri=thrift://localhost:9083 \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=https://nessie.dremio.cloud/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=s3a://some-other-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,authentication.type=BEARER,authentication.token=$PAT +``` + +## Migrate all tables from DYNAMODB catalog to Arctic catalog (main branch) +```shell +java -jar iceberg-catalog-migrator-cli-0.3.0.jar migrate \ +--source-catalog-type DYNAMODB \ +--source-catalog-properties warehouse=s3a://some-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=https://nessie.dremio.cloud/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=s3a://some-other-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,authentication.type=BEARER,authentication.token=$PAT +``` + +## Migrate all tables from JDBC catalog to Arctic catalog (main branch) +```shell +java -jar iceberg-catalog-migrator-cli-0.3.0.jar migrate \ +--source-catalog-type JDBC \ +--source-catalog-properties warehouse=/tmp/warehouseJdbc,jdbc.user=root,jdbc.password=pass,uri=jdbc:mysql://localhost:3306/db1,name=catalogName \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=https://nessie.dremio.cloud/v1/repositories/612a4560-1178-493f-9c14-ab6b33dc31c5,ref=main,warehouse=/tmp/nessiewarehouse,authentication.type=BEARER,authentication.token=$PAT +``` + +# Scenarios +## A. User wants to try out a new catalog +Users can use a new catalog by creating a fresh table to test the new catalog's capabilities. + +## B. Users wants to move the tables from one catalog (example: Hive) to another (example: Nessie). + +### B.1) Executes `--dry-run` option to check which tables will get migrated. + +Sample input: +```shell +java -jar iceberg-catalog-migrator-cli-0.3.0.jar migrate \ +--source-catalog-type HIVE \ +--source-catalog-properties warehouse=s3a://some-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,uri=thrift://localhost:9083 \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--dry-run +``` + +After validating all inputs, the console will display a list of table identifiers, that are identified for migration, along with the total count. +This information will also be written to a file called `dry_run.txt`, +The list of table identifiers in `dry_run.txt` can be altered (if needed) and reused for the actual migration using the `--identifiers-from-file` option; thus eliminating the need for the tool to list the tables from the catalog in the actual run. + +### B.2) Executes the migration of all 1000 tables and all the tables are successfully migrated. + +Sample input: +```shell +java -jar iceberg-catalog-migrator-cli-0.3.0.jar migrate \ +--source-catalog-type HIVE \ +--source-catalog-properties warehouse=s3a://some-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,uri=thrift://localhost:9083 \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse +``` + +After input validation, users will receive a prompt message with the option to either abort or continue the operation. + +``` +WARN - User has not specified the table identifiers. Will be selecting all the tables from all the namespaces from the source catalog. +INFO - Configured source catalog: SOURCE_CATALOG_HIVE +INFO - Configured target catalog: TARGET_CATALOG_NESSIE +WARN - + a) Executing catalog migration when the source catalog has some in-progress commits + can lead to a data loss as the in-progress commits will not be considered for migration. + So, while using this tool please make sure there are no in-progress commits for the source catalog. + + b) After the migration, successfully migrated tables will be deleted from the source catalog + and can only be accessed from the target catalog. +INFO - Are you certain that you wish to proceed, after reading the above warnings? (yes/no): +``` + +If the user chooses to continue, additional information will be displayed on the console. + +``` +INFO - Continuing... +INFO - Identifying tables for migration ... +INFO - Identified 1000 tables for migration. +INFO - Started migration ... +INFO - Attempted Migration for 100 tables out of 1000 tables. +INFO - Attempted Migration for 200 tables out of 1000 tables. +. +. +. +INFO - Attempted Migration for 900 tables out of 1000 tables. +INFO - Attempted Migration for 1000 tables out of 1000 tables. +INFO - Finished migration ... +INFO - Summary: +INFO - Successfully migrated 1000 tables from HIVE catalog to NESSIE catalog. +INFO - Details: +INFO - Successfully migrated these tables: +[foo.tbl-1, foo.tbl-2, bar.tbl-4, bar.tbl-3, …, …,bar.tbl-1000] +``` + +Please note that a log file will be created, which will print "successfully migrated table X" for every table migration, +and also log any table level failures, if present. + +### B.3) Executes the migration and out of 1000 tables 10 tables have failed to migrate because of some error. Remaining 990 tables were successfully migrated. + +Sample input: +```shell +java -jar iceberg-catalog-migrator-cli-0.3.0.jar migrate \ +--source-catalog-type HIVE \ +--source-catalog-properties warehouse=s3a://some-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,uri=thrift://localhost:9083 \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--stacktrace +``` + +Console output will be same as B.2) till summary because even in case of failure, +all the identified tables will be attempted for migration. + +``` +INFO - Summary: +INFO - Successfully migrated 990 tables from HIVE catalog to NESSIE catalog. +ERROR - Failed to migrate 10 tables from HIVE catalog to NESSIE catalog. Please check the `catalog_migration.log` file for the failure reason. +Failed Identifiers are written to `failed_identifiers.txt`. Retry with that file using the `--identifiers-from-file` option if the failure is because of network/connection timeouts. +INFO - Details: +INFO - Successfully migrated these tables: +[foo.tbl-1, foo.tbl-2, bar.tbl-4, bar.tbl-3, …, …,bar.tbl-1000] +ERROR - Failed to migrate these tables: +[bar.tbl-201, foo.tbl-202, …, …,bar.tbl-210] +``` + +Please note that a log file will be generated, which will print "successfully migrated table X" for every table migration and log any table-level failures in the `failed_identifiers.txt` file. +Users can use this file to identify failed tables and search for them in the log, which will contain the exception stacktrace for those 10 tables. +This can help users understand why the migration failed. +* If the migration of those tables failed with `TableAlreadyExists` exception, users can rename the tables in the source catalog and migrate only those 10 tables using any of the identifier options available in the argument. +* If the migration of those tables failed with `ConnectionTimeOut` exception, users can retry migrating only those 10 tables using the `--identifiers-from-file` option with the `failed_identifiers.txt` file. +* If the migration is successful but deletion of some tables form source catalog is failed, summary will mention that these table names were written into the `failed_to_delete.txt` file and logs will capture the failure reason. +Do not operate these tables from the source catalog and user will have to delete them manually. + +### B.4) Executes the migration and out of 1000 tables. But manually aborts the migration by killing the process. + +To determine the number of migrated tables, the user can either review the log or use the `listTables()` function in the target catalog. +In the event of an abort, migrated tables may not be deleted from the source catalog, and users should avoid manipulating them from there. +To recover, users can manually remove these tables from the source catalog or attempt a bulk migration to transfer all tables from the source catalog. + +### B.5) Users need to move away from one catalog to another with selective tables (maybe want to move only the production tables, test tables, etc) + +Users can provide the selective list of identifiers to migrate using any of these 3 options +`--identifiers`, `--identifiers-from-file`, `--identifier-regex` and it can be used along with the dry-run option too. + +Sample input: (only migrate tables that starts with "foo.") +```shell +java -jar iceberg-catalog-migrator-cli-0.3.0.jar migrate \ +--source-catalog-type HIVE \ +--source-catalog-properties warehouse=s3a://some-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,uri=thrift://localhost:9083 \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--identifiers-regex ^foo\..* + +``` + +Sample input: (migrate all tables in the file ids.txt where each entry is delimited by newline) +```shell +java -jar iceberg-catalog-migrator-cli-0.3.0.jar migrate \ +--source-catalog-type HIVE \ +--source-catalog-properties warehouse=/tmp/warehouse,type=hadoop \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--identifiers-from-file ids.txt +``` + +Sample input: (migrate only two tables foo.tbl1, foo.tbl2) +```shell +java -jar iceberg-catalog-migrator-cli-0.3.0.jar migrate \ +--source-catalog-type HIVE \ +--source-catalog-properties warehouse=s3a://some-bucket/wh/,io-impl=org.apache.iceberg.aws.s3.S3FileIO,uri=thrift://localhost:9083 \ +--target-catalog-type NESSIE \ +--target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouse \ +--identifiers foo.tbl1,foo.tbl2 +``` + +Console will clearly print that only these identifiers are used for table migration. +Rest of the behavior will be the same as mentioned in the previous sections. \ No newline at end of file diff --git a/iceberg-catalog-migrator/api-test/build.gradle.kts b/iceberg-catalog-migrator/api-test/build.gradle.kts new file mode 100644 index 00000000..d969bbc1 --- /dev/null +++ b/iceberg-catalog-migrator/api-test/build.gradle.kts @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +plugins { + `java-library` + `maven-publish` + signing + `build-conventions` +} + +dependencies { + implementation(libs.guava) + implementation(libs.hadoop.common) { + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("javax.servlet", "servlet-api") + exclude("com.google.code.gson", "gson") + exclude("commons-beanutils") + } + implementation(libs.iceberg.spark.runtime) + implementation(platform(libs.junit.bom)) + implementation("org.junit.jupiter:junit-jupiter-api") + implementation("org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests") +} diff --git a/iceberg-catalog-migrator/api-test/src/main/java/org/apache/polaris/iceberg/catalog/migrator/api/test/AbstractTest.java b/iceberg-catalog-migrator/api-test/src/main/java/org/apache/polaris/iceberg/catalog/migrator/api/test/AbstractTest.java new file mode 100644 index 00000000..9bc7d3de --- /dev/null +++ b/iceberg-catalog-migrator/api-test/src/main/java/org/apache/polaris/iceberg/catalog/migrator/api/test/AbstractTest.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.api.test; + +import java.nio.file.Path; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; +import org.apache.iceberg.Schema; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.io.TempDir; + +public abstract class AbstractTest { + + public static final Namespace FOO = Namespace.of("foo"); + public static final Namespace BAR = Namespace.of("bar"); + public static final Namespace DB1 = Namespace.of("db1"); + public static final TableIdentifier FOO_TBL1 = TableIdentifier.of(FOO, "tbl1"); + public static final TableIdentifier FOO_TBL2 = TableIdentifier.of(FOO, "tbl2"); + public static final TableIdentifier BAR_TBL3 = TableIdentifier.of(BAR, "tbl3"); + public static final TableIdentifier BAR_TBL4 = TableIdentifier.of(BAR, "tbl4"); + + private static final List defaultNamespaceList = Arrays.asList(FOO, BAR, DB1); + + protected static final Namespace NS_A = Namespace.of("a"); + protected static final Namespace NS_A_B = Namespace.of("a", "b"); + protected static final Namespace NS_A_C = Namespace.of("a", "c"); + protected static final Namespace NS_A_B_C = Namespace.of("a", "b", "c"); + protected static final Namespace NS_A_B_C_D = Namespace.of("a", "b", "c", "d"); + protected static final Namespace NS_A_B_C_D_E = Namespace.of("a", "b", "c", "d", "e"); + + private static String sourceCatalogWarehouse; + private static String targetCatalogWarehouse; + + protected static Catalog sourceCatalog; + protected static Catalog targetCatalog; + + protected static final Schema schema = + new Schema( + Types.StructType.of(Types.NestedField.required(1, "id", Types.LongType.get())).fields()); + + protected static @TempDir Path logDir; + + protected static @TempDir Path tempDir; + + @BeforeAll + protected static void initLogDir() { + System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); + sourceCatalogWarehouse = tempDir.resolve("sourceCatalogWarehouse").toAbsolutePath().toString(); + targetCatalogWarehouse = tempDir.resolve("targetCatalogWarehouse").toAbsolutePath().toString(); + } + + @AfterAll + protected static void close() throws Exception { + if (sourceCatalog instanceof AutoCloseable) { + ((AutoCloseable) sourceCatalog).close(); + } + if (targetCatalog instanceof AutoCloseable) { + ((AutoCloseable) targetCatalog).close(); + } + } + + protected void validateAssumptionForHadoopCatalogAsSource(boolean deleteSourceTables) { + Assumptions.assumeFalse( + deleteSourceTables && sourceCatalog instanceof HadoopCatalog, + "deleting source tables is unsupported for HadoopCatalog"); + } + + protected static void createNamespacesForSourceCatalog() { + defaultNamespaceList.forEach( + namespace -> ((SupportsNamespaces) sourceCatalog).createNamespace(namespace)); + } + + protected static void createNamespacesForTargetCatalog() { + // don't create "db1" namespace in targetCatalog + defaultNamespaceList + .subList(0, 2) + .forEach(namespace -> ((SupportsNamespaces) targetCatalog).createNamespace(namespace)); + } + + protected static void dropNamespaces() { + Stream.of(sourceCatalog, targetCatalog) + .map(catalog -> (SupportsNamespaces) catalog) + .forEach( + catalog -> + defaultNamespaceList.stream() + .filter(catalog::namespaceExists) + .forEach(catalog::dropNamespace)); + } + + protected static void createTables() { + // two tables in 'foo' namespace + sourceCatalog.createTable(FOO_TBL1, schema); + sourceCatalog.createTable(FOO_TBL2, schema); + // two tables in 'bar' namespace + sourceCatalog.createTable(BAR_TBL3, schema); + sourceCatalog.createTable(BAR_TBL4, schema); + } + + protected static void dropTables() { + Stream.of(sourceCatalog, targetCatalog) + .forEach( + catalog -> + defaultNamespaceList.stream() + .filter(namespace -> ((SupportsNamespaces) catalog).namespaceExists(namespace)) + .forEach( + namespace -> catalog.listTables(namespace).forEach(catalog::dropTable))); + } + + protected static Map nessieCatalogProperties(boolean isSourceCatalog) { + Map properties = new HashMap<>(); + Integer nessiePort = Integer.getInteger("quarkus.http.test-port", 19121); + String nessieUri = String.format("http://localhost:%d/api/v1", nessiePort); + properties.put("uri", nessieUri); + properties.put("warehouse", isSourceCatalog ? sourceCatalogWarehouse : targetCatalogWarehouse); + properties.put("ref", "main"); + return properties; + } + + protected static Map hadoopCatalogProperties(boolean isSourceCatalog) { + Map properties = new HashMap<>(); + properties.put("warehouse", isSourceCatalog ? sourceCatalogWarehouse : targetCatalogWarehouse); + return properties; + } + + protected static Map hiveCatalogProperties( + boolean isSourceCatalog, Map dynamicProperties) { + Map properties = new HashMap<>(); + properties.put("warehouse", isSourceCatalog ? sourceCatalogWarehouse : targetCatalogWarehouse); + properties.putAll(dynamicProperties); + return properties; + } +} diff --git a/iceberg-catalog-migrator/api/build.gradle.kts b/iceberg-catalog-migrator/api/build.gradle.kts new file mode 100644 index 00000000..e8d19578 --- /dev/null +++ b/iceberg-catalog-migrator/api/build.gradle.kts @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +plugins { + `java-library` + `maven-publish` + signing + alias(libs.plugins.nessie.run) + `build-conventions` +} + +dependencies { + implementation(libs.guava) + implementation(libs.slf4j) + implementation(libs.iceberg.spark.runtime) + implementation(libs.iceberg.dell) + implementation(libs.hadoop.common) { + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("javax.servlet", "servlet-api") + exclude("com.google.code.gson", "gson") + exclude("commons-beanutils") + } + + compileOnly(libs.immutables.value.annotations) + annotationProcessor(libs.immutables.value.processor) + + testRuntimeOnly(libs.logback.classic) + testImplementation(platform(libs.junit.bom)) + testImplementation("org.junit.jupiter:junit-jupiter-params") + testImplementation("org.junit.jupiter:junit-jupiter-api") + testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine") + testRuntimeOnly("org.junit.platform:junit-platform-launcher") + testImplementation(libs.assertj) + testImplementation(libs.logcaptor) + + testImplementation(project(":iceberg-catalog-migrator-api-test")) + + // for integration tests + testImplementation( + "org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests" + ) + // this junit4 dependency is needed for above Iceberg's TestHiveMetastore + testRuntimeOnly("junit:junit:4.13.2") + + testImplementation("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") { + // these are taken from iceberg repo configurations + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hbase") + exclude("org.apache.logging.log4j") + exclude("co.cask.tephra") + exclude("com.google.code.findbugs", "jsr305") + exclude("org.eclipse.jetty.aggregate", "jetty-all") + exclude("org.eclipse.jetty.orbit", "javax.servlet") + exclude("org.apache.parquet", "parquet-hadoop-bundle") + exclude("com.tdunning", "json") + exclude("javax.transaction", "transaction-api") + exclude("com.zaxxer", "HikariCP") + } + testImplementation("org.apache.hive:hive-exec:${libs.versions.hive.get()}:core") { + // these are taken from iceberg repo configurations + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hive", "hive-llap-tez") + exclude("org.apache.logging.log4j") + exclude("com.google.protobuf", "protobuf-java") + exclude("org.apache.calcite") + exclude("org.apache.calcite.avatica") + exclude("com.google.code.findbugs", "jsr305") + } + testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}") + + nessieQuarkusServer( + "org.projectnessie.nessie:nessie-quarkus:${libs.versions.nessie.get()}:runner" + ) +} + +nessieQuarkusApp { includeTask(tasks.named("intTest")) } + +tasks.withType().configureEach { systemProperty("java.security.manager", "allow") } diff --git a/iceberg-catalog-migrator/api/src/main/java/org/apache/polaris/iceberg/catalog/migrator/api/CatalogMigrationResult.java b/iceberg-catalog-migrator/api/src/main/java/org/apache/polaris/iceberg/catalog/migrator/api/CatalogMigrationResult.java new file mode 100644 index 00000000..523aea6b --- /dev/null +++ b/iceberg-catalog-migrator/api/src/main/java/org/apache/polaris/iceberg/catalog/migrator/api/CatalogMigrationResult.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.api; + +import java.util.List; +import org.apache.iceberg.catalog.TableIdentifier; +import org.immutables.value.Value; + +@Value.Immutable +public interface CatalogMigrationResult { + + List registeredTableIdentifiers(); + + List failedToRegisterTableIdentifiers(); + + List failedToDeleteTableIdentifiers(); +} diff --git a/iceberg-catalog-migrator/api/src/main/java/org/apache/polaris/iceberg/catalog/migrator/api/CatalogMigrationUtil.java b/iceberg-catalog-migrator/api/src/main/java/org/apache/polaris/iceberg/catalog/migrator/api/CatalogMigrationUtil.java new file mode 100644 index 00000000..ff71508a --- /dev/null +++ b/iceberg-catalog-migrator/api/src/main/java/org/apache/polaris/iceberg/catalog/migrator/api/CatalogMigrationUtil.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.api; + +import com.google.common.base.Preconditions; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.aws.dynamodb.DynamoDbCatalog; +import org.apache.iceberg.aws.glue.GlueCatalog; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.dell.ecs.EcsCatalog; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.iceberg.hive.HiveCatalog; +import org.apache.iceberg.jdbc.JdbcCatalog; +import org.apache.iceberg.nessie.NessieCatalog; +import org.apache.iceberg.rest.RESTCatalog; + +public final class CatalogMigrationUtil { + + private CatalogMigrationUtil() {} + + public enum CatalogType { + CUSTOM, + DYNAMODB, + ECS, + GLUE, + HADOOP, + HIVE, + JDBC, + NESSIE, + REST + } + + public static Catalog buildCatalog( + Map catalogProperties, + CatalogType catalogType, + String catalogName, + String customCatalogImpl, + Map hadoopConf) { + Preconditions.checkArgument(catalogProperties != null, "catalog properties is null"); + Preconditions.checkArgument(catalogType != null, "catalog type is null"); + Configuration catalogConf = new Configuration(); + if (hadoopConf != null) { + hadoopConf.forEach(catalogConf::set); + } + if (catalogProperties.get("name") != null) { + // Some catalogs like jdbc stores the catalog name from the client when the namespace or table + // is created. + // Hence, when accessing the tables from another client, catalog name should match. + catalogName = catalogProperties.get("name"); + } + return CatalogUtil.loadCatalog( + catalogImpl(catalogType, customCatalogImpl), catalogName, catalogProperties, catalogConf); + } + + private static String catalogImpl(CatalogType type, String customCatalogImpl) { + switch (type) { + case CUSTOM: + Preconditions.checkArgument( + customCatalogImpl != null && !customCatalogImpl.trim().isEmpty(), + "Need to specify the fully qualified class name of the custom catalog impl"); + return customCatalogImpl; + case DYNAMODB: + return DynamoDbCatalog.class.getName(); + case ECS: + return EcsCatalog.class.getName(); + case GLUE: + return GlueCatalog.class.getName(); + case HADOOP: + return HadoopCatalog.class.getName(); + case HIVE: + return HiveCatalog.class.getName(); + case JDBC: + return JdbcCatalog.class.getName(); + case NESSIE: + return NessieCatalog.class.getName(); + case REST: + return RESTCatalog.class.getName(); + default: + throw new IllegalArgumentException("Unsupported type: " + type.name()); + } + } +} diff --git a/iceberg-catalog-migrator/api/src/main/java/org/apache/polaris/iceberg/catalog/migrator/api/CatalogMigrator.java b/iceberg-catalog-migrator/api/src/main/java/org/apache/polaris/iceberg/catalog/migrator/api/CatalogMigrator.java new file mode 100644 index 00000000..12d80a48 --- /dev/null +++ b/iceberg-catalog-migrator/api/src/main/java/org/apache/polaris/iceberg/catalog/migrator/api/CatalogMigrator.java @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.api; + +import com.google.common.base.Preconditions; +import java.util.Arrays; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; +import java.util.function.Predicate; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.iceberg.BaseTable; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.immutables.value.Value; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Value.Immutable +public abstract class CatalogMigrator { + + /** Source {@link Catalog} from which the tables are chosen. */ + public abstract Catalog sourceCatalog(); + + /** Target {@link Catalog} to which the tables need to be registered or migrated. */ + public abstract Catalog targetCatalog(); + + /** Delete the table entries from the source catalog after successful registration. */ + public abstract boolean deleteEntriesFromSourceCatalog(); + + /** Enable the stacktrace in logs in case of failures. */ + @Value.Default + public boolean enableStacktrace() { + return false; + } + + @Value.Check + void check() { + if (!(targetCatalog() instanceof SupportsNamespaces)) { + throw new UnsupportedOperationException( + String.format( + "target catalog %s doesn't implement SupportsNamespaces to create missing namespaces.", + targetCatalog().name())); + } + + if (!(sourceCatalog() instanceof SupportsNamespaces)) { + throw new UnsupportedOperationException( + String.format( + "source catalog %s doesn't implement SupportsNamespaces to list all namespaces.", + sourceCatalog().name())); + } + + if (deleteEntriesFromSourceCatalog() && sourceCatalog() instanceof HadoopCatalog) { + throw new UnsupportedOperationException( + "Source catalog is a Hadoop catalog and it doesn't support deleting the table entries just from the catalog. Please configure `deleteEntriesFromSourceCatalog` as `false`"); + } + } + + private static final Logger LOG = LoggerFactory.getLogger(CatalogMigrator.class); + private final ImmutableCatalogMigrationResult.Builder resultBuilder = + ImmutableCatalogMigrationResult.builder(); + private final Set processedNamespaces = new HashSet<>(); + + /** + * Get the table identifiers which matches the regular expression pattern input from all the + * namespaces. + * + * @param identifierRegex regular expression pattern. If null, fetches all the table identifiers + * from all the namespaces. + * @return Set of table identifiers. + */ + public Set getMatchingTableIdentifiers(String identifierRegex) { + LOG.info("Collecting all the namespaces from source catalog..."); + Set namespaces = new LinkedHashSet<>(); + getAllNamespacesFromSourceCatalog(Namespace.empty(), namespaces); + + Predicate matchedIdentifiersPredicate; + if (identifierRegex == null) { + LOG.info("Collecting all the tables from all the namespaces of source catalog..."); + matchedIdentifiersPredicate = tableIdentifier -> true; + } else { + LOG.info( + "Collecting all the tables from all the namespaces of source catalog" + + " which matches the regex pattern:{}", + identifierRegex); + Pattern pattern = Pattern.compile(identifierRegex); + matchedIdentifiersPredicate = + tableIdentifier -> pattern.matcher(tableIdentifier.toString()).matches(); + } + return namespaces.stream() + .flatMap( + namespace -> { + try { + return sourceCatalog().listTables(namespace).stream() + .filter(matchedIdentifiersPredicate); + } catch (IllegalArgumentException | NoSuchNamespaceException exception) { + if (namespace.isEmpty()) { + // some catalogs don't support empty namespace. + // Hence, just log the warning and ignore the exception. + LOG.warn( + "Failed to identify tables from empty namespace : {}", + exception.getMessage()); + return Stream.empty(); + } else { + throw exception; + } + } + }) + .collect(Collectors.toCollection(LinkedHashSet::new)); + } + + /** + * Register or Migrate a single table from one catalog(source catalog) to another catalog(target + * catalog). + * + *

Users must make sure that no in-progress commits on the tables of source catalog during + * registration. + * + * @param identifier table identifier to register or migrate + * @return {@code this} for use in a chained invocation + */ + public CatalogMigrator registerTable(TableIdentifier identifier) { + Preconditions.checkArgument(identifier != null, "Identifier is null"); + + boolean isRegistered = registerTableToTargetCatalog(identifier); + if (isRegistered) { + resultBuilder.addRegisteredTableIdentifiers(identifier); + } else { + resultBuilder.addFailedToRegisterTableIdentifiers(identifier); + } + + try { + if (isRegistered + && deleteEntriesFromSourceCatalog() + && !sourceCatalog().dropTable(identifier, false)) { + resultBuilder.addFailedToDeleteTableIdentifiers(identifier); + } + } catch (Exception exception) { + resultBuilder.addFailedToDeleteTableIdentifiers(identifier); + if (enableStacktrace()) { + LOG.error("Failed to delete the table after migration {}", identifier, exception); + } else { + LOG.error( + "Failed to delete the table after migration {} : {}", + identifier, + exception.getMessage()); + } + } + return this; + } + + public CatalogMigrationResult result() { + processedNamespaces.clear(); + return resultBuilder.build(); + } + + protected void createNamespacesIfNotExistOnTargetCatalog(Namespace identifierNamespace) { + if (!processedNamespaces.contains(identifierNamespace)) { + String[] levels = identifierNamespace.levels(); + for (int index = 0; index < levels.length; index++) { + Namespace namespace = Namespace.of(Arrays.copyOfRange(levels, 0, index + 1)); + if (processedNamespaces.add(namespace)) { + try { + ((SupportsNamespaces) targetCatalog()).createNamespace(namespace); + } catch (AlreadyExistsException ex) { + LOG.debug( + "{}.Ignoring the error as forcefully creating the namespace even if it exists to avoid " + + "namespaceExists() check.", + ex.getMessage()); + } + } + } + } + } + + protected void getAllNamespacesFromSourceCatalog(Namespace namespace, Set visited) { + if (!visited.add(namespace)) { + return; + } + List children = ((SupportsNamespaces) sourceCatalog()).listNamespaces(namespace); + for (Namespace child : children) { + getAllNamespacesFromSourceCatalog(child, visited); + } + } + + private boolean registerTableToTargetCatalog(TableIdentifier tableIdentifier) { + try { + createNamespacesIfNotExistOnTargetCatalog(tableIdentifier.namespace()); + // register the table to the target catalog + TableOperations ops = ((BaseTable) sourceCatalog().loadTable(tableIdentifier)).operations(); + targetCatalog().registerTable(tableIdentifier, ops.current().metadataFileLocation()); + LOG.info("Successfully registered the table {}", tableIdentifier); + return true; + } catch (Exception ex) { + if (enableStacktrace()) { + LOG.error("Unable to register the table {}", tableIdentifier, ex); + } else { + LOG.error("Unable to register the table {} : {}", tableIdentifier, ex.getMessage()); + } + return false; + } + } +} diff --git a/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/AbstractTestCatalogMigrator.java b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/AbstractTestCatalogMigrator.java new file mode 100644 index 00000000..c41b5253 --- /dev/null +++ b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/AbstractTestCatalogMigrator.java @@ -0,0 +1,345 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.api; + +import java.util.Map; +import java.util.Set; +import java.util.stream.IntStream; +import nl.altindag.log.LogCaptor; +import nl.altindag.log.model.LogEvent; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.polaris.iceberg.catalog.migrator.api.test.AbstractTest; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.ValueSource; + +public abstract class AbstractTestCatalogMigrator extends AbstractTest { + + protected static final Namespace NS1 = Namespace.of("ns1"); + protected static final Namespace NS2 = Namespace.of("ns2"); + protected static final Namespace NS3 = Namespace.of("ns3"); + protected static final Namespace NS1_NS2 = Namespace.of("ns1", "ns2"); + protected static final Namespace NS1_NS3 = Namespace.of("ns1", "ns3"); + protected static final Namespace NS1_NS2_NS3 = Namespace.of("ns1", "ns2", "ns3"); + + protected static final TableIdentifier TBL = TableIdentifier.parse("tblz"); + protected static final TableIdentifier NS1_TBL = TableIdentifier.of(NS1, "tblz"); + protected static final TableIdentifier NS2_TBL = TableIdentifier.of(NS2, "tblz"); + protected static final TableIdentifier NS3_TBL = TableIdentifier.of(NS3, "tblz"); + protected static final TableIdentifier NS1_NS2_TBL = TableIdentifier.of(NS1_NS2, "tblz"); + protected static final TableIdentifier NS1_NS3_TBL = TableIdentifier.of(NS1_NS3, "tblz"); + protected static final TableIdentifier NS1_NS2_NS3_TBL = TableIdentifier.of(NS1_NS2_NS3, "tblz"); + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + } + + @BeforeEach + protected void beforeEach() { + createTables(); + } + + @AfterEach + protected void afterEach() { + dropTables(); + } + + protected static void initializeSourceCatalog( + CatalogMigrationUtil.CatalogType catalogType, Map additionalProp) { + initializeCatalog(true, catalogType, additionalProp); + createNamespacesForSourceCatalog(); + } + + protected static void initializeTargetCatalog( + CatalogMigrationUtil.CatalogType catalogType, Map additionalProp) { + initializeCatalog(false, catalogType, additionalProp); + createNamespacesForTargetCatalog(); + } + + private static void initializeCatalog( + boolean isSourceCatalog, + CatalogMigrationUtil.CatalogType catalogType, + Map additionalProp) { + Map properties; + switch (catalogType) { + case HADOOP: + properties = hadoopCatalogProperties(isSourceCatalog); + break; + case NESSIE: + properties = nessieCatalogProperties(isSourceCatalog); + break; + case HIVE: + properties = hiveCatalogProperties(isSourceCatalog, additionalProp); + break; + default: + throw new UnsupportedOperationException( + String.format("Unsupported for catalog type: %s", catalogType)); + } + Catalog catalog = + CatalogMigrationUtil.buildCatalog( + properties, + catalogType, + isSourceCatalog ? "sourceCatalog" : "targetCatalog" + "_" + catalogType, + null, + null); + if (isSourceCatalog) { + sourceCatalog = catalog; + } else { + targetCatalog = catalog; + } + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegister(boolean deleteSourceTables) { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + CatalogMigrationResult result = registerAllTables(deleteSourceTables); + + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2, BAR_TBL3, BAR_TBL4); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + Assertions.assertThat(targetCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); + + if (deleteSourceTables) { + // table should be deleted after migration from source catalog + Assertions.assertThat(sourceCatalog.listTables(FOO)).isEmpty(); + Assertions.assertThat(sourceCatalog.listTables(BAR)).isEmpty(); + } else { + // tables should be present in source catalog. + Assertions.assertThat(sourceCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(sourceCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); + } + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterSelectedTables(boolean deleteSourceTables) { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + // using `--identifiers` option + CatalogMigrationResult result = + catalogMigratorWithDefaultArgs(deleteSourceTables).registerTable(BAR_TBL3).result(); + Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(BAR_TBL3); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + Assertions.assertThat(targetCatalog.listTables(FOO)).isEmpty(); + Assertions.assertThat(targetCatalog.listTables(BAR)).containsExactly(BAR_TBL3); + + // using --identifiers-regex option which matches all the tables starts with "foo." + CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(deleteSourceTables); + catalogMigrator + .getMatchingTableIdentifiers("^foo\\..*") + .forEach(catalogMigrator::registerTable); + result = catalogMigrator.result(); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + Assertions.assertThat(targetCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(targetCatalog.listTables(BAR)).containsExactly(BAR_TBL3); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterError(boolean deleteSourceTables) { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + // use invalid namespace which leads to NoSuchTableException + TableIdentifier identifier = TableIdentifier.parse("dummy.tbl3"); + CatalogMigrationResult result = + catalogMigratorWithDefaultArgs(deleteSourceTables).registerTable(identifier).result(); + Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).containsExactly(identifier); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + // try to register same table twice which leads to AlreadyExistsException + result = catalogMigratorWithDefaultArgs(deleteSourceTables).registerTable(FOO_TBL2).result(); + Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(FOO_TBL2); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + result = catalogMigratorWithDefaultArgs(deleteSourceTables).registerTable(FOO_TBL2).result(); + Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).contains(FOO_TBL2); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterWithFewFailures(boolean deleteSourceTables) { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + // register only foo.tbl2 + CatalogMigrationResult result = + catalogMigratorWithDefaultArgs(deleteSourceTables).registerTable(FOO_TBL2).result(); + Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(FOO_TBL2); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + if (deleteSourceTables) { + // create a table with the same name in source catalog which got deleted. + sourceCatalog.createTable(FOO_TBL2, schema); + } + + // register all the tables from source catalog again. So that `foo.tbl2` will fail to register. + result = registerAllTables(deleteSourceTables); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsExactlyInAnyOrder(FOO_TBL1, BAR_TBL3, BAR_TBL4); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).contains(FOO_TBL2); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + Assertions.assertThat(targetCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + // additionally create 240 tables along with 4 tables created in beforeEach() + IntStream.range(0, 240) + .forEach(val -> sourceCatalog.createTable(TableIdentifier.of(FOO, "tblx" + val), schema)); + + CatalogMigrationResult result; + result = registerAllTables(deleteSourceTables); + + Assertions.assertThat(result.registeredTableIdentifiers()).hasSize(244); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + Assertions.assertThat(targetCatalog.listTables(FOO)).hasSize(242); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testListingTableIdentifiers(boolean deleteSourceTables) { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(deleteSourceTables); + + // should list all the tables from all the namespace when regex is null. + Set matchingTableIdentifiers = + catalogMigrator.getMatchingTableIdentifiers(null); + Assertions.assertThat(matchingTableIdentifiers) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2, BAR_TBL3, BAR_TBL4); + + // list the tables whose identifier starts with "foo." + matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers("^foo\\..*"); + Assertions.assertThat(matchingTableIdentifiers).containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + + // test filter that doesn't match any table. + matchingTableIdentifiers = catalogMigrator.getMatchingTableIdentifiers("^dev\\..*"); + Assertions.assertThat(matchingTableIdentifiers).isEmpty(); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterWithNewNamespace(boolean deleteSourceTables) { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + TableIdentifier tbl5 = TableIdentifier.of(DB1, "tbl5"); + // namespace "db1" exists only in source catalog + sourceCatalog.createTable(tbl5, schema); + + CatalogMigrationResult result = + catalogMigratorWithDefaultArgs(deleteSourceTables).registerTable(tbl5).result(); + + Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(tbl5); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + Assertions.assertThat(targetCatalog.listTables(DB1)).containsExactly(tbl5); + } + + @ParameterizedTest + @CsvSource(value = {"false,false", "false,true", "true,false", "true,true"}) + public void testStacktrace(boolean deleteSourceTables, boolean enableStacktrace) { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + TableIdentifier identifier = TableIdentifier.parse("db.dummy_table"); + + ImmutableCatalogMigrator migrator = + ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(deleteSourceTables) + .enableStacktrace(enableStacktrace) + .build(); + try (LogCaptor logCaptor = LogCaptor.forClass(CatalogMigrator.class)) { + CatalogMigrationResult result = migrator.registerTable(identifier).result(); + Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).containsExactly(identifier); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + Assertions.assertThat(logCaptor.getLogEvents()).hasSize(1); + LogEvent logEvent = logCaptor.getLogEvents().get(0); + if (enableStacktrace) { + Assertions.assertThat(logEvent.getFormattedMessage()) + .isEqualTo("Unable to register the table db.dummy_table"); + Assertions.assertThat(logEvent.getThrowable()) + .isPresent() + .get() + .isInstanceOf(NoSuchTableException.class); + } else { + Assertions.assertThat(logEvent.getFormattedMessage()) + .isEqualTo( + "Unable to register the table db.dummy_table : Table does not exist: db.dummy_table"); + Assertions.assertThat(logEvent.getThrowable()).isEmpty(); + } + } + } + + protected CatalogMigrator catalogMigratorWithDefaultArgs(boolean deleteSourceTables) { + return ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(deleteSourceTables) + .build(); + } + + private CatalogMigrationResult registerAllTables(boolean deleteSourceTables) { + CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(deleteSourceTables); + catalogMigrator.getMatchingTableIdentifiers(null).forEach(catalogMigrator::registerTable); + return catalogMigrator.result(); + } +} diff --git a/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/CatalogMigrationUtilTest.java b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/CatalogMigrationUtilTest.java new file mode 100644 index 00000000..c4ed478b --- /dev/null +++ b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/CatalogMigrationUtilTest.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.api; + +import static org.apache.polaris.iceberg.catalog.migrator.api.test.AbstractTest.FOO_TBL1; + +import java.nio.file.Path; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Stream; +import org.apache.iceberg.Schema; +import org.apache.iceberg.Table; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.iceberg.hive.HiveCatalog; +import org.apache.iceberg.nessie.NessieCatalog; +import org.apache.iceberg.types.Types; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +public class CatalogMigrationUtilTest { + + private static @TempDir Path logDir; + + private static @TempDir Path tempDir; + + @BeforeAll + protected static void initLogDir() { + System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); + } + + static Stream blankOrNullStrings() { + return Stream.of("", " ", null); + } + + @ParameterizedTest() + @MethodSource("blankOrNullStrings") + public void testCustomCatalogWithoutImpl(String impl) { + Assertions.assertThatThrownBy( + () -> + CatalogMigrationUtil.buildCatalog( + Collections.emptyMap(), + CatalogMigrationUtil.CatalogType.CUSTOM, + "catalogName", + impl, + Collections.emptyMap())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining( + "Need to specify the fully qualified class name of the custom catalog impl"); + } + + @Test + public void testInvalidArgs() { + Assertions.assertThatThrownBy( + () -> CatalogMigrationUtil.buildCatalog(null, null, null, null, null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("catalog properties is null"); + + Assertions.assertThatThrownBy( + () -> CatalogMigrationUtil.buildCatalog(Collections.emptyMap(), null, null, null, null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("catalog type is null"); + + Assertions.assertThatThrownBy( + () -> + CatalogMigrationUtil.buildCatalog( + Collections.emptyMap(), + CatalogMigrationUtil.CatalogType.CUSTOM, + "catalogName", + "abc", + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining( + "Cannot initialize Catalog implementation abc: Cannot find constructor for interface"); + } + + @Test + public void testBuildHadoopCatalog() throws Exception { + Map properties = new HashMap<>(); + properties.put("warehouse", tempDir.toAbsolutePath().toString()); + properties.put("type", "hadoop"); + + Map conf = new HashMap<>(); + conf.put("k1", "v1"); + + Catalog catalog = + CatalogMigrationUtil.buildCatalog( + properties, CatalogMigrationUtil.CatalogType.HADOOP, "catalogName", null, conf); + + try { + Assertions.assertThat(catalog).isInstanceOf(HadoopCatalog.class); + Assertions.assertThat(catalog.name()).isEqualTo("catalogName"); + Assertions.assertThat(((HadoopCatalog) catalog).getConf().get("k1")).isEqualTo("v1"); + Schema schema = + new Schema( + Types.StructType.of(Types.NestedField.required(1, "id", Types.LongType.get())) + .fields()); + Table table = catalog.createTable(FOO_TBL1, schema); + Assertions.assertThat(table.location()).contains(tempDir.toAbsolutePath().toString()); + catalog.dropTable(FOO_TBL1); + } finally { + if (catalog instanceof AutoCloseable) { + ((AutoCloseable) catalog).close(); + } + } + } + + @Test + public void testBuildNessieCatalog() throws Exception { + Map properties = new HashMap<>(); + properties.put("warehouse", tempDir.toAbsolutePath().toString()); + properties.put("ref", "main"); + properties.put("uri", "http://localhost:19120/api/v1"); + properties.put("enable-api-compatibility-check", "false"); + + Catalog catalog = + CatalogMigrationUtil.buildCatalog( + properties, CatalogMigrationUtil.CatalogType.NESSIE, "catalogName", null, null); + + try { + Assertions.assertThat(catalog).isInstanceOf(NessieCatalog.class); + Assertions.assertThat(catalog.name()).isEqualTo("catalogName"); + } finally { + if (catalog instanceof AutoCloseable) { + ((AutoCloseable) catalog).close(); + } + } + } + + @Test + public void testBuildHiveCatalog() throws Exception { + Map properties = new HashMap<>(); + properties.put("warehouse", tempDir.toAbsolutePath().toString()); + properties.put("type", "hive"); + properties.put("uri", "thrift://localhost:9083"); + + Catalog catalog = + CatalogMigrationUtil.buildCatalog( + properties, CatalogMigrationUtil.CatalogType.HIVE, "catalogName", null, null); + + try { + Assertions.assertThat(catalog).isInstanceOf(HiveCatalog.class); + Assertions.assertThat(catalog.name()).isEqualTo("catalogName"); + } finally { + if (catalog instanceof AutoCloseable) { + ((AutoCloseable) catalog).close(); + } + } + } +} diff --git a/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/CatalogMigratorParamsTest.java b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/CatalogMigratorParamsTest.java new file mode 100644 index 00000000..fc66d07e --- /dev/null +++ b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/CatalogMigratorParamsTest.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.api; + +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.polaris.iceberg.catalog.migrator.api.test.AbstractTest; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +public class CatalogMigratorParamsTest extends AbstractTest { + + @Test + public void testInvalidArgs() { + sourceCatalog = + CatalogUtil.loadCatalog( + HadoopCatalog.class.getName(), + "source", + hadoopCatalogProperties(true), + new Configuration()); + targetCatalog = + CatalogUtil.loadCatalog( + HadoopCatalog.class.getName(), + "target", + hadoopCatalogProperties(true), + new Configuration()); + + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(false) + .build() + .registerTable(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Identifier is null"); + + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(null) // target-catalog is null + .deleteEntriesFromSourceCatalog(true) + .build()) + .isInstanceOf(NullPointerException.class) + .hasMessageContaining("targetCatalog"); + + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigrator.builder() + .sourceCatalog(null) // source-catalog is null + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(true) + .build()) + .isInstanceOf(NullPointerException.class) + .hasMessageContaining("sourceCatalog"); + + // test source catalog as hadoop with `deleteEntriesFromSourceCatalog` as true. + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(true) + .build()) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining( + "Source catalog is a Hadoop catalog and it doesn't support deleting the table entries just from the catalog. " + + "Please configure `deleteEntriesFromSourceCatalog` as `false`"); + } +} diff --git a/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/CustomCatalogMigratorTest.java b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/CustomCatalogMigratorTest.java new file mode 100644 index 00000000..81ac4d5d --- /dev/null +++ b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/CustomCatalogMigratorTest.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.api; + +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.Table; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.polaris.iceberg.catalog.migrator.api.test.AbstractTest; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class CustomCatalogMigratorTest extends AbstractTest { + + protected static @TempDir Path warehouse1; + protected static @TempDir Path warehouse2; + + @BeforeAll + protected static void setup() { + sourceCatalog = createCustomCatalog(warehouse1.toAbsolutePath().toString(), "sourceCatalog"); + targetCatalog = createCustomCatalog(warehouse2.toAbsolutePath().toString(), "targetCatalog"); + } + + @BeforeEach + protected void beforeEach() { + createTables(); + } + + @AfterEach + protected void afterEach() { + dropTables(); + } + + @Test + public void testRegister() { + CatalogMigrator catalogMigrator = + ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(false) + .build(); + // should fail to register as catalog doesn't support register table operations. + catalogMigrator.getMatchingTableIdentifiers(null).forEach(catalogMigrator::registerTable); + CatalogMigrationResult result = catalogMigrator.result(); + Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2, BAR_TBL3, BAR_TBL4); + } + + private static Catalog createCustomCatalog(String warehousePath, String name) { + + class TestCatalog extends HadoopCatalog { + @Override + public Table registerTable(TableIdentifier identifier, String metadataFileLocation) { + throw new UnsupportedOperationException("This catalog doesn't support register table"); + } + } + + Map properties = new HashMap<>(); + properties.put("warehouse", warehousePath); + TestCatalog testCatalog = new TestCatalog(); + testCatalog.setConf(new Configuration()); + testCatalog.initialize(name, properties); + return testCatalog; + } +} diff --git a/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/HadoopCatalogMigratorTest.java b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/HadoopCatalogMigratorTest.java new file mode 100644 index 00000000..32429624 --- /dev/null +++ b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/HadoopCatalogMigratorTest.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.api; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +public class HadoopCatalogMigratorTest extends AbstractTestCatalogMigrator { + + @BeforeAll + protected static void setup() { + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); + } + + @Test + public void testRegisterWithNewNestedNamespaces() { + List namespaceList = Arrays.asList(NS1, NS2, NS3, NS1_NS2, NS1_NS3, NS1_NS2_NS3); + + List identifiers = + Arrays.asList(TBL, NS1_TBL, NS2_TBL, NS3_TBL, NS1_NS2_TBL, NS1_NS3_TBL, NS1_NS2_NS3_TBL); + + namespaceList.forEach(((SupportsNamespaces) sourceCatalog)::createNamespace); + identifiers.forEach(identifier -> sourceCatalog.createTable(identifier, schema)); + + CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(false); + Set matchingTableIdentifiers = + catalogMigrator.getMatchingTableIdentifiers(null); + // HadoopCatalog supports implicit namespaces. + // Hence, No concept of empty namespace too. So, cannot list the tables from empty + // namespaces. + // Can only load tables in empty namespace using identifiers. + Assertions.assertThat(matchingTableIdentifiers) + .containsAll(identifiers.subList(1, 7)); // without "tblz" + Assertions.assertThat(matchingTableIdentifiers).doesNotContain(identifiers.get(0)); + + matchingTableIdentifiers.forEach(catalogMigrator::registerTable); + CatalogMigrationResult result = catalogMigrator.result(); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsAll(identifiers.subList(1, 7)); // without "tblz" + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + // manually register the table from empty namespace + result = catalogMigratorWithDefaultArgs(false).registerTable(TBL).result(); + Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(TBL); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + Collections.reverse(namespaceList); + identifiers.forEach(sourceCatalog::dropTable); + namespaceList.forEach(((SupportsNamespaces) sourceCatalog)::dropNamespace); + identifiers.forEach(targetCatalog::dropTable); + namespaceList.forEach(((SupportsNamespaces) targetCatalog)::dropNamespace); + } + + @Test + public void testCreateAndListNamespaces() { + ImmutableCatalogMigrator catalogMigrator = + ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(false) + .build(); + + List namespaceList = + Arrays.asList(NS_A, NS_A_B, NS_A_B_C, NS_A_B_C_D, NS_A_B_C_D_E, NS_A_C); + catalogMigrator.createNamespacesIfNotExistOnTargetCatalog( + namespaceList.get(4)); // try creating "a.b.c.d.e" + catalogMigrator.createNamespacesIfNotExistOnTargetCatalog( + namespaceList.get(5)); // try creating "a.c" + // should create all the levels of missing namespaces on target catalog + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces()) + .contains(namespaceList.get(0)) + .doesNotContainAnyElementsOf(namespaceList.subList(1, 6)); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(namespaceList.get(0))) + .containsExactlyInAnyOrder(namespaceList.get(1), namespaceList.get(5)); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(namespaceList.get(1))) + .containsExactly(namespaceList.get(2)); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(namespaceList.get(2))) + .containsExactly(namespaceList.get(3)); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(namespaceList.get(3))) + .containsExactly(namespaceList.get(4)); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(namespaceList.get(4))) + .isEmpty(); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(namespaceList.get(5))) + .isEmpty(); + + namespaceList.forEach( + namespace -> ((SupportsNamespaces) sourceCatalog).createNamespace(namespace)); + Set listedNamespaces = new HashSet<>(); + // collect all the namespaces from all levels + catalogMigrator.getAllNamespacesFromSourceCatalog(Namespace.empty(), listedNamespaces); + Assertions.assertThat(listedNamespaces).containsAll(namespaceList); + + Collections.reverse(namespaceList); + namespaceList.forEach(((SupportsNamespaces) sourceCatalog)::dropNamespace); + namespaceList.forEach(((SupportsNamespaces) targetCatalog)::dropNamespace); + } +} diff --git a/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/ITHadoopToHiveCatalogMigrator.java b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/ITHadoopToHiveCatalogMigrator.java new file mode 100644 index 00000000..87708c23 --- /dev/null +++ b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/ITHadoopToHiveCatalogMigrator.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.api; + +import java.util.Collections; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hive.HiveMetastoreExtension; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class ITHadoopToHiveCatalogMigrator extends AbstractTestCatalogMigrator { + + @RegisterExtension + public static final HiveMetastoreExtension HIVE_METASTORE_EXTENSION = + HiveMetastoreExtension.builder().build(); + + @BeforeAll + protected static void setup() throws Exception { + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); + initializeTargetCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HIVE_METASTORE_EXTENSION.hiveConf().get("hive.metastore.uris"))); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + } + + @Test + public void testRegisterWithNewNestedNamespace() { + TableIdentifier tableIdentifier = TableIdentifier.of(NS_A_B_C, "tbl5"); + // create namespace "a.b.c" only in source catalog + ((SupportsNamespaces) sourceCatalog).createNamespace(NS_A_B_C); + sourceCatalog.createTable(tableIdentifier, schema); + + CatalogMigrationResult result = + catalogMigratorWithDefaultArgs(false).registerTable(tableIdentifier).result(); + + // hive catalog doesn't support multipart namespace. Hence, table should fail to register. + Assertions.assertThat(result.registeredTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()) + .containsExactly(tableIdentifier); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + sourceCatalog.dropTable(tableIdentifier); + ((SupportsNamespaces) sourceCatalog).dropNamespace(NS_A_B_C); + } +} diff --git a/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/ITHadoopToNessieCatalogMigrator.java b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/ITHadoopToNessieCatalogMigrator.java new file mode 100644 index 00000000..87e0034e --- /dev/null +++ b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/ITHadoopToNessieCatalogMigrator.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.api; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +public class ITHadoopToNessieCatalogMigrator extends AbstractTestCatalogMigrator { + + @BeforeAll + protected static void setup() throws Exception { + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + } + + @Test + public void testRegisterWithNewNestedNamespaces() { + List namespaceList = Arrays.asList(NS1, NS2, NS3, NS1_NS2, NS1_NS3, NS1_NS2_NS3); + List identifiers = + Arrays.asList(TBL, NS1_TBL, NS2_TBL, NS3_TBL, NS1_NS2_TBL, NS1_NS3_TBL, NS1_NS2_NS3_TBL); + namespaceList.forEach(((SupportsNamespaces) sourceCatalog)::createNamespace); + identifiers.forEach(identifier -> sourceCatalog.createTable(identifier, schema)); + + CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(false); + Set matchingTableIdentifiers = + catalogMigrator.getMatchingTableIdentifiers(null); + // HadoopCatalog supports implicit namespaces. + // Hence, No concept of empty namespace too. So, cannot list the tables from default + // namespaces. + // Can only load tables in empty namespace using identifiers. + Assertions.assertThat(matchingTableIdentifiers) + .containsAll(identifiers.subList(1, 7)); // without "tblz" + Assertions.assertThat(matchingTableIdentifiers).doesNotContain(identifiers.get(0)); + + matchingTableIdentifiers.forEach(catalogMigrator::registerTable); + CatalogMigrationResult result = catalogMigrator.result(); + Assertions.assertThat(result.registeredTableIdentifiers()) + .containsAll(identifiers.subList(1, 7)); // without "tblz" + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + // manually register the table from empty namespace + result = catalogMigratorWithDefaultArgs(false).registerTable(TBL).result(); + Assertions.assertThat(result.registeredTableIdentifiers()).containsExactly(TBL); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).isEmpty(); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + + Collections.reverse(namespaceList); + identifiers.forEach(sourceCatalog::dropTable); + namespaceList.forEach(((SupportsNamespaces) sourceCatalog)::dropNamespace); + identifiers.forEach(targetCatalog::dropTable); + namespaceList.forEach(((SupportsNamespaces) targetCatalog)::dropNamespace); + } + + @Test + public void testCreateMissingNamespaces() { + ImmutableCatalogMigrator catalogMigrator = + ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(false) + .build(); + + List namespaceList = + Arrays.asList(NS_A, NS_A_B, NS_A_B_C, NS_A_B_C_D, NS_A_B_C_D_E, NS_A_C); + catalogMigrator.createNamespacesIfNotExistOnTargetCatalog( + namespaceList.get(4)); // try creating "a.b.c.d.e" + catalogMigrator.createNamespacesIfNotExistOnTargetCatalog( + namespaceList.get(5)); // try creating "a.c" + + // should create all the levels of missing namespaces on target catalog. + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces()).contains(NS_A); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(NS_A)) + .contains(NS_A_B, NS_A_C); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(NS_A_B)) + .contains(NS_A_B_C); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(NS_A_B_C)) + .contains(NS_A_B_C_D); + Assertions.assertThat(((SupportsNamespaces) targetCatalog).listNamespaces(NS_A_B_C_D)) + .contains(NS_A_B_C_D_E); + + namespaceList.forEach( + namespace -> ((SupportsNamespaces) sourceCatalog).createNamespace(namespace)); + Set listedNamespaces = new HashSet<>(); + // collect all the namespaces from all levels + catalogMigrator.getAllNamespacesFromSourceCatalog(Namespace.empty(), listedNamespaces); + Assertions.assertThat(listedNamespaces).containsAll(namespaceList); + + Collections.reverse(namespaceList); + namespaceList.forEach(((SupportsNamespaces) targetCatalog)::dropNamespace); + namespaceList.forEach(((SupportsNamespaces) sourceCatalog)::dropNamespace); + } + + @Test + public void testListingNamespacesFromNessie() { + Catalog nessie = targetCatalog; + Catalog hadoop = sourceCatalog; + + ImmutableCatalogMigrator catalogMigrator = + ImmutableCatalogMigrator.builder() + .sourceCatalog(nessie) + .targetCatalog(hadoop) + .deleteEntriesFromSourceCatalog(false) + .build(); + + List namespaceList = + Arrays.asList(NS_A, NS_A_B, NS_A_B_C, NS_A_B_C_D, NS_A_B_C_D_E, NS_A_C); + + namespaceList.forEach(namespace -> ((SupportsNamespaces) nessie).createNamespace(namespace)); + Set listedNamespaces = new HashSet<>(); + // collect all the namespaces from all levels + catalogMigrator.getAllNamespacesFromSourceCatalog(Namespace.empty(), listedNamespaces); + Assertions.assertThat(listedNamespaces).containsAll(namespaceList); + + Collections.reverse(namespaceList); + namespaceList.forEach(((SupportsNamespaces) nessie)::dropNamespace); + } +} diff --git a/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/ITHiveToHadoopCatalogMigrator.java b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/ITHiveToHadoopCatalogMigrator.java new file mode 100644 index 00000000..7ad97960 --- /dev/null +++ b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/ITHiveToHadoopCatalogMigrator.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.api; + +import java.util.Collections; +import org.apache.iceberg.hive.HiveMetastoreExtension; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class ITHiveToHadoopCatalogMigrator extends AbstractTestCatalogMigrator { + + @RegisterExtension + public static final HiveMetastoreExtension HIVE_METASTORE_EXTENSION = + HiveMetastoreExtension.builder().build(); + + @BeforeAll + protected static void setup() throws Exception { + initializeSourceCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HIVE_METASTORE_EXTENSION.hiveConf().get("hive.metastore.uris"))); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + } +} diff --git a/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/ITHiveToNessieCatalogMigrator.java b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/ITHiveToNessieCatalogMigrator.java new file mode 100644 index 00000000..c7ea8430 --- /dev/null +++ b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/ITHiveToNessieCatalogMigrator.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.api; + +import java.util.Collections; +import org.apache.iceberg.hive.HiveMetastoreExtension; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class ITHiveToNessieCatalogMigrator extends AbstractTestCatalogMigrator { + + @RegisterExtension + public static final HiveMetastoreExtension HIVE_METASTORE_EXTENSION = + HiveMetastoreExtension.builder().build(); + + @BeforeAll + protected static void setup() throws Exception { + initializeSourceCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HIVE_METASTORE_EXTENSION.hiveConf().get("hive.metastore.uris"))); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + } +} diff --git a/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/ITNessieToHiveCatalogMigrator.java b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/ITNessieToHiveCatalogMigrator.java new file mode 100644 index 00000000..cc6e0201 --- /dev/null +++ b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/ITNessieToHiveCatalogMigrator.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.api; + +import java.util.Collections; +import java.util.Set; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hive.HiveMetastoreExtension; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class ITNessieToHiveCatalogMigrator extends AbstractTestCatalogMigrator { + + @RegisterExtension + public static final HiveMetastoreExtension HIVE_METASTORE_EXTENSION = + HiveMetastoreExtension.builder().build(); + + @BeforeAll + protected static void setup() throws Exception { + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); + initializeTargetCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HIVE_METASTORE_EXTENSION.hiveConf().get("hive.metastore.uris"))); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + } + + @Test + public void testRegisterWithDefaultNamespace() { + sourceCatalog.createTable(TBL, schema); + + CatalogMigrator catalogMigrator = catalogMigratorWithDefaultArgs(false); + // should also include table from empty namespace + Set matchingTableIdentifiers = + catalogMigrator.getMatchingTableIdentifiers(null); + Assertions.assertThat(matchingTableIdentifiers).contains(TBL); + + matchingTableIdentifiers.forEach(catalogMigrator::registerTable); + CatalogMigrationResult result = catalogMigrator.result(); + // hive will not support empty namespace (namespace with level = 0). Hence, register will + // fail. + Assertions.assertThat(result.registeredTableIdentifiers()).doesNotContain(TBL); + Assertions.assertThat(result.failedToRegisterTableIdentifiers()).containsExactly(TBL); + Assertions.assertThat(result.failedToDeleteTableIdentifiers()).isEmpty(); + } +} diff --git a/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/UnsupportedNamespaceTest.java b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/UnsupportedNamespaceTest.java new file mode 100644 index 00000000..66127213 --- /dev/null +++ b/iceberg-catalog-migrator/api/src/test/java/org/apache/polaris/iceberg/catalog/migrator/api/UnsupportedNamespaceTest.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.api; + +import java.nio.file.Path; +import java.util.List; +import org.apache.iceberg.BaseMetastoreCatalog; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class UnsupportedNamespaceTest { + + protected static @TempDir Path tempDir; + + @BeforeAll + protected static void initLogDir() { + System.setProperty("catalog.migration.log.dir", tempDir.toAbsolutePath().toString()); + } + + @Test + public void testUnsupportedNamespace() { + + class TestCatalog extends BaseMetastoreCatalog { + // doesn't support namespaces + @Override + protected TableOperations newTableOps(TableIdentifier tableIdentifier) { + return null; + } + + @Override + protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { + return null; + } + + @Override + public List listTables(Namespace namespace) { + return null; + } + + @Override + public boolean dropTable(TableIdentifier identifier, boolean purge) { + return false; + } + + @Override + public void renameTable(TableIdentifier from, TableIdentifier to) {} + } + + Catalog sourceCatalog = new TestCatalog(); + Catalog targetCatalog = new TestCatalog(); + + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(false) + .build()) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining( + "target catalog TestCatalog{} doesn't implement SupportsNamespaces to create missing namespaces."); + + Assertions.assertThatThrownBy( + () -> + ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(new HadoopCatalog()) + .deleteEntriesFromSourceCatalog(false) + .build()) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining( + "source catalog TestCatalog{} doesn't implement SupportsNamespaces to list all namespaces."); + } +} diff --git a/iceberg-catalog-migrator/api/src/test/resources/logback.xml b/iceberg-catalog-migrator/api/src/test/resources/logback.xml new file mode 100644 index 00000000..769b6180 --- /dev/null +++ b/iceberg-catalog-migrator/api/src/test/resources/logback.xml @@ -0,0 +1,34 @@ + + + + + + + ${catalog.migration.log.dir}/catalog_migration.log + true + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + diff --git a/iceberg-catalog-migrator/cli/build.gradle.kts b/iceberg-catalog-migrator/cli/build.gradle.kts new file mode 100644 index 00000000..b7534127 --- /dev/null +++ b/iceberg-catalog-migrator/cli/build.gradle.kts @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + +plugins { + `java-library` + `maven-publish` + signing + alias(libs.plugins.nessie.run) + `build-conventions` +} + +java.sourceCompatibility = JavaVersion.VERSION_21 + +applyShadowJar() + +dependencies { + implementation(project(":iceberg-catalog-migrator-api")) + implementation(libs.guava) + implementation(libs.slf4j) + runtimeOnly(libs.logback.classic) + implementation(libs.picocli) + implementation(libs.iceberg.spark.runtime) + implementation(libs.hadoop.aws) { exclude("com.amazonaws", "aws-java-sdk-bundle") } + // AWS dependencies based on https://iceberg.apache.org/docs/latest/aws/#enabling-aws-integration + runtimeOnly(libs.aws.sdk.apache.client) + runtimeOnly(libs.aws.sdk.auth) + runtimeOnly(libs.aws.sdk.glue) + runtimeOnly(libs.aws.sdk.s3) + runtimeOnly(libs.aws.sdk.dynamo) + runtimeOnly(libs.aws.sdk.kms) + runtimeOnly(libs.aws.sdk.lakeformation) + runtimeOnly(libs.aws.sdk.sts) + runtimeOnly(libs.aws.sdk.url.connection.client) + + // needed for Hive catalog + runtimeOnly("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") { + // these are taken from iceberg repo configurations + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hbase") + exclude("org.apache.logging.log4j") + exclude("co.cask.tephra") + exclude("com.google.code.findbugs", "jsr305") + exclude("org.eclipse.jetty.aggregate", "jetty-all") + exclude("org.eclipse.jetty.orbit", "javax.servlet") + exclude("org.apache.parquet", "parquet-hadoop-bundle") + exclude("com.tdunning", "json") + exclude("javax.transaction", "transaction-api") + exclude("com.zaxxer", "HikariCP") + } + runtimeOnly("org.apache.hive:hive-exec:${libs.versions.hive.get()}:core") { + // these are taken from iceberg repo configurations + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hive", "hive-llap-tez") + exclude("org.apache.logging.log4j") + exclude("com.google.protobuf", "protobuf-java") + exclude("org.apache.calcite") + exclude("org.apache.calcite.avatica") + exclude("com.google.code.findbugs", "jsr305") + } + runtimeOnly("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}") + + testImplementation(platform(libs.junit.bom)) + testImplementation("org.junit.jupiter:junit-jupiter-params") + testImplementation("org.junit.jupiter:junit-jupiter-api") + testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine") + testRuntimeOnly("org.junit.platform:junit-platform-launcher") + testImplementation(libs.assertj) + testImplementation(libs.logcaptor) + + testImplementation(project(":iceberg-catalog-migrator-api-test")) + + // for integration tests + testImplementation( + "org.apache.iceberg:iceberg-hive-metastore:${libs.versions.iceberg.get()}:tests" + ) + // this junit4 dependency is needed for above Iceberg's TestHiveMetastore + testRuntimeOnly("junit:junit:4.13.2") + + testImplementation("org.apache.hive:hive-metastore:${libs.versions.hive.get()}") { + // these are taken from iceberg repo configurations + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hbase") + exclude("org.apache.logging.log4j") + exclude("co.cask.tephra") + exclude("com.google.code.findbugs", "jsr305") + exclude("org.eclipse.jetty.aggregate", "jetty-all") + exclude("org.eclipse.jetty.orbit", "javax.servlet") + exclude("org.apache.parquet", "parquet-hadoop-bundle") + exclude("com.tdunning", "json") + exclude("javax.transaction", "transaction-api") + exclude("com.zaxxer", "HikariCP") + } + testImplementation("org.apache.hive:hive-exec:${libs.versions.hive.get()}:core") { + // these are taken from iceberg repo configurations + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("org.pentaho") // missing dependency + exclude("org.apache.hive", "hive-llap-tez") + exclude("org.apache.logging.log4j") + exclude("com.google.protobuf", "protobuf-java") + exclude("org.apache.calcite") + exclude("org.apache.calcite.avatica") + exclude("com.google.code.findbugs", "jsr305") + } + testImplementation("org.apache.hadoop:hadoop-mapreduce-client-core:${libs.versions.hadoop.get()}") + + nessieQuarkusServer( + "org.projectnessie.nessie:nessie-quarkus:${libs.versions.nessie.get()}:runner" + ) +} + +nessieQuarkusApp { includeTask(tasks.named("intTest")) } + +tasks.named("test") { systemProperty("expectedCLIVersion", project.version) } + +val processResources = + tasks.named("processResources") { + inputs.property("projectVersion", project.version) + filter( + org.apache.tools.ant.filters.ReplaceTokens::class, + mapOf("tokens" to mapOf("projectVersion" to project.version)), + ) + } + +val mainClassName = "org.apache.polaris.iceberg.catalog.migrator.cli.CatalogMigrationCLI" + +val shadowJar = tasks.named("shadowJar") { isZip64 = true } + +shadowJar { manifest { attributes["Main-Class"] = mainClassName } } + +tasks.withType().configureEach { systemProperty("java.security.manager", "allow") } diff --git a/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/BaseRegisterCommand.java b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/BaseRegisterCommand.java new file mode 100644 index 00000000..cef41dea --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/BaseRegisterCommand.java @@ -0,0 +1,359 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import com.google.common.base.Preconditions; +import java.io.Console; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.stream.Collectors; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.polaris.iceberg.catalog.migrator.api.CatalogMigrationResult; +import org.apache.polaris.iceberg.catalog.migrator.api.CatalogMigrator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +public abstract class BaseRegisterCommand implements Callable { + + @CommandLine.ArgGroup( + exclusive = false, + multiplicity = "1", + heading = "Source catalog options: %n") + protected SourceCatalogOptions sourceCatalogOptions; + + @CommandLine.ArgGroup( + exclusive = false, + multiplicity = "1", + heading = "Target catalog options: %n") + private TargetCatalogOptions targetCatalogOptions; + + @CommandLine.ArgGroup(heading = "Identifier options: %n") + private IdentifierOptions identifierOptions; + + @CommandLine.Option( + names = {"--output-dir"}, + defaultValue = "", + description = { + "Optional local output directory path to write CLI output files like `failed_identifiers.txt`, " + + "`failed_to_delete_at_source.txt`, `dry_run_identifiers.txt`. " + + "If not specified, uses the present working directory.", + "Example: --output-dir /tmp/output/", + " --output-dir $PWD/output_folder" + }) + private Path outputDirPath; + + @CommandLine.Option( + names = {"--dry-run"}, + description = + "Optional configuration to simulate the registration without actually registering. Can learn about a list " + + "of tables that will be registered by running this.") + private boolean isDryRun; + + @CommandLine.Option( + names = {"--disable-safety-prompts"}, + description = "Optional configuration to disable safety prompts which needs console input.") + private boolean disablePrompts; + + @CommandLine.Option( + names = {"--stacktrace"}, + description = + "Optional configuration to enable capturing stacktrace in logs in case of failures.") + private boolean enableStackTrace; + + private static final int BATCH_SIZE = 100; + public static final String FAILED_IDENTIFIERS_FILE = "failed_identifiers.txt"; + public static final String FAILED_TO_DELETE_AT_SOURCE_FILE = "failed_to_delete_at_source.txt"; + public static final String DRY_RUN_FILE = "dry_run_identifiers.txt"; + + private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); + + public BaseRegisterCommand() {} + + protected abstract CatalogMigrator catalogMigrator( + Catalog sourceCatalog, Catalog targetCatalog, boolean enableStackTrace); + + protected abstract boolean canProceed(Catalog sourceCatalog); + + protected abstract String operation(); + + protected abstract String operated(); + + protected abstract String operate(); + + @Override + public Integer call() { + Set identifiers = Collections.emptySet(); + String identifierRegEx = identifierOptions != null ? identifierOptions.identifiersRegEx : null; + + if (identifierOptions != null) { + identifiers = identifierOptions.processIdentifiersInput(); + } + checkAndWarnAboutIdentifiers(identifiers, identifierRegEx); + + validateOutputDir(); + + Catalog sourceCatalog = null; + Catalog targetCatalog = null; + + try { + sourceCatalog = sourceCatalogOptions.build(); + consoleLog.info("Configured source catalog: {}", sourceCatalog.name()); + + targetCatalog = targetCatalogOptions.build(); + consoleLog.info("Configured target catalog: {}", targetCatalog.name()); + + if (!isDryRun && !disablePrompts && !canProceed(sourceCatalog)) { + return 1; + } + + CatalogMigrator catalogMigrator = + catalogMigrator(sourceCatalog, targetCatalog, enableStackTrace); + + if (identifiers.isEmpty()) { + consoleLog.info("Identifying tables for {} ...", operation()); + identifiers = catalogMigrator.getMatchingTableIdentifiers(identifierRegEx); + if (identifiers.isEmpty()) { + consoleLog.warn( + "No tables were identified for {}. Please check `catalog_migration.log` file for more info.", + operation()); + return 1; + } + } + + if (isDryRun) { + consoleLog.info("Dry run is completed."); + handleDryRunResult(identifiers); + return 0; + } + + consoleLog.info("Identified {} tables for {}.", identifiers.size(), operation()); + + consoleLog.info("Started {} ...", operation()); + + CatalogMigrationResult result; + try { + int processedIdentifiersCount = 0; + for (TableIdentifier identifier : identifiers) { + catalogMigrator.registerTable(identifier); + processedIdentifiersCount++; + if (processedIdentifiersCount % BATCH_SIZE == 0 + || processedIdentifiersCount == identifiers.size()) { + consoleLog.info( + "Attempted {} for {} tables out of {} tables.", + operation(), + processedIdentifiersCount, + identifiers.size()); + } + } + } finally { + consoleLog.info("Finished {} ...", operation()); + result = catalogMigrator.result(); + handleResults(result); + } + + if (!result.failedToRegisterTableIdentifiers().isEmpty() + || !result.failedToDeleteTableIdentifiers().isEmpty() + || result.registeredTableIdentifiers().isEmpty()) { + return 1; + } + + return 0; + } finally { + close(sourceCatalog); + close(targetCatalog); + } + } + + private void close(Catalog catalog) { + if (catalog instanceof AutoCloseable) { + try { + ((AutoCloseable) catalog).close(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + + private void checkAndWarnAboutIdentifiers( + Set identifiers, String identifierRegEx) { + if (identifiers.isEmpty()) { + if (identifierRegEx != null) { + consoleLog.warn( + "User has not specified the table identifiers." + + " Will be selecting all the tables from all the namespaces from the source catalog " + + "which matches the regex pattern:{}", + identifierRegEx); + } else { + consoleLog.warn( + "User has not specified the table identifiers." + + " Will be selecting all the tables from all the namespaces from the source catalog."); + } + } + } + + private void validateOutputDir() { + if (!Files.exists(outputDirPath)) { + try { + Files.createDirectories(outputDirPath); + } catch (IOException ex) { + throw new UncheckedIOException( + "Failed to create the output directory from the path specified in `--output-dir`", ex); + } + } + Preconditions.checkArgument( + Files.isWritable(outputDirPath), "Path specified in `--output-dir` is not writable"); + } + + private void handleResults(CatalogMigrationResult result) { + try { + writeToFile( + outputDirPath.resolve(FAILED_IDENTIFIERS_FILE), + result.failedToRegisterTableIdentifiers()); + writeToFile( + outputDirPath.resolve(FAILED_TO_DELETE_AT_SOURCE_FILE), + result.failedToDeleteTableIdentifiers()); + } finally { + printSummary(result); + printDetails(result); + } + } + + private void handleDryRunResult(Set identifiers) { + try { + writeToFile(outputDirPath.resolve(DRY_RUN_FILE), identifiers); + } finally { + printDryRunResult(identifiers); + } + } + + private void printSummary(CatalogMigrationResult result) { + consoleLog.info("Summary: "); + if (!result.registeredTableIdentifiers().isEmpty()) { + consoleLog.info( + "Successfully {} {} tables from {} catalog to {} catalog.", + operated(), + result.registeredTableIdentifiers().size(), + sourceCatalogOptions.type.name(), + targetCatalogOptions.type.name()); + } + if (!result.failedToRegisterTableIdentifiers().isEmpty()) { + consoleLog.error( + "Failed to {} {} tables from {} catalog to {} catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. " + + "Failed identifiers are written into `{}`. " + + "Retry with that file using `--identifiers-from-file` option " + + "if the failure is because of network/connection timeouts.", + operate(), + result.failedToRegisterTableIdentifiers().size(), + sourceCatalogOptions.type.name(), + targetCatalogOptions.type.name(), + FAILED_IDENTIFIERS_FILE); + } + if (!result.failedToDeleteTableIdentifiers().isEmpty()) { + consoleLog.error( + "Failed to delete {} tables from {} catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. " + + "{}Failed to delete identifiers are written into `{}`.", + result.failedToDeleteTableIdentifiers().size(), + sourceCatalogOptions.type.name(), + System.lineSeparator(), + FAILED_TO_DELETE_AT_SOURCE_FILE); + } + } + + private void printDetails(CatalogMigrationResult result) { + consoleLog.info("Details: "); + if (!result.registeredTableIdentifiers().isEmpty()) { + consoleLog.info( + "Successfully {} these tables:{}{}", + operated(), + System.lineSeparator(), + result.registeredTableIdentifiers()); + } + + if (!result.failedToRegisterTableIdentifiers().isEmpty()) { + consoleLog.error( + "Failed to {} these tables:{}{}", + operate(), + System.lineSeparator(), + result.failedToRegisterTableIdentifiers()); + } + + if (!result.failedToDeleteTableIdentifiers().isEmpty()) { + consoleLog.error( + "Failed to delete these tables from source catalog:{}{}", + System.lineSeparator(), + result.failedToDeleteTableIdentifiers()); + } + } + + private void printDryRunResult(Set result) { + consoleLog.info("Summary: "); + consoleLog.info( + "Identified {} tables for {} by dry-run. These identifiers are also written into {}. " + + "This file can be used with `--identifiers-from-file` option for an actual run.", + result.size(), + operation(), + DRY_RUN_FILE); + consoleLog.info( + "Details: {}Identified these tables for {} by dry-run:{}{}", + System.lineSeparator(), + operation(), + System.lineSeparator(), + result); + } + + private static void writeToFile(Path filePath, Collection identifiers) { + List identifiersString = + identifiers.stream().map(TableIdentifier::toString).collect(Collectors.toList()); + try { + Files.write(filePath, identifiersString); + } catch (IOException e) { + throw new UncheckedIOException("Failed to write the file:" + filePath, e); + } + } + + protected boolean proceed() { + Console console = System.console(); + while (true) { + consoleLog.info( + "Are you certain that you wish to proceed, after reading the above warnings? (yes/no):"); + String input = console.readLine(); + + if (input.equalsIgnoreCase("yes")) { + consoleLog.info("Continuing..."); + return true; + } else if (input.equalsIgnoreCase("no")) { + consoleLog.info("Aborting..."); + return false; + } else { + consoleLog.info("Invalid input. Please enter 'yes' or 'no'."); + } + } + } +} diff --git a/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/CLIVersionProvider.java b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/CLIVersionProvider.java new file mode 100644 index 00000000..3fecc2d4 --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/CLIVersionProvider.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import java.io.InputStream; +import java.util.Properties; +import picocli.CommandLine.IVersionProvider; + +public class CLIVersionProvider implements IVersionProvider { + @Override + public String[] getVersion() throws Exception { + try (InputStream input = + CLIVersionProvider.class + .getResource("version.properties") + .openConnection() + .getInputStream()) { + Properties props = new Properties(); + props.load(input); + return new String[] {props.getProperty("cli.version")}; + } + } +} diff --git a/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/CatalogMigrationCLI.java b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/CatalogMigrationCLI.java new file mode 100644 index 00000000..6d4b0820 --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/CatalogMigrationCLI.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +@CommandLine.Command( + name = "iceberg-catalog-migrator", + mixinStandardHelpOptions = true, + versionProvider = CLIVersionProvider.class, + subcommands = {MigrateCommand.class, RegisterCommand.class}) +public class CatalogMigrationCLI { + + public CatalogMigrationCLI() {} + + private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); + + public static void main(String... args) { + CommandLine commandLine = + new CommandLine(new CatalogMigrationCLI()) + .setExecutionExceptionHandler( + (ex, cmd, parseResult) -> { + if (enableStacktrace(args)) { + cmd.getErr().println(cmd.getColorScheme().richStackTraceString(ex)); + } else { + consoleLog.error( + "Error during CLI execution: {}. Please check `catalog_migration.log` file for more info.", + ex.getMessage()); + } + return 1; + }); + commandLine.setUsageHelpWidth(150); + int exitCode = commandLine.execute(args); + System.exit(exitCode); + } + + private static boolean enableStacktrace(String... args) { + for (String arg : args) { + if (arg.equalsIgnoreCase("--stacktrace")) { + return true; + } + } + return false; + } +} diff --git a/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/IdentifierOptions.java b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/IdentifierOptions.java new file mode 100644 index 00000000..7047935b --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/IdentifierOptions.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Sets; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; +import java.util.stream.Collectors; +import org.apache.iceberg.catalog.TableIdentifier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +public class IdentifierOptions { + + @CommandLine.Option( + names = {"--identifiers"}, + split = ",", + description = { + "Optional selective set of identifiers to register. If not specified, all the tables will be registered. " + + "Use this when there are few identifiers that need to be registered. For a large number of identifiers, " + + "use the `--identifiers-from-file` or `--identifiers-regex` option.", + "Example: --identifiers foo.t1,bar.t2" + }) + protected Set identifiers = new HashSet<>(); + + @CommandLine.Option( + names = {"--identifiers-from-file"}, + description = { + "Optional text file path that contains a set of table identifiers (one per line) to register. Should not be " + + "used with `--identifiers` or `--identifiers-regex` option.", + "Example: --identifiers-from-file /tmp/files/ids.txt" + }) + protected String identifiersFromFile; + + @CommandLine.Option( + names = {"--identifiers-regex"}, + description = { + "Optional regular expression pattern used to register only the tables whose identifiers match this pattern. " + + "Should not be used with `--identifiers` or '--identifiers-from-file' option.", + "Example: --identifiers-regex ^foo\\..*" + }) + protected String identifiersRegEx; + + private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); + + protected Set processIdentifiersInput() { + + if (!identifiers.isEmpty()) { + return identifiers.stream() + .map(TableIdentifier::parse) + .collect(Collectors.toCollection(LinkedHashSet::new)); + } else if (identifiersFromFile != null) { + Preconditions.checkArgument( + Files.exists(Paths.get(identifiersFromFile)), + "File specified in `--identifiers-from-file` option does not exist"); + try { + consoleLog.info("Collecting identifiers from the file {} ...", identifiersFromFile); + return Files.readAllLines(Paths.get(identifiersFromFile)).stream() + .map(String::trim) + .filter(string -> !string.isEmpty()) + .map(TableIdentifier::parse) + .collect(Collectors.toCollection(LinkedHashSet::new)); + } catch (IOException e) { + throw new UncheckedIOException( + String.format("Failed to read the file: %s", identifiersFromFile), e); + } + } else if (identifiersRegEx != null) { + Preconditions.checkArgument( + !identifiersRegEx.trim().isEmpty(), "--identifiers-regex should not be empty"); + // check whether pattern is compilable + try { + Pattern.compile(identifiersRegEx); + } catch (PatternSyntaxException ex) { + throw new IllegalArgumentException("--identifiers-regex pattern is not compilable", ex); + } + } + return Sets.newHashSet(); + } +} diff --git a/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/MigrateCommand.java b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/MigrateCommand.java new file mode 100644 index 00000000..5616e07c --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/MigrateCommand.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import org.apache.iceberg.catalog.Catalog; +import org.apache.polaris.iceberg.catalog.migrator.api.CatalogMigrationUtil; +import org.apache.polaris.iceberg.catalog.migrator.api.CatalogMigrator; +import org.apache.polaris.iceberg.catalog.migrator.api.ImmutableCatalogMigrator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +@CommandLine.Command( + name = "migrate", + mixinStandardHelpOptions = true, + versionProvider = CLIVersionProvider.class, + // As both source and target catalog has similar configurations, + // documentation is easy to read if the target and source property is one after another instead + // of sorted order. + sortOptions = false, + description = + "Bulk migrate the iceberg tables from source catalog to target catalog without data copy." + + " Table entries from source catalog will be deleted after the successful migration to the target " + + "catalog.") +public class MigrateCommand extends BaseRegisterCommand { + + private static final String newLine = System.lineSeparator(); + private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); + + @Override + protected CatalogMigrator catalogMigrator( + Catalog sourceCatalog, Catalog targetCatalog, boolean enableStackTrace) { + + return ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(true) + .enableStacktrace(enableStackTrace) + .build(); + } + + @Override + public Integer call() { + if (sourceCatalogOptions.type == CatalogMigrationUtil.CatalogType.HADOOP) { + consoleLog.error( + "Source catalog is a Hadoop catalog and it doesn't support deleting the table entries just from the catalog. " + + "Please use 'register' command instead."); + return 1; + } + return super.call(); + } + + @Override + protected boolean canProceed(Catalog sourceCatalog) { + consoleLog.warn( + "{}" + + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + + "{}\tcan lead to a data loss as the in-progress commits will not be considered for migration. " + + "{}\tSo, while using this tool please make sure there are no in-progress commits for the source " + + "catalog.{}" + + "{}" + + "\tb) After the migration, successfully migrated tables will be deleted from the source catalog " + + "{}\tand can only be accessed from the target catalog.", + newLine, + newLine, + newLine, + newLine, + newLine, + newLine); + return proceed(); + } + + @Override + protected String operation() { + return "migration"; + } + + @Override + protected String operated() { + return "migrated"; + } + + @Override + protected String operate() { + return "migrate"; + } +} diff --git a/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/RegisterCommand.java b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/RegisterCommand.java new file mode 100644 index 00000000..c482217a --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/RegisterCommand.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import org.apache.iceberg.catalog.Catalog; +import org.apache.polaris.iceberg.catalog.migrator.api.CatalogMigrator; +import org.apache.polaris.iceberg.catalog.migrator.api.ImmutableCatalogMigrator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +@CommandLine.Command( + name = "register", + mixinStandardHelpOptions = true, + versionProvider = CLIVersionProvider.class, + // As both source and target catalog has similar configurations, + // documentation is easy to read if the target and source property is one after another instead + // of sorted order. + sortOptions = false, + description = + "Bulk register the iceberg tables from source catalog to target catalog without data copy.") +public class RegisterCommand extends BaseRegisterCommand { + + private static final String newLine = System.lineSeparator(); + private static final Logger consoleLog = LoggerFactory.getLogger("console-log"); + + @Override + protected CatalogMigrator catalogMigrator( + Catalog sourceCatalog, Catalog targetCatalog, boolean enableStackTrace) { + return ImmutableCatalogMigrator.builder() + .sourceCatalog(sourceCatalog) + .targetCatalog(targetCatalog) + .deleteEntriesFromSourceCatalog(false) + .enableStacktrace(enableStackTrace) + .build(); + } + + @Override + protected boolean canProceed(Catalog sourceCatalog) { + consoleLog.warn( + "{}" + + "\ta) Executing catalog migration when the source catalog has some in-progress commits " + + "{}\tcan lead to a data loss as the in-progress commits will not be considered for migration. " + + "{}\tSo, while using this tool please make sure there are no in-progress commits for the source " + + "catalog.{}" + + "{}" + + "\tb) After the registration, successfully registered tables will be present in both source and target " + + "catalog. " + + "{}\tHaving the same metadata.json registered in more than one catalog can lead to missing updates, " + + "loss of data, and table corruption. " + + "{}\tUse `migrate` command to automatically delete the table from source catalog after " + + "migration.", + newLine, + newLine, + newLine, + newLine, + newLine, + newLine, + newLine); + return proceed(); + } + + @Override + protected String operation() { + return "registration"; + } + + @Override + protected String operated() { + return "registered"; + } + + @Override + protected String operate() { + return "register"; + } +} diff --git a/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/SourceCatalogOptions.java b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/SourceCatalogOptions.java new file mode 100644 index 00000000..31e2c248 --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/SourceCatalogOptions.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import java.util.HashMap; +import java.util.Map; +import org.apache.iceberg.catalog.Catalog; +import org.apache.polaris.iceberg.catalog.migrator.api.CatalogMigrationUtil; +import picocli.CommandLine; + +public class SourceCatalogOptions { + + @CommandLine.Option( + names = "--source-catalog-type", + required = true, + description = { + "Source catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, " + + "NESSIE, REST].", + "Example: --source-catalog-type GLUE", + " --source-catalog-type NESSIE" + }) + protected CatalogMigrationUtil.CatalogType type; + + @CommandLine.Option( + names = "--source-catalog-properties", + required = true, + split = ",", + description = { + "Iceberg catalog properties for source catalog (like uri, warehouse, etc).", + "Example: --source-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie" + }) + private Map properties; + + @CommandLine.Option( + names = "--source-catalog-hadoop-conf", + split = ",", + description = { + "Optional source catalog Hadoop configurations required by the Iceberg catalog.", + "Example: --source-catalog-hadoop-conf key1=value1,key2=value2" + }) + private final Map hadoopConf = new HashMap<>(); + + @CommandLine.Option( + names = {"--source-custom-catalog-impl"}, + description = { + "Optional fully qualified class name of the custom catalog implementation of the source catalog. Required " + + "when the catalog type is CUSTOM.", + "Example: --source-custom-catalog-impl org.apache.iceberg.AwesomeCatalog" + }) + private String customCatalogImpl; + + Catalog build() { + return CatalogMigrationUtil.buildCatalog( + properties, type, "SOURCE_CATALOG_" + type.name(), customCatalogImpl, hadoopConf); + } +} diff --git a/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/TargetCatalogOptions.java b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/TargetCatalogOptions.java new file mode 100644 index 00000000..1ea9bc9d --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/main/java/org/apache/polaris/iceberg/catalog/migrator/cli/TargetCatalogOptions.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import java.util.HashMap; +import java.util.Map; +import org.apache.iceberg.catalog.Catalog; +import org.apache.polaris.iceberg.catalog.migrator.api.CatalogMigrationUtil; +import picocli.CommandLine; + +public class TargetCatalogOptions { + + @CommandLine.Option( + names = "--target-catalog-type", + required = true, + description = { + "Target catalog type. Can be one of these [CUSTOM, DYNAMODB, ECS, GLUE, HADOOP, HIVE, JDBC, " + + "NESSIE, REST].", + "Example: --target-catalog-type GLUE", + " --target-catalog-type NESSIE" + }) + protected CatalogMigrationUtil.CatalogType type; + + @CommandLine.Option( + names = "--target-catalog-properties", + required = true, + split = ",", + description = { + "Iceberg catalog properties for target catalog (like uri, warehouse, etc).", + "Example: --target-catalog-properties uri=http://localhost:19120/api/v1,ref=main,warehouse=/tmp/warehouseNessie" + }) + private Map properties; + + @CommandLine.Option( + names = "--target-catalog-hadoop-conf", + split = ",", + description = { + "Optional target catalog Hadoop configurations required by the Iceberg catalog.", + "Example: --target-catalog-hadoop-conf key1=value1,key2=value2" + }) + private final Map hadoopConf = new HashMap<>(); + + @CommandLine.Option( + names = {"--target-custom-catalog-impl"}, + description = { + "Optional fully qualified class name of the custom catalog implementation of the target catalog. Required " + + "when the catalog type is CUSTOM.", + "Example: --target-custom-catalog-impl org.apache.iceberg.AwesomeCatalog" + }) + private String customCatalogImpl; + + Catalog build() { + return CatalogMigrationUtil.buildCatalog( + properties, type, "TARGET_CATALOG_" + type.name(), customCatalogImpl, hadoopConf); + } +} diff --git a/iceberg-catalog-migrator/cli/src/main/resources/logback.xml b/iceberg-catalog-migrator/cli/src/main/resources/logback.xml new file mode 100644 index 00000000..b257baf3 --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/main/resources/logback.xml @@ -0,0 +1,44 @@ + + + + + + + + ${catalog.migration.log.dir}/catalog_migration.log + true + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + true + + %highlight(%-5level) - %msg%n + + + + + + + + + + + diff --git a/iceberg-catalog-migrator/cli/src/main/resources/org/apache/polaris/iceberg/catalog/migrator/cli/version.properties b/iceberg-catalog-migrator/cli/src/main/resources/org/apache/polaris/iceberg/catalog/migrator/cli/version.properties new file mode 100644 index 00000000..45434cbe --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/main/resources/org/apache/polaris/iceberg/catalog/migrator/cli/version.properties @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +cli.version=@projectVersion@ diff --git a/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/AbstractCLIMigrationTest.java b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/AbstractCLIMigrationTest.java new file mode 100644 index 00000000..57f85ac7 --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/AbstractCLIMigrationTest.java @@ -0,0 +1,496 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import static org.apache.polaris.iceberg.catalog.migrator.cli.BaseRegisterCommand.DRY_RUN_FILE; +import static org.apache.polaris.iceberg.catalog.migrator.cli.BaseRegisterCommand.FAILED_IDENTIFIERS_FILE; + +import com.google.common.base.Joiner; +import com.google.common.collect.Lists; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import nl.altindag.log.LogCaptor; +import nl.altindag.log.model.LogEvent; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.polaris.iceberg.catalog.migrator.api.CatalogMigrationUtil; +import org.apache.polaris.iceberg.catalog.migrator.api.CatalogMigrator; +import org.apache.polaris.iceberg.catalog.migrator.api.test.AbstractTest; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.ValueSource; + +public abstract class AbstractCLIMigrationTest extends AbstractTest { + + protected static @TempDir Path outputDir; + + protected static String sourceCatalogProperties; + protected static String targetCatalogProperties; + + protected static String sourceCatalogType; + protected static String targetCatalogType; + + protected static void initializeSourceCatalog( + CatalogMigrationUtil.CatalogType catalogType, Map additionalProp) { + initializeCatalog(true, catalogType, additionalProp); + createNamespacesForSourceCatalog(); + } + + protected static void initializeTargetCatalog( + CatalogMigrationUtil.CatalogType catalogType, Map additionalProp) { + initializeCatalog(false, catalogType, additionalProp); + createNamespacesForTargetCatalog(); + } + + private static void initializeCatalog( + boolean isSourceCatalog, + CatalogMigrationUtil.CatalogType catalogType, + Map additionalProp) { + Map properties; + switch (catalogType) { + case HADOOP: + properties = hadoopCatalogProperties(isSourceCatalog); + break; + case NESSIE: + properties = nessieCatalogProperties(isSourceCatalog); + break; + case HIVE: + properties = hiveCatalogProperties(isSourceCatalog, additionalProp); + break; + default: + throw new UnsupportedOperationException( + String.format("Unsupported for catalog type: %s", catalogType)); + } + Catalog catalog = + CatalogMigrationUtil.buildCatalog( + properties, + catalogType, + isSourceCatalog ? "sourceCatalog" : "targetCatalog" + "_" + catalogType, + null, + null); + String propertiesStr = Joiner.on(",").withKeyValueSeparator("=").join(properties); + if (isSourceCatalog) { + sourceCatalog = catalog; + sourceCatalogProperties = propertiesStr; + sourceCatalogType = catalogType.name(); + } else { + targetCatalog = catalog; + targetCatalogProperties = propertiesStr; + targetCatalogType = catalogType.name(); + } + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + } + + @BeforeEach + protected void beforeEach() { + createTables(); + } + + @AfterEach + protected void afterEach() { + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + // create table will call refresh internally. + sourceCatalog.createTable(TableIdentifier.of(BAR, "tblx"), schema).refresh(); + targetCatalog.createTable(TableIdentifier.of(BAR, "tblx"), schema).refresh(); + + dropTables(); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegister(boolean deleteSourceTables) throws Exception { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + String operation = deleteSourceTables ? "migration" : "registration"; + String operated = deleteSourceTables ? "migrated" : "registered"; + + // register or migrate all the tables + RunCLI run = runCLI(deleteSourceTables, defaultArgs()); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .contains( + "User has not specified the table identifiers. " + + "Will be selecting all the tables from all the namespaces from the source catalog.") + .contains(String.format("Identified 4 tables for %s.", operation)) + .contains( + String.format( + "Summary: %nSuccessfully %s 4 tables from %s catalog to %s catalog.", + operated, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operated)); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + targetCatalog.loadTable(FOO_TBL1).refresh(); + + Assertions.assertThat(targetCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + sourceCatalog.tableExists(FOO_TBL1); + + if (deleteSourceTables) { + // table should be deleted after migration from source catalog + Assertions.assertThat(sourceCatalog.listTables(FOO)).isEmpty(); + Assertions.assertThat(sourceCatalog.listTables(BAR)).isEmpty(); + } else { + // tables should be present in source catalog. + Assertions.assertThat(sourceCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(sourceCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); + } + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterSelectedTables(boolean deleteSourceTables) throws Exception { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + String operation = deleteSourceTables ? "migration" : "registration"; + String operated = deleteSourceTables ? "migrated" : "registered"; + + // using `--identifiers` option + List argsList = defaultArgs(); + argsList.addAll(Arrays.asList("--identifiers", "bar.tbl3")); + RunCLI run = runCLI(deleteSourceTables, argsList); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .doesNotContain( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog.") + .contains(String.format("Identified 1 tables for %s.", operation)) + .contains( + String.format( + "Summary: %nSuccessfully %s 1 tables from %s catalog to %s catalog.", + operated, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nSuccessfully %s these tables:%n[bar.tbl3]", operated)); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + targetCatalog.loadTable(BAR_TBL3).refresh(); + + Assertions.assertThat(targetCatalog.listTables(FOO)).isEmpty(); + Assertions.assertThat(targetCatalog.listTables(BAR)).containsExactly(BAR_TBL3); + + Path identifierFile = outputDir.resolve("ids.txt"); + + // using `--identifiers-from-file` option + Files.write(identifierFile, Collections.singletonList("bar.tbl4")); + argsList = defaultArgs(); + argsList.addAll( + Arrays.asList("--identifiers-from-file", identifierFile.toAbsolutePath().toString())); + run = runCLI(deleteSourceTables, argsList); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .doesNotContain( + "User has not specified the table identifiers. " + + "Selecting all the tables from all the namespaces from the source catalog.") + .contains(String.format("Identified 1 tables for %s.", operation)) + .contains( + String.format( + "Summary: %nSuccessfully %s 1 tables from %s catalog to %s catalog.", + operated, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operated)); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + targetCatalog.loadTable(BAR_TBL3).refresh(); + + Assertions.assertThat(targetCatalog.listTables(FOO)).isEmpty(); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL4, BAR_TBL3); + Files.delete(identifierFile); + + // using `--identifiers-regex` option which matches all the tables starts with "foo." + argsList = defaultArgs(); + argsList.addAll(Arrays.asList("--identifiers-regex", "^foo\\..*")); + run = runCLI(deleteSourceTables, argsList); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .contains( + "User has not specified the table identifiers. Will be selecting all the tables from all the namespaces " + + "from the source catalog which matches the regex pattern:^foo\\..*") + .contains(String.format("Identified 2 tables for %s.", operation)) + .contains( + String.format( + "Summary: %nSuccessfully %s 2 tables from %s catalog to %s catalog.", + operated, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operated)); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + targetCatalog.loadTable(BAR_TBL3).refresh(); + + Assertions.assertThat(targetCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterError(boolean deleteSourceTables) throws Exception { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + String operation = deleteSourceTables ? "migration" : "registration"; + String operate = deleteSourceTables ? "migrate" : "register"; + + // use invalid namespace which leads to NoSuchTableException + List argsList = defaultArgs(); + argsList.addAll(Arrays.asList("--identifiers", "dummy.tbl3")); + RunCLI run = runCLI(deleteSourceTables, argsList); + + Assertions.assertThat(run.getExitCode()).isEqualTo(1); + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 1 tables for %s.", operation)) + .contains( + String.format( + "Summary: %nFailed to %s 1 tables from %s catalog to %s catalog." + + " Please check the `catalog_migration.log`", + operate, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nFailed to %s these tables:%n[dummy.tbl3]", operate)); + + // try to register same table twice which leads to AlreadyExistsException + argsList = defaultArgs(); + argsList.addAll(Arrays.asList("--identifiers", "foo.tbl2")); + runCLI(deleteSourceTables, argsList); + run = RunCLI.run(argsList.toArray(new String[0])); + + Assertions.assertThat(run.getExitCode()).isEqualTo(1); + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 1 tables for %s.", operation)) + .contains( + String.format( + "Summary: %nFailed to %s 1 tables from %s catalog to %s catalog." + + " Please check the `catalog_migration.log`", + operate, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nFailed to %s these tables:%n[foo.tbl2]", operate)); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterWithFewFailures(boolean deleteSourceTables) throws Exception { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + String operation = deleteSourceTables ? "migration" : "registration"; + String operated = deleteSourceTables ? "migrated" : "registered"; + String operate = deleteSourceTables ? "migrate" : "register"; + + // register only foo.tbl2 + List argsList = defaultArgs(); + argsList.addAll(Arrays.asList("--identifiers", "foo.tbl2")); + RunCLI run = runCLI(deleteSourceTables, argsList); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 1 tables for %s.", operation)) + .contains( + String.format( + "Summary: %nSuccessfully %s 1 tables from %s catalog to %s catalog.", + operated, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nSuccessfully %s these tables:%n[foo.tbl2]", operated)); + + if (deleteSourceTables) { + // create a table with the same name in source catalog which got deleted. + sourceCatalog.createTable(FOO_TBL2, schema); + } + + // register all the tables from source catalog again. So that registering `foo.tbl2` will fail. + run = runCLI(deleteSourceTables, defaultArgs()); + + Assertions.assertThat(run.getExitCode()).isEqualTo(1); + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 4 tables for %s.", operation)) + .contains( + String.format( + "Summary: %n" + + "Successfully %s 3 tables from %s catalog to %s catalog.%n" + + "Failed to %s 1 tables from %s catalog to %s catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. " + + "Failed identifiers are written into `failed_identifiers.txt`. " + + "Retry with that file using `--identifiers-from-file` option " + + "if the failure is because of network/connection timeouts.", + operated, + sourceCatalogType, + targetCatalogType, + operate, + sourceCatalogType, + targetCatalogType)) + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operated)) + .contains(String.format("Failed to %s these tables:%n[foo.tbl2]", operate)); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + targetCatalog.loadTable(BAR_TBL3).refresh(); + + Assertions.assertThat(targetCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); + + Path failedIdentifiersFile = outputDir.resolve(FAILED_IDENTIFIERS_FILE); + + // retry the failed tables using `--identifiers-from-file` + argsList = defaultArgs(); + argsList.addAll( + Arrays.asList( + "--identifiers-from-file", failedIdentifiersFile.toAbsolutePath().toString())); + run = runCLI(deleteSourceTables, argsList); + + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "Summary: %n" + + "Failed to %s 1 tables from %s catalog to %s catalog. " + + "Please check the `catalog_migration.log` file for the failure reason. " + + "Failed identifiers are written into `failed_identifiers.txt`. " + + "Retry with that file using `--identifiers-from-file` option " + + "if the failure is because of network/connection timeouts.", + operate, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nFailed to %s these tables:%n[foo.tbl2]", operate)); + Assertions.assertThat(failedIdentifiersFile).exists(); + Assertions.assertThat(Files.readAllLines(failedIdentifiersFile)).containsExactly("foo.tbl2"); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterNoTables(boolean deleteSourceTables) throws Exception { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + // clean up the default tables present in the source catalog. + dropTables(); + + RunCLI run = runCLI(deleteSourceTables, defaultArgs()); + + Assertions.assertThat(run.getExitCode()).isEqualTo(1); + String operation = deleteSourceTables ? "migration" : "registration"; + Assertions.assertThat(run.getOut()) + .contains( + String.format( + "No tables were identified for %s. Please check `catalog_migration.log` file for more info.", + operation)); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testDryRun(boolean deleteSourceTables) throws Exception { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + List argsList = defaultArgs(); + argsList.add("--dry-run"); + RunCLI run = runCLI(deleteSourceTables, argsList); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + String operation = deleteSourceTables ? "migration" : "registration"; + // should not prompt for dry run + Assertions.assertThat(run.getOut()) + .doesNotContain( + "Are you certain that you wish to proceed, after reading the above warnings? (yes/no):") + .contains("Dry run is completed.") + .contains( + String.format( + "Summary: %n" + + "Identified 4 tables for %s by dry-run. " + + "These identifiers are also written into dry_run_identifiers.txt. " + + "This file can be used with `--identifiers-from-file` option for an actual run.", + operation)) + .contains( + String.format("Details: %nIdentified these tables for %s by dry-run:%n", operation)); + Path dryRunFile = outputDir.resolve(DRY_RUN_FILE); + Assertions.assertThat(dryRunFile).exists(); + Assertions.assertThat(Files.readAllLines(dryRunFile)) + .containsExactlyInAnyOrder("foo.tbl1", "foo.tbl2", "bar.tbl3", "bar.tbl4"); + } + + @ParameterizedTest + @CsvSource(value = {"false,false", "false,true", "true,false", "true,true"}) + public void testStacktrace(boolean deleteSourceTables, boolean enableStacktrace) + throws Exception { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + try (LogCaptor logCaptor = LogCaptor.forClass(CatalogMigrator.class)) { + List argsList = defaultArgs(); + argsList.addAll( + Arrays.asList("--identifiers", "db.dummy_table", "--stacktrace=" + enableStacktrace)); + runCLI(deleteSourceTables, argsList); + + Assertions.assertThat(logCaptor.getLogEvents()).hasSize(1); + LogEvent logEvent = logCaptor.getLogEvents().get(0); + if (enableStacktrace) { + Assertions.assertThat(logEvent.getFormattedMessage()) + .isEqualTo("Unable to register the table db.dummy_table"); + Assertions.assertThat(logEvent.getThrowable()) + .isPresent() + .get() + .isInstanceOf(NoSuchTableException.class); + } else { + Assertions.assertThat(logEvent.getFormattedMessage()) + .isEqualTo( + "Unable to register the table db.dummy_table : Table does not exist: db.dummy_table"); + Assertions.assertThat(logEvent.getThrowable()).isEmpty(); + } + } + } + + protected static List defaultArgs() { + return Lists.newArrayList( + "--source-catalog-type", + sourceCatalogType, + "--source-catalog-properties", + sourceCatalogProperties, + "--target-catalog-type", + targetCatalogType, + "--target-catalog-properties", + targetCatalogProperties, + "--output-dir", + outputDir.toAbsolutePath().toString(), + "--disable-safety-prompts"); + } + + protected static RunCLI runCLI(boolean deleteSourceTables, List argsList) + throws Exception { + if (!deleteSourceTables) { + argsList.add(0, "register"); + } else { + argsList.add(0, "migrate"); + } + return RunCLI.run(argsList.toArray(new String[0])); + } +} diff --git a/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/CLIOptionsTest.java b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/CLIOptionsTest.java new file mode 100644 index 00000000..0f63fbbc --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/CLIOptionsTest.java @@ -0,0 +1,275 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import static org.junit.jupiter.params.provider.Arguments.arguments; + +import com.google.common.collect.Lists; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.UUID; +import java.util.stream.Stream; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +public class CLIOptionsTest { + + protected static @TempDir Path logDir; + + @BeforeAll + protected static void initLogDir() { + System.setProperty("catalog.migration.log.dir", logDir.toAbsolutePath().toString()); + } + + private static Stream optionErrors() { + return Stream.of( + // no arguments + arguments( + Lists.newArrayList(), + "Error: Missing required argument(s): (--target-catalog-type= --target-catalog-properties=[,...] [--target-catalog-properties=[,...]]... [--target-catalog-hadoop-conf=[,...]]... [--target-custom-catalog-impl=])"), + // missing required arguments + arguments(Lists.newArrayList(""), "Unmatched argument at index 1: ''"), + // missing required arguments + arguments( + Lists.newArrayList( + "--source-catalog-properties", "properties1=ab", "--target-catalog-type", "NESSIE"), + "Error: Missing required argument(s): --source-catalog-type="), + // missing required arguments + arguments( + Lists.newArrayList("--source-catalog-type", "GLUE"), + "Error: Missing required argument(s): --source-catalog-properties="), + // missing required arguments + arguments( + Lists.newArrayList( + "--source-catalog-type", + "HIVE", + "--source-catalog-properties", + "properties1=ab", + "--target-catalog-type", + "NESSIE"), + "Error: Missing required argument(s): --target-catalog-properties="), + // missing required arguments + arguments( + Lists.newArrayList( + "--source-catalog-type", + "HIVE", + "--source-catalog-properties", + "properties1=ab", + "--target-catalog-properties", + "properties2=cd"), + "Error: Missing required argument(s): --target-catalog-type="), + arguments( + Lists.newArrayList( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--identifiers", + "foo.tbl", + "--identifiers-from-file", + "file.txt", + "--identifiers-regex", + "^foo\\."), + "Error: --identifiers=, --identifiers-from-file=, --identifiers-regex= are mutually exclusive (specify only one)"), + arguments( + Lists.newArrayList( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--identifiers", + "foo.tbl", + "--identifiers-from-file", + "file.txt"), + "Error: --identifiers=, --identifiers-from-file= are mutually exclusive (specify only one)"), + arguments( + Lists.newArrayList( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--identifiers-regex", + "^foo\\.", + "--identifiers-from-file", + "file.txt"), + "Error: --identifiers-from-file=, --identifiers-regex= are mutually exclusive (specify only one)"), + arguments( + Lists.newArrayList( + "--source-catalog-type", + "HADOOP", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HIVE", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--identifiers", + "foo.tbl", + "--identifiers-regex", + "^foo\\."), + "Error: --identifiers=, --identifiers-regex= are mutually exclusive " + + "(specify only one)")); + } + + @ParameterizedTest + @MethodSource("optionErrors") + public void testOptionErrorsForRegister(List args, String expectedMessage) + throws Exception { + executeAndValidateResults("register", args, expectedMessage, 2); + } + + @ParameterizedTest + @MethodSource("optionErrors") + public void testOptionErrorsForMigrate(List args, String expectedMessage) + throws Exception { + executeAndValidateResults("migrate", args, expectedMessage, 2); + } + + private static Stream invalidArgs() { + return Stream.of( + arguments( + Lists.newArrayList( + "--source-catalog-type", + "HIVE", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HADOOP", + "--target-catalog-properties", + "k3=v3, k4=v4"), + "Error during CLI execution: Cannot initialize HadoopCatalog " + + "because warehousePath must not be null or empty"), + arguments( + Lists.newArrayList( + "--source-catalog-type", + "HIVE", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HADOOP", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--identifiers-from-file", + "file.txt"), + "Error during CLI execution: File specified in `--identifiers-from-file` option does not exist"), + arguments( + Lists.newArrayList( + "--source-catalog-type", + "HIVE", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HADOOP", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--output-dir", + "/path/to/file"), + "Error during CLI execution: Failed to create the output directory from the path specified in `--output-dir`"), + arguments( + Lists.newArrayList( + "--source-catalog-type", + "HIVE", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HADOOP", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--output-dir", + readOnlyDirLocation()), + "Error during CLI execution: Path specified in `--output-dir` is not writable"), + // test with stacktrace + arguments( + Lists.newArrayList( + "--source-catalog-type", + "HIVE", + "--source-catalog-properties", + "k1=v1,k2=v2", + "--target-catalog-type", + "HADOOP", + "--target-catalog-properties", + "k3=v3, k4=v4", + "--output-dir", + readOnlyDirLocation(), + "--stacktrace"), + "java.lang.IllegalArgumentException: Path specified in `--output-dir` is not writable")); + } + + @ParameterizedTest + @MethodSource("invalidArgs") + public void testInvalidArgsForRegister(List args, String expectedMessage) + throws Exception { + executeAndValidateResults("register", args, expectedMessage, 1); + } + + @ParameterizedTest + @MethodSource("invalidArgs") + public void testInvalidArgsForMigrate(List args, String expectedMessage) + throws Exception { + executeAndValidateResults("migrate", args, expectedMessage, 1); + } + + @Test + public void version() throws Exception { + RunCLI run = RunCLI.runWithPrintWriter("--version"); + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()).startsWith(System.getProperty("expectedCLIVersion")); + } + + private static void executeAndValidateResults( + String command, List args, String expectedMessage, int expectedErrorCode) + throws Exception { + args.add(0, command); + RunCLI run = RunCLI.run(args); + + Assertions.assertThat(run.getExitCode()).isEqualTo(expectedErrorCode); + Assertions.assertThat(run.getErr()).contains(expectedMessage); + } + + private static String readOnlyDirLocation() { + Path readOnly = logDir.resolve(UUID.randomUUID().toString()); + try { + Files.createDirectory(readOnly); + } catch (IOException e) { + throw new RuntimeException(e); + } + Assertions.assertThat(readOnly.toFile().setWritable(false)).isTrue(); + + return readOnly.toAbsolutePath().toString(); + } +} diff --git a/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/HadoopCLIMigrationTest.java b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/HadoopCLIMigrationTest.java new file mode 100644 index 00000000..0e6d6c42 --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/HadoopCLIMigrationTest.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import java.util.Collections; +import org.apache.polaris.iceberg.catalog.migrator.api.CatalogMigrationUtil; +import org.junit.jupiter.api.BeforeAll; + +public class HadoopCLIMigrationTest extends AbstractCLIMigrationTest { + + @BeforeAll + protected static void setup() { + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); + } +} diff --git a/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ITHadoopToHiveCLIMigrationTest.java b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ITHadoopToHiveCLIMigrationTest.java new file mode 100644 index 00000000..a7f4ee5c --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ITHadoopToHiveCLIMigrationTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import java.util.Collections; +import org.apache.iceberg.hive.HiveMetastoreExtension; +import org.apache.polaris.iceberg.catalog.migrator.api.CatalogMigrationUtil; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class ITHadoopToHiveCLIMigrationTest extends AbstractCLIMigrationTest { + + @RegisterExtension + public static final HiveMetastoreExtension HIVE_METASTORE_EXTENSION = + HiveMetastoreExtension.builder().build(); + + @BeforeAll + protected static void setup() throws Exception { + + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); + initializeTargetCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HIVE_METASTORE_EXTENSION.hiveConf().get("hive.metastore.uris"))); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + } +} diff --git a/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ITHadoopToNessieCLIMigrationTest.java b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ITHadoopToNessieCLIMigrationTest.java new file mode 100644 index 00000000..938df448 --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ITHadoopToNessieCLIMigrationTest.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.IntStream; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.polaris.iceberg.catalog.migrator.api.CatalogMigrationUtil; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +public class ITHadoopToNessieCLIMigrationTest extends AbstractCLIMigrationTest { + + @BeforeAll + protected static void setup() { + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); + } + + @Test + public void testRegisterLargeNumberOfTablesWithNestedNamespaces() throws Exception { + List namespaceList = + Arrays.asList(NS_A, NS_A_B, NS_A_B_C, NS_A_B_C_D, NS_A_B_C_D_E, NS_A_C); + + // additionally create 240 tables along with 4 tables created in beforeEach() + namespaceList.forEach( + namespace -> { + ((SupportsNamespaces) sourceCatalog).createNamespace(namespace); + IntStream.range(0, 40) + .forEach( + val -> + sourceCatalog.createTable( + TableIdentifier.of(namespace, "tblx" + val), schema)); + }); + + // register or migrate all the tables + RunCLI run = runCLI(false, defaultArgs()); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .contains("Identified 244 tables for registration.") + .contains( + String.format( + "Summary: %nSuccessfully registered 244 tables from %s catalog to" + " %s catalog.", + sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nSuccessfully registered these tables:%n")) + // validate intermediate output + .contains("Attempted registration for 100 tables out of 244 tables.") + .contains("Attempted registration for 200 tables out of 244 tables.") + .contains("Attempted registration for 244 tables out of 244 tables."); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + targetCatalog.loadTable(BAR_TBL3).refresh(); + + Assertions.assertThat(targetCatalog.listTables(FOO)) + .containsExactlyInAnyOrder(FOO_TBL1, FOO_TBL2); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); + + Collections.reverse(namespaceList); + namespaceList.forEach( + namespace -> { + List identifiers = targetCatalog.listTables(namespace); + + // validate tables count in each namespace. + Assertions.assertThat(identifiers).hasSize(40); + + identifiers.forEach( + identifier -> { + targetCatalog.dropTable(identifier); + sourceCatalog.dropTable(identifier); + }); + ((SupportsNamespaces) sourceCatalog).dropNamespace(namespace); + ((SupportsNamespaces) targetCatalog).dropNamespace(namespace); + }); + } +} diff --git a/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ITHiveToHadoopCLIMigrationTest.java b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ITHiveToHadoopCLIMigrationTest.java new file mode 100644 index 00000000..7a8a4197 --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ITHiveToHadoopCLIMigrationTest.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import java.util.Collections; +import org.apache.iceberg.hive.HiveMetastoreExtension; +import org.apache.polaris.iceberg.catalog.migrator.api.CatalogMigrationUtil; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class ITHiveToHadoopCLIMigrationTest extends AbstractCLIMigrationTest { + + @RegisterExtension + public static final HiveMetastoreExtension HIVE_METASTORE_EXTENSION = + HiveMetastoreExtension.builder().build(); + + @BeforeAll + protected static void setup() throws Exception { + initializeSourceCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HIVE_METASTORE_EXTENSION.hiveConf().get("hive.metastore.uris"))); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.HADOOP, Collections.emptyMap()); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + } +} diff --git a/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ITHiveToNessieCLIMigrationTest.java b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ITHiveToNessieCLIMigrationTest.java new file mode 100644 index 00000000..09876329 --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ITHiveToNessieCLIMigrationTest.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import java.util.Collections; +import java.util.stream.IntStream; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hive.HiveMetastoreExtension; +import org.apache.polaris.iceberg.catalog.migrator.api.CatalogMigrationUtil; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class ITHiveToNessieCLIMigrationTest extends AbstractCLIMigrationTest { + + @RegisterExtension + public static final HiveMetastoreExtension HIVE_METASTORE_EXTENSION = + HiveMetastoreExtension.builder().build(); + + @BeforeAll + protected static void setup() throws Exception { + initializeSourceCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HIVE_METASTORE_EXTENSION.hiveConf().get("hive.metastore.uris"))); + initializeTargetCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRegisterLargeNumberOfTables(boolean deleteSourceTables) throws Exception { + validateAssumptionForHadoopCatalogAsSource(deleteSourceTables); + + String operation = deleteSourceTables ? "migration" : "registration"; + String operated = deleteSourceTables ? "migrated" : "registered"; + + // additionally create 240 tables along with 4 tables created in beforeEach() + IntStream.range(0, 240) + .forEach(val -> sourceCatalog.createTable(TableIdentifier.of(FOO, "tblx" + val), schema)); + + // register or migrate all the tables + RunCLI run = runCLI(deleteSourceTables, defaultArgs()); + + Assertions.assertThat(run.getExitCode()).isEqualTo(0); + Assertions.assertThat(run.getOut()) + .contains(String.format("Identified 244 tables for %s.", operation)) + .contains( + String.format( + "Summary: %nSuccessfully %s 244 tables from %s catalog to" + " %s catalog.", + operated, sourceCatalogType, targetCatalogType)) + .contains(String.format("Details: %nSuccessfully %s these tables:%n", operated)) + // validate intermediate output + .contains(String.format("Attempted %s for 100 tables out of 244 tables.", operation)) + .contains(String.format("Attempted %s for 200 tables out of 244 tables.", operation)) + .contains(String.format("Attempted %s for 244 tables out of 244 tables.", operation)); + + // manually refreshing catalog due to missing refresh in Nessie catalog + // https://github.com/apache/iceberg/pull/6789 + targetCatalog.loadTable(BAR_TBL3).refresh(); + + Assertions.assertThat(targetCatalog.listTables(FOO)).hasSize(242); + Assertions.assertThat(targetCatalog.listTables(BAR)) + .containsExactlyInAnyOrder(BAR_TBL3, BAR_TBL4); + } +} diff --git a/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ITNessieToHiveCLIMigrationTest.java b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ITNessieToHiveCLIMigrationTest.java new file mode 100644 index 00000000..d151aed4 --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ITNessieToHiveCLIMigrationTest.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import java.util.Collections; +import org.apache.iceberg.hive.HiveMetastoreExtension; +import org.apache.polaris.iceberg.catalog.migrator.api.CatalogMigrationUtil; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class ITNessieToHiveCLIMigrationTest extends AbstractCLIMigrationTest { + + @RegisterExtension + public static final HiveMetastoreExtension HIVE_METASTORE_EXTENSION = + HiveMetastoreExtension.builder().build(); + + @BeforeAll + protected static void setup() throws Exception { + initializeSourceCatalog(CatalogMigrationUtil.CatalogType.NESSIE, Collections.emptyMap()); + initializeTargetCatalog( + CatalogMigrationUtil.CatalogType.HIVE, + Collections.singletonMap( + "uri", HIVE_METASTORE_EXTENSION.hiveConf().get("hive.metastore.uris"))); + } + + @AfterAll + protected static void tearDown() throws Exception { + dropNamespaces(); + } +} diff --git a/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ProcessIdentifiersTest.java b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ProcessIdentifiersTest.java new file mode 100644 index 00000000..482cbfd1 --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/ProcessIdentifiersTest.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import com.google.common.collect.Sets; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Set; +import org.apache.iceberg.catalog.TableIdentifier; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class ProcessIdentifiersTest { + + protected static @TempDir Path tempDir; + + @BeforeAll + protected static void initLogDir() { + System.setProperty("catalog.migration.log.dir", tempDir.toAbsolutePath().toString()); + } + + @Test + public void testIdentifiersSet() { + // test empty set + Assertions.assertThat(new IdentifierOptions().processIdentifiersInput()).isEmpty(); + + // test valid elements + IdentifierOptions identifierOptions = new IdentifierOptions(); + identifierOptions.identifiers = Sets.newHashSet("foo.abc", "bar.def"); + Assertions.assertThat(identifierOptions.processIdentifiersInput()) + .containsExactlyInAnyOrder( + TableIdentifier.parse("foo.abc"), TableIdentifier.parse("bar.def")); + } + + @Test + public void testIdentifiersFromFile() throws Exception { + // valid file contents + Path identifierFile = tempDir.resolve("file_with_ids.txt"); + Files.write(identifierFile, Arrays.asList("db1.t1", "db2.t2", "db123.t5")); + IdentifierOptions options = new IdentifierOptions(); + options.identifiersFromFile = identifierFile.toAbsolutePath().toString(); + Assertions.assertThat(options.processIdentifiersInput()) + .containsExactlyInAnyOrder( + TableIdentifier.parse("db1.t1"), + TableIdentifier.parse("db2.t2"), + TableIdentifier.parse("db123.t5")); + + // empty file + identifierFile = tempDir.resolve("ids1.txt"); + Files.createFile(identifierFile); + options = new IdentifierOptions(); + options.identifiersFromFile = identifierFile.toAbsolutePath().toString(); + Assertions.assertThat(options.processIdentifiersInput()).isEmpty(); + + // file with some blanks contents + identifierFile = tempDir.resolve("ids2.txt"); + String[] lines = {"abc. def", " abc 123 ", "", "", " xyz%n123"}; + Files.writeString(identifierFile, String.join(System.lineSeparator(), lines)); + options = new IdentifierOptions(); + options.identifiersFromFile = identifierFile.toAbsolutePath().toString(); + Set identifiers = options.processIdentifiersInput(); + Assertions.assertThat(identifiers) + .containsExactlyInAnyOrder( + TableIdentifier.parse("abc. def"), + TableIdentifier.parse("abc 123"), + TableIdentifier.parse("xyz%n123")); + + // with duplicate entries + identifierFile = tempDir.resolve("ids3.txt"); + String[] ids = {"abc.def", "xx.yy", "abc.def", "abc.def", "abc.def ", " xx.yy"}; + Files.writeString(identifierFile, String.join(System.lineSeparator(), ids)); + options = new IdentifierOptions(); + options.identifiersFromFile = identifierFile.toAbsolutePath().toString(); + identifiers = options.processIdentifiersInput(); + Assertions.assertThat(identifiers) + .containsExactlyInAnyOrder( + TableIdentifier.parse("abc.def"), TableIdentifier.parse("xx.yy")); + } + + @Test + public void testIdentifiersFromFileInvalidInputs() throws Exception { + // file without permission to read + Path identifierFile = tempDir.resolve("non_readable_file.txt"); + Files.createFile(identifierFile); + Assertions.assertThat(identifierFile.toFile().setReadable(false)).isTrue(); + IdentifierOptions options = new IdentifierOptions(); + options.identifiersFromFile = identifierFile.toAbsolutePath().toString(); + Assertions.assertThatThrownBy(options::processIdentifiersInput) + .isInstanceOf(UncheckedIOException.class) + .hasMessageContaining("Failed to read the file: " + identifierFile); + Assertions.assertThat(identifierFile.toFile().setReadable(true)).isTrue(); + + // file doesn't exist + options.identifiersFromFile = "path/to/file"; + Assertions.assertThatThrownBy(options::processIdentifiersInput) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("File specified in `--identifiers-from-file` option does not exist"); + } + + @Test + public void testIdentifiersRegEx() { + // test valid regex + IdentifierOptions options = new IdentifierOptions(); + options.identifiersRegEx = "^foo\\..*"; + Assertions.assertThat(options.processIdentifiersInput()).isEmpty(); + + // test invalid regex + options.identifiersRegEx = "(23erf423!"; + Assertions.assertThatThrownBy(options::processIdentifiersInput) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("--identifiers-regex pattern is not compilable"); + + options = new IdentifierOptions(); + options.identifiersRegEx = " "; + Assertions.assertThatThrownBy(options::processIdentifiersInput) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("--identifiers-regex should not be empty"); + } +} diff --git a/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/RunCLI.java b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/RunCLI.java new file mode 100644 index 00000000..0e17c605 --- /dev/null +++ b/iceberg-catalog-migrator/cli/src/test/java/org/apache/polaris/iceberg/catalog/migrator/cli/RunCLI.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.iceberg.catalog.migrator.cli; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.Arrays; +import java.util.List; +import nl.altindag.log.LogCaptor; +import picocli.CommandLine; + +/** Helper class for tests. */ +public final class RunCLI { + + private final String[] args; + private final int exitCode; + private final String out; + private final String err; + + public RunCLI(int exitCode, String out, String err, String[] args) { + this.args = args; + this.exitCode = exitCode; + this.out = out; + this.err = err; + } + + public static RunCLI run(List args) throws Exception { + return run(args.toArray(new String[0])); + } + + public static RunCLI run(String... args) throws Exception { + try (LogCaptor logCaptor = LogCaptor.forName("console-log"); + StringWriter err = new StringWriter(); + PrintWriter errWriter = new PrintWriter(err)) { + int exitCode = runMain(null, errWriter, args); + String out = String.join(System.lineSeparator(), logCaptor.getLogs()); + return new RunCLI(exitCode, out, err.toString(), args); + } + } + + public static RunCLI runWithPrintWriter(String... args) throws Exception { + try (StringWriter out = new StringWriter(); + PrintWriter outWriter = new PrintWriter(out); + StringWriter err = new StringWriter(); + PrintWriter errWriter = new PrintWriter(err)) { + int exitCode = runMain(outWriter, errWriter, args); + return new RunCLI(exitCode, out.toString(), err.toString(), args); + } + } + + private static int runMain(PrintWriter out, PrintWriter err, String... arguments) { + CommandLine commandLine = + new CommandLine(new CatalogMigrationCLI()) + .setExecutionExceptionHandler( + (ex, cmd, parseResult) -> { + if (enableStacktrace(arguments)) { + cmd.getErr().println(cmd.getColorScheme().richStackTraceString(ex)); + } else { + cmd.getErr().println("Error during CLI execution: " + ex.getMessage()); + } + return cmd.getExitCodeExceptionMapper() != null + ? cmd.getExitCodeExceptionMapper().getExitCode(ex) + : cmd.getCommandSpec().exitCodeOnExecutionException(); + }); + if (null != out) { + commandLine = commandLine.setOut(out); + } + if (null != err) { + commandLine = commandLine.setErr(err); + } + try { + return commandLine.execute(arguments); + } finally { + commandLine.getErr().flush(); + } + } + + public int getExitCode() { + return exitCode; + } + + public String getOut() { + return out; + } + + public String getErr() { + return err; + } + + private static boolean enableStacktrace(String... args) { + for (String arg : args) { + if (arg.equalsIgnoreCase("--stacktrace")) { + return true; + } + } + return false; + } + + @Override + public String toString() { + return String.format( + "org.apache.polaris.catalogs.migrator.cli" + + ".RunCLI{args=%s%nexitCode=%d%n%nstdout:%n%s%n%nstderr:%n%s", + Arrays.toString(args), exitCode, out, err); + } +} diff --git a/ide-name.txt b/ide-name.txt new file mode 100644 index 00000000..beeca0c1 --- /dev/null +++ b/ide-name.txt @@ -0,0 +1 @@ +polaris-tools diff --git a/polaris-synchronizer/README.md b/polaris-synchronizer/README.md new file mode 100644 index 00000000..81e54a37 --- /dev/null +++ b/polaris-synchronizer/README.md @@ -0,0 +1,162 @@ +# Objective + +To provide users of [Apache Polaris (Incubating)](https://github.com/apache/polaris) a tool to be able to easily and efficiently +migrate their entities from one Polaris instance to another. + +Polaris is a catalog of catalogs. It can become cumbersome to perform catalog-by-catalog migration of each and every catalog contained +within a Polaris instance. Additionally, while migrating catalog-by-catalog Iceberg entities is achievable using the +existing generic [iceberg-catalog-migrator](../iceberg-catalog-migrator/README.md), the existing tool will not migrate +Polaris specific entities, like principal-roles, catalog-roles, grants. + +## Use Cases +* **Migration:** A user may have an active Polaris deployment that they want to migrate to a managed cloud offering like + [Snowflake Open Catalog](https://www.snowflake.com/en/product/features/open-catalog/). +* **Preventing Vendor Lock-In:** A user may currently have a managed Polaris offering and want the freedom to switch providers or to host Polaris themselves. +* **Mirroring/Disaster Recovery:** Modern data solutions require employing redundancy to ensure no single point of + failure. The tool can be scheduled on a cron to run periodic incremental syncs. + +The tool currently supports migrating the following Polaris Management entities: +* Principal roles +* Catalogs +* Catalog Roles +* Assignment of Catalog Roles to Principal Roles +* Grants + + +> :warning: Polaris principals and their assignments to Principal roles are not supported for migration +> by this tool. Migrating client credentials stored in Polaris is not possible nor is it secure. Polaris +> principals must be manually migrated between Polaris instances. + +The tool currently supports migrating the following Iceberg entities: +* Namespaces +* Tables + +# Building the Tool from Source + +**Prerequisite:** Must have Java installed in your machine (Java 21 is recommended and the minimum Java version) to use this CLI tool. + +``` +gradlew build # build and run tests +gradlew assemble # build without running tests +``` + +The default build location for the built JAR will be `cli/build/libs/` + +# Migrating between Polaris Instances + +### Step 1: Create a principal with read-only access to catalog internals on the source Polaris instance. + +**This step only has to be completed once.** + +Polaris is built with a separation between access and metadata management permissions. The `service_admin` +may have permissions to create access related entities like principal roles, catalog roles, and grants, but may not necessarily +possess the ability to view Iceberg content of catalogs, like namespaces and tables. We need to create a super user principal +that has access to all entities on the source Polaris instance in order to migrate them. + +To do this, we can use the `create-omnipotent-principal` command to create a principal, principal role, +and a catalog role per catalog with the appropriate grants to read all entities on the source Polaris instance. + +**Example:** Create a **read-only** principal on the source Polaris instance, and replace it if it already exists, +with 10 concurrent catalog setup threads: +``` +java -jar polaris-synchronizer-cli.jar create-omnipotent-principal \ +--polaris-client-id root \ +--polaris-client-secret \ +--polaris-base-url http://localhost:8181 \ +--polaris-oauth2-server-uri http://localhost:8181/api/catalog/v1/oauth/tokens \ +--polaris-scope PRINCIPAL_ROLE:ALL \ +--replace \ # replace if it exists +--concurrency 10 # 10 concurrent catalog setup threads +``` + +Upon finishing execution, the tool will output the principal name and client credentials for this +principal. **Make sure to note these down as they will be necessary for the migration step.** + +**Example Output:** +``` +====================================================== +Omnipotent Principal Credentials: +name = omnipotent-principal-XXXXX +clientId = ff7s8f9asbX10 +clientSecret = +====================================================== +``` + +Additionally, at the end of execution the command will output a list of catalogs for which catalog setup failed. +**These catalogs may experience failure during migration**. + +**Example Output:** +``` +Encountered issues creating catalog roles for the following catalogs: [catalog-1, catalog-2] +``` + +### Step 2: Create a principal with read-write access to catalog internals on the target Polaris instance. + +**This step only has to be completed once.** + +The same `create-omnipotent-principal` command can also be used to now create a **read-write** principal on the target +Polaris instance so that the tool can create entities on the target. + +To create a read-write principal, we simply specify the `--write-access` option. + +**Example:** Create a read-write principal on your target Polaris instance, replacing it if it exists, with 10 concurrent +catalog setup threads. +``` +java -jar polaris-synchronizer-cli.jar create-omnipotent-principal \ +--polaris-client-id root \ +--polaris-client-secret \ +--polaris-base-url http://localhost:8181 \ +--polaris-oauth2-server-uri http://localhost:8181/api/catalog/v1/oauth/tokens \ +--polaris-scope PRINCIPAL_ROLE:ALL \ +--replace \ # replace if it exists +--concurrency 10 \ # 10 concurrent catalog setup threads +--write-access # give the principal write access to catalog internals +``` + +Similarly to the last step, the tool will output the client credentials and principal name. Again, these need to be noted +for subsequent steps. + +**Example Output:** +``` +====================================================== +Omnipotent Principal Credentials: +name = omnipotent-principal-YYYYY +clientId = 0af20a3a0037a40d +clientSecret = +====================================================== +``` + +> :warning: `service_admin` is not guaranteed to have access management level grants on every catalog. This is usually +> delegated to the `catalog_admin` role, which is automatically granted to whichever principal role was used to create +> the catalog. This means that while the tool can detect this catalog when run with `service_admin` level access, +> it cannot create an omnipotent principal for this catalog. To remedy this, create a catalog-role with `CATALOG_MANAGE_ACCESS` +> grants for the catalog, and assign it to the principal used to run this tool (presumably, a principal with the `servic_admin` +> principal role). Then, re-running `create-omnipotent-principal` should be able to create the relevant entities for that catalog. + +### Step 3: Running the Migration/Synchronization + +Running the synchronization requires minimal reconfiguration, can be run idempotently, and will attempt to only copy over the +diff between the source and target Polaris instances. This can be achieved using the `sync-polaris` command. + +**Example** Running the synchronization between source Polaris instance using an access token, and a target Polaris instance +using client credentials. +``` +java -jar polaris-synchronizer-cli.jar sync-polaris \ +--source-base-url http://localhost:8182 \ +--source-access-token \ +--target-base-url http://localhost:8181 \ +--target-client-id root \ +--target-client-secret \ +--target-oauth2-server-uri http://localhost:8181/api/catalog/v1/oauth/tokens \ +--target-scope PRINCIPAL_ROLE:ALL \ +--source-omni-principal-name omnipotent-principal-XXXXX \ +--source-omni-client-id ff7s8f9asbX10 \ +--source-omni-client-secret \ +--target-omni-principal-name omnipotent-principal-YYYYY \ +--target-omni-client-id 0af20a3a0037a40d \ +--target-omni-client-secret +``` + +> :warning: The tool will not migrate the `service_admin`, `catalog_admin`, nor the omnipotent principals from the source +> nor remove or modify them on the target. This is to accommodate that the tool itself will be running with the permission +> levels for these principals and roles, and we do not want to modify the tool's permissions at runtime. diff --git a/polaris-synchronizer/api/build.gradle.kts b/polaris-synchronizer/api/build.gradle.kts new file mode 100644 index 00000000..39691307 --- /dev/null +++ b/polaris-synchronizer/api/build.gradle.kts @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +plugins { + `java-library` + `maven-publish` + signing + `build-conventions` + alias(libs.plugins.openapi.generator.gradle.plugin) +} + +java { + toolchain { + languageVersion.set(JavaLanguageVersion.of(21)) // Set the compilation JDK to 21 + } +} + +dependencies { + implementation(libs.openapi.generator) + implementation(libs.jakarta.annotation) + implementation(libs.iceberg.spark.runtime) + implementation(libs.slf4j) + + implementation(libs.hadoop.common) { + exclude("org.apache.avro", "avro") + exclude("org.slf4j", "slf4j-log4j12") + exclude("javax.servlet", "servlet-api") + exclude("com.google.code.gson", "gson") + exclude("commons-beanutils") + } + + testImplementation(platform(libs.junit.bom)) + testImplementation("org.junit.jupiter:junit-jupiter-params") + testImplementation("org.junit.jupiter:junit-jupiter-api") + testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine") + testRuntimeOnly("org.junit.platform:junit-platform-launcher") +} + +tasks.register( + "generatePolarisManagementClient" +) { + inputSpec.set("$projectDir/src/main/resources/polaris-management-service.yml") + generatorName.set("java") + outputDir.set("${layout.buildDirectory.get()}/generated") + apiPackage.set("org.apache.polaris.management.client") + modelPackage.set("org.apache.polaris.core.admin.model") + removeOperationIdPrefix.set(true) + + globalProperties.set( + mapOf( + "apis" to "", + "models" to "", + "supportingFiles" to "", + "apiDocs" to "false", + "modelTests" to "false", + ) + ) + + additionalProperties.set( + mapOf( + "apiNamePrefix" to "PolarisManagement", + "apiNameSuffix" to "Api", + "metricsPrefix" to "polaris.management", + ) + ) + + configOptions.set( + mapOf( + "library" to "native", + "sourceFolder" to "src/main/java", + "useJakartaEe" to "true", + "useBeanValidation" to "false", + "openApiNullable" to "false", + "useRuntimeException" to "true", + "supportUrlQuery" to "false", + ) + ) + + importMappings.set( + mapOf( + "AbstractOpenApiSchema" to "org.apache.polaris.core.admin.model.AbstractOpenApiSchema", + "AddGrantRequest" to "org.apache.polaris.core.admin.model.AddGrantRequest", + "AwsStorageConfigInfo" to "org.apache.polaris.core.admin.model.AwsStorageConfigInfo", + "AzureStorageConfigInfo" to "org.apache.polaris.core.admin.model.AzureStorageConfigInfo", + "Catalog" to "org.apache.polaris.core.admin.model.Catalog", + "CatalogGrant" to "org.apache.polaris.core.admin.model.CatalogGrant", + "CatalogPrivilege" to "org.apache.polaris.core.admin.model.CatalogPrivilege", + "CatalogProperties" to "org.apache.polaris.core.admin.model.CatalogProperties", + "CatalogRole" to "org.apache.polaris.core.admin.model.CatalogRole", + "CatalogRoles" to "org.apache.polaris.core.admin.model.CatalogRoles", + "Catalogs" to "org.apache.polaris.core.admin.model.Catalogs", + "CreateCatalogRequest" to "org.apache.polaris.core.admin.model.CreateCatalogRequest", + "CreateCatalogRoleRequest" to "org.apache.polaris.core.admin.model.CreateCatalogRoleRequest", + "CreatePrincipalRequest" to "org.apache.polaris.core.admin.model.CreatePrincipalRequest", + "CreatePrincipalRoleRequest" to + "org.apache.polaris.core.admin.model.CreatePrincipalRoleRequest", + "ExternalCatalog" to "org.apache.polaris.core.admin.model.ExternalCatalog", + "FileStorageConfigInfo" to "org.apache.polaris.core.admin.model.FileStorageConfigInfo", + "GcpStorageConfigInfo" to "org.apache.polaris.core.admin.model.GcpStorageConfigInfo", + "GrantCatalogRoleRequest" to "org.apache.polaris.core.admin.model.GrantCatalogRoleRequest", + "GrantPrincipalRoleRequest" to + "org.apache.polaris.core.admin.model.GrantPrincipalRoleRequest", + "GrantResource" to "org.apache.polaris.core.admin.model.GrantResource", + "GrantResources" to "org.apache.polaris.core.admin.model.GrantResources", + "NamespaceGrant" to "org.apache.polaris.core.admin.model.NamespaceGrant", + "NamespacePrivilege" to "org.apache.polaris.core.admin.model.NamespacePrivilege", + "PolarisCatalog" to "org.apache.polaris.core.admin.model.PolarisCatalog", + "Principal" to "org.apache.polaris.core.admin.model.Principal", + "PrincipalRole" to "org.apache.polaris.core.admin.model.PrincipalRole", + "PrincipalRoles" to "org.apache.polaris.core.admin.model.PrincipalRoles", + "PrincipalWithCredentials" to "org.apache.polaris.core.admin.model.PrincipalWithCredentials", + "PrincipalWithCredentialsCredentials" to + "org.apache.polaris.core.admin.model.PrincipalWithCredentialsCredentials", + "Principals" to "org.apache.polaris.core.admin.model.Principals", + "RevokeGrantRequest" to "org.apache.polaris.core.admin.model.RevokeGrantRequest", + "StorageConfigInfo" to "org.apache.polaris.core.admin.model.StorageConfigInfo", + "TableGrant" to "org.apache.polaris.core.admin.model.TableGrant", + "TablePrivilege" to "org.apache.polaris.core.admin.model.TablePrivilege", + "UpdateCatalogRequest" to "org.apache.polaris.core.admin.model.UpdateCatalogRequest", + "UpdateCatalogRoleRequest" to "org.apache.polaris.core.admin.model.UpdateCatalogRoleRequest", + "UpdatePrincipalRequest" to "org.apache.polaris.core.admin.model.UpdatePrincipalRequest", + "UpdatePrincipalRoleRequest" to + "org.apache.polaris.core.admin.model.UpdatePrincipalRoleRequest", + "ViewGrant" to "org.apache.polaris.core.admin.model.ViewGrant", + "ViewPrivilege" to "org.apache.polaris.core.admin.model.ViewPrivilege", + ) + ) +} + +tasks.named("compileJava") { dependsOn("generatePolarisManagementClient") } + +sourceSets.main { java.srcDir("${layout.buildDirectory.get()}/generated/src/main/java") } diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/PolarisService.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/PolarisService.java new file mode 100644 index 00000000..18ff23c3 --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/PolarisService.java @@ -0,0 +1,371 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.http.HttpStatus; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.polaris.core.admin.model.AddGrantRequest; +import org.apache.polaris.core.admin.model.Catalog; +import org.apache.polaris.core.admin.model.CatalogRole; +import org.apache.polaris.core.admin.model.CreateCatalogRequest; +import org.apache.polaris.core.admin.model.CreateCatalogRoleRequest; +import org.apache.polaris.core.admin.model.CreatePrincipalRequest; +import org.apache.polaris.core.admin.model.CreatePrincipalRoleRequest; +import org.apache.polaris.core.admin.model.GrantCatalogRoleRequest; +import org.apache.polaris.core.admin.model.GrantPrincipalRoleRequest; +import org.apache.polaris.core.admin.model.GrantResource; +import org.apache.polaris.core.admin.model.Principal; +import org.apache.polaris.core.admin.model.PrincipalRole; +import org.apache.polaris.core.admin.model.PrincipalWithCredentials; +import org.apache.polaris.core.admin.model.RevokeGrantRequest; +import org.apache.polaris.management.ApiException; +import org.apache.polaris.management.client.PolarisManagementDefaultApi; +import org.apache.polaris.tools.sync.polaris.catalog.PolarisCatalog; + +/** + * Service class that wraps Polaris HTTP client and performs recursive operations like drops on + * overwrites. + */ +public class PolarisService { + + private final PolarisManagementDefaultApi api; + + private final Map catalogProperties; + + public PolarisService(PolarisManagementDefaultApi api, Map catalogProperties) { + this.api = api; + this.catalogProperties = catalogProperties; + } + + public List listPrincipals() { + return this.api.listPrincipals().getPrincipals(); + } + + public Principal getPrincipal(String principalName) { + return this.api.getPrincipal(principalName); + } + + public boolean principalExists(String principalName) { + try { + getPrincipal(principalName); + return true; + } catch (ApiException apiException) { + if (apiException.getCode() == HttpStatus.SC_NOT_FOUND) { + return false; + } + throw apiException; + } + } + + public PrincipalWithCredentials createPrincipal(Principal principal, boolean overwrite) { + if (overwrite) { + removePrincipal(principal.getName()); + } + + CreatePrincipalRequest request = new CreatePrincipalRequest().principal(principal); + return this.api.createPrincipal(request); + } + + public void removePrincipal(String principalName) { + this.api.deletePrincipal(principalName); + } + + public void assignPrincipalRole(String principalName, String principalRoleName) { + GrantPrincipalRoleRequest request = + new GrantPrincipalRoleRequest().principalRole(new PrincipalRole().name(principalRoleName)); + this.api.assignPrincipalRole(principalName, request); + } + + public void createPrincipalRole(PrincipalRole principalRole, boolean overwrite) { + if (overwrite) { + removePrincipalRole(principalRole.getName()); + } + CreatePrincipalRoleRequest request = + new CreatePrincipalRoleRequest().principalRole(principalRole); + this.api.createPrincipalRole(request); + } + + public List listPrincipalRolesAssignedForPrincipal(String principalName) { + return this.api.listPrincipalRolesAssigned(principalName).getRoles(); + } + + public List listPrincipalRoles() { + return this.api.listPrincipalRoles().getRoles(); + } + + public List listAssigneePrincipalRolesForCatalogRole( + String catalogName, String catalogRoleName) { + return this.api + .listAssigneePrincipalRolesForCatalogRole(catalogName, catalogRoleName) + .getRoles(); + } + + public void assignCatalogRoleToPrincipalRole( + String principalRoleName, String catalogName, String catalogRoleName) { + GrantCatalogRoleRequest request = + new GrantCatalogRoleRequest().catalogRole(new CatalogRole().name(catalogRoleName)); + this.api.assignCatalogRoleToPrincipalRole(principalRoleName, catalogName, request); + } + + public void removeCatalogRoleFromPrincipalRole( + String principalRoleName, String catalogName, String catalogRoleName) { + this.api.revokeCatalogRoleFromPrincipalRole(principalRoleName, catalogName, catalogRoleName); + } + + public PrincipalRole getPrincipalRole(String principalRoleName) { + return this.api.getPrincipalRole(principalRoleName); + } + + public boolean principalRoleExists(String principalRoleName) { + try { + getPrincipalRole(principalRoleName); + return true; + } catch (ApiException apiException) { + if (apiException.getCode() == HttpStatus.SC_NOT_FOUND) { + return false; + } + throw apiException; + } + } + + public void removePrincipalRole(String principalRoleName) { + this.api.deletePrincipalRole(principalRoleName); + } + + public List listCatalogs() { + return this.api.listCatalogs().getCatalogs(); + } + + public void createCatalog(Catalog catalog) { + CreateCatalogRequest request = new CreateCatalogRequest().catalog(catalog); + this.api.createCatalog(request); + } + + /** + * Performs a cascading drop on the catalog before recreating. + * + * @param catalog + * @param omnipotentPrincipal necessary to initialize an Iceberg catalog to drop catalog internals + */ + public void overwriteCatalog(Catalog catalog, PrincipalWithCredentials omnipotentPrincipal) { + removeCatalogCascade(catalog.getName(), omnipotentPrincipal); + createCatalog(catalog); + } + + /** + * Recursively discover all namespaces contained within an Iceberg catalog. + * + * @param catalog + * @return a list of all the namespaces in the catalog + */ + private List discoverAllNamespaces(org.apache.iceberg.catalog.Catalog catalog) { + List namespaces = new ArrayList<>(); + namespaces.add(Namespace.empty()); + + if (catalog instanceof SupportsNamespaces namespaceCatalog) { + namespaces.addAll(discoverContainedNamespaces(namespaceCatalog, Namespace.empty())); + } + + return namespaces; + } + + /** + * Discover all child namespaces of a given namespace. + * + * @param namespaceCatalog a catalog that supports nested namespaces + * @param namespace the namespace to look under + * @return a list of all child namespaces + */ + private List discoverContainedNamespaces( + SupportsNamespaces namespaceCatalog, Namespace namespace) { + List immediateChildren = namespaceCatalog.listNamespaces(namespace); + + List namespaces = new ArrayList<>(); + + for (Namespace ns : immediateChildren) { + namespaces.add(ns); + + // discover children of child namespace + namespaces.addAll(discoverContainedNamespaces(namespaceCatalog, ns)); + } + + return namespaces; + } + + /** + * Perform a cascading drop of a catalog. Removes all namespaces, tables, catalog-roles first. + * + * @param catalogName + * @param omnipotentPrincipal + */ + public void removeCatalogCascade( + String catalogName, PrincipalWithCredentials omnipotentPrincipal) { + org.apache.iceberg.catalog.Catalog icebergCatalog = + initializeCatalog(catalogName, omnipotentPrincipal); + + // find all namespaces in the catalog + List namespaces = discoverAllNamespaces(icebergCatalog); + + List tables = new ArrayList<>(); + + // find all tables in the catalog + for (Namespace ns : namespaces) { + if (!ns.isEmpty()) { + tables.addAll(icebergCatalog.listTables(ns)); + } + } + + // drop every table in the catalog + for (TableIdentifier table : tables) { + icebergCatalog.dropTable(table); + } + + // drop every namespace in the catalog, note that because we discovered the namespaces + // parent-first, we should reverse over the namespaces to ensure that we drop child namespaces + // before we drop parent namespaces, as we cannot drop nonempty namespaces + for (Namespace ns : namespaces.reversed()) { + // NOTE: this is checking if the namespace is not the empty namespace, not if it is empty + // in the sense of containing no tables/namespaces + if (!ns.isEmpty() && icebergCatalog instanceof SupportsNamespaces namespaceCatalog) { + namespaceCatalog.dropNamespace(ns); + } + } + + List catalogRoles = listCatalogRoles(catalogName); + + // remove catalog roles under catalog + for (CatalogRole catalogRole : catalogRoles) { + if (catalogRole.getName().equals("catalog_admin")) continue; + + removeCatalogRole(catalogName, catalogRole.getName()); + } + + this.api.deleteCatalog(catalogName); + } + + public List listCatalogRoles(String catalogName) { + return this.api.listCatalogRoles(catalogName).getRoles(); + } + + public CatalogRole getCatalogRole(String catalogName, String catalogRoleName) { + return this.api.getCatalogRole(catalogName, catalogRoleName); + } + + public boolean catalogRoleExists(String catalogName, String catalogRoleName) { + try { + getCatalogRole(catalogName, catalogRoleName); + return true; + } catch (ApiException apiException) { + if (apiException.getCode() == HttpStatus.SC_NOT_FOUND) { + return false; + } + throw apiException; + } + } + + public void assignCatalogRole( + String principalRoleName, String catalogName, String catalogRoleName) { + GrantCatalogRoleRequest request = + new GrantCatalogRoleRequest().catalogRole(new CatalogRole().name(catalogRoleName)); + this.api.assignCatalogRoleToPrincipalRole(principalRoleName, catalogName, request); + } + + public void createCatalogRole(String catalogName, CatalogRole catalogRole, boolean overwrite) { + if (overwrite) { + removeCatalogRole(catalogName, catalogRole.getName()); + } + + CreateCatalogRoleRequest request = new CreateCatalogRoleRequest().catalogRole(catalogRole); + this.api.createCatalogRole(catalogName, request); + } + + public void removeCatalogRole(String catalogName, String catalogRoleName) { + this.api.deleteCatalogRole(catalogName, catalogRoleName); + } + + public List listGrants(String catalogName, String catalogRoleName) { + return this.api.listGrantsForCatalogRole(catalogName, catalogRoleName).getGrants(); + } + + public void addGrant(String catalogName, String catalogRoleName, GrantResource grant) { + AddGrantRequest addGrantRequest = new AddGrantRequest().grant(grant); + this.api.addGrantToCatalogRole(catalogName, catalogRoleName, addGrantRequest); + } + + public void revokeGrant(String catalogName, String catalogRoleName, GrantResource grant) { + RevokeGrantRequest revokeGrantRequest = new RevokeGrantRequest().grant(grant); + this.api.revokeGrantFromCatalogRole(catalogName, catalogRoleName, false, revokeGrantRequest); + } + + public org.apache.iceberg.catalog.Catalog initializeCatalog( + String catalogName, PrincipalWithCredentials migratorPrincipal) { + Map currentCatalogProperties = new HashMap<>(catalogProperties); + currentCatalogProperties.put("warehouse", catalogName); + + String clientId = migratorPrincipal.getCredentials().getClientId(); + String clientSecret = migratorPrincipal.getCredentials().getClientSecret(); + currentCatalogProperties.putIfAbsent( + "credential", String.format("%s:%s", clientId, clientSecret)); + currentCatalogProperties.putIfAbsent("scope", "PRINCIPAL_ROLE:ALL"); + + return CatalogUtil.loadCatalog( + PolarisCatalog.class.getName(), "SOURCE_CATALOG_REST", currentCatalogProperties, null); + } + + /** + * Perform cascading drop of a namespace. + * + * @param icebergCatalog the iceberg catalog to use + * @param namespace the namespace to drop + */ + public void dropNamespaceCascade( + org.apache.iceberg.catalog.Catalog icebergCatalog, Namespace namespace) { + if (icebergCatalog instanceof SupportsNamespaces namespaceCatalog) { + List namespaces = discoverContainedNamespaces(namespaceCatalog, namespace); + + List tables = new ArrayList<>(); + + for (Namespace ns : namespaces) { + tables.addAll(icebergCatalog.listTables(ns)); + } + + tables.addAll(icebergCatalog.listTables(namespace)); + + for (TableIdentifier table : tables) { + icebergCatalog.dropTable(table); + } + + // go over in reverse order of namespaces since we discover namespaces + // in the parent -> child order, so we need to drop all children + // before we can drop the parent + for (Namespace ns : namespaces.reversed()) { + namespaceCatalog.dropNamespace(ns); + } + + namespaceCatalog.dropNamespace(namespace); + } + } +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/PolarisServiceFactory.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/PolarisServiceFactory.java new file mode 100644 index 00000000..2dacd37a --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/PolarisServiceFactory.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import org.apache.http.HttpHeaders; +import org.apache.polaris.management.ApiClient; +import org.apache.polaris.management.client.PolarisManagementDefaultApi; +import org.apache.polaris.tools.sync.polaris.http.OAuth2Util; + +/** Used to initialize a {@link PolarisService}. */ +public class PolarisServiceFactory { + + private static void validatePolarisInstanceProperties( + String baseUrl, + String accessToken, + String oauth2ServerUri, + String clientId, + String clientSecret, + String scope) { + if (baseUrl == null) { + throw new IllegalArgumentException("baseUrl is required but was not provided"); + } + + if (accessToken != null) { + return; + } + + final String oauthErrorMessage = + "Either the accessToken property must be provided, or all of oauth2ServerUri, clientId, clientSecret, scope"; + + if (oauth2ServerUri == null || clientId == null || clientSecret == null || scope == null) { + throw new IllegalArgumentException(oauthErrorMessage); + } + } + + public static PolarisService newPolarisService( + String baseUrl, String oauth2ServerUri, String clientId, String clientSecret, String scope) + throws IOException { + validatePolarisInstanceProperties( + baseUrl, null, oauth2ServerUri, clientId, clientSecret, scope); + + String accessToken = OAuth2Util.fetchToken(oauth2ServerUri, clientId, clientSecret, scope); + + return newPolarisService(baseUrl, accessToken); + } + + public static PolarisService newPolarisService(String baseUrl, String accessToken) { + validatePolarisInstanceProperties(baseUrl, accessToken, null, null, null, null); + + ApiClient client = new ApiClient(); + client.updateBaseUri(baseUrl + "/api/management/v1"); + + // TODO: Add token refresh + client.setRequestInterceptor( + requestBuilder -> { + requestBuilder.header(HttpHeaders.AUTHORIZATION, "Bearer " + accessToken); + }); + + Map catalogProperties = new HashMap<>(); + catalogProperties.putIfAbsent("uri", baseUrl + "/api/catalog"); + + PolarisManagementDefaultApi polarisClient = new PolarisManagementDefaultApi(client); + return new PolarisService(polarisClient, catalogProperties); + } +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/PolarisSynchronizer.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/PolarisSynchronizer.java new file mode 100644 index 00000000..90b4174c --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/PolarisSynchronizer.java @@ -0,0 +1,1135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris; + +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.iceberg.BaseTable; +import org.apache.iceberg.Table; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.polaris.core.admin.model.Catalog; +import org.apache.polaris.core.admin.model.CatalogRole; +import org.apache.polaris.core.admin.model.GrantResource; +import org.apache.polaris.core.admin.model.PrincipalRole; +import org.apache.polaris.core.admin.model.PrincipalWithCredentials; +import org.apache.polaris.tools.sync.polaris.access.AccessControlService; +import org.apache.polaris.tools.sync.polaris.catalog.BaseTableWithETag; +import org.apache.polaris.tools.sync.polaris.catalog.ETagService; +import org.apache.polaris.tools.sync.polaris.catalog.NotModifiedException; +import org.apache.polaris.tools.sync.polaris.catalog.PolarisCatalog; +import org.apache.polaris.tools.sync.polaris.planning.SynchronizationPlanner; +import org.apache.polaris.tools.sync.polaris.planning.plan.SynchronizationPlan; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Encapsulates idempotent and failure-safe logic to perform Polaris entity syncs. Performs logging + * with configurability and all actions related to the generated sync plans. + */ +public class PolarisSynchronizer { + + private final Logger clientLogger; + + private final SynchronizationPlanner syncPlanner; + + private final PolarisService source; + + private final PolarisService target; + + private final PrincipalWithCredentials sourceOmnipotentPrincipal; + + private final PrincipalWithCredentials targetOmnipotentPrincipal; + + private final PrincipalRole sourceOmnipotentPrincipalRole; + + private final PrincipalRole targetOmnipotentPrincipalRole; + + private final AccessControlService sourceAccessControlService; + + private final AccessControlService targetAccessControlService; + + private final ETagService etagService; + + public PolarisSynchronizer( + Logger clientLogger, + SynchronizationPlanner synchronizationPlanner, + PrincipalWithCredentials sourceOmnipotentPrincipal, + PrincipalWithCredentials targetOmnipotentPrincipal, + PolarisService source, + PolarisService target, + ETagService etagService) { + this.clientLogger = + clientLogger == null ? LoggerFactory.getLogger(PolarisSynchronizer.class) : clientLogger; + this.syncPlanner = synchronizationPlanner; + this.sourceOmnipotentPrincipal = sourceOmnipotentPrincipal; + this.targetOmnipotentPrincipal = targetOmnipotentPrincipal; + this.source = source; + this.target = target; + this.sourceAccessControlService = new AccessControlService(source); + this.targetAccessControlService = new AccessControlService(target); + + this.sourceOmnipotentPrincipalRole = + sourceAccessControlService.getOmnipotentPrincipalRoleForPrincipal( + sourceOmnipotentPrincipal.getPrincipal().getName()); + this.targetOmnipotentPrincipalRole = + targetAccessControlService.getOmnipotentPrincipalRoleForPrincipal( + targetOmnipotentPrincipal.getPrincipal().getName()); + this.etagService = etagService; + } + + /** + * Calculates the total number of sync tasks to complete. + * + * @param plan the plan to scan for cahnges + * @return the nuber of syncs to perform + */ + private int totalSyncsToComplete(SynchronizationPlan plan) { + return plan.entitiesToCreate().size() + + plan.entitiesToOverwrite().size() + + plan.entitiesToRemove().size(); + } + + /** Sync principal roles from source to target. */ + public void syncPrincipalRoles() { + List principalRolesSource; + + try { + principalRolesSource = source.listPrincipalRoles(); + clientLogger.info("Listed {} principal-roles from source.", principalRolesSource.size()); + } catch (Exception e) { + clientLogger.error("Failed to list principal-roles from source.", e); + return; + } + + List principalRolesTarget; + + try { + principalRolesTarget = target.listPrincipalRoles(); + clientLogger.info("Listed {} principal-roles from target.", principalRolesTarget.size()); + } catch (Exception e) { + clientLogger.error("Failed to list principal-roles from target.", e); + return; + } + + SynchronizationPlan principalRoleSyncPlan = + syncPlanner.planPrincipalRoleSync(principalRolesSource, principalRolesTarget); + + principalRoleSyncPlan + .entitiesToSkip() + .forEach( + principalRole -> + clientLogger.info("Skipping principal-role {}.", principalRole.getName())); + + principalRoleSyncPlan + .entitiesNotModified() + .forEach( + principalRole -> + clientLogger.info( + "No change detected for principal-role {}, skipping.", + principalRole.getName())); + + int syncsCompleted = 0; + final int totalSyncsToComplete = totalSyncsToComplete(principalRoleSyncPlan); + + for (PrincipalRole principalRole : principalRoleSyncPlan.entitiesToCreate()) { + try { + target.createPrincipalRole(principalRole, false); + clientLogger.info( + "Created principal-role {} on target. - {}/{}", + principalRole.getName(), + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to create principal-role {} on target. - {}/{}", + principalRole.getName(), + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (PrincipalRole principalRole : principalRoleSyncPlan.entitiesToOverwrite()) { + try { + target.createPrincipalRole(principalRole, true); + clientLogger.info( + "Overwrote principal-role {} on target. - {}/{}", + principalRole.getName(), + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to overwrite principal-role {} on target. - {}/{}", + principalRole.getName(), + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (PrincipalRole principalRole : principalRoleSyncPlan.entitiesToRemove()) { + try { + target.removePrincipalRole(principalRole.getName()); + clientLogger.info( + "Removed principal-role {} on target. - {}/{}", + principalRole.getName(), + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to remove principal-role {} on target. - {}/{}", + principalRole.getName(), + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + } + + /** + * Sync assignments of principal roles to a catalog role. + * + * @param catalogName the catalog that the catalog role is in + * @param catalogRoleName the name of the catalog role + */ + public void syncAssigneePrincipalRolesForCatalogRole(String catalogName, String catalogRoleName) { + List principalRolesSource; + + try { + principalRolesSource = + source.listAssigneePrincipalRolesForCatalogRole(catalogName, catalogRoleName); + clientLogger.info( + "Listed {} assignee principal-roles for catalog-role {} in catalog {} from source.", + principalRolesSource.size(), + catalogRoleName, + catalogName); + } catch (Exception e) { + clientLogger.error( + "Failed to list assignee principal-roles for catalog-role {} in catalog {} from source.", + catalogRoleName, + catalogName, + e); + return; + } + + List principalRolesTarget; + + try { + principalRolesTarget = + target.listAssigneePrincipalRolesForCatalogRole(catalogName, catalogRoleName); + clientLogger.info( + "Listed {} assignee principal-roles for catalog-role {} in catalog {} from target.", + principalRolesTarget.size(), + catalogRoleName, + catalogName); + } catch (Exception e) { + clientLogger.error( + "Failed to list assignee principal-roles for catalog-role {} in catalog {} from target.", + catalogRoleName, + catalogName, + e); + return; + } + + SynchronizationPlan assignedPrincipalRoleSyncPlan = + syncPlanner.planAssignPrincipalRolesToCatalogRolesSync( + catalogName, catalogRoleName, principalRolesSource, principalRolesTarget); + + assignedPrincipalRoleSyncPlan + .entitiesToSkip() + .forEach( + principalRole -> + clientLogger.info( + "Skipping assignment of principal-role {} to catalog-role {} in catalog {}.", + principalRole.getName(), + catalogRoleName, + catalogName)); + + assignedPrincipalRoleSyncPlan + .entitiesNotModified() + .forEach( + principalRole -> + clientLogger.info( + "Principal-role {} is already assigned to catalog-role {} in catalog {}. Skipping.", + principalRole.getName(), + catalogRoleName, + catalogName)); + + int syncsCompleted = 0; + int totalSyncsToComplete = totalSyncsToComplete(assignedPrincipalRoleSyncPlan); + + for (PrincipalRole principalRole : assignedPrincipalRoleSyncPlan.entitiesToCreate()) { + try { + target.assignCatalogRoleToPrincipalRole( + principalRole.getName(), catalogName, catalogRoleName); + clientLogger.info( + "Assigned principal-role {} to catalog-role {} in catalog {}. - {}/{}", + principalRole.getName(), + catalogRoleName, + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to assign principal-role {} to catalog-role {} in catalog {}. - {}/{}", + principalRole.getName(), + catalogRoleName, + catalogName, + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (PrincipalRole principalRole : assignedPrincipalRoleSyncPlan.entitiesToOverwrite()) { + try { + target.assignCatalogRoleToPrincipalRole( + principalRole.getName(), catalogName, catalogRoleName); + clientLogger.info( + "Assigned principal-role {} to catalog-role {} in catalog {}. - {}/{}", + principalRole.getName(), + catalogRoleName, + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to assign principal-role {} to catalog-role {} in catalog {}. - {}/{}", + principalRole.getName(), + catalogRoleName, + catalogName, + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (PrincipalRole principalRole : assignedPrincipalRoleSyncPlan.entitiesToRemove()) { + try { + target.removeCatalogRoleFromPrincipalRole( + principalRole.getName(), catalogName, catalogRoleName); + clientLogger.info( + "Revoked principal-role {} from catalog-role {} in catalog {}. - {}/{}", + principalRole.getName(), + catalogRoleName, + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to revoke principal-role {} from catalog-role {} in catalog {}. - {}/{}", + principalRole.getName(), + catalogRoleName, + catalogName, + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + } + + /** Sync catalogs across the source and target polaris instance. */ + public void syncCatalogs() { + List catalogsSource; + + try { + catalogsSource = source.listCatalogs(); + clientLogger.info("Listed {} catalogs from source.", catalogsSource.size()); + } catch (Exception e) { + clientLogger.error("Failed to list catalogs from source.", e); + return; + } + + List catalogsTarget; + + try { + catalogsTarget = target.listCatalogs(); + clientLogger.info("Listed {} catalogs from target.", catalogsTarget.size()); + } catch (Exception e) { + clientLogger.error("Failed to list catalogs from target.", e); + return; + } + + SynchronizationPlan catalogSyncPlan = + syncPlanner.planCatalogSync(catalogsSource, catalogsTarget); + + catalogSyncPlan + .entitiesToSkip() + .forEach(catalog -> clientLogger.info("Skipping catalog {}.", catalog.getName())); + + catalogSyncPlan + .entitiesToSkipAndSkipChildren() + .forEach( + catalog -> + clientLogger.info( + "Skipping catalog {} and all child entities.", catalog.getName())); + + catalogSyncPlan + .entitiesNotModified() + .forEach( + catalog -> + clientLogger.info( + "No change detected in catalog {}. Skipping.", catalog.getName())); + + int syncsCompleted = 0; + int totalSyncsToComplete = totalSyncsToComplete(catalogSyncPlan); + + for (Catalog catalog : catalogSyncPlan.entitiesToCreate()) { + try { + target.createCatalog(catalog); + clientLogger.info( + "Created catalog {}. - {}/{}", + catalog.getName(), + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to create catalog {}. - {}/{}", + catalog.getName(), + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (Catalog catalog : catalogSyncPlan.entitiesToOverwrite()) { + try { + setupOmnipotentCatalogRoleIfNotExistsTarget(catalog.getName()); + target.overwriteCatalog(catalog, targetOmnipotentPrincipal); + clientLogger.info( + "Overwrote catalog {}. - {}/{}", + catalog.getName(), + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to overwrite catalog {}. - {}/{}", + catalog.getName(), + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (Catalog catalog : catalogSyncPlan.entitiesToRemove()) { + try { + setupOmnipotentCatalogRoleIfNotExistsTarget(catalog.getName()); + target.removeCatalogCascade(catalog.getName(), targetOmnipotentPrincipal); + clientLogger.info( + "Removed catalog {}. - {}/{}", + catalog.getName(), + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to remove catalog {}. - {}/{}", + catalog.getName(), + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (Catalog catalog : catalogSyncPlan.entitiesToSyncChildren()) { + syncCatalogRoles(catalog.getName()); + + org.apache.iceberg.catalog.Catalog sourceIcebergCatalog; + + try { + sourceIcebergCatalog = initializeIcebergCatalogSource(catalog.getName()); + clientLogger.info( + "Initialized Iceberg REST catalog for Polaris catalog {} on source.", + catalog.getName()); + } catch (Exception e) { + clientLogger.error( + "Failed to initialize Iceberg REST catalog for Polaris catalog {} on source.", + catalog.getName(), + e); + continue; + } + + org.apache.iceberg.catalog.Catalog targetIcebergCatalog; + + try { + targetIcebergCatalog = initializeIcebergCatalogTarget(catalog.getName()); + clientLogger.info( + "Initialized Iceberg REST catalog for Polaris catalog {} on target.", + catalog.getName()); + } catch (Exception e) { + clientLogger.error( + "Failed to initialize Iceberg REST catalog for Polaris catalog {} on target.", + catalog.getName(), + e); + continue; + } + + syncNamespaces( + catalog.getName(), Namespace.empty(), sourceIcebergCatalog, targetIcebergCatalog); + } + } + + /** + * Sync catalog roles across the source and polaris instance for a catalog. + * + * @param catalogName the catalog to sync roles for + */ + public void syncCatalogRoles(String catalogName) { + List catalogRolesSource; + + try { + catalogRolesSource = source.listCatalogRoles(catalogName); + clientLogger.info( + "Listed {} catalog-roles for catalog {} from source.", + catalogRolesSource.size(), + catalogName); + } catch (Exception e) { + clientLogger.error( + "Failed to list catalog-roles for catalog {} from source.", catalogName, e); + return; + } + + List catalogRolesTarget; + + try { + catalogRolesTarget = target.listCatalogRoles(catalogName); + clientLogger.info( + "Listed {} catalog-roles for catalog {} from target.", + catalogRolesTarget.size(), + catalogName); + } catch (Exception e) { + clientLogger.error( + "Failed to list catalog-roles for catalog {} from target.", catalogName, e); + return; + } + + SynchronizationPlan catalogRoleSyncPlan = + syncPlanner.planCatalogRoleSync(catalogName, catalogRolesSource, catalogRolesTarget); + + catalogRoleSyncPlan + .entitiesToSkip() + .forEach( + catalogRole -> + clientLogger.info( + "Skipping catalog-role {} in catalog {}.", catalogRole.getName(), catalogName)); + + catalogRoleSyncPlan + .entitiesToSkipAndSkipChildren() + .forEach( + catalogRole -> + clientLogger.info( + "Skipping catalog-role {} in catalog {} and all child entities.", + catalogRole.getName(), + catalogName)); + + catalogRoleSyncPlan + .entitiesNotModified() + .forEach( + catalogRole -> + clientLogger.info( + "No change detected in catalog-role {} in catalog {}. Skipping.", + catalogRole.getName(), + catalogName)); + + int syncsCompleted = 0; + int totalSyncsToComplete = totalSyncsToComplete(catalogRoleSyncPlan); + + for (CatalogRole catalogRole : catalogRoleSyncPlan.entitiesToCreate()) { + try { + target.createCatalogRole(catalogName, catalogRole, false); + clientLogger.info( + "Created catalog-role {} for catalog {}. - {}/{}", + catalogRole.getName(), + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to create catalog-role {} for catalog {}. - {}/{}", + catalogRole.getName(), + catalogName, + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (CatalogRole catalogRole : catalogRoleSyncPlan.entitiesToOverwrite()) { + try { + target.createCatalogRole(catalogName, catalogRole, true); + clientLogger.info( + "Overwrote catalog-role {} for catalog {}. - {}/{}", + catalogRole.getName(), + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to overwrite catalog-role {} for catalog {}. - {}/{}", + catalogRole.getName(), + catalogName, + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (CatalogRole catalogRole : catalogRoleSyncPlan.entitiesToRemove()) { + try { + target.removeCatalogRole(catalogName, catalogRole.getName()); + clientLogger.info( + "Removed catalog-role {} for catalog {}. - {}/{}", + catalogRole.getName(), + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to remove catalog-role {} for catalog {}. - {}/{}", + catalogRole.getName(), + catalogName, + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (CatalogRole catalogRole : catalogRoleSyncPlan.entitiesToSyncChildren()) { + syncAssigneePrincipalRolesForCatalogRole(catalogName, catalogRole.getName()); + syncGrants(catalogName, catalogRole.getName()); + } + } + + /** + * Sync grants for a catalog role across the source and the target. + * + * @param catalogName + * @param catalogRoleName + */ + private void syncGrants(String catalogName, String catalogRoleName) { + List grantsSource; + + try { + grantsSource = source.listGrants(catalogName, catalogRoleName); + clientLogger.info( + "Listed {} grants for catalog-role {} in catalog {} from source.", + grantsSource.size(), + catalogRoleName, + catalogName); + } catch (Exception e) { + clientLogger.error( + "Failed to list grants for catalog-role {} in catalog {} from source.", + catalogRoleName, + catalogName, + e); + return; + } + + List grantsTarget; + + try { + grantsTarget = target.listGrants(catalogName, catalogRoleName); + clientLogger.info( + "Listed {} grants for catalog-role {} in catalog {} from target.", + grantsTarget.size(), + catalogRoleName, + catalogName); + } catch (Exception e) { + clientLogger.error( + "Failed to list grants for catalog-role {} in catalog {} from target.", + catalogRoleName, + catalogName, + e); + return; + } + + SynchronizationPlan grantSyncPlan = + syncPlanner.planGrantSync(catalogName, catalogRoleName, grantsSource, grantsTarget); + + grantSyncPlan + .entitiesToSkip() + .forEach( + grant -> + clientLogger.info( + "Skipping addition of grant {} to catalog-role {} in catalog {}.", + grant.getType(), + catalogRoleName, + catalogName)); + + grantSyncPlan + .entitiesNotModified() + .forEach( + grant -> + clientLogger.info( + "Grant {} was already added to catalog-role {} in catalog {}. Skipping.", + grant.getType(), + catalogRoleName, + catalogName)); + + int syncsCompleted = 0; + int totalSyncsToComplete = totalSyncsToComplete(grantSyncPlan); + + for (GrantResource grant : grantSyncPlan.entitiesToCreate()) { + try { + target.addGrant(catalogName, catalogRoleName, grant); + clientLogger.info( + "Added grant {} to catalog-role {} for catalog {}. - {}/{}", + grant.getType(), + catalogRoleName, + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to add grant {} to catalog-role {} for catalog {}. - {}/{}", + grant.getType(), + catalogRoleName, + catalogName, + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (GrantResource grant : grantSyncPlan.entitiesToOverwrite()) { + try { + target.addGrant(catalogName, catalogRoleName, grant); + clientLogger.info( + "Added grant {} to catalog-role {} for catalog {}. - {}/{}", + grant.getType(), + catalogRoleName, + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to add grant {} to catalog-role {} for catalog {}. - {}/{}", + grant.getType(), + catalogRoleName, + catalogName, + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (GrantResource grant : grantSyncPlan.entitiesToRemove()) { + try { + target.revokeGrant(catalogName, catalogRoleName, grant); + clientLogger.info( + "Revoked grant {} from catalog-role {} for catalog {}. - {}/{}", + grant.getType(), + catalogRoleName, + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to revoke grant {} from catalog-role {} for catalog {}. - {}/{}", + grant.getType(), + catalogRoleName, + catalogName, + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + } + + /** + * Setup an omnipotent principal for the provided catalog on the source Polaris instance. + * + * @param catalogName + */ + private void setupOmnipotentCatalogRoleIfNotExistsTarget(String catalogName) { + if (!this.targetAccessControlService.omnipotentCatalogRoleExists(catalogName)) { + clientLogger.info( + "No omnipotent catalog-role exists for catalog {} on target. Going to set one up.", + catalogName); + + targetAccessControlService.setupOmnipotentRoleForCatalog( + catalogName, targetOmnipotentPrincipalRole, false, true); + + clientLogger.info("Setup omnipotent catalog-role for catalog {} on target.", catalogName); + } + } + + /** + * Setup an omnipotent principal for the provided catalog on the target Polaris instance. + * + * @param catalogName + */ + private void setupOmnipotentCatalogRoleIfNotExistsSource(String catalogName) { + if (!this.sourceAccessControlService.omnipotentCatalogRoleExists(catalogName)) { + clientLogger.info( + "No omnipotent catalog-role exists for catalog {} on source. Going to set one up.", + catalogName); + + sourceAccessControlService.setupOmnipotentRoleForCatalog( + catalogName, sourceOmnipotentPrincipalRole, false, false); + + clientLogger.info("Setup omnipotent catalog-role for catalog {} on source.", catalogName); + } + } + + public org.apache.iceberg.catalog.Catalog initializeIcebergCatalogSource(String catalogName) { + setupOmnipotentCatalogRoleIfNotExistsSource(catalogName); + return source.initializeCatalog(catalogName, sourceOmnipotentPrincipal); + } + + public org.apache.iceberg.catalog.Catalog initializeIcebergCatalogTarget(String catalogName) { + setupOmnipotentCatalogRoleIfNotExistsTarget(catalogName); + return target.initializeCatalog(catalogName, targetOmnipotentPrincipal); + } + + /** + * Sync namespaces contained within a parent namespace. + * + * @param catalogName + * @param parentNamespace + * @param sourceIcebergCatalog + * @param targetIcebergCatalog + */ + public void syncNamespaces( + String catalogName, + Namespace parentNamespace, + org.apache.iceberg.catalog.Catalog sourceIcebergCatalog, + org.apache.iceberg.catalog.Catalog targetIcebergCatalog) { + // no namespaces to sync if catalog does not implement SupportsNamespaces + if (sourceIcebergCatalog instanceof SupportsNamespaces sourceNamespaceCatalog + && targetIcebergCatalog instanceof SupportsNamespaces targetNamespaceCatalog) { + List namespacesSource; + + try { + namespacesSource = sourceNamespaceCatalog.listNamespaces(parentNamespace); + clientLogger.info( + "Listed {} namespaces in namespace {} for catalog {} from source.", + namespacesSource.size(), + parentNamespace, + catalogName); + } catch (Exception e) { + clientLogger.error( + "Failed to list namespaces in namespace {} for catalog {} from source.", + parentNamespace, + catalogName, + e); + return; + } + + List namespacesTarget; + + try { + namespacesTarget = targetNamespaceCatalog.listNamespaces(parentNamespace); + clientLogger.info( + "Listed {} namespaces in namespace {} for catalog {} from target.", + namespacesTarget.size(), + parentNamespace, + catalogName); + } catch (Exception e) { + clientLogger.error( + "Failed to list namespaces in namespace {} for catalog {} from target.", + parentNamespace, + catalogName, + e); + return; + } + + SynchronizationPlan namespaceSynchronizationPlan = + syncPlanner.planNamespaceSync( + catalogName, parentNamespace, namespacesSource, namespacesTarget); + + int syncsCompleted = 0; + int totalSyncsToComplete = totalSyncsToComplete(namespaceSynchronizationPlan); + + namespaceSynchronizationPlan + .entitiesNotModified() + .forEach( + namespace -> + clientLogger.info( + "No change detected for namespace {} in namespace {} for catalog {}, skipping.", + namespace, + parentNamespace, + catalogName)); + + for (Namespace namespace : namespaceSynchronizationPlan.entitiesToCreate()) { + try { + targetNamespaceCatalog.createNamespace(namespace); + clientLogger.info( + "Created namespace {} in namespace {} for catalog {} - {}/{}", + namespace, + parentNamespace, + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to create namespace {} in namespace {} for catalog {} - {}/{}", + namespace, + parentNamespace, + catalogName, + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (Namespace namespace : namespaceSynchronizationPlan.entitiesToOverwrite()) { + try { + Map sourceNamespaceMetadata = + sourceNamespaceCatalog.loadNamespaceMetadata(namespace); + Map targetNamespaceMetadata = + targetNamespaceCatalog.loadNamespaceMetadata(namespace); + + if (sourceNamespaceMetadata.equals(targetNamespaceMetadata)) { + clientLogger.info( + "Namespace metadata for namespace {} in namespace {} for catalog {} was not modified, skipping. - {}/{}", + namespace, + parentNamespace, + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + continue; + } + + target.dropNamespaceCascade(targetIcebergCatalog, namespace); + targetNamespaceCatalog.createNamespace(namespace, sourceNamespaceMetadata); + + clientLogger.info( + "Overwrote namespace {} in namespace {} for catalog {} - {}/{}", + namespace, + parentNamespace, + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to overwrite namespace {} in namespace {} for catalog {} - {}/{}", + namespace, + parentNamespace, + catalogName, + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (Namespace namespace : namespaceSynchronizationPlan.entitiesToRemove()) { + try { + target.dropNamespaceCascade(targetIcebergCatalog, namespace); + clientLogger.info( + "Removed namespace {} in namespace {} for catalog {} - {}/{}", + namespace, + parentNamespace, + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to remove namespace {} in namespace {} for catalog {} - {}/{}", + namespace, + parentNamespace, + catalogName, + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (Namespace namespace : namespaceSynchronizationPlan.entitiesToSyncChildren()) { + syncTables(catalogName, namespace, sourceIcebergCatalog, targetIcebergCatalog); + syncNamespaces(catalogName, namespace, sourceIcebergCatalog, targetIcebergCatalog); + } + } + } + + /** + * Sync tables contained within a namespace. + * + * @param catalogName + * @param namespace + * @param sourceIcebergCatalog + * @param targetIcebergCatalog + */ + public void syncTables( + String catalogName, + Namespace namespace, + org.apache.iceberg.catalog.Catalog sourceIcebergCatalog, + org.apache.iceberg.catalog.Catalog targetIcebergCatalog) { + Set sourceTables; + + try { + sourceTables = new HashSet<>(sourceIcebergCatalog.listTables(namespace)); + clientLogger.info( + "Listed {} tables in namespace {} for catalog {} on source.", + sourceTables.size(), + namespace, + catalogName); + } catch (Exception e) { + clientLogger.error( + "Failed to list tables in namespace {} for catalog {} on source.", + namespace, + catalogName, + e); + return; + } + + Set targetTables; + + try { + targetTables = new HashSet<>(targetIcebergCatalog.listTables(namespace)); + clientLogger.info( + "Listed {} tables in namespace {} for catalog {} on target.", + targetTables.size(), + namespace, + catalogName); + } catch (Exception e) { + clientLogger.error( + "Failed to list tables in namespace {} for catalog {} on target.", + namespace, + catalogName, + e); + return; + } + + SynchronizationPlan tableSyncPlan = + syncPlanner.planTableSync(catalogName, namespace, sourceTables, targetTables); + + tableSyncPlan + .entitiesToSkip() + .forEach( + tableId -> + clientLogger.info( + "Skipping table {} in namespace {} in catalog {}.", + tableId, + namespace, + catalogName)); + + int syncsCompleted = 0; + int totalSyncsToComplete = totalSyncsToComplete(tableSyncPlan); + + for (TableIdentifier tableId : tableSyncPlan.entitiesToCreate()) { + try { + Table table = sourceIcebergCatalog.loadTable(tableId); + + if (table instanceof BaseTable baseTable) { + targetIcebergCatalog.registerTable( + tableId, baseTable.operations().current().metadataFileLocation()); + } else { + throw new IllegalStateException("Cannot register table that does not extend BaseTable."); + } + + if (table instanceof BaseTableWithETag tableWithETag) { + etagService.storeETag(catalogName, tableId, tableWithETag.etag()); + } + + clientLogger.info( + "Registered table {} in namespace {} in catalog {}. - {}/{}", + tableId, + namespace, + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to register table {} in namespace {} in catalog {}. - {}/{}", + tableId, + namespace, + catalogName, + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (TableIdentifier tableId : tableSyncPlan.entitiesToOverwrite()) { + try { + Table table; + + if (sourceIcebergCatalog instanceof PolarisCatalog polarisCatalog) { + String etag = etagService.getETag(catalogName, tableId); + table = polarisCatalog.loadTable(tableId, etag); + } else { + table = sourceIcebergCatalog.loadTable(tableId); + } + + if (table instanceof BaseTable baseTable) { + targetIcebergCatalog.dropTable(tableId, /* purge */ false); + targetIcebergCatalog.registerTable( + tableId, baseTable.operations().current().metadataFileLocation()); + } else { + throw new IllegalStateException("Cannot register table that does not extend BaseTable."); + } + + if (table instanceof BaseTableWithETag tableWithETag) { + etagService.storeETag(catalogName, tableId, tableWithETag.etag()); + } + + clientLogger.info( + "Dropped and re-registered table {} in namespace {} in catalog {}. - {}/{}", + tableId, + namespace, + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (NotModifiedException e) { + clientLogger.info( + "Table {} in namespace {} in catalog {} with was not modified, not overwriting in target catalog. - {}/{}", + tableId, + namespace, + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.error( + "Failed to drop and re-register table {} in namespace {} in catalog {}. - {}/{}", + tableId, + namespace, + catalogName, + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + + for (TableIdentifier table : tableSyncPlan.entitiesToRemove()) { + try { + targetIcebergCatalog.dropTable(table, /* purge */ false); + clientLogger.info( + "Dropped table {} in namespace {} in catalog {}. - {}/{}", + table, + namespace, + catalogName, + ++syncsCompleted, + totalSyncsToComplete); + } catch (Exception e) { + clientLogger.info( + "Failed to drop table {} in namespace {} in catalog {}. - {}/{}", + table, + namespace, + catalogName, + ++syncsCompleted, + totalSyncsToComplete, + e); + } + } + } +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/access/AccessControlConstants.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/access/AccessControlConstants.java new file mode 100644 index 00000000..58be6a95 --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/access/AccessControlConstants.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.access; + +public class AccessControlConstants { + + public static final String OMNIPOTENCE_PROPERTY = "IS_OMNIPOTENT_PRINCIPAL"; + + protected static final String OMNIPOTENT_PRINCIPAL_NAME_PREFIX = "omnipotent-principal-"; +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/access/AccessControlService.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/access/AccessControlService.java new file mode 100644 index 00000000..c0b4fe6e --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/access/AccessControlService.java @@ -0,0 +1,291 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.access; + +import static org.apache.polaris.core.admin.model.CatalogPrivilege.CATALOG_MANAGE_METADATA; +import static org.apache.polaris.core.admin.model.CatalogPrivilege.CATALOG_READ_PROPERTIES; +import static org.apache.polaris.core.admin.model.CatalogPrivilege.NAMESPACE_LIST; +import static org.apache.polaris.core.admin.model.CatalogPrivilege.NAMESPACE_READ_PROPERTIES; +import static org.apache.polaris.core.admin.model.CatalogPrivilege.TABLE_LIST; +import static org.apache.polaris.core.admin.model.CatalogPrivilege.TABLE_READ_PROPERTIES; +import static org.apache.polaris.core.admin.model.CatalogPrivilege.VIEW_LIST; +import static org.apache.polaris.core.admin.model.CatalogPrivilege.VIEW_READ_PROPERTIES; +import static org.apache.polaris.tools.sync.polaris.access.AccessControlConstants.OMNIPOTENCE_PROPERTY; +import static org.apache.polaris.tools.sync.polaris.access.AccessControlConstants.OMNIPOTENT_PRINCIPAL_NAME_PREFIX; + +import java.util.List; +import java.util.NoSuchElementException; +import org.apache.polaris.core.admin.model.CatalogGrant; +import org.apache.polaris.core.admin.model.CatalogRole; +import org.apache.polaris.core.admin.model.GrantResource; +import org.apache.polaris.core.admin.model.Principal; +import org.apache.polaris.core.admin.model.PrincipalRole; +import org.apache.polaris.core.admin.model.PrincipalWithCredentials; +import org.apache.polaris.tools.sync.polaris.PolarisService; + +/** + * Service class to facilitate the access control needs of the synchronization. This involves + * setting up principals, principal roles, catalog roles, and grants to allow the tool to be able to + * introspect into catalog internals like catalog-roles, tables, grants. + */ +public class AccessControlService { + + private final PolarisService polaris; + + public AccessControlService(PolarisService polaris) { + this.polaris = polaris; + } + + /** + * Creates or replaces the existing omnipotent principal on the provided polaris instance. + * + * @param replace if true, if an existing omnipotent principal role exists, it will be dropped and + * recreated + * @return the principal and credentials for the omnipotent principal + */ + public PrincipalWithCredentials createOmnipotentPrincipal(boolean replace) { + List principals = polaris.listPrincipals(); + + Principal omnipotentPrincipalPrototype = + new Principal() + .name(OMNIPOTENT_PRINCIPAL_NAME_PREFIX + System.currentTimeMillis()) + .putPropertiesItem( + OMNIPOTENCE_PROPERTY, ""); // this property identifies the omnipotent principal + + for (Principal principal : principals) { + if (principal.getProperties() != null + && principal.getProperties().containsKey(OMNIPOTENCE_PROPERTY)) { + if (replace) { + // drop existing omnipotent principal in preparation for replacement + polaris.removePrincipal(principal.getName()); + } else { + // we cannot create another omnipotent principal and cannot replace the existing, fail + throw new IllegalStateException( + "Not permitted to replace existing omnipotent principal, but omnipotent " + + "principal with property " + + OMNIPOTENCE_PROPERTY + + " already exists"); + } + } + } + + // existing principal with identifying property does not exist, create a new one + return polaris.createPrincipal(omnipotentPrincipalPrototype, false); + } + + /** + * Retrieves the omnipotent principal role for the provided principalName. + * + * @param principalName the principal name to search for roles with + * @return the principal role for the provided principal, if exists + */ + public PrincipalRole getOmnipotentPrincipalRoleForPrincipal(String principalName) { + List principalRolesAssigned = + polaris.listPrincipalRolesAssignedForPrincipal(principalName); + + return principalRolesAssigned.stream() + .filter( + principalRole -> + principalRole.getProperties() != null + && principalRole.getProperties().containsKey(OMNIPOTENCE_PROPERTY)) + .findFirst() + .orElseThrow( + () -> + new NoSuchElementException( + "No omnipotent principal role with property " + + OMNIPOTENCE_PROPERTY + + " exists for principal " + + principalName)); + } + + /** + * Creates a principal role for the omnipotent principal and assigns it to the provided omnipotent + * principal. + * + * @param omnipotentPrincipal the principal to create and assign the role for + * @param replace if true, drops existing omnipotent principal roles if they exist before creating + * the new one + * @return the principal role for the omnipotent principal + */ + public PrincipalRole createAndAssignPrincipalRole( + PrincipalWithCredentials omnipotentPrincipal, boolean replace) { + List principalRoles = polaris.listPrincipalRoles(); + + PrincipalRole omnipotentPrincipalRole = + new PrincipalRole() + .name(omnipotentPrincipal.getPrincipal().getName()) + .putPropertiesItem(OMNIPOTENCE_PROPERTY, ""); + + for (PrincipalRole principalRole : principalRoles) { + if (principalRole.getProperties() != null + && principalRole.getProperties().containsKey(OMNIPOTENCE_PROPERTY)) { + // replace existing principal role if exists + if (replace) { + polaris.removePrincipalRole(principalRole.getName()); + } else { + throw new IllegalStateException( + "Not permitted to replace existing omnipotent principal role, but omnipotent " + + "principal role with property " + + OMNIPOTENCE_PROPERTY + + " already exists"); + } + } + } + + polaris.createPrincipalRole(omnipotentPrincipalRole, false); + polaris.assignPrincipalRole( + omnipotentPrincipal.getPrincipal().getName(), omnipotentPrincipalRole.getName()); + return omnipotentPrincipalRole; + } + + /** + * Creates an omnipotent catalog role for a catalog and assigns it to the provided omnipotent + * principal role. + * + * @param catalogName the catalog to create the catalog role for + * @param omnipotentPrincipalRole the omnipotent principal role to assign the created catalog role + * to + * @param replace if true, drops and recreates the existing omnipotent catalog role + * @return the created omnipotent catalog role + */ + public CatalogRole createAndAssignCatalogRole( + String catalogName, PrincipalRole omnipotentPrincipalRole, boolean replace) { + List catalogRoles = polaris.listCatalogRoles(catalogName); + + for (CatalogRole catalogRole : catalogRoles) { + if (catalogRole.getProperties() != null + && catalogRole.getProperties().containsKey(OMNIPOTENCE_PROPERTY)) { + if (replace) { + polaris.removeCatalogRole(catalogName, catalogRole.getName()); + } else { + throw new IllegalStateException( + "Not permitted to replace existing omnipotent catalog role for catalog " + + catalogName + + ", but omnipotent principal with property " + + OMNIPOTENCE_PROPERTY + + " already exists"); + } + } + } + + CatalogRole omnipotentCatalogRole = + new CatalogRole() + .name(omnipotentPrincipalRole.getName()) + .putPropertiesItem(OMNIPOTENCE_PROPERTY, ""); + + polaris.createCatalogRole(catalogName, omnipotentCatalogRole, false /* overwrite */); + polaris.assignCatalogRole( + omnipotentPrincipalRole.getName(), catalogName, omnipotentCatalogRole.getName()); + return omnipotentCatalogRole; + } + + /** + * Adds grants for privilege level desired on the omnipotent catalog role. + * + * @param catalogName the catalog to identify the role in + * @param catalogRoleName the name of the catalog role to assign the grants tpo + * @param withWriteAccess if the catalog role should be given write access to the catalog + * internals + * @return the grants that were added to the catalog role + */ + public List addGrantsToCatalogRole( + String catalogName, String catalogRoleName, boolean withWriteAccess) { + if (withWriteAccess) { + // write access only requires CATALOG_MANAGE_METADATA + CatalogGrant catalogManageMetadata = + new CatalogGrant() + .type(GrantResource.TypeEnum.CATALOG) + .privilege(CATALOG_MANAGE_METADATA); + + polaris.addGrant(catalogName, catalogRoleName, catalogManageMetadata); + return List.of(catalogManageMetadata); + } else { + // read access requires reading properties and listing entities for each entity type + CatalogGrant catalogReadProperties = + new CatalogGrant() + .type(GrantResource.TypeEnum.CATALOG) + .privilege(CATALOG_READ_PROPERTIES); + + CatalogGrant namespaceReadProperties = + new CatalogGrant() + .type(GrantResource.TypeEnum.CATALOG) + .privilege(NAMESPACE_READ_PROPERTIES); + + CatalogGrant namespaceList = + new CatalogGrant().type(GrantResource.TypeEnum.CATALOG).privilege(NAMESPACE_LIST); + + CatalogGrant tableReadProperties = + new CatalogGrant().type(GrantResource.TypeEnum.CATALOG).privilege(TABLE_READ_PROPERTIES); + + CatalogGrant tableList = + new CatalogGrant().type(GrantResource.TypeEnum.CATALOG).privilege(TABLE_LIST); + + CatalogGrant viewReadProperties = + new CatalogGrant().type(GrantResource.TypeEnum.CATALOG).privilege(VIEW_READ_PROPERTIES); + + CatalogGrant viewList = + new CatalogGrant().type(GrantResource.TypeEnum.CATALOG).privilege(VIEW_LIST); + + polaris.addGrant(catalogName, catalogRoleName, catalogReadProperties); + polaris.addGrant(catalogName, catalogRoleName, namespaceReadProperties); + polaris.addGrant(catalogName, catalogRoleName, namespaceList); + polaris.addGrant(catalogName, catalogRoleName, tableReadProperties); + polaris.addGrant(catalogName, catalogRoleName, tableList); + polaris.addGrant(catalogName, catalogRoleName, viewReadProperties); + polaris.addGrant(catalogName, catalogRoleName, viewList); + return List.of( + catalogReadProperties, namespaceReadProperties, tableReadProperties, viewReadProperties); + } + } + + /** + * Determines if an omnipotent catalog role already exists for this catalog. + * + * @param catalogName the catalog to search in + * @return true if exists, false otherwise + */ + public boolean omnipotentCatalogRoleExists(String catalogName) { + List catalogRoles = polaris.listCatalogRoles(catalogName); + + return catalogRoles.stream() + .anyMatch( + catalogRole -> + catalogRole.getProperties() != null + && catalogRole.getProperties().containsKey(OMNIPOTENCE_PROPERTY)); + } + + /** + * Creates catalog role for catalog, assigns it to provided principal role, and assigns grants + * with appropriate privilege level. + * + * @param catalogName the catalog to create the role for + * @param omnipotentPrincipalRole the principal role to assign the catalog role to + * @param replace if true, drops the existing catalog role if it exists + * @param withWriteAccess gives write access to the catalog role + */ + public void setupOmnipotentRoleForCatalog( + String catalogName, + PrincipalRole omnipotentPrincipalRole, + boolean replace, + boolean withWriteAccess) { + CatalogRole omniPotentCatalogRole = + createAndAssignCatalogRole(catalogName, omnipotentPrincipalRole, replace); + addGrantsToCatalogRole(catalogName, omniPotentCatalogRole.getName(), withWriteAccess); + } +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/BaseTableWithETag.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/BaseTableWithETag.java new file mode 100644 index 00000000..1f28a031 --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/BaseTableWithETag.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.catalog; + +import org.apache.iceberg.BaseTable; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.metrics.MetricsReporter; + +/** Wrapper around {@link BaseTable} that contains the latest ETag for the table. */ +public class BaseTableWithETag extends BaseTable { + + private final String etag; + + public BaseTableWithETag(TableOperations ops, String name, String etag) { + super(ops, name); + this.etag = etag; + } + + public BaseTableWithETag( + TableOperations ops, String name, MetricsReporter reporter, String etag) { + super(ops, name, reporter); + this.etag = etag; + } + + public String etag() { + return etag; + } +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/ETagService.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/ETagService.java new file mode 100644 index 00000000..b06e4281 --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/ETagService.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.catalog; + +import org.apache.iceberg.catalog.TableIdentifier; + +/** + * Generic interface to provide and store ETags for tables within catalogs. This allows the storage + * of the ETag to be completely independent from the tool. + */ +public interface ETagService { + + /** + * Retrieves the ETag for the table. + * + * @param catalogName the catalog the table is in + * @param tableIdentifier the table identifier + * @return The ETag for the last known metadata for the table + */ + String getETag(String catalogName, TableIdentifier tableIdentifier); + + /** + * After table loading, stores the fetched ETag. + * + * @param catalogName the catalog the table is in + * @param tableIdentifier the table identifier + * @param etag the ETag that was provided by the Iceberg REST api + */ + void storeETag(String catalogName, TableIdentifier tableIdentifier, String etag); +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/MetadataWrapperTableOperations.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/MetadataWrapperTableOperations.java new file mode 100644 index 00000000..a5331554 --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/MetadataWrapperTableOperations.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.catalog; + +import java.util.NoSuchElementException; +import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.LocationProvider; + +/** + * Wrapper table operationw class that just allows fetching a provided table metadata. Used to build + * a {@link org.apache.iceberg.BaseTable} without having to expose a full-fledged operations class. + */ +public class MetadataWrapperTableOperations implements TableOperations { + + private final TableMetadata tableMetadata; + + public MetadataWrapperTableOperations(TableMetadata tableMetadata) { + this.tableMetadata = tableMetadata; + } + + @Override + public TableMetadata current() { + return this.tableMetadata; + } + + @Override + public TableMetadata refresh() { + return this.tableMetadata; + } + + @Override + public void commit(TableMetadata tableMetadata, TableMetadata tableMetadata1) { + throw new UnsupportedOperationException("Cannot perform commit."); + } + + @Override + public FileIO io() { + throw new NoSuchElementException("Does not possess file io."); + } + + @Override + public String metadataFileLocation(String s) { + return this.tableMetadata.metadataFileLocation(); + } + + @Override + public LocationProvider locationProvider() { + throw new NoSuchElementException("Does not possess location provider."); + } +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/NoOpETagService.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/NoOpETagService.java new file mode 100644 index 00000000..04f3bd0c --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/NoOpETagService.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.catalog; + +import org.apache.iceberg.catalog.TableIdentifier; + +/** Implementation that returns nothing and stores no ETags. */ +public class NoOpETagService implements ETagService { + + @Override + public String getETag(String catalogName, TableIdentifier tableIdentifier) { + return null; + } + + @Override + public void storeETag(String catalogName, TableIdentifier tableIdentifier, String etag) {} +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/NotModifiedException.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/NotModifiedException.java new file mode 100644 index 00000000..6903c642 --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/NotModifiedException.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.catalog; + +import org.apache.iceberg.catalog.TableIdentifier; + +public class NotModifiedException extends RuntimeException { + + public NotModifiedException(TableIdentifier tableIdentifier) { + super("Table " + tableIdentifier + " was not modified."); + } + + public NotModifiedException(String message) { + super(message); + } + + public NotModifiedException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/PolarisCatalog.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/PolarisCatalog.java new file mode 100644 index 00000000..553d7253 --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/catalog/PolarisCatalog.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.catalog; + +import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.Closeable; +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.util.Map; +import org.apache.http.HttpHeaders; +import org.apache.http.HttpStatus; +import org.apache.iceberg.BaseTable; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.Table; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.catalog.ViewCatalog; +import org.apache.iceberg.hadoop.Configurable; +import org.apache.iceberg.rest.RESTCatalog; +import org.apache.iceberg.rest.ResourcePaths; +import org.apache.iceberg.rest.responses.LoadTableResponse; +import org.apache.iceberg.rest.responses.LoadTableResponseParser; +import org.apache.polaris.tools.sync.polaris.http.OAuth2Util; + +/** + * Overrides loadTable default implementation to issue a custom loadTable request to the Polaris + * Iceberg REST Api and build the table metadata. This is necessary since the existing {@link + * RESTCatalog} does not provide a way to capture response headers to retrieve the ETag on a + * loadTable request. + */ +public class PolarisCatalog extends RESTCatalog + implements Catalog, ViewCatalog, SupportsNamespaces, Configurable, Closeable { + + private String name = null; + + private Map properties = null; + + private String accessToken = null; + + private HttpClient httpClient = null; + + private ObjectMapper objectMapper = null; + + private ResourcePaths resourcePaths = null; + + public PolarisCatalog() { + super(); + } + + @Override + public void initialize(String name, Map props) { + this.name = name; + this.properties = props; + + if (resourcePaths == null) { + this.properties.put("prefix", props.get("warehouse")); + resourcePaths = ResourcePaths.forCatalogProperties(this.properties); + } + + if (accessToken == null || httpClient == null || this.objectMapper == null) { + String oauth2ServerUri = props.get("uri") + "/v1/oauth/tokens"; + String credential = props.get("credential"); + + String clientId = credential.split(":")[0]; + String clientSecret = credential.split(":")[1]; + + String scope = props.get("scope"); + + // TODO: Add token refresh + try { + this.accessToken = OAuth2Util.fetchToken(oauth2ServerUri, clientId, clientSecret, scope); + } catch (IOException e) { + throw new RuntimeException(e); + } + + this.httpClient = HttpClient.newBuilder().build(); + this.objectMapper = new ObjectMapper(); + } + super.initialize(name, props); + } + + @Override + public Table loadTable(TableIdentifier ident) { + return loadTable(ident, null); + } + + /** + * Perform a loadTable with a specified ETag in the If-None-Match header. TODO: Remove this once + * ETag is officially supported in Iceberg + * + * @param ident the identifier of the table + * @param etag the etag + * @return a {@link BaseTable} if no ETag was found in the response headers. A {@link + * BaseTableWithETag} if an ETag was included in the response headers. + * @throws NotModifiedException if the Iceberg REST catalog responded with 304 NOT MODIFIED + */ + public Table loadTable(TableIdentifier ident, String etag) { + String catalogName = this.properties.get("warehouse"); + + String tablePath = + String.format("%s/%s", this.properties.get("uri"), resourcePaths.table(ident)); + + HttpRequest.Builder requestBuilder = + HttpRequest.newBuilder() + .uri(URI.create(tablePath)) + .header(HttpHeaders.AUTHORIZATION, "Bearer " + accessToken) + .GET(); + + // specify last known etag in if-none-match header + if (etag != null) { + requestBuilder.header(HttpHeaders.IF_NONE_MATCH, etag); + } + + HttpRequest request = requestBuilder.build(); + + HttpResponse response; + + try { + response = this.httpClient.send(request, HttpResponse.BodyHandlers.ofString()); + } catch (Exception e) { + throw new RuntimeException(e); + } + + // api responded with 304 not modified, throw from here to signal + if (response.statusCode() == HttpStatus.SC_NOT_MODIFIED) { + throw new NotModifiedException(ident); + } + + String body = response.body(); + + String newETag = null; + + // if etag header is present in response, store new provided etag + if (response.headers().firstValue(HttpHeaders.ETAG).isPresent()) { + newETag = response.headers().firstValue(HttpHeaders.ETAG).get(); + } + + // build custom base table with metadata so that tool can retrieve the + // location and register it on the target side + LoadTableResponse loadTableResponse = LoadTableResponseParser.fromJson(body); + MetadataWrapperTableOperations ops = + new MetadataWrapperTableOperations(loadTableResponse.tableMetadata()); + + if (newETag != null) { + return new BaseTableWithETag(ops, CatalogUtil.fullTableName(catalogName, ident), newETag); + } + + return new BaseTable(ops, CatalogUtil.fullTableName(catalogName, ident)); + } +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/http/HttpUtil.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/http/HttpUtil.java new file mode 100644 index 00000000..50c1428d --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/http/HttpUtil.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.http; + +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.Map; +import java.util.stream.Collectors; + +/** Encapsulates handy http utility methods. */ +public class HttpUtil { + + /** Turn a {@link Map} into an xxx-url-form-encoded compatible String form body. */ + public static String constructFormEncodedString(Map parameters) { + return parameters.entrySet().stream() + .map( + entry -> + URLEncoder.encode(entry.getKey(), StandardCharsets.UTF_8) + + "=" + + URLEncoder.encode(entry.getValue(), StandardCharsets.UTF_8)) + .collect(Collectors.joining("&")); + } +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/http/OAuth2Util.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/http/OAuth2Util.java new file mode 100644 index 00000000..98ada786 --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/http/OAuth2Util.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.http; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.util.Map; +import java.util.NoSuchElementException; +import org.apache.http.HttpHeaders; +import org.apache.http.entity.ContentType; + +/** Utility class to manage OAuth2 flow for a Polaris instance. */ +public class OAuth2Util { + + private static final HttpClient httpClient = HttpClient.newHttpClient(); + + private static final ObjectMapper objectMapper = new ObjectMapper(); + + public static String fetchToken( + String oauth2ServerUri, String clientId, String clientSecret, String scope) + throws IOException { + + Map formBody = + Map.of( + "grant_type", "client_credentials", + "scope", scope, + "client_id", clientId, + "client_secret", clientSecret); + + String formBodyAsString = HttpUtil.constructFormEncodedString(formBody); + + HttpRequest request = + HttpRequest.newBuilder() + .uri(URI.create(oauth2ServerUri)) + .header(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_FORM_URLENCODED.getMimeType()) + .POST(HttpRequest.BodyPublishers.ofString(formBodyAsString)) + .build(); + + try { + HttpResponse response = + httpClient.send(request, HttpResponse.BodyHandlers.ofString()); + Map responseBody = + objectMapper.readValue(response.body(), new TypeReference<>() {}); + + String accessToken = responseBody.getOrDefault("access_token", null); + + if (accessToken != null) { + return accessToken; + } + + throw new NoSuchElementException( + "No field 'access_token' found in response from oauth2-server-uri."); + } catch (Exception e) { + throw new RuntimeException("Could not fetch access token", e); + } + } +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/AccessControlAwarePlanner.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/AccessControlAwarePlanner.java new file mode 100644 index 00000000..0c6f4b10 --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/AccessControlAwarePlanner.java @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.planning; + +import java.util.ArrayList; +import java.util.List; +import org.apache.polaris.core.admin.model.CatalogRole; +import org.apache.polaris.core.admin.model.PrincipalRole; +import org.apache.polaris.tools.sync.polaris.access.AccessControlConstants; +import org.apache.polaris.tools.sync.polaris.planning.plan.SynchronizationPlan; + +/** + * Planner that filters out access control entities that should not be modified in the duration of + * the sync. This includes the omnipotent roles and principals that we do not want to copy between + * the two instances as well as modifications to service_admin or catalog_admin that may disrupt + * manage_access permissions. + */ +public class AccessControlAwarePlanner extends DelegatedPlanner implements SynchronizationPlanner { + + public AccessControlAwarePlanner(SynchronizationPlanner delegate) { + super(delegate); + } + + @Override + public SynchronizationPlan planPrincipalRoleSync( + List principalRolesOnSource, List principalRolesOnTarget) { + List skippedRoles = new ArrayList<>(); + List filteredRolesSource = new ArrayList<>(); + List filteredRolesTarget = new ArrayList<>(); + + for (PrincipalRole role : principalRolesOnSource) { + // filter out omnipotent principal role + if (role.getProperties() != null + && role.getProperties().containsKey(AccessControlConstants.OMNIPOTENCE_PROPERTY)) { + skippedRoles.add(role); + continue; + } + + // filter out service_admin + if (role.getName().equals("service_admin")) { + skippedRoles.add(role); + continue; + } + + filteredRolesSource.add(role); + } + + for (PrincipalRole role : principalRolesOnTarget) { + // filter out omnipotent principal role + if (role.getProperties() != null + && role.getProperties().containsKey(AccessControlConstants.OMNIPOTENCE_PROPERTY)) { + skippedRoles.add(role); + continue; + } + + // filter out service admin + if (role.getName().equals("service_admin")) { + skippedRoles.add(role); + continue; + } + + filteredRolesTarget.add(role); + } + + SynchronizationPlan delegatedPlan = + this.delegate.planPrincipalRoleSync(filteredRolesSource, filteredRolesTarget); + + for (PrincipalRole role : skippedRoles) { + delegatedPlan.skipEntity(role); + } + + return delegatedPlan; + } + + @Override + public SynchronizationPlan planCatalogRoleSync( + String catalogName, + List catalogRolesOnSource, + List catalogRolesOnTarget) { + List skippedRoles = new ArrayList<>(); + List filteredRolesSource = new ArrayList<>(); + List filteredRolesTarget = new ArrayList<>(); + + for (CatalogRole role : catalogRolesOnSource) { + // filter out omnipotent catalog role + if (role.getProperties() != null + && role.getProperties().containsKey(AccessControlConstants.OMNIPOTENCE_PROPERTY)) { + skippedRoles.add(role); + continue; + } + + // filter out catalog admin + if (role.getName().equals("catalog_admin")) { + skippedRoles.add(role); + continue; + } + + filteredRolesSource.add(role); + } + + for (CatalogRole role : catalogRolesOnTarget) { + // filter out omnipotent catalog role + if (role.getProperties() != null + && role.getProperties().containsKey(AccessControlConstants.OMNIPOTENCE_PROPERTY)) { + skippedRoles.add(role); + continue; + } + + // filter out catalog admin + if (role.getName().equals("catalog_admin")) { + skippedRoles.add(role); + continue; + } + + filteredRolesTarget.add(role); + } + + SynchronizationPlan delegatedPlan = + this.delegate.planCatalogRoleSync(catalogName, filteredRolesSource, filteredRolesTarget); + + for (CatalogRole role : skippedRoles) { + delegatedPlan.skipEntityAndSkipChildren(role); + } + + return delegatedPlan; + } + + @Override + public SynchronizationPlan planAssignPrincipalRolesToCatalogRolesSync( + String catalogName, + String catalogRoleName, + List assignedPrincipalRolesOnSource, + List assignedPrincipalRolesOnTarget) { + List skippedRoles = new ArrayList<>(); + List filteredRolesSource = new ArrayList<>(); + List filteredRolesTarget = new ArrayList<>(); + + for (PrincipalRole role : assignedPrincipalRolesOnSource) { + // filter out assignment to omnipotent catalog role + if (role.getProperties() != null + && role.getProperties().containsKey(AccessControlConstants.OMNIPOTENCE_PROPERTY)) { + skippedRoles.add(role); + continue; + } + + // filter out assignment to service admin + if (role.getName().equals("service_admin")) { + skippedRoles.add(role); + continue; + } + + filteredRolesSource.add(role); + } + + for (PrincipalRole role : assignedPrincipalRolesOnTarget) { + // filer out assignment to omnipotent principal role + if (role.getProperties() != null + && role.getProperties().containsKey(AccessControlConstants.OMNIPOTENCE_PROPERTY)) { + skippedRoles.add(role); + continue; + } + + // filter out assignment to service admin + if (role.getName().equals("service_admin")) { + skippedRoles.add(role); + continue; + } + + filteredRolesTarget.add(role); + } + + SynchronizationPlan delegatedPlan = + this.delegate.planAssignPrincipalRolesToCatalogRolesSync( + catalogName, catalogRoleName, filteredRolesSource, filteredRolesTarget); + + for (PrincipalRole role : skippedRoles) { + delegatedPlan.skipEntity(role); + } + + return delegatedPlan; + } +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/DelegatedPlanner.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/DelegatedPlanner.java new file mode 100644 index 00000000..e48b5323 --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/DelegatedPlanner.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.planning; + +import java.util.List; +import java.util.Set; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.polaris.core.admin.model.Catalog; +import org.apache.polaris.core.admin.model.CatalogRole; +import org.apache.polaris.core.admin.model.GrantResource; +import org.apache.polaris.core.admin.model.PrincipalRole; +import org.apache.polaris.tools.sync.polaris.planning.plan.SynchronizationPlan; + +/** + * Extend this to delegate planning to another planner, but only override methods for the + * functionality needed. + */ +public abstract class DelegatedPlanner implements SynchronizationPlanner { + + protected final SynchronizationPlanner delegate; + + public DelegatedPlanner(SynchronizationPlanner delegate) { + this.delegate = delegate; + } + + @Override + public SynchronizationPlan planPrincipalRoleSync( + List principalRolesOnSource, List principalRolesOnTarget) { + return delegate.planPrincipalRoleSync(principalRolesOnSource, principalRolesOnTarget); + } + + @Override + public SynchronizationPlan planCatalogSync( + List catalogsOnSource, List catalogsOnTarget) { + return delegate.planCatalogSync(catalogsOnSource, catalogsOnTarget); + } + + @Override + public SynchronizationPlan planCatalogRoleSync( + String catalogName, + List catalogRolesOnSource, + List catalogRolesOnTarget) { + return delegate.planCatalogRoleSync(catalogName, catalogRolesOnSource, catalogRolesOnTarget); + } + + @Override + public SynchronizationPlan planGrantSync( + String catalogName, + String catalogRoleName, + List grantsOnSource, + List grantsOnTarget) { + return delegate.planGrantSync(catalogName, catalogRoleName, grantsOnSource, grantsOnTarget); + } + + @Override + public SynchronizationPlan planAssignPrincipalRolesToCatalogRolesSync( + String catalogName, + String catalogRoleName, + List assignedPrincipalRolesOnSource, + List assignedPrincipalRolesOnTarget) { + return delegate.planAssignPrincipalRolesToCatalogRolesSync( + catalogName, + catalogRoleName, + assignedPrincipalRolesOnSource, + assignedPrincipalRolesOnTarget); + } + + @Override + public SynchronizationPlan planNamespaceSync( + String catalogName, + Namespace namespace, + List namespacesOnSource, + List namespacesOnTarget) { + return delegate.planNamespaceSync( + catalogName, namespace, namespacesOnSource, namespacesOnTarget); + } + + @Override + public SynchronizationPlan planTableSync( + String catalogName, + Namespace namespace, + Set tablesOnSource, + Set tablesOnTarget) { + return delegate.planTableSync(catalogName, namespace, tablesOnSource, tablesOnTarget); + } +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/ModificationAwarePlanner.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/ModificationAwarePlanner.java new file mode 100644 index 00000000..67a38a21 --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/ModificationAwarePlanner.java @@ -0,0 +1,329 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.planning; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.polaris.core.admin.model.Catalog; +import org.apache.polaris.core.admin.model.CatalogRole; +import org.apache.polaris.core.admin.model.GrantResource; +import org.apache.polaris.core.admin.model.PrincipalRole; +import org.apache.polaris.tools.sync.polaris.planning.plan.SynchronizationPlan; + +/** Planner that checks for modifications and plans to skip entities that have not been modified. */ +public class ModificationAwarePlanner implements SynchronizationPlanner { + + private static final String CREATE_TIMESTAMP = "createTimestamp"; + + private static final String LAST_UPDATE_TIMESTAMP = "lastUpdateTimestamp"; + + private static final String ENTITY_VERSION = "entityVersion"; + + private static final List DEFAULT_KEYS_TO_IGNORE = + List.of(CREATE_TIMESTAMP, LAST_UPDATE_TIMESTAMP, ENTITY_VERSION); + + private static final List CATALOG_KEYS_TO_IGNORE = + List.of( + // defaults + CREATE_TIMESTAMP, + LAST_UPDATE_TIMESTAMP, + ENTITY_VERSION, + + // For certain storageConfigInfo fields, depending on the credentials Polaris was set up + // with + // to access the storage, some fields will always be different across the source and the + // target. + // For example, for S3 my source and target Polaris instances may be set up with different + // AWS users, + // each of which assumes the same role to access the storage + + // S3 + "storageConfigInfo.userArn", + + // AZURE + "storageConfigInfo.consentUrl", + "storageConfigInfo.multiTenantAppName", + + // GCP + "storageConfigInfo.gcsServiceAccount"); + + private final SynchronizationPlanner delegate; + + private final ObjectMapper objectMapper; + + public ModificationAwarePlanner(SynchronizationPlanner delegate) { + this.objectMapper = new ObjectMapper(); + this.delegate = delegate; + } + + /** + * Removes keys from the provided map. + * + * @param map the map to remove the keys from + * @param keysToRemove a list of keys, nested keys should be separated by '.' eg. "key1.key2" + * @return the map with the keys removed + */ + private Map removeKeys(Map map, List keysToRemove) { + Map cleaned = + objectMapper.convertValue(map, new TypeReference>() {}); + + for (String key : keysToRemove) { + // splits key into first part and rest, eg. key1.key2.key3 becomes [key1, key2.key3] + String[] separateFirst = key.split("\\.", 2); + String primary = separateFirst[0]; + + if (separateFirst.length > 1) { + // if there are more nested keys, we want to recursively search the sub map if it exists + Object valueForPrimary = cleaned.get(primary); // get object for primary key if it exists + + if (valueForPrimary == null) { + continue; + } + + try { + Map subMap = + objectMapper.convertValue(valueForPrimary, new TypeReference<>() {}); + Map cleanedSubMap = + removeKeys(subMap, List.of(separateFirst[1])); // remove nested keys from submap + cleaned.put(primary, cleanedSubMap); // replace sub-map with key removed + } catch (IllegalArgumentException e) { + // do nothing because that means the key does not exist, no need to remove it + } + } else { + cleaned.remove(primary); // just remove the key if we have no more nesting + } + } + + return cleaned; + } + + /** + * Compares two objects to see if they are the same. + * + * @param o1 + * @param o2 + * @param keysToIgnore list of keys to ignore in the comparison + * @return true if they are the same, false otherwise + */ + private boolean areSame(Object o1, Object o2, List keysToIgnore) { + Map o1AsMap = objectMapper.convertValue(o1, new TypeReference<>() {}); + Map o2AsMap = objectMapper.convertValue(o2, new TypeReference<>() {}); + o1AsMap = removeKeys(o1AsMap, keysToIgnore); + o2AsMap = removeKeys(o2AsMap, keysToIgnore); + return o1AsMap.equals(o2AsMap); + } + + private boolean areSame(Object o1, Object o2) { + return areSame(o1, o2, DEFAULT_KEYS_TO_IGNORE); + } + + @Override + public SynchronizationPlan planPrincipalRoleSync( + List principalRolesOnSource, List principalRolesOnTarget) { + Map sourceRolesByName = new HashMap<>(); + Map targetRolesByName = new HashMap<>(); + + List notModifiedPrincipalRoles = new ArrayList<>(); + + principalRolesOnSource.forEach(role -> sourceRolesByName.put(role.getName(), role)); + principalRolesOnTarget.forEach(role -> targetRolesByName.put(role.getName(), role)); + + for (PrincipalRole sourceRole : principalRolesOnSource) { + if (targetRolesByName.containsKey(sourceRole.getName())) { + PrincipalRole targetRole = targetRolesByName.get(sourceRole.getName()); + + if (areSame(sourceRole, targetRole)) { + targetRolesByName.remove(targetRole.getName()); + sourceRolesByName.remove(sourceRole.getName()); + notModifiedPrincipalRoles.add(sourceRole); + } + } + } + + SynchronizationPlan delegatedPlan = + delegate.planPrincipalRoleSync( + sourceRolesByName.values().stream().toList(), + targetRolesByName.values().stream().toList()); + + for (PrincipalRole principalRole : notModifiedPrincipalRoles) { + delegatedPlan.skipEntityNotModified(principalRole); + } + + return delegatedPlan; + } + + private boolean areSame(Catalog source, Catalog target) { + return areSame(source, target, CATALOG_KEYS_TO_IGNORE) + // because of the way the jackson serialization works, any class that extends HashMap is + // serialized + // with just the fields in the map. Unfortunately, CatalogProperties extends HashMap so we + // must + // manually compare the fields in the catalog properties and cannot automatically + // deserialize them + // as a map + && Objects.equals(source.getProperties(), target.getProperties()); + } + + @Override + public SynchronizationPlan planCatalogSync( + List catalogsOnSource, List catalogsOnTarget) { + Map sourceCatalogsByName = new HashMap<>(); + Map targetCatalogsByName = new HashMap<>(); + + List notModifiedCatalogs = new ArrayList<>(); + + catalogsOnSource.forEach(catalog -> sourceCatalogsByName.put(catalog.getName(), catalog)); + catalogsOnTarget.forEach(catalog -> targetCatalogsByName.put(catalog.getName(), catalog)); + + for (Catalog sourceCatalog : catalogsOnSource) { + if (targetCatalogsByName.containsKey(sourceCatalog.getName())) { + Catalog targetCatalog = targetCatalogsByName.get(sourceCatalog.getName()); + + if (areSame(sourceCatalog, targetCatalog)) { + targetCatalogsByName.remove(targetCatalog.getName()); + sourceCatalogsByName.remove(sourceCatalog.getName()); + notModifiedCatalogs.add(sourceCatalog); + } + } + } + + SynchronizationPlan delegatedPlan = + delegate.planCatalogSync( + sourceCatalogsByName.values().stream().toList(), + targetCatalogsByName.values().stream().toList()); + + for (Catalog catalog : notModifiedCatalogs) { + delegatedPlan.skipEntityNotModified(catalog); + } + + return delegatedPlan; + } + + @Override + public SynchronizationPlan planCatalogRoleSync( + String catalogName, + List catalogRolesOnSource, + List catalogRolesOnTarget) { + Map sourceCatalogRolesByName = new HashMap<>(); + Map targetCatalogRolesByName = new HashMap<>(); + + List notModifiedCatalogRoles = new ArrayList<>(); + + catalogRolesOnSource.forEach(role -> sourceCatalogRolesByName.put(role.getName(), role)); + catalogRolesOnTarget.forEach(role -> targetCatalogRolesByName.put(role.getName(), role)); + + for (CatalogRole sourceCatalogRole : catalogRolesOnSource) { + if (targetCatalogRolesByName.containsKey(sourceCatalogRole.getName())) { + CatalogRole targetCatalogRole = targetCatalogRolesByName.get(sourceCatalogRole.getName()); + + if (areSame(sourceCatalogRole, targetCatalogRole)) { + targetCatalogRolesByName.remove(targetCatalogRole.getName()); + sourceCatalogRolesByName.remove(sourceCatalogRole.getName()); + notModifiedCatalogRoles.add(sourceCatalogRole); + } + } + } + + SynchronizationPlan delegatedPlan = + delegate.planCatalogRoleSync( + catalogName, + sourceCatalogRolesByName.values().stream().toList(), + targetCatalogRolesByName.values().stream().toList()); + + for (CatalogRole catalogRole : notModifiedCatalogRoles) { + delegatedPlan.skipEntityNotModified(catalogRole); + } + + return delegatedPlan; + } + + @Override + public SynchronizationPlan planGrantSync( + String catalogName, + String catalogRoleName, + List grantsOnSource, + List grantsOnTarget) { + Set sourceGrants = new HashSet<>(grantsOnSource); + Set targetGrants = new HashSet<>(grantsOnTarget); + + List notModifiedGrants = new ArrayList<>(); + + for (GrantResource grantResource : grantsOnSource) { + if (targetGrants.contains(grantResource)) { + sourceGrants.remove(grantResource); + targetGrants.remove(grantResource); + notModifiedGrants.add(grantResource); + } + } + + SynchronizationPlan delegatedPlan = + delegate.planGrantSync( + catalogName, + catalogRoleName, + sourceGrants.stream().toList(), + targetGrants.stream().toList()); + + for (GrantResource grant : notModifiedGrants) { + delegatedPlan.skipEntityNotModified(grant); + } + + return delegatedPlan; + } + + @Override + public SynchronizationPlan planAssignPrincipalRolesToCatalogRolesSync( + String catalogName, + String catalogRoleName, + List assignedPrincipalRolesOnSource, + List assignedPrincipalRolesOnTarget) { + return delegate.planAssignPrincipalRolesToCatalogRolesSync( + catalogName, + catalogRoleName, + assignedPrincipalRolesOnSource, + assignedPrincipalRolesOnTarget); + } + + @Override + public SynchronizationPlan planNamespaceSync( + String catalogName, + Namespace namespace, + List namespacesOnSource, + List namespacesOnTarget) { + return delegate.planNamespaceSync( + catalogName, namespace, namespacesOnSource, namespacesOnTarget); + } + + @Override + public SynchronizationPlan planTableSync( + String catalogName, + Namespace namespace, + Set tablesOnSource, + Set tablesOnTarget) { + return delegate.planTableSync(catalogName, namespace, tablesOnSource, tablesOnTarget); + } +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/NoOpSyncPlanner.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/NoOpSyncPlanner.java new file mode 100644 index 00000000..241febd0 --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/NoOpSyncPlanner.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.planning; + +import java.util.List; +import java.util.Set; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.polaris.core.admin.model.Catalog; +import org.apache.polaris.core.admin.model.CatalogRole; +import org.apache.polaris.core.admin.model.GrantResource; +import org.apache.polaris.core.admin.model.PrincipalRole; +import org.apache.polaris.tools.sync.polaris.planning.plan.SynchronizationPlan; + +public class NoOpSyncPlanner implements SynchronizationPlanner { + + @Override + public SynchronizationPlan planPrincipalRoleSync( + List principalRolesOnSource, List principalRolesOnTarget) { + return new SynchronizationPlan<>(); + } + + @Override + public SynchronizationPlan planCatalogSync( + List catalogsOnSource, List catalogsOnTarget) { + return new SynchronizationPlan<>(); + } + + @Override + public SynchronizationPlan planCatalogRoleSync( + String catalogName, + List catalogRolesOnSource, + List catalogRolesOnTarget) { + return new SynchronizationPlan<>(); + } + + @Override + public SynchronizationPlan planGrantSync( + String catalogName, + String catalogRoleName, + List grantsOnSource, + List grantsOnTarget) { + return new SynchronizationPlan<>(); + } + + @Override + public SynchronizationPlan planAssignPrincipalRolesToCatalogRolesSync( + String catalogName, + String catalogRoleName, + List assignedPrincipalRolesOnSource, + List assignedPrincipalRolesOnTarget) { + return new SynchronizationPlan<>(); + } + + @Override + public SynchronizationPlan planNamespaceSync( + String catalogName, + Namespace namespace, + List namespacesOnSource, + List namespacesOnTarget) { + return null; + } + + @Override + public SynchronizationPlan planTableSync( + String catalogName, + Namespace namespace, + Set tablesOnSource, + Set tablesOnTarget) { + return new SynchronizationPlan<>(); + } +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/SourceParitySynchronizationPlanner.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/SourceParitySynchronizationPlanner.java new file mode 100644 index 00000000..622bfe25 --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/SourceParitySynchronizationPlanner.java @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.planning; + +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.polaris.core.admin.model.Catalog; +import org.apache.polaris.core.admin.model.CatalogRole; +import org.apache.polaris.core.admin.model.GrantResource; +import org.apache.polaris.core.admin.model.PrincipalRole; +import org.apache.polaris.tools.sync.polaris.planning.plan.SynchronizationPlan; + +/** + * Sync planner that attempts to create total parity between the source and target Polaris + * instances. This involves creating new entities, overwriting entities that exist on both source + * and target, and removing entities that exist only on the target. + */ +public class SourceParitySynchronizationPlanner implements SynchronizationPlanner { + + @Override + public SynchronizationPlan planPrincipalRoleSync( + List principalRolesOnSource, List principalRolesOnTarget) { + Set sourcePrincipalRoleNames = + principalRolesOnSource.stream().map(PrincipalRole::getName).collect(Collectors.toSet()); + Set targetPrincipalRoleNames = + principalRolesOnTarget.stream().map(PrincipalRole::getName).collect(Collectors.toSet()); + + SynchronizationPlan plan = new SynchronizationPlan<>(); + + for (PrincipalRole principalRole : principalRolesOnSource) { + if (targetPrincipalRoleNames.contains(principalRole.getName())) { + // overwrite roles that exist on both + plan.overwriteEntity(principalRole); + } else { + // create roles on target that only exist on source + plan.createEntity(principalRole); + } + } + + // remove roles that aren't on source + for (PrincipalRole principalRole : principalRolesOnTarget) { + if (!sourcePrincipalRoleNames.contains(principalRole.getName())) { + plan.removeEntity(principalRole); + } + } + + return plan; + } + + @Override + public SynchronizationPlan planCatalogSync( + List catalogsOnSource, List catalogsOnTarget) { + Set sourceCatalogNames = + catalogsOnSource.stream().map(Catalog::getName).collect(Collectors.toSet()); + Set targetCatalogNames = + catalogsOnTarget.stream().map(Catalog::getName).collect(Collectors.toSet()); + + SynchronizationPlan plan = new SynchronizationPlan<>(); + + for (Catalog catalog : catalogsOnSource) { + if (targetCatalogNames.contains(catalog.getName())) { + // overwrite catalogs on target that exist on both + plan.overwriteEntity(catalog); + } else { + // create catalogs on target that exist only on source + plan.createEntity(catalog); + } + } + + // remove catalogs that are only on target + for (Catalog catalog : catalogsOnTarget) { + if (!sourceCatalogNames.contains(catalog.getName())) { + plan.removeEntity(catalog); + } + } + + return plan; + } + + @Override + public SynchronizationPlan planCatalogRoleSync( + String catalogName, + List catalogRolesOnSource, + List catalogRolesOnTarget) { + Set sourceCatalogRoleNames = + catalogRolesOnSource.stream().map(CatalogRole::getName).collect(Collectors.toSet()); + Set targetCatalogRoleNames = + catalogRolesOnTarget.stream().map(CatalogRole::getName).collect(Collectors.toSet()); + + SynchronizationPlan plan = new SynchronizationPlan<>(); + + for (CatalogRole catalogRole : catalogRolesOnSource) { + if (targetCatalogRoleNames.contains(catalogRole.getName())) { + plan.overwriteEntity(catalogRole); + // overwrite catalog roles on both + } else { + // create catalog roles on target that are only on source + plan.createEntity(catalogRole); + } + } + + // remove catalog roles on both the source and target + for (CatalogRole catalogRole : catalogRolesOnTarget) { + if (!sourceCatalogRoleNames.contains(catalogRole.getName())) { + plan.removeEntity(catalogRole); + } + } + + return plan; + } + + @Override + public SynchronizationPlan planGrantSync( + String catalogName, + String catalogRoleName, + List grantsOnSource, + List grantsOnTarget) { + Set grantsSourceSet = Set.copyOf(grantsOnSource); + Set grantsTargetSet = Set.copyOf(grantsOnTarget); + + SynchronizationPlan plan = new SynchronizationPlan<>(); + + // special case: no concept of overwriting a grant + // it exists and cannot change, so just create new ones + for (GrantResource grant : grantsOnSource) { + if (!grantsTargetSet.contains(grant)) { + plan.createEntity(grant); + } + } + + // remove grants that are not on the source + for (GrantResource grant : grantsOnTarget) { + if (!grantsSourceSet.contains(grant)) { + plan.removeEntity(grant); + } + } + + return plan; + } + + @Override + public SynchronizationPlan planAssignPrincipalRolesToCatalogRolesSync( + String catalogName, + String catalogRoleName, + List assignedPrincipalRolesOnSource, + List assignedPrincipalRolesOnTarget) { + Set sourcePrincipalRoleNames = + assignedPrincipalRolesOnSource.stream() + .map(PrincipalRole::getName) + .collect(Collectors.toSet()); + Set targetPrincipalRoleNames = + assignedPrincipalRolesOnTarget.stream() + .map(PrincipalRole::getName) + .collect(Collectors.toSet()); + + SynchronizationPlan plan = new SynchronizationPlan<>(); + + // special case: no concept of overwriting an assignment of principal role to catalog role + // it either exists or it doesn't, it cannot change + for (PrincipalRole principalRole : assignedPrincipalRolesOnSource) { + if (!targetPrincipalRoleNames.contains(principalRole.getName())) { + plan.createEntity(principalRole); + } + } + + // revoke principal roles that do not exist on the source + for (PrincipalRole principalRole : assignedPrincipalRolesOnTarget) { + if (!sourcePrincipalRoleNames.contains(principalRole.getName())) { + plan.removeEntity(principalRole); + } + } + + return plan; + } + + @Override + public SynchronizationPlan planNamespaceSync( + String catalogName, + Namespace namespace, + List namespacesOnSource, + List namespacesOnTarget) { + SynchronizationPlan plan = new SynchronizationPlan<>(); + + for (Namespace ns : namespacesOnSource) { + if (namespacesOnTarget.contains(ns)) { + // overwrite the entity on the target with the entity on the source + plan.overwriteEntity(ns); + } else { + // if the namespace is not on the target, plan to create it + plan.createEntity(ns); + } + } + + for (Namespace ns : namespacesOnTarget) { + if (!namespacesOnSource.contains(ns)) { + // remove namespaces that do not exist on the source but do exist on the target + plan.removeEntity(ns); + } + } + + return plan; + } + + @Override + public SynchronizationPlan planTableSync( + String catalogName, + Namespace namespace, + Set tablesOnSource, + Set tablesOnTarget) { + SynchronizationPlan plan = new SynchronizationPlan<>(); + + for (TableIdentifier tableIdentifier : tablesOnSource) { + if (tablesOnTarget.contains(tableIdentifier)) { + // overwrite tables on target and source + plan.overwriteEntity(tableIdentifier); + } else { + // create tables on source but not target + plan.createEntity(tableIdentifier); + } + } + + // remove tables only on target + for (TableIdentifier tableIdentifier : tablesOnTarget) { + if (!tablesOnSource.contains(tableIdentifier)) { + plan.removeEntity(tableIdentifier); + } + } + + return plan; + } +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/SynchronizationPlanner.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/SynchronizationPlanner.java new file mode 100644 index 00000000..78edf9df --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/SynchronizationPlanner.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.planning; + +import java.util.List; +import java.util.Set; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.polaris.core.admin.model.Catalog; +import org.apache.polaris.core.admin.model.CatalogRole; +import org.apache.polaris.core.admin.model.GrantResource; +import org.apache.polaris.core.admin.model.PrincipalRole; +import org.apache.polaris.tools.sync.polaris.planning.plan.SynchronizationPlan; + +/** + * Generic interface to generate synchronization plans for different types of entities based on what + * principal roles exist on the source and target. + */ +public interface SynchronizationPlanner { + + SynchronizationPlan planPrincipalRoleSync( + List principalRolesOnSource, List principalRolesOnTarget); + + SynchronizationPlan planCatalogSync( + List catalogsOnSource, List catalogsOnTarget); + + SynchronizationPlan planCatalogRoleSync( + String catalogName, + List catalogRolesOnSource, + List catalogRolesOnTarget); + + SynchronizationPlan planGrantSync( + String catalogName, + String catalogRoleName, + List grantsOnSource, + List grantsOnTarget); + + SynchronizationPlan planAssignPrincipalRolesToCatalogRolesSync( + String catalogName, + String catalogRoleName, + List assignedPrincipalRolesOnSource, + List assignedPrincipalRolesOnTarget); + + SynchronizationPlan planNamespaceSync( + String catalogName, + Namespace namespace, + List namespacesOnSource, + List namespacesOnTarget); + + SynchronizationPlan planTableSync( + String catalogName, + Namespace namespace, + Set tablesOnSource, + Set tablesOnTarget); +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/plan/PlannedAction.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/plan/PlannedAction.java new file mode 100644 index 00000000..c5b5e500 --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/plan/PlannedAction.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.planning.plan; + +public enum PlannedAction { + + /** For entities that are being freshly created on target. */ + CREATE, + + /** For entities that have to be dropped and recreated on target. */ + OVERWRITE, + + /** For entities that need to be dropped from the target. */ + REMOVE, + + /** + * For entities that should be skipped. Note that their child entities will still be synced. For + * example, we may skip a catalog role but its grants and assignments to principal roles will + * still be synced. + */ + SKIP, + + /** + * For entities that should be skipped due to no modification detected. Note that their child + * entities will still be synced. For example, we may skip a catalog role but its grants and + * assignments to principal roles will still be synced. + */ + SKIP_NOT_MODIFIED, + + /** + * For entities that should be skipped along with also skipping their child entities. Used in + * cases where we don't want to mess with an entire entity tree. For example we may not want to + * edit the catalog roles assigned to the service_admin. + */ + SKIP_AND_SKIP_CHILDREN +} diff --git a/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/plan/SynchronizationPlan.java b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/plan/SynchronizationPlan.java new file mode 100644 index 00000000..2153fc91 --- /dev/null +++ b/polaris-synchronizer/api/src/main/java/org/apache/polaris/tools/sync/polaris/planning/plan/SynchronizationPlan.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.planning.plan; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Data structure that holds the state of all the planned modifications that should be made on the + * target. + * + * @param the entity type that the plan is for + */ +public class SynchronizationPlan { + + private final Map> entitiesForAction; + + public SynchronizationPlan() { + this.entitiesForAction = new HashMap<>(); + + for (PlannedAction action : PlannedAction.values()) { + this.entitiesForAction.put(action, new ArrayList<>()); + } + } + + public List entitiesForAction(PlannedAction action) { + return entitiesForAction.get(action); + } + + public List entitiesToCreate() { + return entitiesForAction(PlannedAction.CREATE); + } + + public List entitiesToOverwrite() { + return entitiesForAction(PlannedAction.OVERWRITE); + } + + public List entitiesToRemove() { + return entitiesForAction(PlannedAction.REMOVE); + } + + public List entitiesToSkip() { + return entitiesForAction(PlannedAction.SKIP); + } + + public List entitiesNotModified() { + return entitiesForAction(PlannedAction.SKIP_NOT_MODIFIED); + } + + public List entitiesToSkipAndSkipChildren() { + return entitiesForAction(PlannedAction.SKIP_AND_SKIP_CHILDREN); + } + + public List entitiesToSyncChildren() { + List entities = new ArrayList<>(); + + for (PlannedAction action : PlannedAction.values()) { + if (action != PlannedAction.SKIP_AND_SKIP_CHILDREN && action != PlannedAction.REMOVE) { + entities.addAll(entitiesForAction(action)); + } + } + + return entities; + } + + public void actOnEntity(PlannedAction action, T entity) { + this.entitiesForAction.get(action).add(entity); + } + + public void createEntity(T entity) { + this.actOnEntity(PlannedAction.CREATE, entity); + } + + public void overwriteEntity(T entity) { + this.actOnEntity(PlannedAction.OVERWRITE, entity); + } + + public void removeEntity(T entity) { + this.actOnEntity(PlannedAction.REMOVE, entity); + } + + public void skipEntity(T entity) { + this.actOnEntity(PlannedAction.SKIP, entity); + } + + public void skipEntityNotModified(T entity) { + this.actOnEntity(PlannedAction.SKIP_NOT_MODIFIED, entity); + } + + public void skipEntityAndSkipChildren(T entity) { + this.actOnEntity(PlannedAction.SKIP_AND_SKIP_CHILDREN, entity); + } +} diff --git a/polaris-synchronizer/api/src/main/resources/polaris-management-service.yml b/polaris-synchronizer/api/src/main/resources/polaris-management-service.yml new file mode 100644 index 00000000..ecf374c7 --- /dev/null +++ b/polaris-synchronizer/api/src/main/resources/polaris-management-service.yml @@ -0,0 +1,1432 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +openapi: 3.0.3 +info: + title: Polaris Management Service + version: 0.0.1 + description: + Defines the management APIs for using Polaris to create and manage Iceberg catalogs and their principals +servers: + - url: "{scheme}://{host}/api/management/v1" + description: Server URL when the port can be inferred from the scheme + variables: + scheme: + description: The scheme of the URI, either http or https. + default: https + host: + description: The host address for the specified server + default: localhost +# All routes are currently configured using an Authorization header. +security: + - OAuth2: [] + +paths: + /catalogs: + get: + operationId: listCatalogs + description: List all catalogs in this polaris service + responses: + 200: + description: List of catalogs in the polaris service + content: + application/json: + schema: + $ref: "#/components/schemas/Catalogs" + 403: + description: "The caller does not have permission to list catalog details" + post: + operationId: createCatalog + description: Add a new Catalog + requestBody: + description: The Catalog to create + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/CreateCatalogRequest" + responses: + 201: + description: "Successful response" + 403: + description: "The caller does not have permission to create a catalog" + 404: + description: "The catalog does not exist" + 409: + description: "A catalog with the specified name already exists" + + /catalogs/{catalogName}: + parameters: + - name: catalogName + in: path + description: The name of the catalog + required: true + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + get: + operationId: getCatalog + description: Get the details of a catalog + responses: + 200: + description: The catalog details + content: + application/json: + schema: + $ref: "#/components/schemas/Catalog" + 403: + description: "The caller does not have permission to read catalog details" + 404: + description: "The catalog does not exist" + + put: + operationId: updateCatalog + description: Update an existing catalog + requestBody: + description: The catalog details to use in the update + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/UpdateCatalogRequest" + responses: + 200: + description: The catalog details + content: + application/json: + schema: + $ref: "#/components/schemas/Catalog" + 403: + description: "The caller does not have permission to update catalog details" + 404: + description: "The catalog does not exist" + 409: + description: "The entity version doesn't match the currentEntityVersion; retry after fetching latest version" + + delete: + operationId: deleteCatalog + description: Delete an existing catalog. The catalog must be empty. + responses: + 204: + description: "Success, no content" + 403: + description: "The caller does not have permission to delete a catalog" + 404: + description: "The catalog does not exist" + + /principals: + get: + operationId: listPrincipals + description: List the principals for the current catalog + responses: + 200: + description: List of principals for this catalog + content: + application/json: + schema: + $ref: "#/components/schemas/Principals" + 403: + description: "The caller does not have permission to list catalog admins" + 404: + description: "The catalog does not exist" + + post: + operationId: createPrincipal + description: Create a principal + requestBody: + description: The principal to create + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/CreatePrincipalRequest" + responses: + 201: + description: "Successful response" + content: + application/json: + schema: + $ref: "#/components/schemas/PrincipalWithCredentials" + 403: + description: "The caller does not have permission to add a principal" + + /principals/{principalName}: + parameters: + - name: principalName + in: path + description: The principal name + required: true + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + get: + operationId: getPrincipal + description: Get the principal details + responses: + 200: + description: The requested principal + content: + application/json: + schema: + $ref: "#/components/schemas/Principal" + 403: + description: "The caller does not have permission to get principal details" + 404: + description: "The catalog or principal does not exist" + + put: + operationId: updatePrincipal + description: Update an existing principal + requestBody: + description: The principal details to use in the update + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/UpdatePrincipalRequest" + responses: + 200: + description: The updated principal + content: + application/json: + schema: + $ref: "#/components/schemas/Principal" + 403: + description: "The caller does not have permission to update principal details" + 404: + description: "The principal does not exist" + 409: + description: "The entity version doesn't match the currentEntityVersion; retry after fetching latest version" + + delete: + operationId: deletePrincipal + description: Remove a principal from polaris + responses: + 204: + description: "Success, no content" + 403: + description: "The caller does not have permission to delete a principal" + 404: + description: "The principal does not exist" + + /principals/{principalName}/rotate: + parameters: + - name: principalName + in: path + description: The user name + required: true + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + post: + operationId: rotateCredentials + description: Rotate a principal's credentials. The new credentials will be returned in the response. This is the only + API, aside from createPrincipal, that returns the user's credentials. This API is *not* idempotent. + responses: + 200: + description: The principal details along with the newly rotated credentials + content: + application/json: + schema: + $ref: "#/components/schemas/PrincipalWithCredentials" + 403: + description: "The caller does not have permission to rotate credentials" + 404: + description: "The principal does not exist" + + /principals/{principalName}/principal-roles: + parameters: + - name: principalName + in: path + description: The name of the target principal + required: true + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + get: + operationId: listPrincipalRolesAssigned + description: List the roles assigned to the principal + responses: + 200: + description: List of roles assigned to this principal + content: + application/json: + schema: + $ref: "#/components/schemas/PrincipalRoles" + 403: + description: "The caller does not have permission to list roles" + 404: + description: "The principal or catalog does not exist" + + put: + operationId: assignPrincipalRole + description: Add a role to the principal + requestBody: + description: The principal role to assign + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/GrantPrincipalRoleRequest" + responses: + 201: + description: "Successful response" + 403: + description: "The caller does not have permission to add assign a role to the principal" + 404: + description: "The catalog, the principal, or the role does not exist" + + /principals/{principalName}/principal-roles/{principalRoleName}: + parameters: + - name: principalName + in: path + description: The name of the target principal + required: true + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + - name: principalRoleName + in: path + description: The name of the role + required: true + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + delete: + operationId: revokePrincipalRole + description: Remove a role from a catalog principal + responses: + 204: + description: "Success, no content" + 403: + description: "The caller does not have permission to remove a role from the principal" + 404: + description: "The catalog or principal does not exist" + + /principal-roles: + get: + operationId: listPrincipalRoles + description: List the principal roles + responses: + 200: + description: List of principal roles + content: + application/json: + schema: + $ref: "#/components/schemas/PrincipalRoles" + 403: + description: "The caller does not have permission to list principal roles" + 404: + description: "The catalog does not exist" + + post: + operationId: createPrincipalRole + description: Create a principal role + requestBody: + description: The principal to create + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/CreatePrincipalRoleRequest" + responses: + 201: + description: "Successful response" + 403: + description: "The caller does not have permission to add a principal role" + + /principal-roles/{principalRoleName}: + parameters: + - name: principalRoleName + in: path + description: The principal role name + required: true + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + get: + operationId: getPrincipalRole + description: Get the principal role details + responses: + 200: + description: The requested principal role + content: + application/json: + schema: + $ref: "#/components/schemas/PrincipalRole" + 403: + description: "The caller does not have permission to get principal role details" + 404: + description: "The principal role does not exist" + + put: + operationId: updatePrincipalRole + description: Update an existing principalRole + requestBody: + description: The principalRole details to use in the update + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/UpdatePrincipalRoleRequest" + responses: + 200: + description: The updated principal role + content: + application/json: + schema: + $ref: "#/components/schemas/PrincipalRole" + 403: + description: "The caller does not have permission to update principal role details" + 404: + description: "The principal role does not exist" + 409: + description: "The entity version doesn't match the currentEntityVersion; retry after fetching latest version" + + delete: + operationId: deletePrincipalRole + description: Remove a principal role from polaris + responses: + 204: + description: "Success, no content" + 403: + description: "The caller does not have permission to delete a principal role" + 404: + description: "The principal role does not exist" + + /principal-roles/{principalRoleName}/principals: + parameters: + - name: principalRoleName + in: path + description: The principal role name + required: true + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + get: + operationId: listAssigneePrincipalsForPrincipalRole + description: List the Principals to whom the target principal role has been assigned + responses: + 200: + description: List the Principals to whom the target principal role has been assigned + content: + application/json: + schema: + $ref: "#/components/schemas/Principals" + 403: + description: "The caller does not have permission to list principals" + 404: + description: "The principal role does not exist" + + /principal-roles/{principalRoleName}/catalog-roles/{catalogName}: + parameters: + - name: principalRoleName + in: path + description: The principal role name + required: true + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + - name: catalogName + in: path + required: true + description: The name of the catalog where the catalogRoles reside + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + get: + operationId: listCatalogRolesForPrincipalRole + description: Get the catalog roles mapped to the principal role + responses: + 200: + description: The list of catalog roles mapped to the principal role + content: + application/json: + schema: + $ref: "#/components/schemas/CatalogRoles" + 403: + description: "The caller does not have permission to list catalog roles" + 404: + description: "The principal role does not exist" + + put: + operationId: assignCatalogRoleToPrincipalRole + description: Assign a catalog role to a principal role + requestBody: + description: The principal to create + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/GrantCatalogRoleRequest" + responses: + 201: + description: "Successful response" + 403: + description: "The caller does not have permission to assign a catalog role" + + /principal-roles/{principalRoleName}/catalog-roles/{catalogName}/{catalogRoleName}: + parameters: + - name: principalRoleName + in: path + description: The principal role name + required: true + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + - name: catalogName + in: path + description: The name of the catalog that contains the role to revoke + required: true + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + - name: catalogRoleName + in: path + description: The name of the catalog role that should be revoked + required: true + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + delete: + operationId: revokeCatalogRoleFromPrincipalRole + description: Remove a catalog role from a principal role + responses: + 204: + description: "Success, no content" + 403: + description: "The caller does not have permission to revoke a catalog role" + 404: + description: "The principal role does not exist" + + /catalogs/{catalogName}/catalog-roles: + parameters: + - name: catalogName + in: path + description: The catalog for which we are reading/updating roles + required: true + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + get: + operationId: listCatalogRoles + description: List existing roles in the catalog + responses: + 200: + description: The list of roles that exist in this catalog + content: + application/json: + schema: + $ref: "#/components/schemas/CatalogRoles" + post: + operationId: createCatalogRole + description: Create a new role in the catalog + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/CreateCatalogRoleRequest" + responses: + 201: + description: "Successful response" + 403: + description: "The principal is not authorized to create roles" + 404: + description: "The catalog does not exist" + + /catalogs/{catalogName}/catalog-roles/{catalogRoleName}: + parameters: + - name: catalogName + in: path + description: The catalog for which we are retrieving roles + required: true + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + - name: catalogRoleName + in: path + description: The name of the role + required: true + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + get: + operationId: getCatalogRole + description: Get the details of an existing role + responses: + 200: + description: The specified role details + content: + application/json: + schema: + $ref: "#/components/schemas/CatalogRole" + 403: + description: "The principal is not authorized to read role data" + 404: + description: "The catalog or the role does not exist" + + put: + operationId: updateCatalogRole + description: Update an existing role in the catalog + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/UpdateCatalogRoleRequest" + responses: + 200: + description: The specified role details + content: + application/json: + schema: + $ref: "#/components/schemas/CatalogRole" + 403: + description: "The principal is not authorized to update roles" + 404: + description: "The catalog or the role does not exist" + 409: + description: "The entity version doesn't match the currentEntityVersion; retry after fetching latest version" + + delete: + operationId: deleteCatalogRole + description: Delete an existing role from the catalog. All associated grants will also be deleted + responses: + 204: + description: "Success, no content" + 403: + description: "The principal is not authorized to delete roles" + 404: + description: "The catalog or the role does not exist" + + /catalogs/{catalogName}/catalog-roles/{catalogRoleName}/principal-roles: + parameters: + - name: catalogName + in: path + required: true + description: The name of the catalog where the catalog role resides + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + - name: catalogRoleName + in: path + required: true + description: The name of the catalog role + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + get: + operationId: listAssigneePrincipalRolesForCatalogRole + description: List the PrincipalRoles to which the target catalog role has been assigned + responses: + 200: + description: List the PrincipalRoles to which the target catalog role has been assigned + content: + application/json: + schema: + $ref: "#/components/schemas/PrincipalRoles" + 403: + description: "The caller does not have permission to list principal roles" + 404: + description: "The catalog or catalog role does not exist" + + /catalogs/{catalogName}/catalog-roles/{catalogRoleName}/grants: + parameters: + - name: catalogName + in: path + required: true + description: The name of the catalog where the role will receive the grant + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + - name: catalogRoleName + in: path + required: true + description: The name of the role receiving the grant (must exist) + schema: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + get: + operationId: listGrantsForCatalogRole + description: List the grants the catalog role holds + responses: + 200: + description: List of all grants given to the role in this catalog + content: + application/json: + schema: + $ref: "#/components/schemas/GrantResources" + put: + operationId: addGrantToCatalogRole + description: Add a new grant to the catalog role + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/AddGrantRequest" + responses: + 201: + description: "Successful response" + 403: + description: "The principal is not authorized to create grants" + 404: + description: "The catalog or the role does not exist" + post: + operationId: revokeGrantFromCatalogRole + description: + Delete a specific grant from the role. This may be a subset or a superset of the grants the role has. In case of + a subset, the role will retain the grants not specified. If the `cascade` parameter is true, grant revocation + will have a cascading effect - that is, if a principal has specific grants on a subresource, and grants are revoked + on a parent resource, the grants present on the subresource will be revoked as well. By default, this behavior + is disabled and grant revocation only affects the specified resource. + parameters: + - name: cascade + in: query + schema: + type: boolean + default: false + description: If true, the grant revocation cascades to all subresources. + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/RevokeGrantRequest" + responses: + 201: + description: "Successful response" + 403: + description: "The principal is not authorized to create grants" + 404: + description: "The catalog or the role does not exist" + +components: + securitySchemes: + OAuth2: + type: oauth2 + description: Uses OAuth 2 with client credentials flow + flows: + implicit: + authorizationUrl: "{scheme}://{host}/api/v1/oauth/tokens" + scopes: {} + + schemas: + Catalogs: + type: object + description: A list of Catalog objects + properties: + catalogs: + type: array + items: + $ref: "#/components/schemas/Catalog" + required: + - catalogs + + CreateCatalogRequest: + type: object + description: Request to create a new catalog + properties: + catalog: + $ref: "#/components/schemas/Catalog" + required: + - catalog + + Catalog: + type: object + description: A catalog object. A catalog may be internal or external. External catalogs are managed entirely by + an external catalog interface. Third party catalogs may be other Iceberg REST implementations or other services + with their own proprietary APIs + properties: + type: + type: string + enum: + - INTERNAL + - EXTERNAL + description: the type of catalog - internal or external + default: INTERNAL + name: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + description: The name of the catalog + properties: + type: object + properties: + default-base-location: + type: string + additionalProperties: + type: string + required: + - default-base-location + createTimestamp: + type: integer + format: "int64" + description: The creation time represented as unix epoch timestamp in milliseconds + lastUpdateTimestamp: + type: integer + format: "int64" + description: The last update time represented as unix epoch timestamp in milliseconds + entityVersion: + type: integer + description: The version of the catalog object used to determine if the catalog metadata has changed + storageConfigInfo: + $ref: "#/components/schemas/StorageConfigInfo" + required: + - name + - type + - storageConfigInfo + - properties + discriminator: + propertyName: type + mapping: + INTERNAL: "#/components/schemas/PolarisCatalog" + EXTERNAL: "#/components/schemas/ExternalCatalog" + + + PolarisCatalog: + type: object + allOf: + - $ref: "#/components/schemas/Catalog" + description: The base catalog type - this contains all the fields necessary to construct an INTERNAL catalog + + ExternalCatalog: + description: An externally managed catalog + type: object + allOf: + - $ref: "#/components/schemas/Catalog" + - type: object + properties: + connectionConfigInfo: + $ref: "#/components/schemas/ConnectionConfigInfo" + + ConnectionConfigInfo: + type: object + description: A connection configuration representing a remote catalog service. IMPORTANT - Specifying a + ConnectionConfigInfo in an ExternalCatalog is currently an experimental API and is subject to change. + properties: + connectionType: + type: string + enum: + - ICEBERG_REST + description: The type of remote catalog service represented by this connection + uri: + type: string + description: URI to the remote catalog service + authenticationParameters: + $ref: "#/components/schemas/AuthenticationParameters" + required: + - connectionType + discriminator: + propertyName: connectionType + mapping: + ICEBERG_REST: "#/components/schemas/IcebergRestConnectionConfigInfo" + + IcebergRestConnectionConfigInfo: + type: object + description: Configuration necessary for connecting to an Iceberg REST Catalog + allOf: + - $ref: '#/components/schemas/ConnectionConfigInfo' + properties: + remoteCatalogName: + type: string + description: The name of a remote catalog instance within the remote catalog service; in some older systems + this is specified as the 'warehouse' when multiple logical catalogs are served under the same base + uri, and often translates into a 'prefix' added to all REST resource paths + + AuthenticationParameters: + type: object + description: Authentication-specific information for a REST connection + properties: + authenticationType: + type: string + enum: + - OAUTH + - BEARER + description: The type of authentication to use when connecting to the remote rest service + required: + - authenticationType + discriminator: + propertyName: authenticationType + mapping: + OAUTH: "#/components/schemas/OAuthClientCredentialsParameters" + BEARER: "#/components/schemas/BearerAuthenticationParameters" + + OAuthClientCredentialsParameters: + type: object + description: OAuth authentication based on client_id/client_secret + allOf: + - $ref: '#/components/schemas/AuthenticationParameters' + properties: + tokenUri: + type: string + description: Token server URI + clientId: + type: string + description: oauth client id + clientSecret: + type: string + format: password + description: oauth client secret (input-only) + scopes: + type: array + items: + type: string + description: oauth scopes to specify when exchanging for a short-lived access token + + BearerAuthenticationParameters: + type: object + description: Bearer authentication directly embedded in request auth headers + allOf: + - $ref: '#/components/schemas/AuthenticationParameters' + properties: + bearerToken: + type: string + format: password + description: Bearer token (input-only) + + StorageConfigInfo: + type: object + description: A storage configuration used by catalogs + properties: + storageType: + type: string + enum: + - S3 + - GCS + - AZURE + - FILE + description: The cloud provider type this storage is built on. FILE is supported for testing purposes only + allowedLocations: + type: array + items: + type: string + example: "For AWS [s3://bucketname/prefix/], for AZURE [abfss://container@storageaccount.blob.core.windows.net/prefix/], for GCP [gs://bucketname/prefix/]" + required: + - storageType + discriminator: + propertyName: storageType + mapping: + S3: "#/components/schemas/AwsStorageConfigInfo" + AZURE: "#/components/schemas/AzureStorageConfigInfo" + GCS: "#/components/schemas/GcpStorageConfigInfo" + FILE: "#/components/schemas/FileStorageConfigInfo" + + AwsStorageConfigInfo: + type: object + description: aws storage configuration info + allOf: + - $ref: '#/components/schemas/StorageConfigInfo' + properties: + roleArn: + type: string + description: the aws role arn that grants privileges on the S3 buckets + example: "arn:aws:iam::123456789001:principal/abc1-b-self1234" + externalId: + type: string + description: an optional external id used to establish a trust relationship with AWS in the trust policy + userArn: + type: string + description: the aws user arn used to assume the aws role + example: "arn:aws:iam::123456789001:user/abc1-b-self1234" + region: + type: string + description: the aws region where data is stored + example: "us-east-2" + required: + - roleArn + + AzureStorageConfigInfo: + type: object + description: azure storage configuration info + allOf: + - $ref: '#/components/schemas/StorageConfigInfo' + properties: + tenantId: + type: string + description: the tenant id that the storage accounts belong to + multiTenantAppName: + type: string + description: the name of the azure client application + consentUrl: + type: string + description: URL to the Azure permissions request page + required: + - tenantId + + GcpStorageConfigInfo: + type: object + description: gcp storage configuration info + allOf: + - $ref: '#/components/schemas/StorageConfigInfo' + properties: + gcsServiceAccount: + type: string + description: a Google cloud storage service account + + FileStorageConfigInfo: + type: object + description: file storage configuration info + allOf: + - $ref: '#/components/schemas/StorageConfigInfo' + + UpdateCatalogRequest: + description: Updates to apply to a Catalog. Any fields which are required in the Catalog + will remain unaltered if omitted from the contents of this Update request. + type: object + properties: + currentEntityVersion: + type: integer + description: The version of the object onto which this update is applied; if the object changed, the update will fail and the caller should retry after fetching the latest version. + properties: + type: object + additionalProperties: + type: string + storageConfigInfo: + $ref: "#/components/schemas/StorageConfigInfo" + + Principals: + description: A list of Principals + type: object + properties: + principals: + type: array + items: + $ref: "#/components/schemas/Principal" + required: + - principals + + PrincipalWithCredentials: + description: A user with its client id and secret. This type is returned when a new principal is created or when its + credentials are rotated + type: object + properties: + principal: + $ref: "#/components/schemas/Principal" + credentials: + type: object + properties: + clientId: + type: string + clientSecret: + type: string + format: password + required: + - principal + - credentials + + CreatePrincipalRequest: + type: object + properties: + principal: + $ref: '#/components/schemas/Principal' + credentialRotationRequired: + type: boolean + description: If true, the initial credentials can only be used to call rotateCredentials + + Principal: + description: A Polaris principal. + type: object + properties: + name: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + clientId: + type: string + description: The output-only OAuth clientId associated with this principal if applicable + properties: + type: object + additionalProperties: + type: string + createTimestamp: + type: integer + format: "int64" + lastUpdateTimestamp: + type: integer + format: "int64" + entityVersion: + type: integer + description: The version of the principal object used to determine if the principal metadata has changed + required: + - name + + UpdatePrincipalRequest: + description: Updates to apply to a Principal + type: object + properties: + currentEntityVersion: + type: integer + description: The version of the object onto which this update is applied; if the object changed, the update will fail and the caller should retry after fetching the latest version. + properties: + type: object + additionalProperties: + type: string + required: + - currentEntityVersion + - properties + + PrincipalRoles: + type: object + properties: + roles: + type: array + items: + $ref: "#/components/schemas/PrincipalRole" + required: + - roles + + GrantPrincipalRoleRequest: + type: object + properties: + principalRole: + $ref: '#/components/schemas/PrincipalRole' + + CreatePrincipalRoleRequest: + type: object + properties: + principalRole: + $ref: '#/components/schemas/PrincipalRole' + + PrincipalRole: + type: object + properties: + name: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + description: The name of the role + properties: + type: object + additionalProperties: + type: string + createTimestamp: + type: integer + format: "int64" + lastUpdateTimestamp: + type: integer + format: "int64" + entityVersion: + type: integer + description: The version of the principal role object used to determine if the principal role metadata has changed + required: + - name + + UpdatePrincipalRoleRequest: + description: Updates to apply to a Principal Role + type: object + properties: + currentEntityVersion: + type: integer + description: The version of the object onto which this update is applied; if the object changed, the update will fail and the caller should retry after fetching the latest version. + properties: + type: object + additionalProperties: + type: string + required: + - currentEntityVersion + - properties + + CatalogRoles: + type: object + properties: + roles: + type: array + items: + $ref: "#/components/schemas/CatalogRole" + description: The list of catalog roles + required: + - roles + + GrantCatalogRoleRequest: + type: object + properties: + catalogRole: + $ref: '#/components/schemas/CatalogRole' + + CreateCatalogRoleRequest: + type: object + properties: + catalogRole: + $ref: '#/components/schemas/CatalogRole' + + CatalogRole: + type: object + properties: + name: + type: string + minLength: 1 + maxLength: 256 + pattern: '^(?!\s*[s|S][y|Y][s|S][t|T][e|E][m|M]\$).*$' + description: The name of the role + properties: + type: object + additionalProperties: + type: string + createTimestamp: + type: integer + format: "int64" + lastUpdateTimestamp: + type: integer + format: "int64" + entityVersion: + type: integer + description: The version of the catalog role object used to determine if the catalog role metadata has changed + required: + - name + + UpdateCatalogRoleRequest: + description: Updates to apply to a Catalog Role + type: object + properties: + currentEntityVersion: + type: integer + description: The version of the object onto which this update is applied; if the object changed, the update will fail and the caller should retry after fetching the latest version. + properties: + type: object + additionalProperties: + type: string + required: + - currentEntityVersion + - properties + + ViewPrivilege: + type: string + enum: + - CATALOG_MANAGE_ACCESS + - VIEW_DROP + - VIEW_LIST + - VIEW_READ_PROPERTIES + - VIEW_WRITE_PROPERTIES + - VIEW_FULL_METADATA + + TablePrivilege: + type: string + enum: + - CATALOG_MANAGE_ACCESS + - TABLE_DROP + - TABLE_LIST + - TABLE_READ_PROPERTIES + - TABLE_WRITE_PROPERTIES + - TABLE_READ_DATA + - TABLE_WRITE_DATA + - TABLE_FULL_METADATA + + NamespacePrivilege: + type: string + enum: + - CATALOG_MANAGE_ACCESS + - CATALOG_MANAGE_CONTENT + - CATALOG_MANAGE_METADATA + - NAMESPACE_CREATE + - TABLE_CREATE + - VIEW_CREATE + - NAMESPACE_DROP + - TABLE_DROP + - VIEW_DROP + - NAMESPACE_LIST + - TABLE_LIST + - VIEW_LIST + - NAMESPACE_READ_PROPERTIES + - TABLE_READ_PROPERTIES + - VIEW_READ_PROPERTIES + - NAMESPACE_WRITE_PROPERTIES + - TABLE_WRITE_PROPERTIES + - VIEW_WRITE_PROPERTIES + - TABLE_READ_DATA + - TABLE_WRITE_DATA + - NAMESPACE_FULL_METADATA + - TABLE_FULL_METADATA + - VIEW_FULL_METADATA + + CatalogPrivilege: + type: string + enum: + - CATALOG_MANAGE_ACCESS + - CATALOG_MANAGE_CONTENT + - CATALOG_MANAGE_METADATA + - CATALOG_READ_PROPERTIES + - CATALOG_WRITE_PROPERTIES + - NAMESPACE_CREATE + - TABLE_CREATE + - VIEW_CREATE + - NAMESPACE_DROP + - TABLE_DROP + - VIEW_DROP + - NAMESPACE_LIST + - TABLE_LIST + - VIEW_LIST + - NAMESPACE_READ_PROPERTIES + - TABLE_READ_PROPERTIES + - VIEW_READ_PROPERTIES + - NAMESPACE_WRITE_PROPERTIES + - TABLE_WRITE_PROPERTIES + - VIEW_WRITE_PROPERTIES + - TABLE_READ_DATA + - TABLE_WRITE_DATA + - NAMESPACE_FULL_METADATA + - TABLE_FULL_METADATA + - VIEW_FULL_METADATA + + AddGrantRequest: + type: object + properties: + grant: + $ref: '#/components/schemas/GrantResource' + + RevokeGrantRequest: + type: object + properties: + grant: + $ref: '#/components/schemas/GrantResource' + + ViewGrant: + allOf: + - $ref: '#/components/schemas/GrantResource' + - type: object + properties: + namespace: + type: array + items: + type: string + viewName: + type: string + minLength: 1 + maxLength: 256 + privilege: + $ref: '#/components/schemas/ViewPrivilege' + required: + - namespace + - viewName + - privilege + + TableGrant: + allOf: + - $ref: '#/components/schemas/GrantResource' + - type: object + properties: + namespace: + type: array + items: + type: string + tableName: + type: string + minLength: 1 + maxLength: 256 + privilege: + $ref: '#/components/schemas/TablePrivilege' + required: + - namespace + - tableName + - privilege + + NamespaceGrant: + allOf: + - $ref: '#/components/schemas/GrantResource' + - type: object + properties: + namespace: + type: array + items: + type: string + privilege: + $ref: '#/components/schemas/NamespacePrivilege' + required: + - namespace + - privilege + + + CatalogGrant: + allOf: + - $ref: '#/components/schemas/GrantResource' + - type: object + properties: + privilege: + $ref: '#/components/schemas/CatalogPrivilege' + required: + - privilege + + GrantResource: + type: object + discriminator: + propertyName: type + mapping: + catalog: '#/components/schemas/CatalogGrant' + namespace: '#/components/schemas/NamespaceGrant' + table: '#/components/schemas/TableGrant' + view: '#/components/schemas/ViewGrant' + properties: + type: + type: string + enum: + - catalog + - namespace + - table + - view + required: + - type + + GrantResources: + type: object + properties: + grants: + type: array + items: + $ref: "#/components/schemas/GrantResource" + required: + - grants \ No newline at end of file diff --git a/polaris-synchronizer/api/src/test/java/org/apache/polaris/tools/sync/polaris/AccessControlAwarePlannerTest.java b/polaris-synchronizer/api/src/test/java/org/apache/polaris/tools/sync/polaris/AccessControlAwarePlannerTest.java new file mode 100644 index 00000000..f40e6b1d --- /dev/null +++ b/polaris-synchronizer/api/src/test/java/org/apache/polaris/tools/sync/polaris/AccessControlAwarePlannerTest.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris; + +import java.util.List; +import org.apache.polaris.core.admin.model.CatalogRole; +import org.apache.polaris.core.admin.model.PrincipalRole; +import org.apache.polaris.tools.sync.polaris.access.AccessControlConstants; +import org.apache.polaris.tools.sync.polaris.planning.AccessControlAwarePlanner; +import org.apache.polaris.tools.sync.polaris.planning.NoOpSyncPlanner; +import org.apache.polaris.tools.sync.polaris.planning.SynchronizationPlanner; +import org.apache.polaris.tools.sync.polaris.planning.plan.SynchronizationPlan; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class AccessControlAwarePlannerTest { + + private static final PrincipalRole omnipotentPrincipalRoleSource = + new PrincipalRole() + .name("omnipotent-principal-XXXXX") + .putPropertiesItem(AccessControlConstants.OMNIPOTENCE_PROPERTY, ""); + + private static final PrincipalRole omnipotentPrincipalRoleTarget = + new PrincipalRole() + .name("omnipotent-principal-YYYYY") + .putPropertiesItem(AccessControlConstants.OMNIPOTENCE_PROPERTY, ""); + + @Test + public void filtersOmnipotentPrincipalRoles() { + SynchronizationPlanner accessControlAwarePlanner = + new AccessControlAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + accessControlAwarePlanner.planPrincipalRoleSync( + List.of(omnipotentPrincipalRoleSource), List.of(omnipotentPrincipalRoleTarget)); + + Assertions.assertTrue(plan.entitiesToSkip().contains(omnipotentPrincipalRoleSource)); + Assertions.assertTrue(plan.entitiesToSkip().contains(omnipotentPrincipalRoleTarget)); + } + + private static final PrincipalRole serviceAdminSource = new PrincipalRole().name("service_admin"); + + private static final PrincipalRole serviceAdminTarget = new PrincipalRole().name("service_admin"); + + @Test + public void filtersServiceAdmin() { + SynchronizationPlanner accessControlAwarePlanner = + new AccessControlAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + accessControlAwarePlanner.planPrincipalRoleSync( + List.of(serviceAdminSource), List.of(serviceAdminTarget)); + + Assertions.assertTrue(plan.entitiesToSkip().contains(serviceAdminSource)); + Assertions.assertTrue(plan.entitiesToSkip().contains(serviceAdminTarget)); + } + + private static final CatalogRole omnipotentCatalogRoleSource = + new CatalogRole() + .name("omnipotent-principal-XXXXX") + .putPropertiesItem(AccessControlConstants.OMNIPOTENCE_PROPERTY, ""); + + private static final CatalogRole omnipotentCatalogRoleTarget = + new CatalogRole() + .name("omnipotent-principal-YYYYY") + .putPropertiesItem(AccessControlConstants.OMNIPOTENCE_PROPERTY, ""); + + @Test + public void filtersOmnipotentCatalogRolesAndChildren() { + SynchronizationPlanner accessControlAwarePlanner = + new AccessControlAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + accessControlAwarePlanner.planCatalogRoleSync( + "catalogName", + List.of(omnipotentCatalogRoleSource), + List.of(omnipotentCatalogRoleTarget)); + + Assertions.assertTrue( + plan.entitiesToSkipAndSkipChildren().contains(omnipotentCatalogRoleSource)); + Assertions.assertTrue( + plan.entitiesToSkipAndSkipChildren().contains(omnipotentCatalogRoleTarget)); + } + + private static final CatalogRole catalogAdminSource = new CatalogRole().name("catalog_admin"); + + private static final CatalogRole catalogAdminTarget = new CatalogRole().name("catalog_admin"); + + @Test + public void filtersCatalogAdminAndChildren() { + SynchronizationPlanner accessControlAwarePlanner = + new AccessControlAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + accessControlAwarePlanner.planCatalogRoleSync( + "catalogName", List.of(catalogAdminSource), List.of(catalogAdminTarget)); + + Assertions.assertTrue(plan.entitiesToSkipAndSkipChildren().contains(catalogAdminSource)); + Assertions.assertTrue(plan.entitiesToSkipAndSkipChildren().contains(catalogAdminTarget)); + } + + @Test + public void filtersOutAssignmentOfOmnipotentPrincipalRoles() { + SynchronizationPlanner accessControlAwarePlanner = + new AccessControlAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + accessControlAwarePlanner.planAssignPrincipalRolesToCatalogRolesSync( + "catalogName", + "catalogRoleName", + List.of(omnipotentPrincipalRoleSource), + List.of(omnipotentPrincipalRoleTarget)); + + Assertions.assertTrue(plan.entitiesToSkip().contains(omnipotentPrincipalRoleSource)); + Assertions.assertTrue(plan.entitiesToSkip().contains(omnipotentPrincipalRoleTarget)); + } + + @Test + public void filtersOutAssignmentOfServiceAdmin() { + SynchronizationPlanner accessControlAwarePlanner = + new AccessControlAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + accessControlAwarePlanner.planAssignPrincipalRolesToCatalogRolesSync( + "catalogName", + "catalogRoleName", + List.of(serviceAdminSource), + List.of(serviceAdminTarget)); + + Assertions.assertTrue(plan.entitiesToSkip().contains(serviceAdminSource)); + Assertions.assertTrue(plan.entitiesToSkip().contains(serviceAdminTarget)); + } +} diff --git a/polaris-synchronizer/api/src/test/java/org/apache/polaris/tools/sync/polaris/ModificationAwarePlannerTest.java b/polaris-synchronizer/api/src/test/java/org/apache/polaris/tools/sync/polaris/ModificationAwarePlannerTest.java new file mode 100644 index 00000000..5d875017 --- /dev/null +++ b/polaris-synchronizer/api/src/test/java/org/apache/polaris/tools/sync/polaris/ModificationAwarePlannerTest.java @@ -0,0 +1,302 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris; + +import java.util.List; +import org.apache.polaris.core.admin.model.AwsStorageConfigInfo; +import org.apache.polaris.core.admin.model.AzureStorageConfigInfo; +import org.apache.polaris.core.admin.model.Catalog; +import org.apache.polaris.core.admin.model.CatalogProperties; +import org.apache.polaris.core.admin.model.CatalogRole; +import org.apache.polaris.core.admin.model.ExternalCatalog; +import org.apache.polaris.core.admin.model.GcpStorageConfigInfo; +import org.apache.polaris.core.admin.model.GrantResource; +import org.apache.polaris.core.admin.model.PolarisCatalog; +import org.apache.polaris.core.admin.model.PrincipalRole; +import org.apache.polaris.core.admin.model.StorageConfigInfo; +import org.apache.polaris.tools.sync.polaris.planning.ModificationAwarePlanner; +import org.apache.polaris.tools.sync.polaris.planning.NoOpSyncPlanner; +import org.apache.polaris.tools.sync.polaris.planning.SynchronizationPlanner; +import org.apache.polaris.tools.sync.polaris.planning.plan.SynchronizationPlan; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class ModificationAwarePlannerTest { + + private static final PrincipalRole principalRole = new PrincipalRole().name("principal-role"); + + private static final PrincipalRole modifiedPrincipalRole = + new PrincipalRole().name("principal-role").putPropertiesItem("newproperty", "newvalue"); + + @Test + public void testPrincipalRoleNotModified() { + SynchronizationPlanner modificationPlanner = + new ModificationAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + modificationPlanner.planPrincipalRoleSync(List.of(principalRole), List.of(principalRole)); + + Assertions.assertTrue(plan.entitiesNotModified().contains(principalRole)); + } + + @Test + public void testPrincipalRoleModified() { + SynchronizationPlanner modificationPlanner = + new ModificationAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + modificationPlanner.planPrincipalRoleSync( + List.of(principalRole), List.of(modifiedPrincipalRole)); + + Assertions.assertFalse(plan.entitiesNotModified().contains(principalRole)); + } + + private static final CatalogRole catalogRole = new CatalogRole().name("catalog-role"); + + private static final CatalogRole modifiedCatalogRole = + new CatalogRole().name("catalog-role").putPropertiesItem("newproperty", "newvalue"); + + @Test + public void testCatalogRoleNotModified() { + SynchronizationPlanner modificationPlanner = + new ModificationAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + modificationPlanner.planCatalogRoleSync( + "catalog", List.of(catalogRole), List.of(catalogRole)); + + Assertions.assertTrue(plan.entitiesNotModified().contains(catalogRole)); + } + + @Test + public void testCatalogRoleModified() { + SynchronizationPlanner modificationPlanner = + new ModificationAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + modificationPlanner.planCatalogRoleSync( + "catalog", List.of(catalogRole), List.of(modifiedCatalogRole)); + + Assertions.assertFalse(plan.entitiesNotModified().contains(catalogRole)); + } + + private static final GrantResource grant = + new GrantResource().type(GrantResource.TypeEnum.CATALOG); + + @Test + public void testGrantNotRevoked() { + SynchronizationPlanner modificationPlanner = + new ModificationAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + modificationPlanner.planGrantSync("catalog", "catalogRole", List.of(grant), List.of(grant)); + + Assertions.assertTrue(plan.entitiesNotModified().contains(grant)); + } + + private static final Catalog catalog = + new PolarisCatalog() + .name("catalog") + .type(Catalog.TypeEnum.INTERNAL) + .properties(new CatalogProperties()) + .storageConfigInfo( + new AwsStorageConfigInfo() + .storageType(StorageConfigInfo.StorageTypeEnum.S3) + .roleArn("roleArn") + .userArn("userArn") + .externalId("externalId") + .region("region")); + + private static final Catalog catalogWithTypeChange = + new ExternalCatalog() + .name("catalog") + .type(Catalog.TypeEnum.EXTERNAL) // changed type + .properties(new CatalogProperties()) + .storageConfigInfo( + new AwsStorageConfigInfo() + .storageType(StorageConfigInfo.StorageTypeEnum.S3) + .roleArn("roleArn") + .userArn("userArn") + .externalId("externalId") + .region("region")); + + private static final Catalog catalogWithStorageConfigInfoChange = + new PolarisCatalog() + .name("catalog") + .type(Catalog.TypeEnum.EXTERNAL) // changed type + .properties(new CatalogProperties()) + .storageConfigInfo( + new AzureStorageConfigInfo() + .storageType(StorageConfigInfo.StorageTypeEnum.AZURE) + .consentUrl("consentUrl") + .tenantId("tenantId") + .multiTenantAppName("multiTenantAppName")); + + private static final Catalog catalogWithOnlyUserArnChange = + new PolarisCatalog() + .name("catalog") + .type(Catalog.TypeEnum.INTERNAL) + .properties(new CatalogProperties()) + .storageConfigInfo( + new AwsStorageConfigInfo() + .storageType(StorageConfigInfo.StorageTypeEnum.S3) + .roleArn("roleArn") + .userArn("userArnChanged") // only user arn changed + .externalId("externalId") + .region("region")); + + private static final Catalog catalogWithPropertyChange = + new PolarisCatalog() + .name("catalog") + .type(Catalog.TypeEnum.INTERNAL) + .properties(new CatalogProperties().putAdditionalProperty("newproperty", "newvalue")) + .storageConfigInfo( + new AwsStorageConfigInfo() + .storageType(StorageConfigInfo.StorageTypeEnum.S3) + .roleArn("roleArn") + .userArn("userArn") + .externalId("externalId") + .region("region")); + + private static final Catalog azureCatalog = + new PolarisCatalog() + .name("catalog") + .type(Catalog.TypeEnum.INTERNAL) + .properties(new CatalogProperties()) + .storageConfigInfo( + new AzureStorageConfigInfo() + .storageType(StorageConfigInfo.StorageTypeEnum.AZURE) + .consentUrl("consentUrl") + .multiTenantAppName("multiTenantAppName") + .tenantId("tenantId")); + + private static final Catalog azureCatalogConsentUrlAndTenantAppNameChange = + new PolarisCatalog() + .name("catalog") + .type(Catalog.TypeEnum.INTERNAL) + .properties(new CatalogProperties()) + .storageConfigInfo( + new AzureStorageConfigInfo() + .storageType(StorageConfigInfo.StorageTypeEnum.AZURE) + .consentUrl("consentUrlChanged") + .multiTenantAppName("multiTenantAppNameChanged") + .tenantId("tenantId")); + + private static final Catalog gcpCatalog = + new PolarisCatalog() + .name("catalog") + .type(Catalog.TypeEnum.INTERNAL) + .properties(new CatalogProperties()) + .storageConfigInfo( + new GcpStorageConfigInfo() + .storageType(StorageConfigInfo.StorageTypeEnum.AZURE) + .gcsServiceAccount("gcsServiceAccount")); + + private static final Catalog gcpCatalogGcsServiceAccountChange = + new PolarisCatalog() + .name("catalog") + .type(Catalog.TypeEnum.INTERNAL) + .properties(new CatalogProperties()) + .storageConfigInfo( + new GcpStorageConfigInfo() + .storageType(StorageConfigInfo.StorageTypeEnum.AZURE) + .gcsServiceAccount("gcsServiceAccountChanged")); + + @Test + public void testCatalogNotModified() { + SynchronizationPlanner modificationPlanner = + new ModificationAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + modificationPlanner.planCatalogSync(List.of(catalog), List.of(catalog)); + + Assertions.assertTrue(plan.entitiesNotModified().contains(catalog)); + } + + @Test + public void testCatalogTypeModified() { + SynchronizationPlanner modificationPlanner = + new ModificationAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + modificationPlanner.planCatalogSync(List.of(catalogWithTypeChange), List.of(catalog)); + + Assertions.assertFalse(plan.entitiesNotModified().contains(catalogWithTypeChange)); + } + + @Test + public void testCatalogStorageConfigInfoModified() { + SynchronizationPlanner modificationPlanner = + new ModificationAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + modificationPlanner.planCatalogSync( + List.of(catalogWithStorageConfigInfoChange), List.of(catalog)); + + Assertions.assertFalse(plan.entitiesNotModified().contains(catalogWithStorageConfigInfoChange)); + } + + @Test + public void testCatalogPropertiesModified() { + SynchronizationPlanner modificationPlanner = + new ModificationAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + modificationPlanner.planCatalogSync(List.of(catalogWithPropertyChange), List.of(catalog)); + + Assertions.assertFalse(plan.entitiesNotModified().contains(catalogWithPropertyChange)); + } + + @Test + public void testOnlyUserArnModifiedForAws() { + SynchronizationPlanner modificationPlanner = + new ModificationAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + modificationPlanner.planCatalogSync( + List.of(catalogWithOnlyUserArnChange), List.of(catalog)); + + Assertions.assertTrue(plan.entitiesNotModified().contains(catalogWithOnlyUserArnChange)); + } + + @Test + public void testOnlyConsentUrlAndTenantAppNameChangeAzure() { + SynchronizationPlanner modificationPlanner = + new ModificationAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + modificationPlanner.planCatalogSync( + List.of(azureCatalogConsentUrlAndTenantAppNameChange), List.of(azureCatalog)); + + Assertions.assertTrue( + plan.entitiesNotModified().contains(azureCatalogConsentUrlAndTenantAppNameChange)); + } + + @Test + public void testOnlyGcsServiceAccountChangeGCP() { + SynchronizationPlanner modificationPlanner = + new ModificationAwarePlanner(new NoOpSyncPlanner()); + + SynchronizationPlan plan = + modificationPlanner.planCatalogSync( + List.of(gcpCatalogGcsServiceAccountChange), List.of(gcpCatalog)); + + Assertions.assertTrue(plan.entitiesNotModified().contains(gcpCatalogGcsServiceAccountChange)); + } +} diff --git a/polaris-synchronizer/api/src/test/java/org/apache/polaris/tools/sync/polaris/SourceParitySynchronizationPlannerTest.java b/polaris-synchronizer/api/src/test/java/org/apache/polaris/tools/sync/polaris/SourceParitySynchronizationPlannerTest.java new file mode 100644 index 00000000..c9a8480f --- /dev/null +++ b/polaris-synchronizer/api/src/test/java/org/apache/polaris/tools/sync/polaris/SourceParitySynchronizationPlannerTest.java @@ -0,0 +1,240 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris; + +import java.util.List; +import java.util.Set; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.polaris.core.admin.model.Catalog; +import org.apache.polaris.core.admin.model.CatalogRole; +import org.apache.polaris.core.admin.model.GrantResource; +import org.apache.polaris.core.admin.model.PrincipalRole; +import org.apache.polaris.tools.sync.polaris.planning.SourceParitySynchronizationPlanner; +import org.apache.polaris.tools.sync.polaris.planning.plan.SynchronizationPlan; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SourceParitySynchronizationPlannerTest { + + private static final Catalog CATALOG_1 = new Catalog().name("catalog-1"); + + private static final Catalog CATALOG_2 = new Catalog().name("catalog-2"); + + private static final Catalog CATALOG_3 = new Catalog().name("catalog-3"); + + @Test + public void testCreatesNewCatalogOverwritesOldCatalogRemovesDroppedCatalog() { + SourceParitySynchronizationPlanner planner = new SourceParitySynchronizationPlanner(); + + SynchronizationPlan plan = + planner.planCatalogSync(List.of(CATALOG_1, CATALOG_2), List.of(CATALOG_2, CATALOG_3)); + + Assertions.assertTrue(plan.entitiesToCreate().contains(CATALOG_1)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(CATALOG_1)); + Assertions.assertFalse(plan.entitiesToRemove().contains(CATALOG_1)); + + Assertions.assertFalse(plan.entitiesToCreate().contains(CATALOG_2)); + Assertions.assertTrue(plan.entitiesToOverwrite().contains(CATALOG_2)); + Assertions.assertFalse(plan.entitiesToRemove().contains(CATALOG_2)); + + Assertions.assertFalse(plan.entitiesToCreate().contains(CATALOG_3)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(CATALOG_3)); + Assertions.assertTrue(plan.entitiesToRemove().contains(CATALOG_3)); + } + + private static final PrincipalRole PRINCIPAL_ROLE_1 = + new PrincipalRole().name("principal-role-1"); + + private static final PrincipalRole PRINCIPAL_ROLE_2 = + new PrincipalRole().name("principal-role-2"); + + private static final PrincipalRole PRINCIPAL_ROLE_3 = + new PrincipalRole().name("principal-role-3"); + + @Test + public void testCreatesNewPrincipalRoleOverwritesOldPrincipalRoleRemovesDroppedPrincipalRole() { + SourceParitySynchronizationPlanner planner = new SourceParitySynchronizationPlanner(); + + SynchronizationPlan plan = + planner.planPrincipalRoleSync( + List.of(PRINCIPAL_ROLE_1, PRINCIPAL_ROLE_2), + List.of(PRINCIPAL_ROLE_2, PRINCIPAL_ROLE_3)); + + Assertions.assertTrue(plan.entitiesToCreate().contains(PRINCIPAL_ROLE_1)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(PRINCIPAL_ROLE_1)); + Assertions.assertFalse(plan.entitiesToRemove().contains(PRINCIPAL_ROLE_1)); + + Assertions.assertFalse(plan.entitiesToCreate().contains(PRINCIPAL_ROLE_2)); + Assertions.assertTrue(plan.entitiesToOverwrite().contains(PRINCIPAL_ROLE_2)); + Assertions.assertFalse(plan.entitiesToRemove().contains(PRINCIPAL_ROLE_2)); + + Assertions.assertFalse(plan.entitiesToCreate().contains(PRINCIPAL_ROLE_3)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(PRINCIPAL_ROLE_3)); + Assertions.assertTrue(plan.entitiesToRemove().contains(PRINCIPAL_ROLE_3)); + } + + private static final CatalogRole CATALOG_ROLE_1 = new CatalogRole().name("catalog-role-1"); + + private static final CatalogRole CATALOG_ROLE_2 = new CatalogRole().name("catalog-role-2"); + + private static final CatalogRole CATALOG_ROLE_3 = new CatalogRole().name("catalog-role-3"); + + @Test + public void testCreatesNewCatalogRoleOverwritesOldCatalogRoleRemovesDroppedCatalogRole() { + SourceParitySynchronizationPlanner planner = new SourceParitySynchronizationPlanner(); + + SynchronizationPlan plan = + planner.planCatalogRoleSync( + "catalog", + List.of(CATALOG_ROLE_1, CATALOG_ROLE_2), + List.of(CATALOG_ROLE_2, CATALOG_ROLE_3)); + + Assertions.assertTrue(plan.entitiesToCreate().contains(CATALOG_ROLE_1)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(CATALOG_ROLE_1)); + Assertions.assertFalse(plan.entitiesToRemove().contains(CATALOG_ROLE_1)); + + Assertions.assertFalse(plan.entitiesToCreate().contains(CATALOG_ROLE_2)); + Assertions.assertTrue(plan.entitiesToOverwrite().contains(CATALOG_ROLE_2)); + Assertions.assertFalse(plan.entitiesToRemove().contains(CATALOG_ROLE_2)); + + Assertions.assertFalse(plan.entitiesToCreate().contains(CATALOG_ROLE_3)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(CATALOG_ROLE_3)); + Assertions.assertTrue(plan.entitiesToRemove().contains(CATALOG_ROLE_3)); + } + + private static final GrantResource GRANT_1 = + new GrantResource().type(GrantResource.TypeEnum.CATALOG); + + private static final GrantResource GRANT_2 = + new GrantResource().type(GrantResource.TypeEnum.NAMESPACE); + + private static final GrantResource GRANT_3 = + new GrantResource().type(GrantResource.TypeEnum.TABLE); + + @Test + public void testCreatesNewGrantResourceRemovesDroppedGrantResource() { + SourceParitySynchronizationPlanner planner = new SourceParitySynchronizationPlanner(); + + SynchronizationPlan plan = + planner.planGrantSync( + "catalog", "catalogRole", List.of(GRANT_1, GRANT_2), List.of(GRANT_2, GRANT_3)); + + Assertions.assertTrue(plan.entitiesToCreate().contains(GRANT_1)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(GRANT_1)); + Assertions.assertFalse(plan.entitiesToRemove().contains(GRANT_1)); + + // special case: no concept of overwriting a grant + Assertions.assertFalse(plan.entitiesToCreate().contains(GRANT_2)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(GRANT_2)); + Assertions.assertFalse(plan.entitiesToRemove().contains(GRANT_2)); + + Assertions.assertFalse(plan.entitiesToCreate().contains(GRANT_3)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(GRANT_3)); + Assertions.assertTrue(plan.entitiesToRemove().contains(GRANT_3)); + } + + private static final PrincipalRole ASSIGNED_PRINCIPAL_ROLE_1 = + new PrincipalRole().name("principal-role-1"); + + private static final PrincipalRole ASSIGNED_PRINCIPAL_ROLE_2 = + new PrincipalRole().name("principal-role-2"); + + private static final PrincipalRole ASSIGNED_PRINCIPAL_ROLE_3 = + new PrincipalRole().name("principal-role-3"); + + @Test + public void testAssignsNewPrincipalRoleRevokesDroppedPrincipalRole() { + SourceParitySynchronizationPlanner planner = new SourceParitySynchronizationPlanner(); + + SynchronizationPlan plan = + planner.planAssignPrincipalRolesToCatalogRolesSync( + "catalog", + "catalogRole", + List.of(ASSIGNED_PRINCIPAL_ROLE_1, ASSIGNED_PRINCIPAL_ROLE_2), + List.of(ASSIGNED_PRINCIPAL_ROLE_2, ASSIGNED_PRINCIPAL_ROLE_3)); + + Assertions.assertTrue(plan.entitiesToCreate().contains(ASSIGNED_PRINCIPAL_ROLE_1)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(ASSIGNED_PRINCIPAL_ROLE_1)); + Assertions.assertFalse(plan.entitiesToRemove().contains(ASSIGNED_PRINCIPAL_ROLE_1)); + + // special case: no concept of overwriting the assignment of a principal role + Assertions.assertFalse(plan.entitiesToCreate().contains(ASSIGNED_PRINCIPAL_ROLE_2)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(ASSIGNED_PRINCIPAL_ROLE_2)); + Assertions.assertFalse(plan.entitiesToRemove().contains(ASSIGNED_PRINCIPAL_ROLE_2)); + + Assertions.assertFalse(plan.entitiesToCreate().contains(ASSIGNED_PRINCIPAL_ROLE_3)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(ASSIGNED_PRINCIPAL_ROLE_3)); + Assertions.assertTrue(plan.entitiesToRemove().contains(ASSIGNED_PRINCIPAL_ROLE_3)); + } + + private static final Namespace NS_1 = Namespace.of("ns1"); + + private static final Namespace NS_2 = Namespace.of("ns2"); + + private static final Namespace NS_3 = Namespace.of("ns3"); + + @Test + public void testCreatesNewNamespaceOverwritesOldNamespaceDropsDroppedNamespace() { + SourceParitySynchronizationPlanner planner = new SourceParitySynchronizationPlanner(); + SynchronizationPlan plan = + planner.planNamespaceSync( + "catalog", Namespace.empty(), List.of(NS_1, NS_2), List.of(NS_2, NS_3)); + + Assertions.assertTrue(plan.entitiesToCreate().contains(NS_1)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(NS_1)); + Assertions.assertFalse(plan.entitiesToRemove().contains(NS_1)); + + Assertions.assertFalse(plan.entitiesToCreate().contains(NS_2)); + Assertions.assertTrue(plan.entitiesToOverwrite().contains(NS_2)); + Assertions.assertFalse(plan.entitiesToRemove().contains(NS_2)); + + Assertions.assertFalse(plan.entitiesToCreate().contains(NS_3)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(NS_3)); + Assertions.assertTrue(plan.entitiesToRemove().contains(NS_3)); + } + + private static final TableIdentifier TABLE_1 = TableIdentifier.of("ns", "table1"); + + private static final TableIdentifier TABLE_2 = TableIdentifier.of("ns", "table2"); + + private static final TableIdentifier TABLE_3 = TableIdentifier.of("ns", "table3"); + + @Test + public void + testCreatesNewTableIdentifierOverwritesOldTableIdentifierRevokesDroppedTableIdentifier() { + SourceParitySynchronizationPlanner planner = new SourceParitySynchronizationPlanner(); + + SynchronizationPlan plan = + planner.planTableSync( + "catalog", Namespace.empty(), Set.of(TABLE_1, TABLE_2), Set.of(TABLE_2, TABLE_3)); + + Assertions.assertTrue(plan.entitiesToCreate().contains(TABLE_1)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(TABLE_1)); + Assertions.assertFalse(plan.entitiesToRemove().contains(TABLE_1)); + + Assertions.assertFalse(plan.entitiesToCreate().contains(TABLE_2)); + Assertions.assertTrue(plan.entitiesToOverwrite().contains(TABLE_2)); + Assertions.assertFalse(plan.entitiesToRemove().contains(TABLE_2)); + + Assertions.assertFalse(plan.entitiesToCreate().contains(TABLE_3)); + Assertions.assertFalse(plan.entitiesToOverwrite().contains(TABLE_3)); + Assertions.assertTrue(plan.entitiesToRemove().contains(TABLE_3)); + } +} diff --git a/polaris-synchronizer/cli/build.gradle.kts b/polaris-synchronizer/cli/build.gradle.kts new file mode 100644 index 00000000..9a276fca --- /dev/null +++ b/polaris-synchronizer/cli/build.gradle.kts @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + +plugins { + `java-library` + `maven-publish` + signing + `build-conventions` +} + +applyShadowJar() + +dependencies { + implementation(project(":polaris-synchronizer-api")) + + implementation(libs.picocli) + implementation(libs.slf4j) + implementation(libs.iceberg.spark.runtime) + implementation(libs.apache.commons.csv) + runtimeOnly(libs.logback.classic) + + testImplementation(platform(libs.junit.bom)) + testImplementation("org.junit.jupiter:junit-jupiter-params") + testImplementation("org.junit.jupiter:junit-jupiter-api") + testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine") + testRuntimeOnly("org.junit.platform:junit-platform-launcher") +} + +val mainClassName = "org.apache.polaris.tools.sync.polaris.PolarisSynchronizerCLI" + +val shadowJar = tasks.named("shadowJar") { isZip64 = true } + +shadowJar { manifest { attributes["Main-Class"] = mainClassName } } diff --git a/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/CreateOmnipotentPrincipalCommand.java b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/CreateOmnipotentPrincipalCommand.java new file mode 100644 index 00000000..596dc4e7 --- /dev/null +++ b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/CreateOmnipotentPrincipalCommand.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris; + +import java.util.ArrayList; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.polaris.core.admin.model.Catalog; +import org.apache.polaris.core.admin.model.PrincipalRole; +import org.apache.polaris.core.admin.model.PrincipalWithCredentials; +import org.apache.polaris.tools.sync.polaris.access.AccessControlService; +import org.apache.polaris.tools.sync.polaris.options.PolarisOptions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +@CommandLine.Command( + name = "create-omnipotent-principal", + mixinStandardHelpOptions = true, + sortOptions = false, + description = + "Creates a principal, associated principal role, and associated catalog role for each catalog " + + "with appropriate access permissions.") +public class CreateOmnipotentPrincipalCommand implements Callable { + + private final Logger consoleLog = LoggerFactory.getLogger("console-log"); + + @CommandLine.ArgGroup(exclusive = false, multiplicity = "1", heading = "Polaris options: %n") + private PolarisOptions options; + + @CommandLine.Option( + names = {"--replace"}, + description = { + "Optional flag to enable overwriting the existing omnipotent principal and associated entity if it exists." + }) + private boolean replace; + + @CommandLine.Option( + names = {"--write-access"}, + description = { + "Optional flag to create the principal with write access to every catalog. This is required if " + + "the Polaris instance is the target of a sync." + }) + private boolean withWriteAccess; + + @CommandLine.Option( + names = {"--concurrency"}, + defaultValue = "1", + description = { + "Optional flag to specify the number of concurrent threads to use to setup catalog roles." + }) + private int concurrency; + + @Override + public Integer call() throws Exception { + PolarisService polaris = options.buildService(); + AccessControlService accessControlService = new AccessControlService(polaris); + + PrincipalWithCredentials principalWithCredentials; + + try { + principalWithCredentials = accessControlService.createOmnipotentPrincipal(replace); + } catch (Exception e) { + consoleLog.error("Failed to create omnipotent principal.", e); + return 1; + } + + consoleLog.info( + "Created omnipotent principal {}.", principalWithCredentials.getPrincipal().getName()); + + PrincipalRole principalRole; + + try { + principalRole = + accessControlService.createAndAssignPrincipalRole(principalWithCredentials, replace); + } catch (Exception e) { + consoleLog.error("Failed to create omnipotent principal role and assign it to principal.", e); + return 1; + } + + consoleLog.info( + "Created omnipotent principal role {} and assigned it to omnipotent principal {}.", + principalWithCredentials.getPrincipal().getName(), + principalRole.getName()); + + List catalogs = polaris.listCatalogs(); + + consoleLog.info("Identified {} catalogs to create catalog roles for.", catalogs.size()); + + final String permissionLevel = withWriteAccess ? "write" : "readonly"; + + AtomicInteger completedCatalogSetups = new AtomicInteger(0); + + Queue failedCatalogs = new ConcurrentLinkedQueue<>(); + + ExecutorService executor = Executors.newFixedThreadPool(concurrency); + + List> futures = new ArrayList<>(); + + for (Catalog catalog : catalogs) { + CompletableFuture future = + CompletableFuture.runAsync( + () -> { + try { + accessControlService.setupOmnipotentRoleForCatalog( + catalog.getName(), principalRole, replace, withWriteAccess); + } catch (Exception e) { + failedCatalogs.add(catalog); + consoleLog.error( + "Failed to setup omnipotent catalog role for catalog {} with {} access. - {}/{}", + catalog.getName(), + permissionLevel, + completedCatalogSetups.getAndIncrement(), + catalogs.size(), + e); + } + + consoleLog.info( + "Finished omnipotent principal setup for catalog {} with {} access. - {}/{}", + catalog.getName(), + permissionLevel, + completedCatalogSetups.incrementAndGet(), + catalogs.size()); + }, + executor); + + futures.add(future); + } + + futures.forEach(CompletableFuture::join); + + consoleLog.info( + "Encountered issues creating catalog roles for the following catalogs: {}", + failedCatalogs.stream().map(Catalog::getName).toList()); + + consoleLog.info( + "\n======================================================\n" + + "Omnipotent Principal Credentials:\n" + + "\tname = {}\n" + + "\tclientId = {}\n" + + "\tclientSecret = {}\n" + + "======================================================", + principalWithCredentials.getPrincipal().getName(), + principalWithCredentials.getCredentials().getClientId(), + principalWithCredentials.getCredentials().getClientSecret()); + + return 0; + } +} diff --git a/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/CsvETagService.java b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/CsvETagService.java new file mode 100644 index 00000000..e391214c --- /dev/null +++ b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/CsvETagService.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.BufferedWriter; +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.csv.CSVRecord; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.polaris.tools.sync.polaris.catalog.ETagService; + +/** Implementation that stores/loads ETags to/from a CSV file. */ +public class CsvETagService implements ETagService, Closeable { + + private static final String CATALOG_HEADER = "Catalog"; + + private static final String TABLE_ID_HEADER = "TableIdentifier"; + + private static final String ETAG_HEADER = "ETag"; + + private static final String[] HEADERS = {CATALOG_HEADER, TABLE_ID_HEADER, ETAG_HEADER}; + + private final File file; + + private final Map> tablesByCatalogName; + + public CsvETagService(File file) throws IOException { + this.tablesByCatalogName = new HashMap<>(); + this.file = file; + + if (file.exists()) { + CSVFormat readerCSVFormat = + CSVFormat.DEFAULT.builder().setHeader(HEADERS).setSkipHeaderRecord(true).get(); + + CSVParser parser = + CSVParser.parse(Files.newBufferedReader(file.toPath(), UTF_8), readerCSVFormat); + + for (CSVRecord record : parser.getRecords()) { + this.tablesByCatalogName.putIfAbsent(record.get(CATALOG_HEADER), new HashMap<>()); + + TableIdentifier tableId = TableIdentifier.parse(record.get(TABLE_ID_HEADER)); + + this.tablesByCatalogName + .get(record.get(CATALOG_HEADER)) + .put(tableId, record.get(ETAG_HEADER)); + } + + parser.close(); + } + } + + @Override + public String getETag(String catalogName, TableIdentifier tableIdentifier) { + if (tablesByCatalogName.get(catalogName) != null) { + return tablesByCatalogName + .get(catalogName) + .get(tableIdentifier); // will return null anyway if table id not available + } + return null; + } + + @Override + public void storeETag(String catalogName, TableIdentifier tableIdentifier, String etag) { + this.tablesByCatalogName.putIfAbsent(catalogName, new HashMap<>()); + this.tablesByCatalogName.get(catalogName).put(tableIdentifier, etag); + } + + @Override + public void close() throws IOException { + BufferedWriter writer = Files.newBufferedWriter(file.toPath(), UTF_8); + + writer.write(""); // clear file + + CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setHeader(HEADERS).get(); + + CSVPrinter printer = new CSVPrinter(writer, csvFormat); + + // write etags to file + tablesByCatalogName.forEach( + (catalogName, etagsByTable) -> { + etagsByTable.forEach( + (tableIdentifier, etag) -> { + try { + printer.printRecord(catalogName, tableIdentifier.toString(), etag); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + }); + + printer.flush(); + printer.close(); + } +} diff --git a/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/PolarisSynchronizerCLI.java b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/PolarisSynchronizerCLI.java new file mode 100644 index 00000000..e561e9ea --- /dev/null +++ b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/PolarisSynchronizerCLI.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris; + +import picocli.CommandLine; + +@CommandLine.Command( + name = "polaris-synchronizer", + mixinStandardHelpOptions = true, + subcommands = {SyncPolarisCommand.class, CreateOmnipotentPrincipalCommand.class}) +public class PolarisSynchronizerCLI { + + public PolarisSynchronizerCLI() {} + + public static void main(String... args) { + CommandLine commandLine = + new CommandLine(new PolarisSynchronizerCLI()) + .setExecutionExceptionHandler( + (ex, cmd, parseResult) -> { + cmd.getErr().println(cmd.getColorScheme().richStackTraceString(ex)); + return 1; + }); + commandLine.setUsageHelpWidth(150); + int exitCode = commandLine.execute(args); + System.exit(exitCode); + } +} diff --git a/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/SyncPolarisCommand.java b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/SyncPolarisCommand.java new file mode 100644 index 00000000..eda18c58 --- /dev/null +++ b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/SyncPolarisCommand.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.util.concurrent.Callable; +import org.apache.polaris.core.admin.model.PrincipalWithCredentials; +import org.apache.polaris.tools.sync.polaris.catalog.ETagService; +import org.apache.polaris.tools.sync.polaris.catalog.NoOpETagService; +import org.apache.polaris.tools.sync.polaris.options.SourceOmniPotentPrincipalOptions; +import org.apache.polaris.tools.sync.polaris.options.SourcePolarisOptions; +import org.apache.polaris.tools.sync.polaris.options.TargetOmnipotentPrincipal; +import org.apache.polaris.tools.sync.polaris.options.TargetPolarisOptions; +import org.apache.polaris.tools.sync.polaris.planning.AccessControlAwarePlanner; +import org.apache.polaris.tools.sync.polaris.planning.ModificationAwarePlanner; +import org.apache.polaris.tools.sync.polaris.planning.SourceParitySynchronizationPlanner; +import org.apache.polaris.tools.sync.polaris.planning.SynchronizationPlanner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +@CommandLine.Command( + name = "sync-polaris", + mixinStandardHelpOptions = true, + sortOptions = false, + description = + "Idempotent synchronization of one Polaris instance to another. Entities will not be removed from the source Polaris instance.") +public class SyncPolarisCommand implements Callable { + + private final Logger consoleLog = LoggerFactory.getLogger("console-log"); + + @CommandLine.ArgGroup( + exclusive = false, + multiplicity = "1", + heading = "Source Polaris options: %n") + private SourcePolarisOptions sourcePolarisOptions; + + @CommandLine.ArgGroup( + exclusive = false, + multiplicity = "1", + heading = "Target Polaris options: %n") + private TargetPolarisOptions targetPolarisOptions; + + @CommandLine.ArgGroup( + exclusive = false, + multiplicity = "1", + heading = "Source Polaris Omnipotent Principal Options: %n") + private SourceOmniPotentPrincipalOptions sourceOmniPotentPrincipalOptions; + + @CommandLine.ArgGroup( + exclusive = false, + multiplicity = "1", + heading = "Target Polaris Omnipotent Principal Options: %n") + private TargetOmnipotentPrincipal targetOmniPotentPrincipalOptions; + + @CommandLine.Option( + names = {"--etag-file"}, + description = "The file path of the file to retrieve and store table ETags from.") + private String etagFilePath; + + @Override + public Integer call() throws Exception { + SynchronizationPlanner sourceParityPlanner = new SourceParitySynchronizationPlanner(); + SynchronizationPlanner modificationAwareSourceParityPlanner = + new ModificationAwarePlanner(sourceParityPlanner); + SynchronizationPlanner accessControlAwarePlanner = + new AccessControlAwarePlanner(modificationAwareSourceParityPlanner); + + PolarisService source = sourcePolarisOptions.buildService(); + PolarisService target = targetPolarisOptions.buildService(); + + PrincipalWithCredentials sourceOmnipotentPrincipal = + sourceOmniPotentPrincipalOptions.buildPrincipalWithCredentials(); + PrincipalWithCredentials targetOmniPotentPrincipal = + targetOmniPotentPrincipalOptions.buildPrincipalWithCredentials(); + + ETagService etagService; + + if (etagFilePath != null) { + File etagFile = new File(etagFilePath); + etagService = new CsvETagService(etagFile); + } else { + etagService = new NoOpETagService(); + } + + Runtime.getRuntime() + .addShutdownHook( + new Thread( + () -> { + if (etagService instanceof Closeable closableETagService) { + try { + closableETagService.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + })); + + PolarisSynchronizer synchronizer = + new PolarisSynchronizer( + consoleLog, + accessControlAwarePlanner, + sourceOmnipotentPrincipal, + targetOmniPotentPrincipal, + source, + target, + etagService); + + synchronizer.syncPrincipalRoles(); + synchronizer.syncCatalogs(); + + return 0; + } +} diff --git a/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/BaseOmnipotentPrincipalOptions.java b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/BaseOmnipotentPrincipalOptions.java new file mode 100644 index 00000000..c0107c16 --- /dev/null +++ b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/BaseOmnipotentPrincipalOptions.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.options; + +import org.apache.polaris.core.admin.model.Principal; +import org.apache.polaris.core.admin.model.PrincipalWithCredentials; +import org.apache.polaris.core.admin.model.PrincipalWithCredentialsCredentials; + +public abstract class BaseOmnipotentPrincipalOptions { + + protected static final String PRINCIPAL_NAME = "omni-principal-name"; + + protected static final String CLIENT_ID = "omni-client-id"; + + protected static final String CLIENT_SECRET = "omni-client-secret"; + + protected String principalName; + + protected String clientId; + + protected String clientSecret; + + public abstract void setPrincipalName(String principalName); + + public abstract void setClientId(String clientId); + + public abstract void setClientSecret(String clientSecret); + + public PrincipalWithCredentials buildPrincipalWithCredentials() { + return new PrincipalWithCredentials() + .principal(new Principal().name(principalName)) + .credentials( + new PrincipalWithCredentialsCredentials() + .clientId(clientId) + .clientSecret(clientSecret)); + } +} diff --git a/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/BasePolarisOptions.java b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/BasePolarisOptions.java new file mode 100644 index 00000000..118489f7 --- /dev/null +++ b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/BasePolarisOptions.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.options; + +import java.io.IOException; +import org.apache.polaris.tools.sync.polaris.PolarisService; +import org.apache.polaris.tools.sync.polaris.PolarisServiceFactory; + +public abstract class BasePolarisOptions { + + protected static final String BASE_URL = "base-url"; + + protected static final String CLIENT_ID = "client-id"; + + protected static final String CLIENT_SECRET = "client-secret"; + + protected static final String SCOPE = "scope"; + + protected static final String OAUTH2_SERVER_URI = "oauth2-server-uri"; + + protected static final String ACCESS_TOKEN = "access-token"; + + protected String baseUrl; + + protected String oauth2ServerUri; + + protected String clientId; + + protected String clientSecret; + + protected String scope; + + protected String accessToken; + + public abstract String getServiceName(); + + public abstract void setBaseUrl(String baseUrl); + + public abstract void setOauth2ServerUri(String oauth2ServerUri); + + public abstract void setClientId(String clientId); + + public abstract void setClientSecret(String clientSecret); + + public abstract void setScope(String scope); + + public abstract void setAccessToken(String accessToken); + + public PolarisService buildService() throws IOException { + if (accessToken != null) { + return PolarisServiceFactory.newPolarisService(baseUrl, accessToken); + } + return PolarisServiceFactory.newPolarisService( + baseUrl, oauth2ServerUri, clientId, clientSecret, scope); + } +} diff --git a/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/PolarisOptions.java b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/PolarisOptions.java new file mode 100644 index 00000000..6a10e4db --- /dev/null +++ b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/PolarisOptions.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.options; + +import picocli.CommandLine; + +public class PolarisOptions extends BasePolarisOptions { + + @Override + public String getServiceName() { + return "polaris"; + } + + @CommandLine.Option( + names = "--polaris-" + BASE_URL, + required = true, + description = "The base url of the Polaris instance. Example: http://localhost:8181/polaris.") + @Override + public void setBaseUrl(String baseUrl) { + this.baseUrl = baseUrl; + } + + @CommandLine.Option( + names = "--polaris-" + OAUTH2_SERVER_URI, + description = { + "(Note: required if access-token not provided) the oauth2-server-uri to authenticate against to " + + "obtain an access token for the Polaris instance." + }) + @Override + public void setOauth2ServerUri(String oauth2ServerUri) { + this.oauth2ServerUri = oauth2ServerUri; + } + + @CommandLine.Option( + names = "--polaris-" + CLIENT_ID, + description = { + "(Note: required if access-token not provided) The client id for the principal the tool will assume" + + " to carry out the copy. This principal must have SERVICE_MANAGE_ACCESS level privileges." + }) + @Override + public void setClientId(String clientId) { + this.clientId = clientId; + } + + @CommandLine.Option( + names = "--polaris-" + CLIENT_SECRET, + description = { + "(Note: required if access-token not provided) The client secret for the principal the tool will assume" + + " to carry out the copy. This principal must have SERVICE_MANAGE_ACCESS level privileges." + }) + @Override + public void setClientSecret(String clientSecret) { + this.clientSecret = clientSecret; + } + + @CommandLine.Option( + names = "--polaris-" + SCOPE, + description = { + "(Note: required if access-token not provided) The scope that the principal the tool will assume" + + " to carry out the copy. This principal must have SERVICE_MANAGE_ACCESS level privileges." + }) + @Override + public void setScope(String scope) { + this.scope = scope; + } + + @CommandLine.Option( + names = "--polaris-" + ACCESS_TOKEN, + description = "The access token to authenticate to the Polaris instance") + @Override + public void setAccessToken(String accessToken) { + this.accessToken = accessToken; + } +} diff --git a/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/SourceOmniPotentPrincipalOptions.java b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/SourceOmniPotentPrincipalOptions.java new file mode 100644 index 00000000..90b7b46e --- /dev/null +++ b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/SourceOmniPotentPrincipalOptions.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.options; + +import picocli.CommandLine; + +public class SourceOmniPotentPrincipalOptions extends BaseOmnipotentPrincipalOptions { + + @CommandLine.Option( + names = "--source-" + PRINCIPAL_NAME, + required = true, + description = "The principal name of the source omnipotent principal.") + @Override + public void setPrincipalName(String principalName) { + this.principalName = principalName; + } + + @CommandLine.Option( + names = "--source-" + CLIENT_ID, + required = true, + description = "The client id of the source omnipotent principal.") + @Override + public void setClientId(String clientId) { + this.clientId = clientId; + } + + @CommandLine.Option( + names = "--source-" + CLIENT_SECRET, + required = true, + description = "The client secret of the source omnipotent principal.") + @Override + public void setClientSecret(String clientSecret) { + this.clientSecret = clientSecret; + } +} diff --git a/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/SourcePolarisOptions.java b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/SourcePolarisOptions.java new file mode 100644 index 00000000..7a811bc7 --- /dev/null +++ b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/SourcePolarisOptions.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.options; + +import picocli.CommandLine; + +public class SourcePolarisOptions extends BasePolarisOptions { + + @Override + public String getServiceName() { + return "source"; + } + + @CommandLine.Option( + names = "--source-" + BASE_URL, + required = true, + description = "The base url of the Polaris instance. Example: http://localhost:8181/polaris.") + @Override + public void setBaseUrl(String baseUrl) { + this.baseUrl = baseUrl; + } + + @CommandLine.Option( + names = "--source-" + OAUTH2_SERVER_URI, + description = { + "(Note: required if access-token not provided) the oauth2-server-uri to authenticate against to " + + "obtain an access token for the Polaris instance." + }) + @Override + public void setOauth2ServerUri(String oauth2ServerUri) { + this.oauth2ServerUri = oauth2ServerUri; + } + + @CommandLine.Option( + names = "--source-" + CLIENT_ID, + description = { + "(Note: required if access-token not provided) The client id for the principal the tool will assume" + + " to carry out the copy. This principal must have SERVICE_MANAGE_ACCESS level privileges." + }) + @Override + public void setClientId(String clientId) { + this.clientId = clientId; + } + + @CommandLine.Option( + names = "--source-" + CLIENT_SECRET, + description = { + "(Note: required if access-token not provided) The client secret for the principal the tool will assume" + + " to carry out the copy. This principal must have SERVICE_MANAGE_ACCESS level privileges." + }) + @Override + public void setClientSecret(String clientSecret) { + this.clientSecret = clientSecret; + } + + @CommandLine.Option( + names = "--source-" + SCOPE, + description = { + "(Note: required if access-token not provided) The scope that the principal the tool will assume" + + " to carry out the copy. This principal must have SERVICE_MANAGE_ACCESS level privileges." + }) + @Override + public void setScope(String scope) { + this.scope = scope; + } + + @CommandLine.Option( + names = "--source-" + ACCESS_TOKEN, + description = "The access token to authenticate to the Polaris instance") + @Override + public void setAccessToken(String accessToken) { + this.accessToken = accessToken; + } +} diff --git a/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/TargetOmnipotentPrincipal.java b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/TargetOmnipotentPrincipal.java new file mode 100644 index 00000000..ac03b616 --- /dev/null +++ b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/TargetOmnipotentPrincipal.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.options; + +import picocli.CommandLine; + +public class TargetOmnipotentPrincipal extends BaseOmnipotentPrincipalOptions { + + @CommandLine.Option( + names = "--target-" + PRINCIPAL_NAME, + required = true, + description = "The principal name of the source omnipotent principal.") + @Override + public void setPrincipalName(String principalName) { + this.principalName = principalName; + } + + @CommandLine.Option( + names = "--target-" + CLIENT_ID, + required = true, + description = "The client id of the source omnipotent principal.") + @Override + public void setClientId(String clientId) { + this.clientId = clientId; + } + + @CommandLine.Option( + names = "--target-" + CLIENT_SECRET, + required = true, + description = "The client secret of the source omnipotent principal.") + @Override + public void setClientSecret(String clientSecret) { + this.clientSecret = clientSecret; + } +} diff --git a/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/TargetPolarisOptions.java b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/TargetPolarisOptions.java new file mode 100644 index 00000000..6d6b2042 --- /dev/null +++ b/polaris-synchronizer/cli/src/main/java/org/apache/polaris/tools/sync/polaris/options/TargetPolarisOptions.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.tools.sync.polaris.options; + +import picocli.CommandLine; + +public class TargetPolarisOptions extends BasePolarisOptions { + + @Override + public String getServiceName() { + return "target"; + } + + @CommandLine.Option( + names = "--target-" + BASE_URL, + required = true, + description = "The base url of the Polaris instance. Example: http://localhost:8181/polaris.") + @Override + public void setBaseUrl(String baseUrl) { + this.baseUrl = baseUrl; + } + + @CommandLine.Option( + names = "--target-" + OAUTH2_SERVER_URI, + description = { + "(Note: required if access-token not provided) the oauth2-server-uri to authenticate against to " + + "obtain an access token for the Polaris instance." + }) + @Override + public void setOauth2ServerUri(String oauth2ServerUri) { + this.oauth2ServerUri = oauth2ServerUri; + } + + @CommandLine.Option( + names = "--target-" + CLIENT_ID, + description = { + "(Note: required if access-token not provided) The client id for the principal the tool will assume" + + " to carry out the copy. This principal must have SERVICE_MANAGE_ACCESS level privileges." + }) + @Override + public void setClientId(String clientId) { + this.clientId = clientId; + } + + @CommandLine.Option( + names = "--target-" + CLIENT_SECRET, + description = { + "(Note: required if access-token not provided) The client secret for the principal the tool will assume" + + " to carry out the copy. This principal must have SERVICE_MANAGE_ACCESS level privileges." + }) + @Override + public void setClientSecret(String clientSecret) { + this.clientSecret = clientSecret; + } + + @CommandLine.Option( + names = "--target-" + SCOPE, + description = { + "(Note: required if access-token not provided) The scope that the principal the tool will assume" + + " to carry out the copy. This principal must have SERVICE_MANAGE_ACCESS level privileges." + }) + @Override + public void setScope(String scope) { + this.scope = scope; + } + + @CommandLine.Option( + names = "--target-" + ACCESS_TOKEN, + description = "The access token to authenticate to the Polaris instance") + @Override + public void setAccessToken(String accessToken) { + this.accessToken = accessToken; + } +} diff --git a/polaris-synchronizer/cli/src/main/resources/logback.xml b/polaris-synchronizer/cli/src/main/resources/logback.xml new file mode 100644 index 00000000..663e03f3 --- /dev/null +++ b/polaris-synchronizer/cli/src/main/resources/logback.xml @@ -0,0 +1,36 @@ + + + + + + + true + + %highlight(%-5level) - %msg%n + + + + + + + + + + diff --git a/settings.gradle.kts b/settings.gradle.kts new file mode 100644 index 00000000..3a8309da --- /dev/null +++ b/settings.gradle.kts @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +val baseVersion = file("version.txt").readText().trim() + +rootProject.name = "polaris-tools" + +gradle.beforeProject { + group = "org.apache.polaris.tools" + version = baseVersion + description = + when (name) { + "api" -> "Iceberg catalog migrator - api implementation" + "api-test" -> "Iceberg catalog migrator - common test implementation" + "cli" -> "Iceberg catalog migrator - CLI implementation" + else -> name + } +} + +fun catalogMigratorProject(name: String) { + include("iceberg-catalog-migrator-$name") + project(":iceberg-catalog-migrator-$name").projectDir = file("iceberg-catalog-migrator/$name") +} + +catalogMigratorProject("api") + +catalogMigratorProject("api-test") + +catalogMigratorProject("cli") + +fun polarisSynchronizerProject(name: String) { + include("polaris-synchronizer-$name") + project(":polaris-synchronizer-$name").projectDir = file("polaris-synchronizer/$name") +} + +polarisSynchronizerProject("api") + +polarisSynchronizerProject("cli") + +include("bom") + +project(":bom").projectDir = file("bom")