From 29deea4398d2ccbaf81a682fa31440c96a2848c3 Mon Sep 17 00:00:00 2001 From: Mini Yu Date: Mon, 20 Oct 2025 12:00:29 +0800 Subject: [PATCH 01/43] [#8833] feat(catalogs): Support the basic framework for the generic lakehouse catalog. (#8842) ### What changes were proposed in this pull request? Introduce a basic framework for generic lakehouse catalog. ### Why are the changes needed? To better manage more lakehouse system. Fix: #8833 ### Does this PR introduce _any_ user-facing change? N/A. ### How was this patch tested? It's just framework, and I just tested it locally. Co-authored-by: Jerry Shao --- build.gradle.kts | 3 +- .../build.gradle.kts | 125 +++++++++++++++ .../lakehouse/GenericLakehouseCatalog.java | 80 ++++++++++ .../GenericLakehouseCatalogCapability.java | 50 ++++++ .../GenericLakehouseCatalogOperations.java | 142 ++++++++++++++++++ ...ricLakehouseCatalogPropertiesMetadata.java | 36 +++++ ...ericLakehouseSchemaPropertiesMetadata.java | 37 +++++ ...nericLakehouseTablePropertiesMetadata.java | 38 +++++ .../org.apache.gravitino.CatalogProvider | 19 +++ .../src/main/resources/generic-lakehouse.conf | 17 +++ settings.gradle.kts | 1 + 11 files changed, 547 insertions(+), 1 deletion(-) create mode 100644 catalogs/catalog-generic-lakehouse/build.gradle.kts create mode 100644 catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalog.java create mode 100644 catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogCapability.java create mode 100644 catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java create mode 100644 catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java create mode 100644 catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java create mode 100644 catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseTablePropertiesMetadata.java create mode 100644 catalogs/catalog-generic-lakehouse/src/main/resources/META-INF/services/org.apache.gravitino.CatalogProvider create mode 100644 catalogs/catalog-generic-lakehouse/src/main/resources/generic-lakehouse.conf diff --git a/build.gradle.kts b/build.gradle.kts index 8ad42871680..0a98b6779b1 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -949,7 +949,8 @@ tasks { ":catalogs:catalog-lakehouse-hudi:copyLibAndConfig", ":catalogs:catalog-lakehouse-iceberg:copyLibAndConfig", ":catalogs:catalog-lakehouse-paimon:copyLibAndConfig", - ":catalogs:catalog-model:copyLibAndConfig" + ":catalogs:catalog-model:copyLibAndConfig", + ":catalogs:catalog-generic-lakehouse:copyLibAndConfig" ) } diff --git a/catalogs/catalog-generic-lakehouse/build.gradle.kts b/catalogs/catalog-generic-lakehouse/build.gradle.kts new file mode 100644 index 00000000000..c3ad842ac38 --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/build.gradle.kts @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +description = "catalog-generic-lakehouse" + +plugins { + `maven-publish` + id("java") + id("idea") +} + +dependencies { + implementation(project(":api")) { + exclude("*") + } + implementation(project(":catalogs:catalog-common")) + implementation(project(":common")) { + exclude("*") + } + implementation(project(":core")) { + exclude("*") + } + + implementation(libs.bundles.log4j) + implementation(libs.cglib) + implementation(libs.commons.collections4) + implementation(libs.commons.io) + implementation(libs.commons.lang3) + implementation(libs.guava) + + annotationProcessor(libs.lombok) + + compileOnly(libs.lombok) + + testImplementation(project(":catalogs:catalog-jdbc-common", "testArtifacts")) + testImplementation(project(":clients:client-java")) + testImplementation(project(":integration-test-common", "testArtifacts")) + testImplementation(project(":server")) + testImplementation(project(":server-common")) + + testImplementation(libs.junit.jupiter.api) + testImplementation(libs.junit.jupiter.params) + testImplementation(libs.mockito.core) + testImplementation(libs.mysql.driver) + testImplementation(libs.postgresql.driver) + testImplementation(libs.slf4j.api) + testImplementation(libs.testcontainers) + testImplementation(libs.testcontainers.mysql) + testImplementation(libs.testcontainers.postgresql) + + testRuntimeOnly(libs.junit.jupiter.engine) +} + +tasks { + val runtimeJars by registering(Copy::class) { + from(configurations.runtimeClasspath) + into("build/libs") + } + + jar { + finalizedBy("runtimeJars") + } + + val copyCatalogLibs by registering(Copy::class) { + dependsOn("jar", "runtimeJars") + from("build/libs") { + exclude("guava-*.jar") + exclude("log4j-*.jar") + exclude("slf4j-*.jar") + } + into("$rootDir/distribution/package/catalogs/generic-lakehouse/libs") + } + + val copyCatalogConfig by registering(Copy::class) { + from("src/main/resources") + into("$rootDir/distribution/package/catalogs/generic-lakehouse/conf") + + rename { original -> + if (original.endsWith(".template")) { + original.replace(".template", "") + } else { + original + } + } + + exclude { details -> + details.file.isDirectory() + } + + fileMode = 0b111101101 + } + + register("copyLibAndConfig", Copy::class) { + dependsOn(copyCatalogLibs, copyCatalogConfig) + } +} + +tasks.test { + val skipITs = project.hasProperty("skipITs") + if (skipITs) { + // Exclude integration tests + exclude("**/integration/test/**") + } else { + dependsOn(tasks.jar) + } +} + +tasks.getByName("generateMetadataFileForMavenJavaPublication") { + dependsOn("runtimeJars") +} diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalog.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalog.java new file mode 100644 index 00000000000..68072f55baa --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalog.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse; + +import java.util.Map; +import org.apache.gravitino.connector.BaseCatalog; +import org.apache.gravitino.connector.CatalogOperations; +import org.apache.gravitino.connector.PropertiesMetadata; +import org.apache.gravitino.connector.capability.Capability; + +/** Implementation of a generic lakehouse catalog in Apache Gravitino. */ +public class GenericLakehouseCatalog extends BaseCatalog { + + static final GenericLakehouseCatalogPropertiesMetadata CATALOG_PROPERTIES_METADATA = + new GenericLakehouseCatalogPropertiesMetadata(); + + static final GenericLakehouseSchemaPropertiesMetadata SCHEMA_PROPERTIES_METADATA = + new GenericLakehouseSchemaPropertiesMetadata(); + + static final GenericLakehouseTablePropertiesMetadata TABLE_PROPERTIES_METADATA = + new GenericLakehouseTablePropertiesMetadata(); + + /** + * Returns the short name of the generic lakehouse catalog. + * + * @return The short name of the catalog. + */ + @Override + public String shortName() { + return "generic-lakehouse"; + } + + /** + * Creates a new instance of {@link GenericLakehouseCatalogOperations} with the provided + * configuration. + * + * @param config The configuration map for the generic catalog operations. + * @return A new instance of {@link GenericLakehouseCatalogOperations}. + */ + @Override + protected CatalogOperations newOps(Map config) { + return new GenericLakehouseCatalogOperations(); + } + + @Override + public Capability newCapability() { + return new GenericLakehouseCatalogCapability(); + } + + @Override + public PropertiesMetadata catalogPropertiesMetadata() throws UnsupportedOperationException { + return CATALOG_PROPERTIES_METADATA; + } + + @Override + public PropertiesMetadata schemaPropertiesMetadata() throws UnsupportedOperationException { + return SCHEMA_PROPERTIES_METADATA; + } + + @Override + public PropertiesMetadata tablePropertiesMetadata() throws UnsupportedOperationException { + return TABLE_PROPERTIES_METADATA; + } +} diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogCapability.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogCapability.java new file mode 100644 index 00000000000..08015f7fce1 --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogCapability.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse; + +import org.apache.gravitino.connector.capability.Capability; +import org.apache.gravitino.connector.capability.CapabilityResult; + +public class GenericLakehouseCatalogCapability implements Capability { + + @Override + public CapabilityResult columnNotNull() { + throw new UnsupportedOperationException( + "Not implemented yet: GenericLakehouseCatalogCapability.columnNotNull"); + } + + @Override + public CapabilityResult columnDefaultValue() { + throw new UnsupportedOperationException( + "Not implemented yet: GenericLakehouseCatalogCapability.columnDefaultValue"); + } + + @Override + public CapabilityResult caseSensitiveOnName(Scope scope) { + switch (scope) { + case SCHEMA: + case TABLE: + case COLUMN: + throw new UnsupportedOperationException( + "Not implemented yet: GenericLakehouseCatalogCapability.caseSensitiveOnName"); + default: + return CapabilityResult.SUPPORTED; + } + } +} diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java new file mode 100644 index 00000000000..64743488a01 --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse; + +import java.util.Map; +import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.Namespace; +import org.apache.gravitino.connector.CatalogInfo; +import org.apache.gravitino.connector.CatalogOperations; +import org.apache.gravitino.connector.HasPropertyMetadata; +import org.apache.gravitino.connector.SupportsSchemas; +import org.apache.gravitino.exceptions.NoSuchCatalogException; +import org.apache.gravitino.exceptions.NoSuchSchemaException; +import org.apache.gravitino.exceptions.NoSuchTableException; +import org.apache.gravitino.exceptions.NonEmptySchemaException; +import org.apache.gravitino.exceptions.SchemaAlreadyExistsException; +import org.apache.gravitino.exceptions.TableAlreadyExistsException; +import org.apache.gravitino.rel.Column; +import org.apache.gravitino.rel.Table; +import org.apache.gravitino.rel.TableCatalog; +import org.apache.gravitino.rel.TableChange; +import org.apache.gravitino.rel.expressions.distributions.Distribution; +import org.apache.gravitino.rel.expressions.sorts.SortOrder; +import org.apache.gravitino.rel.expressions.transforms.Transform; +import org.apache.gravitino.rel.indexes.Index; + +/** Operations for interacting with a generic lakehouse catalog in Apache Gravitino. */ +public class GenericLakehouseCatalogOperations + implements CatalogOperations, SupportsSchemas, TableCatalog { + + /** + * Initializes the generic lakehouse catalog operations with the provided configuration. + * + * @param conf The configuration map for the generic catalog operations. + * @param info The catalog info associated with this operation instance. + * @param propertiesMetadata The properties metadata of generic lakehouse catalog. + * @throws RuntimeException if initialization fails. + */ + @Override + public void initialize( + Map conf, CatalogInfo info, HasPropertyMetadata propertiesMetadata) + throws RuntimeException { + // TODO: Implement initialization logic + } + + @Override + public void close() {} + + @Override + public void testConnection( + NameIdentifier catalogIdent, + org.apache.gravitino.Catalog.Type type, + String provider, + String comment, + Map properties) + throws Exception { + throw new UnsupportedOperationException("Not implemented yet."); + } + + @Override + public org.apache.gravitino.NameIdentifier[] listSchemas(org.apache.gravitino.Namespace namespace) + throws NoSuchCatalogException { + throw new UnsupportedOperationException("Not implemented yet."); + } + + @Override + public org.apache.gravitino.Schema createSchema( + org.apache.gravitino.NameIdentifier ident, String comment, Map properties) + throws NoSuchCatalogException, SchemaAlreadyExistsException { + throw new UnsupportedOperationException("Not implemented yet."); + } + + @Override + public org.apache.gravitino.Schema loadSchema(org.apache.gravitino.NameIdentifier ident) + throws NoSuchSchemaException { + throw new UnsupportedOperationException("Not implemented yet."); + } + + @Override + public org.apache.gravitino.Schema alterSchema( + org.apache.gravitino.NameIdentifier ident, org.apache.gravitino.SchemaChange... changes) + throws NoSuchSchemaException { + throw new UnsupportedOperationException("Not implemented yet."); + } + + @Override + public boolean dropSchema(org.apache.gravitino.NameIdentifier ident, boolean cascade) + throws NonEmptySchemaException { + throw new UnsupportedOperationException("Not implemented yet."); + } + + @Override + public NameIdentifier[] listTables(Namespace namespace) throws NoSuchSchemaException { + throw new UnsupportedOperationException("Not implemented yet."); + } + + @Override + public Table loadTable(NameIdentifier ident) throws NoSuchTableException { + throw new UnsupportedOperationException("Not implemented yet."); + } + + @Override + public Table createTable( + NameIdentifier ident, + Column[] columns, + String comment, + Map properties, + Transform[] partitions, + Distribution distribution, + SortOrder[] sortOrders, + Index[] indexes) + throws NoSuchSchemaException, TableAlreadyExistsException { + throw new UnsupportedOperationException("Not implemented yet."); + } + + @Override + public Table alterTable(NameIdentifier ident, TableChange... changes) + throws NoSuchTableException, IllegalArgumentException { + throw new UnsupportedOperationException("Not implemented yet."); + } + + @Override + public boolean dropTable(NameIdentifier ident) { + throw new UnsupportedOperationException("Not implemented yet."); + } +} diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java new file mode 100644 index 00000000000..18543bd0a3c --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.catalog.lakehouse; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.gravitino.connector.BaseCatalogPropertiesMetadata; +import org.apache.gravitino.connector.PropertyEntry; + +public class GenericLakehouseCatalogPropertiesMetadata extends BaseCatalogPropertiesMetadata { + + private static final Map> GENERIC_LAKEHOUSE_CATALOG_PROPERTY_ENTRIES = + ImmutableMap.>builder().build(); + + @Override + protected Map> specificPropertyEntries() { + return GENERIC_LAKEHOUSE_CATALOG_PROPERTY_ENTRIES; + } +} diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java new file mode 100644 index 00000000000..05da8443cd9 --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.gravitino.connector.BasePropertiesMetadata; +import org.apache.gravitino.connector.PropertyEntry; + +public class GenericLakehouseSchemaPropertiesMetadata extends BasePropertiesMetadata { + private static final Map> propertiesMetadata; + + static { + propertiesMetadata = ImmutableMap.of(); + } + + @Override + protected Map> specificPropertyEntries() { + return propertiesMetadata; + } +} diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseTablePropertiesMetadata.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseTablePropertiesMetadata.java new file mode 100644 index 00000000000..362b10dbe4a --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseTablePropertiesMetadata.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.gravitino.connector.BasePropertiesMetadata; +import org.apache.gravitino.connector.PropertyEntry; + +public class GenericLakehouseTablePropertiesMetadata extends BasePropertiesMetadata { + + private static final Map> propertiesMetadata; + + static { + propertiesMetadata = ImmutableMap.of(); + } + + @Override + protected Map> specificPropertyEntries() { + return propertiesMetadata; + } +} diff --git a/catalogs/catalog-generic-lakehouse/src/main/resources/META-INF/services/org.apache.gravitino.CatalogProvider b/catalogs/catalog-generic-lakehouse/src/main/resources/META-INF/services/org.apache.gravitino.CatalogProvider new file mode 100644 index 00000000000..927e28b4fd2 --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/main/resources/META-INF/services/org.apache.gravitino.CatalogProvider @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +org.apache.gravitino.catalog.lakehouse.GenericLakehouseCatalog diff --git a/catalogs/catalog-generic-lakehouse/src/main/resources/generic-lakehouse.conf b/catalogs/catalog-generic-lakehouse/src/main/resources/generic-lakehouse.conf new file mode 100644 index 00000000000..f2a4c807f4a --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/main/resources/generic-lakehouse.conf @@ -0,0 +1,17 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. \ No newline at end of file diff --git a/settings.gradle.kts b/settings.gradle.kts index 21245ecf8bb..5355fe7bc5f 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -51,6 +51,7 @@ include( "clients:client-python", "clients:cli" ) +include("catalogs:catalog-generic-lakehouse") if (gradle.startParameter.projectProperties["enableFuse"]?.toBoolean() == true) { include("clients:filesystem-fuse") } else { From eb8ace8ae69e8bdda4bce207f09ab221bf29f48c Mon Sep 17 00:00:00 2001 From: Beinan Date: Mon, 20 Oct 2025 23:41:59 -0700 Subject: [PATCH 02/43] feat(lance): add Lance REST server packaging (#8825) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? - add the new lance-common and lance-rest-server modules that bootstrap a minimal Lance catalog REST facade backed by in-memory state - introduce REST resources (LanceCatalogOperations, LanceMetadataOperations) plus a GravitinoLanceRESTServer entry point and auxiliary service wiring - package assets for the Lance REST server (config + startup script) and hook the module into Gradle distribution/standalone tarball tasks with checksums - extend build dependencies (SLF4J, Dropwizard metrics, Prometheus bridge) so the service runs using existing Gravitino telemetry components - surface scaffolding build.gradle.kts under lance/ to disable blanket task execution and ensure module resolution ### Why are the changes needed? - the Lance integration needs a thin REST façade to iterate on metadata APIs before the backend is available; this scaffolding mirrors the Iceberg REST flow - distribution packaging must include the new service so it can be assembled, tested, and deployed alongside other Gravitino auxiliaries Fix: #N/A ### Does this PR introduce any user-facing change? - new Lance REST server tarball and config/script templates shipped with the distribution - new REST endpoints under /lance/v1/... exposing catalog metadata (namespaces, tables) - no breaking changes to existing services ### How was this patch tested? - ./gradlew :lance:lance-rest-server:build - started the service locally with java -cp lance/lance-rest-server/build/libs/* ...GravitinoLanceRESTServer /tmp/gravitino-lance-rest-server.conf - curl http://127.0.0.1:9101/lance/v1/catalog - curl http://127.0.0.1:9101/lance/v1/catalog/namespaces - curl "http://127.0.0.1:9101/lance/v1/metadata/table?namespace=default&name=sample_table" - curl -X POST http://127.0.0.1:9101/lance/v1/catalog/namespaces -H 'Content-Type: application/json' -d '{"namespace":"demo"}' - curl -X DELETE http://127.0.0.1:9101/lance/v1/catalog/namespaces/demo --------- Co-authored-by: mchades Co-authored-by: Mini Yu Co-authored-by: Jerry Shao --- bin/gravitino-lance-rest-server.sh.template | 206 ++++++++++ build.gradle.kts | 84 ++++- .../gravitino-lance-rest-server.conf.template | 45 +++ .../gravitino/cache/EntityCacheWeigher.java | 26 +- .../gravitino/cache/TestCacheConfig.java | 127 ++++++- lance/build.gradle.kts | 22 ++ lance/lance-common/build.gradle.kts | 43 +++ .../lance/common/config/LanceConfig.java | 65 ++++ .../lance/common/ops/LanceCatalogService.java | 352 ++++++++++++++++++ lance/lance-rest-server/build.gradle.kts | 90 +++++ .../gravitino/lance/LanceRESTService.java | 97 +++++ .../server/GravitinoLanceRESTServer.java | 107 ++++++ .../rest/LanceListNamespacesResponse.java | 63 ++++ .../service/rest/LanceListTablesResponse.java | 63 ++++ .../rest/LanceNamespaceOperations.java | 92 +++++ .../authorization/MetadataFilterHelper.java | 32 +- .../AuthorizationExpressionEvaluator.java | 53 ++- .../TestAuthorizationExpressionEvaluator.java | 17 +- settings.gradle.kts | 2 + 19 files changed, 1560 insertions(+), 26 deletions(-) create mode 100644 bin/gravitino-lance-rest-server.sh.template create mode 100644 conf/gravitino-lance-rest-server.conf.template create mode 100644 lance/build.gradle.kts create mode 100644 lance/lance-common/build.gradle.kts create mode 100644 lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java create mode 100644 lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceCatalogService.java create mode 100644 lance/lance-rest-server/build.gradle.kts create mode 100644 lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java create mode 100644 lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/server/GravitinoLanceRESTServer.java create mode 100644 lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListNamespacesResponse.java create mode 100644 lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListTablesResponse.java create mode 100644 lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java diff --git a/bin/gravitino-lance-rest-server.sh.template b/bin/gravitino-lance-rest-server.sh.template new file mode 100644 index 00000000000..17f098903e7 --- /dev/null +++ b/bin/gravitino-lance-rest-server.sh.template @@ -0,0 +1,206 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#set -ex +USAGE="-e Usage: bin/gravitino-lance-rest-server.sh [--config ]\n\t + {start|run|stop|restart|status}" + +if [[ "$1" == "--config" ]]; then + shift + conf_dir="$1" + if [[ ! -d "${conf_dir}" ]]; then + echo "ERROR : ${conf_dir} is not a directory" + echo ${USAGE} + exit 1 + else + export GRAVITINO_CONF_DIR="${conf_dir}" + fi + shift +fi + +bin="$(dirname "${BASH_SOURCE-$0}")" +bin="$(cd "${bin}">/dev/null; pwd)" + +. "${bin}/common.sh" + +check_java_version + +function check_process_status() { + local pid=$(found_lance_rest_server_pid) + + if [[ -z "${pid}" ]]; then + echo "GravitinoLanceRESTServer is not running" + else + printArt + echo "GravitinoLanceRESTServer is running[PID:$pid]" + fi +} + +function found_lance_rest_server_pid() { + process_name='GravitinoLanceRESTServer'; + RUNNING_PIDS=$(ps x | grep ${process_name} | grep -v grep | awk '{print $1}'); + + if [[ -z "${RUNNING_PIDS}" ]]; then + return + fi + + if ! kill -0 ${RUNNING_PIDS} > /dev/null 2>&1; then + echo "GravitinoLanceRESTServer running but process is dead" + fi + + echo "${RUNNING_PIDS}" +} + +function wait_for_lance_rest_server_to_die() { + timeout=10 + timeoutTime=$(date "+%s") + let "timeoutTime+=$timeout" + currentTime=$(date "+%s") + forceKill=1 + + while [[ $currentTime -lt $timeoutTime ]]; do + local pid=$(found_lance_rest_server_pid) + if [[ -z "${pid}" ]]; then + forceKill=0 + break + fi + + kill ${pid} > /dev/null 2> /dev/null + if kill -0 ${pid} > /dev/null 2>&1; then + sleep 3 + else + forceKill=0 + break + fi + currentTime=$(date "+%s") + done + + if [[ $forceKill -ne 0 ]]; then + kill -9 ${pid} > /dev/null 2> /dev/null + fi +} + +function start() { + local pid=$(found_lance_rest_server_pid) + + if [[ ! -z "${pid}" ]]; then + if kill -0 ${pid} >/dev/null 2>&1; then + echo "GravitinoLanceRESTServer is already running" + return 0; + fi + fi + + if [[ ! -d "${GRAVITINO_LOG_DIR}" ]]; then + echo "Log dir doesn't exist, create ${GRAVITINO_LOG_DIR}" + mkdir -p "${GRAVITINO_LOG_DIR}" + fi + + nohup ${JAVA_RUNNER} ${JAVA_OPTS} ${GRAVITINO_DEBUG_OPTS} -cp ${GRAVITINO_CLASSPATH} ${GRAVITINO_SERVER_NAME} >> "${GRAVITINO_OUTFILE}" 2>&1 & + + pid=$! + if [[ -z "${pid}" ]]; then + echo "GravitinoLanceRESTServer start error!" + return 1; + fi + + sleep 2 + check_process_status +} + +function run() { + ${JAVA_RUNNER} ${JAVA_OPTS} ${GRAVITINO_DEBUG_OPTS} -cp ${GRAVITINO_CLASSPATH} ${GRAVITINO_SERVER_NAME} +} + +function stop() { + local pid + + pid=$(found_lance_rest_server_pid) + + if [[ -z "${pid}" ]]; then + echo "GravitinoLanceRESTServer is not running" + else + wait_for_lance_rest_server_to_die + echo "GravitinoLanceRESTServer stop" + fi +} + +HOSTNAME=$(hostname) +GRAVITINO_OUTFILE="${GRAVITINO_LOG_DIR}/gravitino-lance-rest-server.out" +GRAVITINO_SERVER_NAME=org.apache.gravitino.lance.server.GravitinoLanceRESTServer +GRAVITINO_SIMPLE_SERVER_NAME=gravitino-lance-rest-server + +JAVA_OPTS+=" -Dfile.encoding=UTF-8" +JAVA_OPTS+=" -Dlog4j2.configurationFile=file://${GRAVITINO_CONF_DIR}/log4j2.properties" +JAVA_OPTS+=" -Dgravitino.log.path=${GRAVITINO_LOG_DIR} ${GRAVITINO_MEM}" +JAVA_OPTS+=" -Dgravitino.server.name=${GRAVITINO_SIMPLE_SERVER_NAME}" +if [ "$JVM_VERSION" -eq 17 ]; then + JAVA_OPTS+=" -XX:+IgnoreUnrecognizedVMOptions" + JAVA_OPTS+=" --add-opens java.base/java.io=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.lang.invoke=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.lang.reflect=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.lang=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.math=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.net=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.nio=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.text=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.time=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.util.concurrent.atomic=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.util.concurrent=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.util.regex=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.util=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/jdk.internal.ref=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/jdk.internal.reflect=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.sql/java.sql=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/sun.util.calendar=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/sun.nio.ch=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/sun.nio.cs=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/sun.security.action=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/sun.util.calendar=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.security.jgss/sun.security.krb5=ALL-UNNAMED" +fi + +#JAVA_OPTS+=" -Djava.security.krb5.conf=/etc/krb5.conf" + +if [ -d "${GRAVITINO_HOME}/lance-rest-server/libs" ]; then + addJarInDir "${GRAVITINO_HOME}/lance-rest-server/libs" + addDirToClasspath "${GRAVITINO_HOME}/lance-rest-server/conf" +else + addJarInDir "${GRAVITINO_HOME}/libs" +fi + +case "${1}" in + start) + start + ;; + run) + run + ;; + stop) + stop + ;; + restart) + stop + start + ;; + status) + check_process_status + ;; + *) + echo ${USAGE} +esac diff --git a/build.gradle.kts b/build.gradle.kts index 0a98b6779b1..ee419f23bd4 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -683,6 +683,7 @@ tasks { "copyCliLib", ":authorizations:copyLibAndConfig", ":iceberg:iceberg-rest-server:copyLibAndConfigs", + ":lance:lance-rest-server:copyLibAndConfigs", ":web:web:build" ) @@ -772,6 +773,51 @@ tasks { } } + val compileLanceRESTServer by registering { + dependsOn("lance:lance-rest-server:copyLibAndConfigsToStandalonePackage") + group = "gravitino distribution" + outputs.dir(projectDir.dir("distribution/${rootProject.name}-lance-rest-server")) + doLast { + copy { + from(projectDir.dir("conf")) { + include( + "${rootProject.name}-lance-rest-server.conf.template", + "${rootProject.name}-env.sh.template", + "log4j2.properties.template" + ) + into("${rootProject.name}-lance-rest-server/conf") + } + from(projectDir.dir("bin")) { + include("common.sh.template", "${rootProject.name}-lance-rest-server.sh.template") + into("${rootProject.name}-lance-rest-server/bin") + } + into(outputDir) + rename { fileName -> + fileName.replace(".template", "") + } + eachFile { + if (name == "gravitino-env.sh") { + filter { line -> + line.replace("GRAVITINO_VERSION_PLACEHOLDER", "$version") + } + } + } + fileMode = 0b111101101 + } + + copy { + from(projectDir.dir("licenses")) { into("${rootProject.name}-lance-rest-server/licenses") } + from(projectDir.file("LICENSE.rest")) { into("${rootProject.name}-lance-rest-server") } + from(projectDir.file("NOTICE.rest")) { into("${rootProject.name}-lance-rest-server") } + from(projectDir.file("README.md")) { into("${rootProject.name}-lance-rest-server") } + into(outputDir) + rename { fileName -> + fileName.replace(".rest", "") + } + } + } + } + val compileTrinoConnector by registering { dependsOn("trino-connector:trino-connector:copyLibs") group = "gravitino distribution" @@ -791,7 +837,7 @@ tasks { } val assembleDistribution by registering(Tar::class) { - dependsOn("assembleTrinoConnector", "assembleIcebergRESTServer") + dependsOn("assembleTrinoConnector", "assembleIcebergRESTServer", "assembleLanceRESTServer") group = "gravitino distribution" finalizedBy("checksumDistribution") into("${rootProject.name}-$version-bin") @@ -823,6 +869,17 @@ tasks { destinationDirectory.set(projectDir.dir("distribution")) } + val assembleLanceRESTServer by registering(Tar::class) { + dependsOn("compileLanceRESTServer") + group = "gravitino distribution" + finalizedBy("checksumLanceRESTServerDistribution") + into("${rootProject.name}-lance-rest-server-$version-bin") + from(compileLanceRESTServer.map { it.outputs.files.single() }) + compression = Compression.GZIP + archiveFileName.set("${rootProject.name}-lance-rest-server-$version-bin.tar.gz") + destinationDirectory.set(projectDir.dir("distribution")) + } + register("checksumIcebergRESTServerDistribution") { group = "gravitino distribution" dependsOn(assembleIcebergRESTServer) @@ -839,9 +896,30 @@ tasks { } } + register("checksumLanceRESTServerDistribution") { + group = "gravitino distribution" + dependsOn(assembleLanceRESTServer) + val archiveFile = assembleLanceRESTServer.flatMap { it.archiveFile } + val checksumFile = archiveFile.map { archive -> + archive.asFile.let { it.resolveSibling("${it.name}.sha256") } + } + inputs.file(archiveFile) + outputs.file(checksumFile) + doLast { + checksumFile.get().writeText( + serviceOf().sha256(archiveFile.get().asFile).toString() + ) + } + } + register("checksumDistribution") { group = "gravitino distribution" - dependsOn(assembleDistribution, "checksumTrinoConnector", "checksumIcebergRESTServerDistribution") + dependsOn( + assembleDistribution, + "checksumTrinoConnector", + "checksumIcebergRESTServerDistribution", + "checksumLanceRESTServerDistribution" + ) val archiveFile = assembleDistribution.flatMap { it.archiveFile } val checksumFile = archiveFile.map { archive -> archive.asFile.let { it.resolveSibling("${it.name}.sha256") } @@ -885,6 +963,7 @@ tasks { !it.name.startsWith("filesystem") && !it.name.startsWith("flink") && !it.name.startsWith("iceberg") && + !it.name.startsWith("lance") && !it.name.startsWith("spark") && it.name != "hadoop-common" && it.name != "hive-metastore-common" && @@ -916,6 +995,7 @@ tasks { !it.name.startsWith("filesystem") && !it.name.startsWith("flink") && !it.name.startsWith("iceberg") && + !it.name.startsWith("lance") && !it.name.startsWith("integration-test") && !it.name.startsWith("spark") && !it.name.startsWith("trino-connector") && diff --git a/conf/gravitino-lance-rest-server.conf.template b/conf/gravitino-lance-rest-server.conf.template new file mode 100644 index 00000000000..32609bffcaa --- /dev/null +++ b/conf/gravitino-lance-rest-server.conf.template @@ -0,0 +1,45 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# THE CONFIGURATION FOR Lance REST SERVER +gravitino.lance-rest.shutdown.timeout = 3000 + +# THE CONFIGURATION FOR Lance REST WEB SERVER +# The host name of the built-in web server +gravitino.lance-rest.host = 0.0.0.0 +# The http port number of the built-in web server +gravitino.lance-rest.httpPort = 9101 +# The min thread size of the built-in web server +gravitino.lance-rest.minThreads = 24 +# The max thread size of the built-in web server +gravitino.lance-rest.maxThreads = 200 +# The stop timeout of the built-in web server +gravitino.lance-rest.stopTimeout = 30000 +# The timeout of idle connections +gravitino.lance-rest.idleTimeout = 30000 +# The executor thread pool work queue size of the built-in web server +gravitino.lance-rest.threadPoolWorkQueueSize = 100 +# The request header size of the built-in web server +gravitino.lance-rest.requestHeaderSize = 131072 +# The response header size of the built-in web server +gravitino.lance-rest.responseHeaderSize = 131072 + +# THE CONFIGURATION FOR Lance CATALOG +# The logical Lance catalog served by this REST endpoint +gravitino.lance-rest.catalog-name = default diff --git a/core/src/main/java/org/apache/gravitino/cache/EntityCacheWeigher.java b/core/src/main/java/org/apache/gravitino/cache/EntityCacheWeigher.java index 768f60eb647..edc3ca6b9b4 100644 --- a/core/src/main/java/org/apache/gravitino/cache/EntityCacheWeigher.java +++ b/core/src/main/java/org/apache/gravitino/cache/EntityCacheWeigher.java @@ -40,24 +40,36 @@ * or manually cleared. *
  • Catalog: 0, which means that it will never be evicted from the cache unless timeout occurs * or manually cleared. - *
  • Schema: 10 - *
  • Other: 100 + *
  • Schema: 500 + *
  • Tag: 100 + *
  • Policy: 100 + *
  • Other: 200 * */ public class EntityCacheWeigher implements Weigher> { - public static final int METALAKE_WEIGHT = 0; + public static final int METALAKE_WEIGHT = 0; // 0 means never evict public static final int CATALOG_WEIGHT = 0; - public static final int SCHEMA_WEIGHT = 10; - public static final int OTHER_WEIGHT = 100; + public static final int SCHEMA_WEIGHT = 500; // higher weight means it will less likely be evicted + public static final int OTHER_WEIGHT = 200; + public static final int TAG_WEIGHT = 100; + public static final int POLICY_WEIGHT = 100; private static final Logger LOG = LoggerFactory.getLogger(EntityCacheWeigher.class.getName()); private static final EntityCacheWeigher INSTANCE = new EntityCacheWeigher(); private static final Map ENTITY_WEIGHTS = ImmutableMap.of( Entity.EntityType.METALAKE, METALAKE_WEIGHT, Entity.EntityType.CATALOG, CATALOG_WEIGHT, - Entity.EntityType.SCHEMA, SCHEMA_WEIGHT); + Entity.EntityType.SCHEMA, SCHEMA_WEIGHT, + Entity.EntityType.TAG, TAG_WEIGHT, + Entity.EntityType.POLICY, POLICY_WEIGHT); private static final long MAX_WEIGHT = - 2 * (METALAKE_WEIGHT * 10 + CATALOG_WEIGHT * (10 * 200) + SCHEMA_WEIGHT * (10 * 200 * 1000)); + 2 + * (METALAKE_WEIGHT * 10 + + CATALOG_WEIGHT * 100 + + SCHEMA_WEIGHT * 1000 + + OTHER_WEIGHT * 10000 + + TAG_WEIGHT * 10000 + + POLICY_WEIGHT * 10000); @VisibleForTesting protected EntityCacheWeigher() {} diff --git a/core/src/test/java/org/apache/gravitino/cache/TestCacheConfig.java b/core/src/test/java/org/apache/gravitino/cache/TestCacheConfig.java index 55c62a1ae92..3944d9a282b 100644 --- a/core/src/test/java/org/apache/gravitino/cache/TestCacheConfig.java +++ b/core/src/test/java/org/apache/gravitino/cache/TestCacheConfig.java @@ -21,19 +21,25 @@ import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; +import com.google.common.collect.ImmutableMap; import java.time.Duration; import java.util.List; +import java.util.stream.IntStream; import org.apache.gravitino.Catalog; import org.apache.gravitino.Config; import org.apache.gravitino.Configs; import org.apache.gravitino.Entity; import org.apache.gravitino.NameIdentifier; import org.apache.gravitino.Namespace; +import org.apache.gravitino.file.Fileset; import org.apache.gravitino.meta.AuditInfo; import org.apache.gravitino.meta.BaseMetalake; import org.apache.gravitino.meta.CatalogEntity; +import org.apache.gravitino.meta.FilesetEntity; import org.apache.gravitino.meta.SchemaEntity; import org.apache.gravitino.meta.SchemaVersion; +import org.apache.gravitino.meta.TagEntity; +import org.apache.gravitino.utils.NameIdentifierUtil; import org.awaitility.Awaitility; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -47,14 +53,127 @@ void testDefaultCacheConfig() { Assertions.assertTrue(config.get(Configs.CACHE_WEIGHER_ENABLED)); Assertions.assertEquals(10_000, config.get(Configs.CACHE_MAX_ENTRIES)); Assertions.assertEquals(3_600_000L, config.get(Configs.CACHE_EXPIRATION_TIME)); - Assertions.assertEquals(40_000_000L, EntityCacheWeigher.getMaxWeight()); + Assertions.assertEquals(9_000_000L, EntityCacheWeigher.getMaxWeight()); Assertions.assertEquals("caffeine", config.get(Configs.CACHE_IMPLEMENTATION)); } + @Test + void testPolicyAndTagCacheWeigher() throws InterruptedException { + Caffeine builder = Caffeine.newBuilder(); + builder.maximumWeight(2000); + builder.weigher(EntityCacheWeigher.getInstance()); + Cache> cache = builder.build(); + + BaseMetalake baseMetalake = + BaseMetalake.builder() + .withName("metalake1") + .withId(1L) + .withVersion(SchemaVersion.V_0_1) + .withAuditInfo(AuditInfo.EMPTY) + .build(); + cache.put( + EntityCacheRelationKey.of(NameIdentifier.of("metalake1"), Entity.EntityType.METALAKE), + List.of(baseMetalake)); + CatalogEntity catalogEntity = + CatalogEntity.builder() + .withNamespace(Namespace.of("metalake1")) + .withName("catalog1") + .withProvider("provider") + .withAuditInfo(AuditInfo.EMPTY) + .withId(100L) + .withType(Catalog.Type.RELATIONAL) + .build(); + cache.put( + EntityCacheRelationKey.of( + NameIdentifier.of(new String[] {"metalake1", "catalog1"}), Entity.EntityType.CATALOG), + List.of(catalogEntity)); + + SchemaEntity schemaEntity = + SchemaEntity.builder() + .withNamespace(Namespace.of("metalake1", "catalog1")) + .withName("schema1") + .withAuditInfo(AuditInfo.EMPTY) + .withId(1000L) + .build(); + cache.put( + EntityCacheRelationKey.of( + NameIdentifier.of(new String[] {"metalake1", "catalog1", "schema1"}), + Entity.EntityType.SCHEMA), + List.of(schemaEntity)); + + for (int i = 0; i < 5; i++) { + String filesetName = "fileset" + i; + FilesetEntity fileset = + FilesetEntity.builder() + .withNamespace(Namespace.of("metalake1", "catalog1", "schema1")) + .withName(filesetName) + .withAuditInfo(AuditInfo.EMPTY) + .withStorageLocations(ImmutableMap.of("default", "s3://bucket/path")) + .withId((long) (i + 1) * 10_000) + .withFilesetType(Fileset.Type.MANAGED) + .build(); + cache.put( + EntityCacheRelationKey.of( + NameIdentifier.of(new String[] {"metalake1", "catalog1", "schema1", filesetName}), + Entity.EntityType.FILESET), + List.of(fileset)); + } + + for (int i = 0; i < 10; i++) { + String tagName = "tag" + i; + NameIdentifier tagNameIdent = NameIdentifierUtil.ofTag("metalake", tagName); + TagEntity tagEntity = + TagEntity.builder() + .withNamespace(tagNameIdent.namespace()) + .withName(tagName) + .withAuditInfo(AuditInfo.EMPTY) + .withId((long) (i + 1) * 100_000) + .build(); + cache.put(EntityCacheRelationKey.of(tagNameIdent, Entity.EntityType.TAG), List.of(tagEntity)); + } + + // The weight of the cache has exceeded 2000, some entities will be evicted if we continue to + // add fileset entities. + for (int i = 5; i < 15; i++) { + String filesetName = "fileset" + i; + FilesetEntity fileset = + FilesetEntity.builder() + .withNamespace(Namespace.of("metalake1", "catalog1", "schema1")) + .withName(filesetName) + .withAuditInfo(AuditInfo.EMPTY) + .withStorageLocations(ImmutableMap.of("default", "s3://bucket/path")) + .withId((long) (i + 1) * 10_000) + .withFilesetType(Fileset.Type.MANAGED) + .build(); + cache.put( + EntityCacheRelationKey.of( + NameIdentifier.of(new String[] {"metalake1", "catalog1", "schema1", filesetName}), + Entity.EntityType.FILESET), + List.of(fileset)); + } + + Thread.sleep(1000); + + // There should no tag entities in the cache, because the weight of each tag entity is 100 that + // is higher than the maximum weight of the fileset entity which is 200. + Awaitility.await() + .atMost(Duration.ofSeconds(5)) + .pollInterval(Duration.ofMillis(10)) + .until( + () -> + IntStream.of(0, 1, 2, 3) + .mapToObj(i -> NameIdentifierUtil.ofTag("metalake", "tag" + i)) + .allMatch( + tagNameIdent -> + cache.getIfPresent( + EntityCacheRelationKey.of(tagNameIdent, Entity.EntityType.TAG)) + == null)); + } + @Test void testCaffeineCacheWithWeight() throws Exception { Caffeine builder = Caffeine.newBuilder(); - builder.maximumWeight(500); + builder.maximumWeight(5000); builder.weigher(EntityCacheWeigher.getInstance()); Cache> cache = builder.build(); @@ -121,11 +240,11 @@ void testCaffeineCacheWithWeight() throws Exception { NameIdentifier.of("metalake1.catalog" + i), Entity.EntityType.CATALOG))); } - // Only some of the 100 schemas are still in the cache, to be exact, 500 / 10 = 50 schemas. + // Only some of the 100 schemas are still in the cache, to be exact, 5000 / 500 = 10 schemas. Awaitility.await() .atMost(Duration.ofSeconds(5)) .pollInterval(Duration.ofMillis(10)) - .until(() -> cache.asMap().size() == 10 + 3 + 500 / 10); + .until(() -> cache.asMap().size() == 10 + 3 + 5000 / 500); } @Test diff --git a/lance/build.gradle.kts b/lance/build.gradle.kts new file mode 100644 index 00000000000..fa6eb7d5ef9 --- /dev/null +++ b/lance/build.gradle.kts @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +tasks.all { + enabled = false +} diff --git a/lance/lance-common/build.gradle.kts b/lance/lance-common/build.gradle.kts new file mode 100644 index 00000000000..5048d274f66 --- /dev/null +++ b/lance/lance-common/build.gradle.kts @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +description = "lance-common" + +plugins { + `maven-publish` + id("java") + id("idea") +} + +dependencies { + implementation(project(":api")) + implementation(project(":catalogs:catalog-common")) + implementation(project(":common")) { + exclude("*") + } + implementation(project(":core")) { + exclude("*") + } + + implementation(libs.guava) + implementation(libs.commons.lang3) + implementation(libs.slf4j.api) + + testImplementation(libs.junit.jupiter.api) + testRuntimeOnly(libs.junit.jupiter.engine) +} diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java new file mode 100644 index 00000000000..f2d7e748cf8 --- /dev/null +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.common.config; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.gravitino.Config; +import org.apache.gravitino.OverwriteDefaultConfig; +import org.apache.gravitino.config.ConfigBuilder; +import org.apache.gravitino.config.ConfigConstants; +import org.apache.gravitino.config.ConfigEntry; + +/** Base Lance REST configuration. */ +public class LanceConfig extends Config implements OverwriteDefaultConfig { + + public static final String LANCE_CONFIG_PREFIX = "gravitino.lance-rest."; + + public static final int DEFAULT_LANCE_REST_SERVICE_HTTP_PORT = 9101; + public static final int DEFAULT_LANCE_REST_SERVICE_HTTPS_PORT = 9533; + + public static final ConfigEntry CATALOG_NAME = + new ConfigBuilder(LANCE_CONFIG_PREFIX + "catalog-name") + .doc("Logical Lance catalog served by the REST endpoint") + .version(ConfigConstants.VERSION_0_1_0) + .stringConf() + .createWithDefault("default"); + + public LanceConfig(Map properties) { + super(false); + loadFromMap(properties, key -> true); + } + + public LanceConfig() { + super(false); + } + + public String getCatalogName() { + return get(CATALOG_NAME); + } + + @Override + public Map getOverwriteDefaultConfig() { + return ImmutableMap.of( + ConfigConstants.WEBSERVER_HTTP_PORT, + String.valueOf(DEFAULT_LANCE_REST_SERVICE_HTTP_PORT), + ConfigConstants.WEBSERVER_HTTPS_PORT, + String.valueOf(DEFAULT_LANCE_REST_SERVICE_HTTPS_PORT)); + } +} diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceCatalogService.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceCatalogService.java new file mode 100644 index 00000000000..67dd4c2d226 --- /dev/null +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceCatalogService.java @@ -0,0 +1,352 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.common.ops; + +import com.google.common.collect.ImmutableMap; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.stream.Collectors; +import org.apache.commons.lang3.StringUtils; +import org.apache.gravitino.lance.common.config.LanceConfig; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Thin placeholder that will later bridge Lance catalog metadata into Gravitino. + * + *

    The current implementation keeps an in-memory catalog view so the REST surface mirrors the + * Iceberg catalog experience while the Lance integration is built out for real. + */ +public class LanceCatalogService implements AutoCloseable { + + private static final Logger LOG = LoggerFactory.getLogger(LanceCatalogService.class); + + private final LanceConfig config; + private final ConcurrentMap namespaces; + + public LanceCatalogService(LanceConfig config) { + this.config = config; + this.namespaces = new ConcurrentHashMap<>(); + seedSampleMetadata(); + } + + public String catalogName() { + return config.getCatalogName(); + } + + public boolean namespaceExists(String namespace) { + return namespaces.containsKey(namespace); + } + + public Map> listNamespaces() { + Map> result = new ConcurrentHashMap<>(); + namespaces.forEach( + (name, state) -> + result.put( + name, Collections.unmodifiableMap(new ConcurrentHashMap<>(state.properties)))); + return Map.copyOf(result); + } + + public List listNamespaceNames() { + return namespaces.keySet().stream() + .sorted(Comparator.naturalOrder()) + .collect(Collectors.toUnmodifiableList()); + } + + public NamespaceListingResult listChildNamespaces( + String parentId, String delimiter, String pageToken, Integer limit) { + String normalizedParent = StringUtils.trimToEmpty(parentId); + String effectiveDelimiter = StringUtils.isBlank(delimiter) ? "$" : delimiter; + + List sortedNamespaces = listNamespaceNames(); + List filtered = filterChildren(sortedNamespaces, normalizedParent, effectiveDelimiter); + + int startingOffset = parsePageToken(pageToken, filtered.size()); + int pageLimit = limit == null ? filtered.size() : validatePositiveLimit(limit, filtered.size()); + int endIndex = Math.min(filtered.size(), startingOffset + pageLimit); + + List page = filtered.subList(startingOffset, endIndex); + String nextToken = endIndex < filtered.size() ? String.valueOf(endIndex) : null; + return new NamespaceListingResult(normalizedParent, effectiveDelimiter, page, nextToken); + } + + public boolean createNamespace(String namespace) { + if (StringUtils.isBlank(namespace)) { + throw new IllegalArgumentException("Namespace must be non-empty"); + } + NamespaceState state = new NamespaceState(Collections.emptyMap()); + NamespaceState existing = namespaces.putIfAbsent(namespace, state); + if (existing == null) { + LOG.info("Created Lance namespace {}", namespace); + return true; + } + return false; + } + + public boolean dropNamespace(String namespace) { + NamespaceState state = namespaces.get(namespace); + if (state == null) { + return false; + } + if (!state.tables.isEmpty()) { + LOG.info("Refusing to drop Lance namespace {} because it still owns tables", namespace); + return false; + } + boolean removed = namespaces.remove(namespace, state); + if (removed) { + LOG.info("Dropped Lance namespace {}", namespace); + } + return removed; + } + + public List listTables(String namespace) { + NamespaceState state = namespaces.get(namespace); + if (state == null) { + throw new IllegalArgumentException("Unknown namespace: " + namespace); + } + return state.tables.keySet().stream() + .sorted(Comparator.naturalOrder()) + .collect(Collectors.toUnmodifiableList()); + } + + public Optional> loadTable(String namespace, String table) { + NamespaceState state = namespaces.get(namespace); + if (state == null) { + return Optional.empty(); + } + LanceTableEntry tableEntry = state.tables.get(table); + if (tableEntry == null) { + return Optional.empty(); + } + return Optional.of(tableEntry.describe()); + } + + public TableListingResult listTables( + String namespaceId, String delimiter, String pageToken, Integer limit) { + String normalizedNamespace = StringUtils.trimToEmpty(namespaceId); + if (StringUtils.isBlank(normalizedNamespace)) { + throw new IllegalArgumentException("Namespace id must be provided"); + } + + String effectiveDelimiter = StringUtils.isBlank(delimiter) ? "$" : delimiter; + + NamespaceState state = namespaces.get(normalizedNamespace); + if (state == null) { + throw new NoSuchElementException("Unknown namespace: " + normalizedNamespace); + } + + List sortedTables = + state.tables.keySet().stream() + .sorted(Comparator.naturalOrder()) + .collect(Collectors.toList()); + + int startingOffset = parsePageToken(pageToken, sortedTables.size()); + int pageLimit = + limit == null ? sortedTables.size() : validatePositiveLimit(limit, sortedTables.size()); + int endIndex = Math.min(sortedTables.size(), startingOffset + pageLimit); + + List page = sortedTables.subList(startingOffset, endIndex); + String nextToken = endIndex < sortedTables.size() ? String.valueOf(endIndex) : null; + + return new TableListingResult(normalizedNamespace, effectiveDelimiter, page, nextToken); + } + + @Override + public void close() { + namespaces.clear(); + } + + private void seedSampleMetadata() { + NamespaceState defaultNamespace = + namespaces.computeIfAbsent("default", key -> new NamespaceState(Collections.emptyMap())); + defaultNamespace.tables.put( + "sample_table", + new LanceTableEntry( + "sample_table", + "default", + ImmutableMap.of( + "format", "lance", + "uri", "file:///tmp/sample_table.lance", + "summary", "Placeholder Lance table metadata"))); + } + + private static final class NamespaceState { + private final Map properties; + private final ConcurrentMap tables; + + NamespaceState(Map properties) { + this.properties = new ConcurrentHashMap<>(properties); + this.tables = new ConcurrentHashMap<>(); + } + } + + private static final class LanceTableEntry { + private final String name; + private final String namespace; + private final Map metadata; + + LanceTableEntry(String name, String namespace, Map metadata) { + this.name = name; + this.namespace = namespace; + this.metadata = new ConcurrentHashMap<>(metadata); + } + + Map describe() { + Map result = new ConcurrentHashMap<>(metadata); + result.put("name", name); + result.put("namespace", namespace); + return Collections.unmodifiableMap(result); + } + } + + private List filterChildren(List namespaces, String parentId, String delimiter) { + boolean rootRequest = StringUtils.isBlank(parentId) || "root".equalsIgnoreCase(parentId); + if (rootRequest) { + return namespaces; + } + + String parentPrefix = parentId + delimiter; + return namespaces.stream() + .filter(ns -> ns.startsWith(parentPrefix)) + .map( + ns -> { + String remainder = ns.substring(parentPrefix.length()); + int nextDelimiter = remainder.indexOf(delimiter); + if (nextDelimiter >= 0) { + return remainder.substring(0, nextDelimiter); + } + return remainder; + }) + .filter(child -> !child.isEmpty()) + .distinct() + .sorted(Comparator.naturalOrder()) + .collect(Collectors.toUnmodifiableList()); + } + + private int parsePageToken(String pageToken, int size) { + if (StringUtils.isBlank(pageToken)) { + return 0; + } + try { + int parsed = Integer.parseInt(pageToken); + if (parsed < 0 || parsed > size) { + throw new IllegalArgumentException("Invalid page_token value"); + } + return parsed; + } catch (NumberFormatException nfe) { + throw new IllegalArgumentException("Invalid page_token value", nfe); + } + } + + private int validatePositiveLimit(int limit, int size) { + if (limit <= 0) { + throw new IllegalArgumentException("limit must be greater than 0"); + } + return Math.min(limit, Math.max(size, 0)); + } + + public static final class NamespaceListingResult { + private final String parentId; + private final String delimiter; + private final List namespaces; + private final String nextPageToken; + + NamespaceListingResult( + String parentId, String delimiter, List namespaces, String nextPageToken) { + this.parentId = parentId; + this.delimiter = delimiter; + this.namespaces = List.copyOf(namespaces); + this.nextPageToken = nextPageToken; + } + + public String getParentId() { + return parentId; + } + + public String getDelimiter() { + return delimiter; + } + + public List getNamespaces() { + return namespaces; + } + + public Optional getNextPageToken() { + return Optional.ofNullable(nextPageToken); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof NamespaceListingResult)) { + return false; + } + NamespaceListingResult that = (NamespaceListingResult) o; + return Objects.equals(parentId, that.parentId) + && Objects.equals(delimiter, that.delimiter) + && Objects.equals(namespaces, that.namespaces) + && Objects.equals(nextPageToken, that.nextPageToken); + } + + @Override + public int hashCode() { + return Objects.hash(parentId, delimiter, namespaces, nextPageToken); + } + } + + public static final class TableListingResult { + private final String namespaceId; + private final String delimiter; + private final List tables; + private final String nextPageToken; + + TableListingResult( + String namespaceId, String delimiter, List tables, String nextPageToken) { + this.namespaceId = namespaceId; + this.delimiter = delimiter; + this.tables = List.copyOf(tables); + this.nextPageToken = nextPageToken; + } + + public String getNamespaceId() { + return namespaceId; + } + + public String getDelimiter() { + return delimiter; + } + + public List getTables() { + return tables; + } + + public Optional getNextPageToken() { + return Optional.ofNullable(nextPageToken); + } + } +} diff --git a/lance/lance-rest-server/build.gradle.kts b/lance/lance-rest-server/build.gradle.kts new file mode 100644 index 00000000000..03376095935 --- /dev/null +++ b/lance/lance-rest-server/build.gradle.kts @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +description = "lance-rest-server" + +plugins { + `maven-publish` + id("java") + id("idea") +} + +dependencies { + implementation(project(":api")) + implementation(project(":catalogs:catalog-common")) + implementation(project(":common")) { + exclude("*") + } + implementation(project(":core")) { + exclude("*") + } + implementation(project(":server-common")) { + exclude("*") + } + implementation(project(":lance:lance-common")) + + implementation(libs.bundles.jetty) + implementation(libs.bundles.jersey) + implementation(libs.bundles.log4j) + implementation(libs.bundles.metrics) + implementation(libs.bundles.prometheus) + implementation(libs.metrics.jersey2) + implementation(libs.guava) + implementation(libs.jackson.annotations) + implementation(libs.jackson.databind) + implementation(libs.jackson.datatype.jdk8) + implementation(libs.jackson.datatype.jsr310) + + testImplementation(libs.junit.jupiter.api) + testRuntimeOnly(libs.junit.jupiter.engine) +} + +tasks { + val copyDepends by registering(Copy::class) { + from(configurations.runtimeClasspath) + into("build/libs") + } + + jar { + finalizedBy(copyDepends) + } + + register("copyLibs", Copy::class) { + dependsOn(copyDepends, "build") + from("build/libs") + into("$rootDir/distribution/package/lance-rest-server/libs") + } + + register("copyLibsToStandalonePackage", Copy::class) { + dependsOn(copyDepends, "build") + from("build/libs") + into("$rootDir/distribution/gravitino-lance-rest-server/libs") + } + + register("copyLibAndConfigs", Copy::class) { + dependsOn("copyLibs") + } + + register("copyLibAndConfigsToStandalonePackage", Copy::class) { + dependsOn("copyLibsToStandalonePackage") + } + + named("generateMetadataFileForMavenJavaPublication") { + dependsOn(copyDepends) + } +} diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java new file mode 100644 index 00000000000..e85dc37b4a3 --- /dev/null +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance; + +import java.util.Map; +import javax.servlet.Servlet; +import org.apache.gravitino.auxiliary.GravitinoAuxiliaryService; +import org.apache.gravitino.lance.common.config.LanceConfig; +import org.apache.gravitino.lance.common.ops.LanceCatalogService; +import org.apache.gravitino.lance.service.rest.LanceNamespaceOperations; +import org.apache.gravitino.server.web.JettyServer; +import org.apache.gravitino.server.web.JettyServerConfig; +import org.glassfish.jersey.jackson.JacksonFeature; +import org.glassfish.jersey.server.ResourceConfig; +import org.glassfish.jersey.servlet.ServletContainer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Thin REST service shell for Lance metadata. */ +public class LanceRESTService implements GravitinoAuxiliaryService { + + private static final Logger LOG = LoggerFactory.getLogger(LanceRESTService.class); + + public static final String SERVICE_NAME = "lance-rest"; + public static final String LANCE_SPEC = "/lance/*"; + + private JettyServer server; + private LanceCatalogService catalogService; + + @Override + public String shortName() { + return SERVICE_NAME; + } + + @Override + public void serviceInit(Map properties) { + LanceConfig lanceConfig = new LanceConfig(properties); + JettyServerConfig serverConfig = JettyServerConfig.fromConfig(lanceConfig); + + server = new JettyServer(); + server.initialize(serverConfig, SERVICE_NAME, false); + + catalogService = new LanceCatalogService(lanceConfig); + + ResourceConfig resourceConfig = new ResourceConfig(); + resourceConfig.register(JacksonFeature.class); + resourceConfig.register(new LanceNamespaceOperations(catalogService)); + + Servlet container = new ServletContainer(resourceConfig); + server.addServlet(container, LANCE_SPEC); + server.addCustomFilters(LANCE_SPEC); + server.addSystemFilters(LANCE_SPEC); + + LOG.info("Initialized Lance REST service for catalog {}", lanceConfig.getCatalogName()); + } + + @Override + public void serviceStart() { + if (server != null) { + server.start(); + LOG.info("Lance REST service started"); + } + } + + @Override + public void serviceStop() throws Exception { + if (server != null) { + server.stop(); + LOG.info("Lance REST service stopped"); + } + if (catalogService != null) { + catalogService.close(); + } + } + + public void join() { + if (server != null) { + server.join(); + } + } +} diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/server/GravitinoLanceRESTServer.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/server/GravitinoLanceRESTServer.java new file mode 100644 index 00000000000..e28bdd5c175 --- /dev/null +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/server/GravitinoLanceRESTServer.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.server; + +import org.apache.gravitino.Config; +import org.apache.gravitino.GravitinoEnv; +import org.apache.gravitino.lance.LanceRESTService; +import org.apache.gravitino.lance.common.config.LanceConfig; +import org.apache.gravitino.server.ServerConfig; +import org.apache.gravitino.server.authentication.ServerAuthenticator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Bootstrap entry point for the Lance REST facade. */ +public class GravitinoLanceRESTServer { + + private static final Logger LOG = LoggerFactory.getLogger(GravitinoLanceRESTServer.class); + + public static final String CONF_FILE = "gravitino-lance-rest-server.conf"; + + private final Config serverConfig; + + private LanceRESTService lanceRESTService; + private GravitinoEnv gravitinoEnv; + + public GravitinoLanceRESTServer(Config config) { + this.serverConfig = config; + this.gravitinoEnv = GravitinoEnv.getInstance(); + this.lanceRESTService = new LanceRESTService(); + } + + private void initialize() { + gravitinoEnv.initializeBaseComponents(serverConfig); + lanceRESTService.serviceInit( + serverConfig.getConfigsWithPrefix(LanceConfig.LANCE_CONFIG_PREFIX)); + ServerAuthenticator.getInstance().initialize(serverConfig); + } + + private void start() { + gravitinoEnv.start(); + lanceRESTService.serviceStart(); + } + + private void join() { + lanceRESTService.join(); + } + + private void stop() throws Exception { + lanceRESTService.serviceStop(); + LOG.info("Gravitino Lance REST service stopped"); + } + + public static void main(String[] args) { + LOG.info("Starting Gravitino Lance REST Server"); + String confPath = System.getenv("GRAVITINO_TEST") == null ? "" : args[0]; + ServerConfig serverConfig = ServerConfig.loadConfig(confPath, CONF_FILE); + GravitinoLanceRESTServer lanceRESTServer = new GravitinoLanceRESTServer(serverConfig); + lanceRESTServer.initialize(); + + try { + lanceRESTServer.start(); + } catch (Exception e) { + LOG.error("Error while running lance REST server", e); + System.exit(-1); + } + LOG.info("Done, Gravitino Lance REST server started."); + + Runtime.getRuntime() + .addShutdownHook( + new Thread( + () -> { + try { + Thread.sleep(serverConfig.get(ServerConfig.SERVER_SHUTDOWN_TIMEOUT)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.error("Interrupted exception:", e); + } catch (Exception e) { + LOG.error("Error while running clean-up tasks in shutdown hook", e); + } + })); + lanceRESTServer.join(); + + LOG.info("Shutting down Gravitino Lance REST Server ... "); + try { + lanceRESTServer.stop(); + LOG.info("Gravitino Lance REST Server has shut down."); + } catch (Exception e) { + LOG.error("Error while stopping Gravitino Lance REST Server", e); + } + } +} diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListNamespacesResponse.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListNamespacesResponse.java new file mode 100644 index 00000000000..11ec7d3c3c5 --- /dev/null +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListNamespacesResponse.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.service.rest; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.List; + +@JsonInclude(JsonInclude.Include.NON_NULL) +public class LanceListNamespacesResponse { + + @JsonProperty("id") + private final String id; + + @JsonProperty("delimiter") + private final String delimiter; + + @JsonProperty("namespaces") + private final List namespaces; + + @JsonProperty("next_page_token") + private final String nextPageToken; + + public LanceListNamespacesResponse( + String id, String delimiter, List namespaces, String nextPageToken) { + this.id = id; + this.delimiter = delimiter; + this.namespaces = List.copyOf(namespaces); + this.nextPageToken = nextPageToken; + } + + public String getId() { + return id; + } + + public String getDelimiter() { + return delimiter; + } + + public List getNamespaces() { + return namespaces; + } + + public String getNextPageToken() { + return nextPageToken; + } +} diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListTablesResponse.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListTablesResponse.java new file mode 100644 index 00000000000..82e2a909787 --- /dev/null +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListTablesResponse.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.service.rest; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.List; + +@JsonInclude(JsonInclude.Include.NON_NULL) +public class LanceListTablesResponse { + + @JsonProperty("id") + private final String namespaceId; + + @JsonProperty("delimiter") + private final String delimiter; + + @JsonProperty("tables") + private final List tables; + + @JsonProperty("next_page_token") + private final String nextPageToken; + + public LanceListTablesResponse( + String namespaceId, String delimiter, List tables, String nextPageToken) { + this.namespaceId = namespaceId; + this.delimiter = delimiter; + this.tables = List.copyOf(tables); + this.nextPageToken = nextPageToken; + } + + public String getNamespaceId() { + return namespaceId; + } + + public String getDelimiter() { + return delimiter; + } + + public List getTables() { + return tables; + } + + public String getNextPageToken() { + return nextPageToken; + } +} diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java new file mode 100644 index 00000000000..0ac9457eff9 --- /dev/null +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.service.rest; + +import java.util.NoSuchElementException; +import javax.ws.rs.BadRequestException; +import javax.ws.rs.DefaultValue; +import javax.ws.rs.Encoded; +import javax.ws.rs.GET; +import javax.ws.rs.NotFoundException; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import org.apache.gravitino.lance.common.ops.LanceCatalogService; + +@Path("/v1/namespace") +@Produces(MediaType.APPLICATION_JSON) +public class LanceNamespaceOperations { + + private final LanceCatalogService catalogService; + + public LanceNamespaceOperations(LanceCatalogService catalogService) { + this.catalogService = catalogService; + } + + @GET + @Path("/{id}/list") + public Response listNamespaces( + @Encoded @PathParam("id") String namespaceId, + @DefaultValue("$") @QueryParam("delimiter") String delimiter, + @QueryParam("page_token") String pageToken, + @QueryParam("limit") Integer limit) { + try { + LanceCatalogService.NamespaceListingResult result = + catalogService.listChildNamespaces(namespaceId, delimiter, pageToken, limit); + LanceListNamespacesResponse payload = + new LanceListNamespacesResponse( + result.getParentId(), + result.getDelimiter(), + result.getNamespaces(), + result.getNextPageToken().orElse(null)); + return Response.ok(payload).build(); + } catch (NoSuchElementException nse) { + throw new NotFoundException(nse.getMessage(), nse); + } catch (IllegalArgumentException iae) { + throw new BadRequestException(iae.getMessage(), iae); + } + } + + @GET + @Path("/{id}/table/list") + public Response listTables( + @Encoded @PathParam("id") String namespaceId, + @DefaultValue("$") @QueryParam("delimiter") String delimiter, + @QueryParam("page_token") String pageToken, + @QueryParam("limit") Integer limit) { + try { + LanceCatalogService.TableListingResult result = + catalogService.listTables(namespaceId, delimiter, pageToken, limit); + LanceListTablesResponse payload = + new LanceListTablesResponse( + result.getNamespaceId(), + result.getDelimiter(), + result.getTables(), + result.getNextPageToken().orElse(null)); + return Response.ok(payload).build(); + } catch (NoSuchElementException nse) { + throw new NotFoundException(nse.getMessage(), nse); + } catch (IllegalArgumentException iae) { + throw new BadRequestException(iae.getMessage(), iae); + } + } +} diff --git a/server-common/src/main/java/org/apache/gravitino/server/authorization/MetadataFilterHelper.java b/server-common/src/main/java/org/apache/gravitino/server/authorization/MetadataFilterHelper.java index e435bac4a1c..874424feb27 100644 --- a/server-common/src/main/java/org/apache/gravitino/server/authorization/MetadataFilterHelper.java +++ b/server-common/src/main/java/org/apache/gravitino/server/authorization/MetadataFilterHelper.java @@ -156,6 +156,35 @@ public static E[] filterByExpression( Entity.EntityType entityType, E[] entities, Function toNameIdentifier) { + GravitinoAuthorizer authorizer = + GravitinoAuthorizerProvider.getInstance().getGravitinoAuthorizer(); + Principal currentPrincipal = PrincipalUtils.getCurrentPrincipal(); + return filterByExpression( + metalake, expression, entityType, entities, toNameIdentifier, currentPrincipal, authorizer); + } + + /** + * Call {@link AuthorizationExpressionEvaluator} and use specified Principal and + * GravitinoAuthorizer to filter the metadata list + * + * @param metalake metalake name + * @param expression authorization expression + * @param entityType entity type + * @param entities metadata entities + * @param toNameIdentifier function to convert entity to NameIdentifier + * @param currentPrincipal current principal + * @param authorizer authorizer to filter metadata + * @return Filtered Metadata Entity + * @param Entity class + */ + public static E[] filterByExpression( + String metalake, + String expression, + Entity.EntityType entityType, + E[] entities, + Function toNameIdentifier, + Principal currentPrincipal, + GravitinoAuthorizer authorizer) { if (!enableAuthorization()) { return entities; } @@ -163,7 +192,6 @@ public static E[] filterByExpression( AuthorizationRequestContext authorizationRequestContext = new AuthorizationRequestContext(); List> futures = new ArrayList<>(); for (E entity : entities) { - Principal currentPrincipal = PrincipalUtils.getCurrentPrincipal(); futures.add( CompletableFuture.supplyAsync( () -> { @@ -172,7 +200,7 @@ public static E[] filterByExpression( currentPrincipal, () -> { AuthorizationExpressionEvaluator authorizationExpressionEvaluator = - new AuthorizationExpressionEvaluator(expression); + new AuthorizationExpressionEvaluator(expression, authorizer); NameIdentifier nameIdentifier = toNameIdentifier.apply(entity); Map nameIdentifierMap = spiltMetadataNames(metalake, entityType, nameIdentifier); diff --git a/server-common/src/main/java/org/apache/gravitino/server/authorization/expression/AuthorizationExpressionEvaluator.java b/server-common/src/main/java/org/apache/gravitino/server/authorization/expression/AuthorizationExpressionEvaluator.java index 78ddf58d44d..02c455e887b 100644 --- a/server-common/src/main/java/org/apache/gravitino/server/authorization/expression/AuthorizationExpressionEvaluator.java +++ b/server-common/src/main/java/org/apache/gravitino/server/authorization/expression/AuthorizationExpressionEvaluator.java @@ -39,6 +39,7 @@ public class AuthorizationExpressionEvaluator { private final String ognlAuthorizationExpression; + private final GravitinoAuthorizer authorizer; /** * Use {@link AuthorizationExpressionConverter} to convert the authorization expression into an @@ -47,8 +48,19 @@ public class AuthorizationExpressionEvaluator { * @param expression authorization expression */ public AuthorizationExpressionEvaluator(String expression) { + this(expression, GravitinoAuthorizerProvider.getInstance().getGravitinoAuthorizer()); + } + + /** + * Constructor of AuthorizationExpressionEvaluator + * + * @param expression authorization expression + * @param authorizer GravitinoAuthorizer instance + */ + public AuthorizationExpressionEvaluator(String expression, GravitinoAuthorizer authorizer) { this.ognlAuthorizationExpression = AuthorizationExpressionConverter.convertToOgnlExpression(expression); + this.authorizer = authorizer; } /** @@ -61,7 +73,24 @@ public AuthorizationExpressionEvaluator(String expression) { public boolean evaluate( Map metadataNames, AuthorizationRequestContext requestContext) { - return evaluate(metadataNames, new HashMap<>(), requestContext); + Principal currentPrincipal = PrincipalUtils.getCurrentPrincipal(); + return evaluate(metadataNames, new HashMap<>(), requestContext, currentPrincipal); + } + + /** + * Use OGNL expressions to invoke GravitinoAuthorizer for authorizing multiple types of metadata + * IDs. + * + * @param metadataNames key-metadata type, value-metadata NameIdentifier + * @param requestContext authorization request context + * @param principal current principal + * @return authorization result + */ + public boolean evaluate( + Map metadataNames, + AuthorizationRequestContext requestContext, + Principal principal) { + return evaluate(metadataNames, new HashMap<>(), requestContext, principal); } /** @@ -77,11 +106,27 @@ public boolean evaluate( Map pathParams, AuthorizationRequestContext requestContext) { Principal currentPrincipal = PrincipalUtils.getCurrentPrincipal(); - GravitinoAuthorizer gravitinoAuthorizer = - GravitinoAuthorizerProvider.getInstance().getGravitinoAuthorizer(); + return evaluate(metadataNames, pathParams, requestContext, currentPrincipal); + } + + /** + * Use OGNL expressions to invoke GravitinoAuthorizer for authorizing multiple types of metadata + * IDs. + * + * @param metadataNames key-metadata type, value-metadata NameIdentifier + * @param pathParams params from request path + * @param requestContext authorization request context + * @param currentPrincipal current principal + * @return authorization result + */ + private boolean evaluate( + Map metadataNames, + Map pathParams, + AuthorizationRequestContext requestContext, + Principal currentPrincipal) { OgnlContext ognlContext = Ognl.createDefaultContext(null); ognlContext.put("principal", currentPrincipal); - ognlContext.put("authorizer", gravitinoAuthorizer); + ognlContext.put("authorizer", authorizer); ognlContext.put("authorizationContext", requestContext); ognlContext.putAll(pathParams); metadataNames.forEach( diff --git a/server-common/src/test/java/org/apache/gravitino/server/authorization/expression/TestAuthorizationExpressionEvaluator.java b/server-common/src/test/java/org/apache/gravitino/server/authorization/expression/TestAuthorizationExpressionEvaluator.java index 59def1e5365..ed3f291dc87 100644 --- a/server-common/src/test/java/org/apache/gravitino/server/authorization/expression/TestAuthorizationExpressionEvaluator.java +++ b/server-common/src/test/java/org/apache/gravitino/server/authorization/expression/TestAuthorizationExpressionEvaluator.java @@ -42,8 +42,6 @@ public class TestAuthorizationExpressionEvaluator { public void testEvaluator() { String expression = "CATALOG::USE_CATALOG && SCHEMA::USE_SCHEMA && (TABLE::SELECT_TABLE || TABLE::MODIFY_TABLE)"; - AuthorizationExpressionEvaluator authorizationExpressionEvaluator = - new AuthorizationExpressionEvaluator(expression); try (MockedStatic principalUtilsMocked = mockStatic(PrincipalUtils.class); MockedStatic mockStatic = mockStatic(GravitinoAuthorizerProvider.class)) { @@ -53,6 +51,9 @@ public void testEvaluator() { GravitinoAuthorizerProvider mockedProvider = mock(GravitinoAuthorizerProvider.class); mockStatic.when(GravitinoAuthorizerProvider::getInstance).thenReturn(mockedProvider); when(mockedProvider.getGravitinoAuthorizer()).thenReturn(new MockGravitinoAuthorizer()); + AuthorizationExpressionEvaluator authorizationExpressionEvaluator = + new AuthorizationExpressionEvaluator(expression); + Map metadataNames = new HashMap<>(); metadataNames.put(Entity.EntityType.METALAKE, NameIdentifierUtil.ofMetalake("testMetalake")); metadataNames.put( @@ -79,17 +80,19 @@ public void testEvaluator() { @Test public void testEvaluatorWithOwner() { String expression = "METALAKE::OWNER || CATALOG::CREATE_CATALOG"; - AuthorizationExpressionEvaluator authorizationExpressionEvaluator = - new AuthorizationExpressionEvaluator(expression); try (MockedStatic principalUtilsMocked = mockStatic(PrincipalUtils.class); MockedStatic mockStatic = mockStatic(GravitinoAuthorizerProvider.class)) { - principalUtilsMocked - .when(PrincipalUtils::getCurrentPrincipal) - .thenReturn(new UserPrincipal("tester")); GravitinoAuthorizerProvider mockedProvider = mock(GravitinoAuthorizerProvider.class); mockStatic.when(GravitinoAuthorizerProvider::getInstance).thenReturn(mockedProvider); when(mockedProvider.getGravitinoAuthorizer()).thenReturn(new MockGravitinoAuthorizer()); + + AuthorizationExpressionEvaluator authorizationExpressionEvaluator = + new AuthorizationExpressionEvaluator(expression); + principalUtilsMocked + .when(PrincipalUtils::getCurrentPrincipal) + .thenReturn(new UserPrincipal("tester")); + Map metadataNames = new HashMap<>(); metadataNames.put( Entity.EntityType.METALAKE, NameIdentifierUtil.ofMetalake("metalakeWithOutOwner")); diff --git a/settings.gradle.kts b/settings.gradle.kts index 5355fe7bc5f..4a3a5d468a1 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -59,6 +59,8 @@ if (gradle.startParameter.projectProperties["enableFuse"]?.toBoolean() == true) } include("iceberg:iceberg-common") include("iceberg:iceberg-rest-server") +include("lance:lance-common") +include("lance:lance-rest-server") include("authorizations:authorization-ranger", "authorizations:authorization-common", "authorizations:authorization-chain") include("trino-connector:trino-connector", "trino-connector:integration-test") include("spark-connector:spark-common") From ea1576a86becaadc77c3d5bad35c061b58d5e636 Mon Sep 17 00:00:00 2001 From: mchades Date: Wed, 22 Oct 2025 14:09:52 +0800 Subject: [PATCH 03/43] [#8835][#8836] feat(lakehouseCatalog): supports catalog and schema operations for lakehouse catalog (#8851) ### What changes were proposed in this pull request? supports catalog and schema operations for lakehouse catalog ### Why are the changes needed? Fix: #8835 Fix: #8836 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? tests added --- .../build.gradle.kts | 1 + .../GenericLakehouseCatalogCapability.java | 24 +- .../GenericLakehouseCatalogOperations.java | 78 ++++-- ...ricLakehouseCatalogPropertiesMetadata.java | 26 +- ...ericLakehouseSchemaPropertiesMetadata.java | 24 +- ...TestGenericLakehouseCatalogOperations.java | 232 ++++++++++++++++++ 6 files changed, 335 insertions(+), 50 deletions(-) create mode 100644 catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/TestGenericLakehouseCatalogOperations.java diff --git a/catalogs/catalog-generic-lakehouse/build.gradle.kts b/catalogs/catalog-generic-lakehouse/build.gradle.kts index c3ad842ac38..fceac14304b 100644 --- a/catalogs/catalog-generic-lakehouse/build.gradle.kts +++ b/catalogs/catalog-generic-lakehouse/build.gradle.kts @@ -42,6 +42,7 @@ dependencies { implementation(libs.commons.io) implementation(libs.commons.lang3) implementation(libs.guava) + implementation(libs.hadoop3.client.api) annotationProcessor(libs.lombok) diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogCapability.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogCapability.java index 08015f7fce1..412b82d6a71 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogCapability.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogCapability.java @@ -24,27 +24,7 @@ public class GenericLakehouseCatalogCapability implements Capability { @Override - public CapabilityResult columnNotNull() { - throw new UnsupportedOperationException( - "Not implemented yet: GenericLakehouseCatalogCapability.columnNotNull"); - } - - @Override - public CapabilityResult columnDefaultValue() { - throw new UnsupportedOperationException( - "Not implemented yet: GenericLakehouseCatalogCapability.columnDefaultValue"); - } - - @Override - public CapabilityResult caseSensitiveOnName(Scope scope) { - switch (scope) { - case SCHEMA: - case TABLE: - case COLUMN: - throw new UnsupportedOperationException( - "Not implemented yet: GenericLakehouseCatalogCapability.caseSensitiveOnName"); - default: - return CapabilityResult.SUPPORTED; - } + public CapabilityResult managedStorage(Scope scope) { + return CapabilityResult.SUPPORTED; } } diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java index 64743488a01..b626aabc161 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java @@ -18,9 +18,18 @@ */ package org.apache.gravitino.catalog.lakehouse; +import com.google.common.annotations.VisibleForTesting; import java.util.Map; +import java.util.Optional; +import org.apache.commons.lang3.StringUtils; +import org.apache.gravitino.Catalog; +import org.apache.gravitino.EntityStore; +import org.apache.gravitino.GravitinoEnv; import org.apache.gravitino.NameIdentifier; import org.apache.gravitino.Namespace; +import org.apache.gravitino.Schema; +import org.apache.gravitino.SchemaChange; +import org.apache.gravitino.catalog.ManagedSchemaOperations; import org.apache.gravitino.connector.CatalogInfo; import org.apache.gravitino.connector.CatalogOperations; import org.apache.gravitino.connector.HasPropertyMetadata; @@ -39,11 +48,19 @@ import org.apache.gravitino.rel.expressions.sorts.SortOrder; import org.apache.gravitino.rel.expressions.transforms.Transform; import org.apache.gravitino.rel.indexes.Index; +import org.apache.hadoop.fs.Path; /** Operations for interacting with a generic lakehouse catalog in Apache Gravitino. */ public class GenericLakehouseCatalogOperations implements CatalogOperations, SupportsSchemas, TableCatalog { + private static final String SLASH = "/"; + + private final ManagedSchemaOperations managedSchemaOps; + + @SuppressWarnings("unused") // todo: remove this after implementing table operations + private Optional catalogLakehouseDir; + /** * Initializes the generic lakehouse catalog operations with the provided configuration. * @@ -56,7 +73,30 @@ public class GenericLakehouseCatalogOperations public void initialize( Map conf, CatalogInfo info, HasPropertyMetadata propertiesMetadata) throws RuntimeException { - // TODO: Implement initialization logic + String catalogDir = + (String) + propertiesMetadata + .catalogPropertiesMetadata() + .getOrDefault(conf, GenericLakehouseCatalogPropertiesMetadata.LAKEHOUSE_DIR); + this.catalogLakehouseDir = + StringUtils.isNotBlank(catalogDir) + ? Optional.of(catalogDir).map(this::ensureTrailingSlash).map(Path::new) + : Optional.empty(); + } + + public GenericLakehouseCatalogOperations() { + this(GravitinoEnv.getInstance().entityStore()); + } + + @VisibleForTesting + GenericLakehouseCatalogOperations(EntityStore store) { + this.managedSchemaOps = + new ManagedSchemaOperations() { + @Override + protected EntityStore store() { + return store; + } + }; } @Override @@ -65,44 +105,38 @@ public void close() {} @Override public void testConnection( NameIdentifier catalogIdent, - org.apache.gravitino.Catalog.Type type, + Catalog.Type type, String provider, String comment, - Map properties) - throws Exception { - throw new UnsupportedOperationException("Not implemented yet."); + Map properties) { + // No-op for generic lakehouse catalog. } @Override - public org.apache.gravitino.NameIdentifier[] listSchemas(org.apache.gravitino.Namespace namespace) - throws NoSuchCatalogException { - throw new UnsupportedOperationException("Not implemented yet."); + public NameIdentifier[] listSchemas(Namespace namespace) throws NoSuchCatalogException { + return managedSchemaOps.listSchemas(namespace); } @Override - public org.apache.gravitino.Schema createSchema( - org.apache.gravitino.NameIdentifier ident, String comment, Map properties) + public Schema createSchema(NameIdentifier ident, String comment, Map properties) throws NoSuchCatalogException, SchemaAlreadyExistsException { - throw new UnsupportedOperationException("Not implemented yet."); + return managedSchemaOps.createSchema(ident, comment, properties); } @Override - public org.apache.gravitino.Schema loadSchema(org.apache.gravitino.NameIdentifier ident) - throws NoSuchSchemaException { - throw new UnsupportedOperationException("Not implemented yet."); + public Schema loadSchema(NameIdentifier ident) throws NoSuchSchemaException { + return managedSchemaOps.loadSchema(ident); } @Override - public org.apache.gravitino.Schema alterSchema( - org.apache.gravitino.NameIdentifier ident, org.apache.gravitino.SchemaChange... changes) + public Schema alterSchema(NameIdentifier ident, SchemaChange... changes) throws NoSuchSchemaException { - throw new UnsupportedOperationException("Not implemented yet."); + return managedSchemaOps.alterSchema(ident, changes); } @Override - public boolean dropSchema(org.apache.gravitino.NameIdentifier ident, boolean cascade) - throws NonEmptySchemaException { - throw new UnsupportedOperationException("Not implemented yet."); + public boolean dropSchema(NameIdentifier ident, boolean cascade) throws NonEmptySchemaException { + return managedSchemaOps.dropSchema(ident, cascade); } @Override @@ -139,4 +173,8 @@ public Table alterTable(NameIdentifier ident, TableChange... changes) public boolean dropTable(NameIdentifier ident) { throw new UnsupportedOperationException("Not implemented yet."); } + + private String ensureTrailingSlash(String path) { + return path.endsWith(SLASH) ? path : path + SLASH; + } } diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java index 18543bd0a3c..01dfc1da171 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java @@ -19,18 +19,36 @@ package org.apache.gravitino.catalog.lakehouse; -import com.google.common.collect.ImmutableMap; +import static org.apache.gravitino.connector.PropertyEntry.stringOptionalPropertyEntry; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Maps; +import java.util.List; import java.util.Map; import org.apache.gravitino.connector.BaseCatalogPropertiesMetadata; import org.apache.gravitino.connector.PropertyEntry; public class GenericLakehouseCatalogPropertiesMetadata extends BaseCatalogPropertiesMetadata { - private static final Map> GENERIC_LAKEHOUSE_CATALOG_PROPERTY_ENTRIES = - ImmutableMap.>builder().build(); + public static final String LAKEHOUSE_DIR = "lakehouse-dir"; + + private static final Map> PROPERTIES_METADATA; + + static { + List> propertyEntries = + ImmutableList.of( + stringOptionalPropertyEntry( + LAKEHOUSE_DIR, + "The root directory of the lakehouse catalog.", + false /* immutable */, + null, /* defaultValue */ + false /* hidden */)); + + PROPERTIES_METADATA = Maps.uniqueIndex(propertyEntries, PropertyEntry::getName); + } @Override protected Map> specificPropertyEntries() { - return GENERIC_LAKEHOUSE_CATALOG_PROPERTY_ENTRIES; + return PROPERTIES_METADATA; } } diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java index 05da8443cd9..52a65e7698d 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java @@ -18,20 +18,36 @@ */ package org.apache.gravitino.catalog.lakehouse; -import com.google.common.collect.ImmutableMap; +import static org.apache.gravitino.connector.PropertyEntry.stringOptionalPropertyEntry; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Maps; +import java.util.List; import java.util.Map; import org.apache.gravitino.connector.BasePropertiesMetadata; import org.apache.gravitino.connector.PropertyEntry; public class GenericLakehouseSchemaPropertiesMetadata extends BasePropertiesMetadata { - private static final Map> propertiesMetadata; + public static final String LAKEHOUSE_DIR = + GenericLakehouseCatalogPropertiesMetadata.LAKEHOUSE_DIR; + + private static final Map> PROPERTIES_METADATA; static { - propertiesMetadata = ImmutableMap.of(); + List> propertyEntries = + ImmutableList.of( + stringOptionalPropertyEntry( + LAKEHOUSE_DIR, + "The root directory of the lakehouse schema.", + false /* immutable */, + null, /* defaultValue */ + false /* hidden */)); + + PROPERTIES_METADATA = Maps.uniqueIndex(propertyEntries, PropertyEntry::getName); } @Override protected Map> specificPropertyEntries() { - return propertiesMetadata; + return PROPERTIES_METADATA; } } diff --git a/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/TestGenericLakehouseCatalogOperations.java b/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/TestGenericLakehouseCatalogOperations.java new file mode 100644 index 00000000000..67887c2f7ad --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/TestGenericLakehouseCatalogOperations.java @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse; + +import static org.apache.gravitino.Configs.DEFAULT_ENTITY_RELATIONAL_STORE; +import static org.apache.gravitino.Configs.ENTITY_RELATIONAL_JDBC_BACKEND_DRIVER; +import static org.apache.gravitino.Configs.ENTITY_RELATIONAL_JDBC_BACKEND_MAX_CONNECTIONS; +import static org.apache.gravitino.Configs.ENTITY_RELATIONAL_JDBC_BACKEND_PASSWORD; +import static org.apache.gravitino.Configs.ENTITY_RELATIONAL_JDBC_BACKEND_PATH; +import static org.apache.gravitino.Configs.ENTITY_RELATIONAL_JDBC_BACKEND_URL; +import static org.apache.gravitino.Configs.ENTITY_RELATIONAL_JDBC_BACKEND_USER; +import static org.apache.gravitino.Configs.ENTITY_RELATIONAL_JDBC_BACKEND_WAIT_MILLISECONDS; +import static org.apache.gravitino.Configs.ENTITY_RELATIONAL_STORE; +import static org.apache.gravitino.Configs.ENTITY_STORE; +import static org.apache.gravitino.Configs.RELATIONAL_ENTITY_STORE; +import static org.apache.gravitino.Configs.STORE_DELETE_AFTER_TIME; +import static org.apache.gravitino.Configs.STORE_TRANSACTION_MAX_SKEW_TIME; +import static org.apache.gravitino.Configs.VERSION_RETENTION_COUNT; +import static org.mockito.Mockito.when; + +import java.io.File; +import java.io.IOException; +import java.time.Instant; +import java.util.Arrays; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; +import org.apache.commons.io.FileUtils; +import org.apache.gravitino.Catalog; +import org.apache.gravitino.Config; +import org.apache.gravitino.Configs; +import org.apache.gravitino.EntityStore; +import org.apache.gravitino.EntityStoreFactory; +import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.Namespace; +import org.apache.gravitino.Schema; +import org.apache.gravitino.StringIdentifier; +import org.apache.gravitino.exceptions.NoSuchCatalogException; +import org.apache.gravitino.exceptions.NoSuchSchemaException; +import org.apache.gravitino.exceptions.SchemaAlreadyExistsException; +import org.apache.gravitino.meta.AuditInfo; +import org.apache.gravitino.meta.BaseMetalake; +import org.apache.gravitino.meta.CatalogEntity; +import org.apache.gravitino.meta.SchemaVersion; +import org.apache.gravitino.storage.IdGenerator; +import org.apache.gravitino.storage.RandomIdGenerator; +import org.apache.gravitino.utils.NameIdentifierUtil; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +public class TestGenericLakehouseCatalogOperations { + private static final String STORE_PATH = + "/tmp/gravitino_test_entityStore_" + UUID.randomUUID().toString().replace("-", ""); + private static final String METALAKE_NAME = "metalake_for_lakehouse_test"; + private static final String CATALOG_NAME = "lakehouse_catalog_test"; + + private static EntityStore store; + private static IdGenerator idGenerator; + private static GenericLakehouseCatalogOperations ops; + + @BeforeAll + public static void setUp() throws IOException { + Config config = Mockito.mock(Config.class); + when(config.get(ENTITY_STORE)).thenReturn(RELATIONAL_ENTITY_STORE); + when(config.get(ENTITY_RELATIONAL_STORE)).thenReturn(DEFAULT_ENTITY_RELATIONAL_STORE); + when(config.get(ENTITY_RELATIONAL_JDBC_BACKEND_PATH)).thenReturn(STORE_PATH); + + // The following properties are used to create the JDBC connection; they are just for test, in + // the real world, they will be set automatically by the configuration file if you set + // ENTITY_RELATIONAL_STORE as EMBEDDED_ENTITY_RELATIONAL_STORE. + when(config.get(ENTITY_RELATIONAL_JDBC_BACKEND_URL)) + .thenReturn(String.format("jdbc:h2:%s;DB_CLOSE_DELAY=-1;MODE=MYSQL", STORE_PATH)); + when(config.get(ENTITY_RELATIONAL_JDBC_BACKEND_USER)).thenReturn("gravitino"); + when(config.get(ENTITY_RELATIONAL_JDBC_BACKEND_PASSWORD)).thenReturn("gravitino"); + when(config.get(ENTITY_RELATIONAL_JDBC_BACKEND_DRIVER)).thenReturn("org.h2.Driver"); + Mockito.when(config.get(ENTITY_RELATIONAL_JDBC_BACKEND_MAX_CONNECTIONS)).thenReturn(100); + Mockito.when(config.get(ENTITY_RELATIONAL_JDBC_BACKEND_WAIT_MILLISECONDS)).thenReturn(1000L); + + File f = FileUtils.getFile(STORE_PATH); + f.deleteOnExit(); + + when(config.get(VERSION_RETENTION_COUNT)).thenReturn(1L); + when(config.get(STORE_TRANSACTION_MAX_SKEW_TIME)).thenReturn(1000L); + when(config.get(STORE_DELETE_AFTER_TIME)).thenReturn(20 * 60 * 1000L); + Mockito.when(config.get(Configs.CACHE_ENABLED)).thenReturn(false); + + store = EntityStoreFactory.createEntityStore(config); + store.initialize(config); + idGenerator = RandomIdGenerator.INSTANCE; + + // Create the metalake and catalog + AuditInfo auditInfo = + AuditInfo.builder().withCreator("test").withCreateTime(Instant.now()).build(); + BaseMetalake metalake = + BaseMetalake.builder() + .withId(idGenerator.nextId()) + .withName(METALAKE_NAME) + .withVersion(SchemaVersion.V_0_1) + .withAuditInfo(auditInfo) + .withName(METALAKE_NAME) + .build(); + store.put(metalake, false); + + CatalogEntity catalog = + CatalogEntity.builder() + .withId(idGenerator.nextId()) + .withName(CATALOG_NAME) + .withNamespace(Namespace.of(METALAKE_NAME)) + .withProvider("generic-lakehouse") + .withType(Catalog.Type.RELATIONAL) + .withAuditInfo(auditInfo) + .build(); + store.put(catalog, false); + + ops = new GenericLakehouseCatalogOperations(store); + } + + @AfterAll + public static void tearDown() throws IOException { + ops.close(); + store.close(); + FileUtils.deleteDirectory(new File(STORE_PATH)); + } + + @Test + public void testSchemaOperations() { + String schemaName = randomSchemaName(); + NameIdentifier schemaIdent = + NameIdentifierUtil.ofSchema(METALAKE_NAME, CATALOG_NAME, schemaName); + StringIdentifier stringId = StringIdentifier.fromId(idGenerator.nextId()); + Map properties = StringIdentifier.newPropertiesWithId(stringId, null); + + ops.createSchema(schemaIdent, "schema comment", properties); + Schema loadedSchema = ops.loadSchema(schemaIdent); + + Assertions.assertEquals(schemaName, loadedSchema.name()); + Assertions.assertEquals("schema comment", loadedSchema.comment()); + Assertions.assertEquals(properties, loadedSchema.properties()); + + // Test create schema with the same name + Assertions.assertThrows( + SchemaAlreadyExistsException.class, + () -> ops.createSchema(schemaIdent, "schema comment", properties)); + + // Test create schema in a non-existent catalog + Assertions.assertThrows( + NoSuchCatalogException.class, + () -> + ops.createSchema( + NameIdentifierUtil.ofSchema(METALAKE_NAME, "non-existent-catalog", schemaName), + "schema comment", + properties)); + + // Test load a non-existent schema + Assertions.assertThrows( + NoSuchSchemaException.class, + () -> + ops.loadSchema( + NameIdentifierUtil.ofSchema(METALAKE_NAME, CATALOG_NAME, "non-existent-schema"))); + + // Test load a non-existent schema in a non-existent catalog + Assertions.assertThrows( + NoSuchSchemaException.class, + () -> + ops.loadSchema( + NameIdentifierUtil.ofSchema( + METALAKE_NAME, "non-existent-catalog", "non-existent-schema"))); + + // Create another schema + String schemaName2 = randomSchemaName(); + NameIdentifier schemaIdent2 = + NameIdentifierUtil.ofSchema(METALAKE_NAME, CATALOG_NAME, schemaName2); + StringIdentifier stringId2 = StringIdentifier.fromId(idGenerator.nextId()); + Map properties2 = StringIdentifier.newPropertiesWithId(stringId2, null); + + ops.createSchema(schemaIdent2, "schema comment 2", properties2); + + // Test list schemas + NameIdentifier[] idents = ops.listSchemas(Namespace.of(METALAKE_NAME, CATALOG_NAME)); + + Set resultSet = Arrays.stream(idents).collect(Collectors.toSet()); + Assertions.assertTrue(resultSet.contains(schemaIdent)); + Assertions.assertTrue(resultSet.contains(schemaIdent2)); + + // Test list schemas in a non-existent catalog + Assertions.assertThrows( + NoSuchCatalogException.class, + () -> ops.listSchemas(Namespace.of(METALAKE_NAME, "non-existent-catalog"))); + + // Test drop schema + Assertions.assertTrue(ops.dropSchema(schemaIdent, false)); + Assertions.assertFalse(ops.dropSchema(schemaIdent, false)); + Assertions.assertTrue(ops.dropSchema(schemaIdent2, false)); + Assertions.assertFalse(ops.dropSchema(schemaIdent2, false)); + + // Test drop non-existent schema + Assertions.assertFalse( + ops.dropSchema( + NameIdentifierUtil.ofSchema(METALAKE_NAME, CATALOG_NAME, "non-existent-schema"), + false)); + + // Test drop schema in a non-existent catalog + Assertions.assertFalse( + ops.dropSchema( + NameIdentifierUtil.ofSchema(METALAKE_NAME, "non-existent-catalog", schemaName2), + false)); + } + + private String randomSchemaName() { + return "schema_" + UUID.randomUUID().toString().replace("-", ""); + } +} From 924acbb81a1c3363f01fe35f0b6a25d19ad643d6 Mon Sep 17 00:00:00 2001 From: Mini Yu Date: Thu, 23 Oct 2025 11:12:14 +0800 Subject: [PATCH 04/43] [#8834] feat(catalogs): Add a table to store details information of Gravitino managed tables. (#8847) ### What changes were proposed in this pull request? Add a new table to store table details for Gravitino-managed tables. ### Why are the changes needed? To support the managed catalog. Fix: #8834 ### Does this PR introduce _any_ user-facing change? N/A. ### How was this patch tested? Test the process of create table locally. --- scripts/h2/schema-1.1.0-h2.sql | 465 ++++++++++ scripts/h2/upgrade-1.0.0-to-1.1.0-h2.sql | 33 + scripts/mysql/schema-1.1.0-mysql.sql | 457 ++++++++++ .../mysql/upgrade-1.0.0-to-1.1.0-mysql.sql | 33 + .../postgresql/schema-1.1.0-postgresql.sql | 794 ++++++++++++++++++ .../upgrade-1.0.0-to-1.1.0-postgresql.sql | 44 + 6 files changed, 1826 insertions(+) create mode 100644 scripts/h2/schema-1.1.0-h2.sql create mode 100644 scripts/h2/upgrade-1.0.0-to-1.1.0-h2.sql create mode 100644 scripts/mysql/schema-1.1.0-mysql.sql create mode 100644 scripts/mysql/upgrade-1.0.0-to-1.1.0-mysql.sql create mode 100644 scripts/postgresql/schema-1.1.0-postgresql.sql create mode 100644 scripts/postgresql/upgrade-1.0.0-to-1.1.0-postgresql.sql diff --git a/scripts/h2/schema-1.1.0-h2.sql b/scripts/h2/schema-1.1.0-h2.sql new file mode 100644 index 00000000000..98a12174234 --- /dev/null +++ b/scripts/h2/schema-1.1.0-h2.sql @@ -0,0 +1,465 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file-- +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"). You may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. +-- + +CREATE TABLE IF NOT EXISTS `metalake_meta` ( + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `metalake_name` VARCHAR(128) NOT NULL COMMENT 'metalake name', + `metalake_comment` VARCHAR(256) DEFAULT '' COMMENT 'metalake comment', + `properties` CLOB DEFAULT NULL COMMENT 'metalake properties', + `audit_info` CLOB NOT NULL COMMENT 'metalake audit info', + `schema_version` CLOB NOT NULL COMMENT 'metalake schema version info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'metalake current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'metalake last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'metalake deleted at', + PRIMARY KEY (metalake_id), + CONSTRAINT uk_mn_del UNIQUE (metalake_name, deleted_at) + ) ENGINE = InnoDB; + + +CREATE TABLE IF NOT EXISTS `catalog_meta` ( + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `catalog_name` VARCHAR(128) NOT NULL COMMENT 'catalog name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `type` VARCHAR(64) NOT NULL COMMENT 'catalog type', + `provider` VARCHAR(64) NOT NULL COMMENT 'catalog provider', + `catalog_comment` VARCHAR(256) DEFAULT '' COMMENT 'catalog comment', + `properties` CLOB DEFAULT NULL COMMENT 'catalog properties', + `audit_info` CLOB NOT NULL COMMENT 'catalog audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'catalog current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'catalog last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'catalog deleted at', + PRIMARY KEY (catalog_id), + CONSTRAINT uk_mid_cn_del UNIQUE (metalake_id, catalog_name, deleted_at) + ) ENGINE=InnoDB; + + +CREATE TABLE IF NOT EXISTS `schema_meta` ( + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `schema_name` VARCHAR(128) NOT NULL COMMENT 'schema name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_comment` VARCHAR(256) DEFAULT '' COMMENT 'schema comment', + `properties` CLOB DEFAULT NULL COMMENT 'schema properties', + `audit_info` CLOB NOT NULL COMMENT 'schema audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'schema current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'schema last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'schema deleted at', + PRIMARY KEY (schema_id), + CONSTRAINT uk_cid_sn_del UNIQUE (catalog_id, schema_name, deleted_at), + -- Aliases are used here, and indexes with the same name in H2 can only be created once. + KEY idx_smid (metalake_id) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `table_meta` ( + `table_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'table id', + `table_name` VARCHAR(128) NOT NULL COMMENT 'table name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `audit_info` CLOB NOT NULL COMMENT 'table audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'table current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'table last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'table deleted at', + PRIMARY KEY (table_id), + CONSTRAINT uk_sid_tn_del UNIQUE (schema_id, table_name, deleted_at), + -- Aliases are used here, and indexes with the same name in H2 can only be created once. + KEY idx_tmid (metalake_id), + KEY idx_tcid (catalog_id) + ) ENGINE=InnoDB; + + +CREATE TABLE IF NOT EXISTS `table_column_version_info` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `table_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'table id', + `table_version` INT UNSIGNED NOT NULL COMMENT 'table version', + `column_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'column id', + `column_name` VARCHAR(128) NOT NULL COMMENT 'column name', + `column_position` INT UNSIGNED NOT NULL COMMENT 'column position, starting from 0', + `column_type` CLOB NOT NULL COMMENT 'column type', + `column_comment` VARCHAR(256) DEFAULT '' COMMENT 'column comment', + `column_nullable` TINYINT(1) NOT NULL DEFAULT 1 COMMENT 'column nullable, 0 is not nullable, 1 is nullable', + `column_auto_increment` TINYINT(1) NOT NULL DEFAULT 0 COMMENT 'column auto increment, 0 is not auto increment, 1 is auto increment', + `column_default_value` CLOB DEFAULT NULL COMMENT 'column default value', + `column_op_type` TINYINT(1) NOT NULL COMMENT 'column operation type, 1 is create, 2 is update, 3 is delete', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'column deleted at', + `audit_info` CLOB NOT NULL COMMENT 'column audit info', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_tid_ver_cid_del` (`table_id`, `table_version`, `column_id`, `deleted_at`), + KEY `idx_tcmid` (`metalake_id`), + KEY `idx_tccid` (`catalog_id`), + KEY `idx_tcsid` (`schema_id`) + ) ENGINE=InnoDB; + + +CREATE TABLE IF NOT EXISTS `fileset_meta` ( + `fileset_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'fileset id', + `fileset_name` VARCHAR(128) NOT NULL COMMENT 'fileset name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `type` VARCHAR(64) NOT NULL COMMENT 'fileset type', + `audit_info` CLOB NOT NULL COMMENT 'fileset audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'fileset current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'fileset last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'fileset deleted at', + PRIMARY KEY (fileset_id), + CONSTRAINT uk_sid_fn_del UNIQUE (schema_id, fileset_name, deleted_at), + -- Aliases are used here, and indexes with the same name in H2 can only be created once. + KEY idx_fmid (metalake_id), + KEY idx_fcid (catalog_id) + ) ENGINE=InnoDB; + + +CREATE TABLE IF NOT EXISTS `fileset_version_info` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `fileset_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'fileset id', + `version` INT UNSIGNED NOT NULL COMMENT 'fileset info version', + `fileset_comment` VARCHAR(256) DEFAULT '' COMMENT 'fileset comment', + `properties` CLOB DEFAULT NULL COMMENT 'fileset properties', + `storage_location_name` VARCHAR(128) NOT NULL DEFAULT 'default' COMMENT 'fileset storage location name', + `storage_location` CLOB DEFAULT NULL COMMENT 'fileset storage location', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'fileset deleted at', + PRIMARY KEY (id), + CONSTRAINT uk_fid_ver_del UNIQUE (fileset_id, version, storage_location_name, deleted_at), + -- Aliases are used here, and indexes with the same name in H2 can only be created once. + KEY idx_fvmid (metalake_id), + KEY idx_fvcid (catalog_id) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `topic_meta` ( + `topic_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'topic id', + `topic_name` VARCHAR(128) NOT NULL COMMENT 'topic name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `comment` VARCHAR(256) DEFAULT '' COMMENT 'topic comment', + `properties` CLOB DEFAULT NULL COMMENT 'topic properties', + `audit_info` CLOB NOT NULL COMMENT 'topic audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'topic current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'topic last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'topic deleted at', + PRIMARY KEY (topic_id), + CONSTRAINT uk_cid_tn_del UNIQUE (schema_id, topic_name, deleted_at), + -- Aliases are used here, and indexes with the same name in H2 can only be created once. + KEY idx_tvmid (metalake_id), + KEY idx_tvcid (catalog_id) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `user_meta` ( + `user_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'user id', + `user_name` VARCHAR(128) NOT NULL COMMENT 'username', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `audit_info` CLOB NOT NULL COMMENT 'user audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'user current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'user last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'user deleted at', + PRIMARY KEY (`user_id`), + CONSTRAINT `uk_mid_us_del` UNIQUE (`metalake_id`, `user_name`, `deleted_at`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `role_meta` ( + `role_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'role id', + `role_name` VARCHAR(128) NOT NULL COMMENT 'role name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `properties` CLOB DEFAULT NULL COMMENT 'schema properties', + `audit_info` CLOB NOT NULL COMMENT 'role audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'role current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'role last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'role deleted at', + PRIMARY KEY (`role_id`), + CONSTRAINT `uk_mid_rn_del` UNIQUE (`metalake_id`, `role_name`, `deleted_at`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `role_meta_securable_object` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `role_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'role id', + `metadata_object_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'securable object entity id', + `type` VARCHAR(128) NOT NULL COMMENT 'securable object type', + `privilege_names` CLOB(81920) NOT NULL COMMENT 'securable object privilege names', + `privilege_conditions` CLOB(81920) NOT NULL COMMENT 'securable object privilege conditions', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'securable objectcurrent version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'securable object last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'securable object deleted at', + PRIMARY KEY (`id`), + KEY `idx_obj_rid` (`role_id`), + KEY `idx_obj_eid` (`metadata_object_id`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `user_role_rel` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `user_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'user id', + `role_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'role id', + `audit_info` CLOB NOT NULL COMMENT 'relation audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'relation current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'relation last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'relation deleted at', + PRIMARY KEY (`id`), + CONSTRAINT `uk_ui_ri_del` UNIQUE (`user_id`, `role_id`, `deleted_at`), + KEY `idx_rid` (`role_id`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `group_meta` ( + `group_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'group id', + `group_name` VARCHAR(128) NOT NULL COMMENT 'group name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `audit_info` CLOB NOT NULL COMMENT 'group audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'group current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'group last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'group deleted at', + PRIMARY KEY (`group_id`), + CONSTRAINT `uk_mid_gr_del` UNIQUE (`metalake_id`, `group_name`, `deleted_at`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `group_role_rel` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `group_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'group id', + `role_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'role id', + `audit_info` CLOB NOT NULL COMMENT 'relation audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'relation current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'relation last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'relation deleted at', + PRIMARY KEY (`id`), + CONSTRAINT `uk_gi_ri_del` UNIQUE (`group_id`, `role_id`, `deleted_at`), + KEY `idx_gid` (`group_id`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `tag_meta` ( + `tag_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'tag id', + `tag_name` VARCHAR(128) NOT NULL COMMENT 'tag name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `tag_comment` VARCHAR(256) DEFAULT '' COMMENT 'tag comment', + `properties` CLOB DEFAULT NULL COMMENT 'tag properties', + `audit_info` CLOB NOT NULL COMMENT 'tag audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'tag current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'tag last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'tag deleted at', + PRIMARY KEY (`tag_id`), + UNIQUE KEY `uk_mn_tn_del` (`metalake_id`, `tag_name`, `deleted_at`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `tag_relation_meta` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `tag_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'tag id', + `metadata_object_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metadata object id', + `metadata_object_type` VARCHAR(64) NOT NULL COMMENT 'metadata object type', + `audit_info` CLOB NOT NULL COMMENT 'tag relation audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'tag relation current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'tag relation last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'tag relation deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_ti_mi_del` (`tag_id`, `metadata_object_id`, `deleted_at`), + KEY `idx_tid` (`tag_id`), + KEY `idx_mid` (`metadata_object_id`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `owner_meta` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `owner_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'owner id', + `owner_type` VARCHAR(64) NOT NULL COMMENT 'owner type', + `metadata_object_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metadata object id', + `metadata_object_type` VARCHAR(64) NOT NULL COMMENT 'metadata object type', + `audit_info` CLOB NOT NULL COMMENT 'owner relation audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'owner relation current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'owner relation last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'owner relation deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_ow_me_del` (`owner_id`, `metadata_object_id`, `metadata_object_type`, `deleted_at`), + KEY `idx_oid` (`owner_id`), + KEY `idx_meid` (`metadata_object_id`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `model_meta` ( + `model_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'model id', + `model_name` VARCHAR(128) NOT NULL COMMENT 'model name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `model_comment` CLOB DEFAULT NULL COMMENT 'model comment', + `model_properties` CLOB DEFAULT NULL COMMENT 'model properties', + `model_latest_version` INT UNSIGNED DEFAULT 0 COMMENT 'model latest version', + `audit_info` CLOB NOT NULL COMMENT 'model audit info', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'model deleted at', + PRIMARY KEY (`model_id`), + UNIQUE KEY `uk_sid_mn_del` (`schema_id`, `model_name`, `deleted_at`), + KEY `idx_mmid` (`metalake_id`), + KEY `idx_mcid` (`catalog_id`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `model_version_info` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `model_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'model id', + `version` INT UNSIGNED NOT NULL COMMENT 'model version', + `model_version_comment` CLOB DEFAULT NULL COMMENT 'model version comment', + `model_version_properties` CLOB DEFAULT NULL COMMENT 'model version properties', + `model_version_uri_name` VARCHAR(128) NOT NULL COMMENT 'model version uri name', + `model_version_uri` CLOB NOT NULL COMMENT 'model storage uri', + `audit_info` CLOB NOT NULL COMMENT 'model version audit info', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'model version deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_mid_ver_uri_del` (`model_id`, `version`, `model_version_uri_name`, `deleted_at`), + KEY `idx_vmid` (`metalake_id`), + KEY `idx_vcid` (`catalog_id`), + KEY `idx_vsid` (`schema_id`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `model_version_alias_rel` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `model_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'model id', + `model_version` INT UNSIGNED NOT NULL COMMENT 'model version', + `model_version_alias` VARCHAR(128) NOT NULL COMMENT 'model version alias', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'model version alias deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_mi_mva_del` (`model_id`, `model_version_alias`, `deleted_at`), + KEY `idx_mva` (`model_version_alias`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `policy_meta` ( + `policy_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'policy id', + `policy_name` VARCHAR(128) NOT NULL COMMENT 'policy name', + `policy_type` VARCHAR(64) NOT NULL COMMENT 'policy type', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `audit_info` CLOB NOT NULL COMMENT 'policy audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'policy current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'policy last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'policy deleted at', + PRIMARY KEY (`policy_id`), + UNIQUE KEY `uk_mi_pn_del` (`metalake_id`, `policy_name`, `deleted_at`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `policy_version_info` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `policy_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'policy id', + `version` INT UNSIGNED NOT NULL COMMENT 'policy info version', + `policy_comment` CLOB DEFAULT NULL COMMENT 'policy info comment', + `enabled` TINYINT(1) DEFAULT 1 COMMENT 'whether the policy is enabled, 0 is disabled, 1 is enabled', + `content` CLOB DEFAULT NULL COMMENT 'policy content', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'policy deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_pod_ver_del` (`policy_id`, `version`, `deleted_at`), + KEY `idx_pmid` (`metalake_id`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `policy_relation_meta` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `policy_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'policy id', + `metadata_object_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metadata object id', + `metadata_object_type` VARCHAR(64) NOT NULL COMMENT 'metadata object type', + `audit_info` CLOB NOT NULL COMMENT 'policy relation audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'policy relation current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'policy relation last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'policy relation deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_pi_mi_mo_del` (`policy_id`, `metadata_object_id`, `metadata_object_type`, `deleted_at`), + KEY `idx_pid` (`policy_id`), + KEY `idx_prmid` (`metadata_object_id`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `statistic_meta` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `statistic_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'statistic id', + `statistic_name` VARCHAR(128) NOT NULL COMMENT 'statistic name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `statistic_value` CLOB NOT NULL COMMENT 'statistic value', + `metadata_object_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metadata object id', + `metadata_object_type` VARCHAR(64) NOT NULL COMMENT 'metadata object type', + `audit_info` CLOB NOT NULL COMMENT 'statistic audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'statistic current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'statistic last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'statistic deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_si_mi_mo_del` (`statistic_name`, `metadata_object_id`, `deleted_at`), + KEY `idx_stid` (`statistic_id`), + KEY `idx_moid` (`metadata_object_id`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `job_template_meta` ( + `job_template_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'job template id', + `job_template_name` VARCHAR(128) NOT NULL COMMENT 'job template name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `job_template_comment` CLOB DEFAULT NULL COMMENT 'job template comment', + `job_template_content` CLOB NOT NULL COMMENT 'job template content', + `audit_info` CLOB NOT NULL COMMENT 'job template audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'job template current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'job template last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'job template deleted at', + PRIMARY KEY (`job_template_id`), + UNIQUE KEY `uk_mid_jtn_del` (`metalake_id`, `job_template_name`, `deleted_at`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `job_run_meta` ( + `job_run_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'job run id', + `job_template_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'job template id', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `job_execution_id` varchar(256) NOT NULL COMMENT 'job execution id', + `job_run_status` varchar(64) NOT NULL COMMENT 'job run status', + `job_finished_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'job finished at', + `audit_info` CLOB NOT NULL COMMENT 'job run audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'job run current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'job run last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'job run deleted at', + PRIMARY KEY (`job_run_id`), + UNIQUE KEY `uk_mid_jei_del` (`metalake_id`, `job_execution_id`, `deleted_at`), + KEY `idx_job_template_id` (`job_template_id`), + KEY `idx_job_execution_id` (`job_execution_id`) + ) ENGINE=InnoDB; + +CREATE TABLE IF NOT EXISTS `table_version_info` ( + `table_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'table id', + `format` VARCHAR(64) NOT NULL COMMENT 'table format, such as Lance, Iceberg and so on', + `properties` CLOB DEFAULT NULL COMMENT 'table properties', + `partitioning` CLOB DEFAULT NULL COMMENT 'table partition info', + `distribution` CLOB DEFAULT NULL COMMENT 'table distribution info', + `sort_orders` CLOB DEFAULT NULL COMMENT 'table sort order info', + `indexes` CLOB DEFAULT NULL COMMENT 'table index info', + `comment` CLOB DEFAULT NULL COMMENT 'table comment', + `version` BIGINT(20) UNSIGNED COMMENT 'table current version', + `deleted_at` BIGINT(20) UNSIGNED DEFAULT 0 COMMENT 'table deletion timestamp, 0 means not deleted', + PRIMARY KEY (table_id), + UNIQUE KEY `uk_table_id_version_deleted_at` (`table_id`, `deleted_at`) +) ENGINE=InnoDB COMMENT 'table detail information including format, location, properties, partition, distribution, sort order, index and so on'; \ No newline at end of file diff --git a/scripts/h2/upgrade-1.0.0-to-1.1.0-h2.sql b/scripts/h2/upgrade-1.0.0-to-1.1.0-h2.sql new file mode 100644 index 00000000000..f76a2c25931 --- /dev/null +++ b/scripts/h2/upgrade-1.0.0-to-1.1.0-h2.sql @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +CREATE TABLE IF NOT EXISTS `table_version_info` ( + `table_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'table id', + `format` VARCHAR(64) NOT NULL COMMENT 'table format, such as Lance, Iceberg and so on', + `properties` CLOB DEFAULT NULL COMMENT 'table properties', + `partitioning` MEDIUMTEXT DEFAULT NULL COMMENT 'table partition info', + `distribution` MEDIUMTEXT DEFAULT NULL COMMENT 'table distribution info', + `sort_orders` MEDIUMTEXT DEFAULT NULL COMMENT 'table sort order info', + `indexes` MEDIUMTEXT DEFAULT NULL COMMENT 'table index info', + `comment` MEDIUMTEXT DEFAULT NULL COMMENT 'table comment', + `version` BIGINT(20) UNSIGNED COMMENT 'table current version', + `deleted_at` BIGINT(20) UNSIGNED DEFAULT 0 COMMENT 'table deletion timestamp, 0 means not deleted', + PRIMARY KEY (table_id), + UNIQUE KEY `uk_table_id_deleted_at` (`table_id`, `deleted_at`) +) ENGINE=InnoDB COMMENT 'table detail information including format, location, properties, partition, distribution, sort order, index and so on'; \ No newline at end of file diff --git a/scripts/mysql/schema-1.1.0-mysql.sql b/scripts/mysql/schema-1.1.0-mysql.sql new file mode 100644 index 00000000000..c6bd8a81e3c --- /dev/null +++ b/scripts/mysql/schema-1.1.0-mysql.sql @@ -0,0 +1,457 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file-- +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"). You may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. +-- + +CREATE TABLE IF NOT EXISTS `metalake_meta` ( + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `metalake_name` VARCHAR(128) NOT NULL COMMENT 'metalake name', + `metalake_comment` VARCHAR(256) DEFAULT '' COMMENT 'metalake comment', + `properties` MEDIUMTEXT DEFAULT NULL COMMENT 'metalake properties', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'metalake audit info', + `schema_version` MEDIUMTEXT NOT NULL COMMENT 'metalake schema version info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'metalake current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'metalake last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'metalake deleted at', + PRIMARY KEY (`metalake_id`), + UNIQUE KEY `uk_mn_del` (`metalake_name`, `deleted_at`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'metalake metadata'; + +CREATE TABLE IF NOT EXISTS `catalog_meta` ( + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `catalog_name` VARCHAR(128) NOT NULL COMMENT 'catalog name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `type` VARCHAR(64) NOT NULL COMMENT 'catalog type', + `provider` VARCHAR(64) NOT NULL COMMENT 'catalog provider', + `catalog_comment` VARCHAR(256) DEFAULT '' COMMENT 'catalog comment', + `properties` MEDIUMTEXT DEFAULT NULL COMMENT 'catalog properties', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'catalog audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'catalog current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'catalog last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'catalog deleted at', + PRIMARY KEY (`catalog_id`), + UNIQUE KEY `uk_mid_cn_del` (`metalake_id`, `catalog_name`, `deleted_at`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'catalog metadata'; + +CREATE TABLE IF NOT EXISTS `schema_meta` ( + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `schema_name` VARCHAR(128) NOT NULL COMMENT 'schema name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_comment` VARCHAR(256) DEFAULT '' COMMENT 'schema comment', + `properties` MEDIUMTEXT DEFAULT NULL COMMENT 'schema properties', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'schema audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'schema current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'schema last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'schema deleted at', + PRIMARY KEY (`schema_id`), + UNIQUE KEY `uk_cid_sn_del` (`catalog_id`, `schema_name`, `deleted_at`), + KEY `idx_mid` (`metalake_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'schema metadata'; + +CREATE TABLE IF NOT EXISTS `table_meta` ( + `table_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'table id', + `table_name` VARCHAR(128) NOT NULL COMMENT 'table name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'table audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'table current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'table last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'table deleted at', + PRIMARY KEY (`table_id`), + UNIQUE KEY `uk_sid_tn_del` (`schema_id`, `table_name`, `deleted_at`), + KEY `idx_mid` (`metalake_id`), + KEY `idx_cid` (`catalog_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'table metadata'; + +CREATE TABLE IF NOT EXISTS `table_column_version_info` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `table_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'table id', + `table_version` INT UNSIGNED NOT NULL COMMENT 'table version', + `column_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'column id', + `column_name` VARCHAR(128) NOT NULL COMMENT 'column name', + `column_position` INT UNSIGNED NOT NULL COMMENT 'column position, starting from 0', + `column_type` TEXT NOT NULL COMMENT 'column type', + `column_comment` VARCHAR(256) DEFAULT '' COMMENT 'column comment', + `column_nullable` TINYINT(1) NOT NULL DEFAULT 1 COMMENT 'column nullable, 0 is not nullable, 1 is nullable', + `column_auto_increment` TINYINT(1) NOT NULL DEFAULT 0 COMMENT 'column auto increment, 0 is not auto increment, 1 is auto increment', + `column_default_value` TEXT DEFAULT NULL COMMENT 'column default value', + `column_op_type` TINYINT(1) NOT NULL COMMENT 'column operation type, 1 is create, 2 is update, 3 is delete', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'column deleted at', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'column audit info', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_tid_ver_cid_del` (`table_id`, `table_version`, `column_id`, `deleted_at`), + KEY `idx_mid` (`metalake_id`), + KEY `idx_cid` (`catalog_id`), + KEY `idx_sid` (`schema_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'table column version info'; + +CREATE TABLE IF NOT EXISTS `fileset_meta` ( + `fileset_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'fileset id', + `fileset_name` VARCHAR(128) NOT NULL COMMENT 'fileset name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `type` VARCHAR(64) NOT NULL COMMENT 'fileset type', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'fileset audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'fileset current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'fileset last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'fileset deleted at', + PRIMARY KEY (`fileset_id`), + UNIQUE KEY `uk_sid_fn_del` (`schema_id`, `fileset_name`, `deleted_at`), + KEY `idx_mid` (`metalake_id`), + KEY `idx_cid` (`catalog_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'fileset metadata'; + +CREATE TABLE IF NOT EXISTS `fileset_version_info` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `fileset_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'fileset id', + `version` INT UNSIGNED NOT NULL COMMENT 'fileset info version', + `fileset_comment` VARCHAR(256) DEFAULT '' COMMENT 'fileset comment', + `properties` MEDIUMTEXT DEFAULT NULL COMMENT 'fileset properties', + `storage_location_name` VARCHAR(256) NOT NULL DEFAULT 'default' COMMENT 'fileset storage location name', + `storage_location` MEDIUMTEXT NOT NULL COMMENT 'fileset storage location', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'fileset deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_fid_ver_sto_del` (`fileset_id`, `version`, `storage_location_name`, `deleted_at`), + KEY `idx_mid` (`metalake_id`), + KEY `idx_cid` (`catalog_id`), + KEY `idx_sid` (`schema_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'fileset version info'; + +CREATE TABLE IF NOT EXISTS `topic_meta` ( + `topic_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'topic id', + `topic_name` VARCHAR(128) NOT NULL COMMENT 'topic name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `comment` VARCHAR(256) DEFAULT '' COMMENT 'topic comment', + `properties` MEDIUMTEXT DEFAULT NULL COMMENT 'topic properties', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'topic audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'topic current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'topic last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'topic deleted at', + PRIMARY KEY (`topic_id`), + UNIQUE KEY `uk_sid_tn_del` (`schema_id`, `topic_name`, `deleted_at`), + KEY `idx_mid` (`metalake_id`), + KEY `idx_cid` (`catalog_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'topic metadata'; + +CREATE TABLE IF NOT EXISTS `user_meta` ( + `user_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'user id', + `user_name` VARCHAR(128) NOT NULL COMMENT 'username', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'user audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'user current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'user last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'user deleted at', + PRIMARY KEY (`user_id`), + UNIQUE KEY `uk_mid_us_del` (`metalake_id`, `user_name`, `deleted_at`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'user metadata'; + +CREATE TABLE IF NOT EXISTS `role_meta` ( + `role_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'role id', + `role_name` VARCHAR(128) NOT NULL COMMENT 'role name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `properties` MEDIUMTEXT DEFAULT NULL COMMENT 'schema properties', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'role audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'role current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'role last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'role deleted at', + PRIMARY KEY (`role_id`), + UNIQUE KEY `uk_mid_rn_del` (`metalake_id`, `role_name`, `deleted_at`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'role metadata'; + +CREATE TABLE IF NOT EXISTS `role_meta_securable_object` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `role_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'role id', + `metadata_object_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'The entity id of securable object', + `type` VARCHAR(128) NOT NULL COMMENT 'securable object type', + `privilege_names` TEXT(81920) NOT NULL COMMENT 'securable object privilege names', + `privilege_conditions` TEXT(81920) NOT NULL COMMENT 'securable object privilege conditions', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'securable object current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'securable object last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'securable object deleted at', + PRIMARY KEY (`id`), + KEY `idx_obj_rid` (`role_id`), + KEY `idx_obj_eid` (`metadata_object_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'securable object meta'; + +CREATE TABLE IF NOT EXISTS `user_role_rel` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `user_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'user id', + `role_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'role id', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'relation audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'relation current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'relation last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'relation deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_ui_ri_del` (`user_id`, `role_id`, `deleted_at`), + KEY `idx_rid` (`role_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'user role relation'; + +CREATE TABLE IF NOT EXISTS `group_meta` ( + `group_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'group id', + `group_name` VARCHAR(128) NOT NULL COMMENT 'group name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'group audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'group current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'group last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'group deleted at', + PRIMARY KEY (`group_id`), + UNIQUE KEY `uk_mid_gr_del` (`metalake_id`, `group_name`, `deleted_at`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'group metadata'; + +CREATE TABLE IF NOT EXISTS `group_role_rel` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `group_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'group id', + `role_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'role id', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'relation audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'relation current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'relation last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'relation deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_gi_ri_del` (`group_id`, `role_id`, `deleted_at`), + KEY `idx_rid` (`group_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'group role relation'; + +CREATE TABLE IF NOT EXISTS `tag_meta` ( + `tag_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'tag id', + `tag_name` VARCHAR(128) NOT NULL COMMENT 'tag name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `tag_comment` VARCHAR(256) DEFAULT '' COMMENT 'tag comment', + `properties` MEDIUMTEXT DEFAULT NULL COMMENT 'tag properties', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'tag audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'tag current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'tag last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'tag deleted at', + PRIMARY KEY (`tag_id`), + UNIQUE KEY `uk_mi_tn_del` (`metalake_id`, `tag_name`, `deleted_at`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'tag metadata'; + +CREATE TABLE IF NOT EXISTS `tag_relation_meta` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `tag_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'tag id', + `metadata_object_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metadata object id', + `metadata_object_type` VARCHAR(64) NOT NULL COMMENT 'metadata object type', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'tag relation audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'tag relation current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'tag relation last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'tag relation deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_ti_mi_mo_del` (`tag_id`, `metadata_object_id`, `metadata_object_type`, `deleted_at`), + KEY `idx_tid` (`tag_id`), + KEY `idx_mid` (`metadata_object_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'tag metadata object relation'; + +CREATE TABLE IF NOT EXISTS `owner_meta` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `owner_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'owner id', + `owner_type` VARCHAR(64) NOT NULL COMMENT 'owner type', + `metadata_object_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metadata object id', + `metadata_object_type` VARCHAR(64) NOT NULL COMMENT 'metadata object type', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'owner relation audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'owner relation current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'owner relation last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'owner relation deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_ow_me_del` (`owner_id`, `metadata_object_id`, `metadata_object_type`,`deleted_at`), + KEY `idx_oid` (`owner_id`), + KEY `idx_meid` (`metadata_object_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'owner relation'; + +CREATE TABLE IF NOT EXISTS `model_meta` ( + `model_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'model id', + `model_name` VARCHAR(128) NOT NULL COMMENT 'model name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `model_comment` TEXT DEFAULT NULL COMMENT 'model comment', + `model_properties` MEDIUMTEXT DEFAULT NULL COMMENT 'model properties', + `model_latest_version` INT UNSIGNED DEFAULT 0 COMMENT 'model latest version', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'model audit info', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'model deleted at', + PRIMARY KEY (`model_id`), + UNIQUE KEY `uk_sid_mn_del` (`schema_id`, `model_name`, `deleted_at`), + KEY `idx_mid` (`metalake_id`), + KEY `idx_cid` (`catalog_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'model metadata'; + +CREATE TABLE IF NOT EXISTS `model_version_info` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `catalog_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'catalog id', + `schema_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'schema id', + `model_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'model id', + `version` INT UNSIGNED NOT NULL COMMENT 'model version', + `model_version_comment` TEXT DEFAULT NULL COMMENT 'model version comment', + `model_version_properties` MEDIUMTEXT DEFAULT NULL COMMENT 'model version properties', + `model_version_uri_name` VARCHAR(256) NOT NULL COMMENT 'model version uri name', + `model_version_uri` TEXT NOT NULL COMMENT 'model storage uri', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'model version audit info', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'model version deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_mid_ver_uri_del` (`model_id`, `version`, `model_version_uri_name`, `deleted_at`), + KEY `idx_mid` (`metalake_id`), + KEY `idx_cid` (`catalog_id`), + KEY `idx_sid` (`schema_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'model version info'; + +CREATE TABLE IF NOT EXISTS `model_version_alias_rel` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `model_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'model id', + `model_version` INT UNSIGNED NOT NULL COMMENT 'model version', + `model_version_alias` VARCHAR(128) NOT NULL COMMENT 'model version alias', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'model version alias deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_mi_mva_del` (`model_id`, `model_version_alias`, `deleted_at`), + KEY `idx_mva` (`model_version_alias`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'model_version_alias_rel'; + +CREATE TABLE IF NOT EXISTS `policy_meta` ( + `policy_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'policy id', + `policy_name` VARCHAR(128) NOT NULL COMMENT 'policy name', + `policy_type` VARCHAR(64) NOT NULL COMMENT 'policy type', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'policy audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'policy current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'policy last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'policy deleted at', + PRIMARY KEY (`policy_id`), + UNIQUE KEY `uk_mi_pn_del` (`metalake_id`, `policy_name`, `deleted_at`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'policy metadata'; + +CREATE TABLE IF NOT EXISTS `policy_version_info` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `policy_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'policy id', + `version` INT UNSIGNED NOT NULL COMMENT 'policy info version', + `policy_comment` TEXT DEFAULT NULL COMMENT 'policy info comment', + `enabled` TINYINT(1) DEFAULT 1 COMMENT 'whether the policy is enabled, 0 is disabled, 1 is enabled', + `content` MEDIUMTEXT DEFAULT NULL COMMENT 'policy content', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'policy deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_pod_ver_del` (`policy_id`, `version`, `deleted_at`), + KEY `idx_mid` (`metalake_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'policy version info'; + +CREATE TABLE IF NOT EXISTS `policy_relation_meta` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `policy_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'policy id', + `metadata_object_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metadata object id', + `metadata_object_type` VARCHAR(64) NOT NULL COMMENT 'metadata object type', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'policy relation audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'policy relation current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'policy relation last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'policy relation deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_pi_mi_mo_del` (`policy_id`, `metadata_object_id`, `metadata_object_type`, `deleted_at`), + KEY `idx_pid` (`policy_id`), + KEY `idx_mid` (`metadata_object_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'policy metadata object relation'; + +CREATE TABLE IF NOT EXISTS `statistic_meta` ( + `id` BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', + `statistic_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'statistic id', + `statistic_name` VARCHAR(128) NOT NULL COMMENT 'statistic name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `statistic_value` MEDIUMTEXT NOT NULL COMMENT 'statistic value', + `metadata_object_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metadata object id', + `metadata_object_type` VARCHAR(64) NOT NULL COMMENT 'metadata object type', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'statistic audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'statistic current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'statistic last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'statistic deleted at', + PRIMARY KEY (`id`), + UNIQUE KEY `uk_si_mi_mo_del` (`statistic_name`, `metadata_object_id`, `deleted_at`), + KEY `idx_stid` (`statistic_id`), + KEY `idx_moid` (`metadata_object_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'statistic metadata'; + +CREATE TABLE IF NOT EXISTS `job_template_meta` ( + `job_template_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'job template id', + `job_template_name` VARCHAR(128) NOT NULL COMMENT 'job template name', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `job_template_comment` TEXT DEFAULT NULL COMMENT 'job template comment', + `job_template_content` MEDIUMTEXT NOT NULL COMMENT 'job template content', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'job template audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'job template current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'job template last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'job template deleted at', + PRIMARY KEY (`job_template_id`), + UNIQUE KEY `uk_mid_jtn_del` (`metalake_id`, `job_template_name`, `deleted_at`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'job template metadata'; + +CREATE TABLE IF NOT EXISTS `job_run_meta` ( + `job_run_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'job run id', + `job_template_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'job template id', + `metalake_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'metalake id', + `job_execution_id` varchar(256) NOT NULL COMMENT 'job execution id', + `job_run_status` varchar(64) NOT NULL COMMENT 'job run status', + `job_finished_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'job finished at', + `audit_info` MEDIUMTEXT NOT NULL COMMENT 'job run audit info', + `current_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'job run current version', + `last_version` INT UNSIGNED NOT NULL DEFAULT 1 COMMENT 'job run last version', + `deleted_at` BIGINT(20) UNSIGNED NOT NULL DEFAULT 0 COMMENT 'job run deleted at', + PRIMARY KEY (`job_run_id`), + UNIQUE KEY `uk_mid_jei_del` (`metalake_id`, `job_execution_id`, `deleted_at`), + KEY `idx_job_template_id` (`job_template_id`), + KEY `idx_job_execution_id` (`job_execution_id`) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'job run metadata'; + +CREATE TABLE IF NOT EXISTS `table_version_info` ( + `table_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'table id', + `format` VARCHAR(64) NOT NULL COMMENT 'table format, such as Lance, Iceberg and so on', + `properties` MEDIUMTEXT DEFAULT NULL COMMENT 'table properties', + `partitioning` MEDIUMTEXT DEFAULT NULL COMMENT 'table partition info', + `distribution` MEDIUMTEXT DEFAULT NULL COMMENT 'table distribution info', + `sort_orders` MEDIUMTEXT DEFAULT NULL COMMENT 'table sort order info', + `indexes` MEDIUMTEXT DEFAULT NULL COMMENT 'table index info', + `comment` MEDIUMTEXT DEFAULT NULL COMMENT 'table comment', + `version` BIGINT(20) UNSIGNED COMMENT 'table current version', + `deleted_at` BIGINT(20) UNSIGNED DEFAULT 0 COMMENT 'table deletion timestamp, 0 means not deleted', + PRIMARY KEY (table_id), + UNIQUE KEY `uk_table_id_deleted_at` (`table_id`, `deleted_at`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'table detail information including format, location, properties, partition, distribution, sort order, index and so on'; + diff --git a/scripts/mysql/upgrade-1.0.0-to-1.1.0-mysql.sql b/scripts/mysql/upgrade-1.0.0-to-1.1.0-mysql.sql new file mode 100644 index 00000000000..5560993eb61 --- /dev/null +++ b/scripts/mysql/upgrade-1.0.0-to-1.1.0-mysql.sql @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +CREATE TABLE IF NOT EXISTS `table_version_info` ( + `table_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'table id', + `format` VARCHAR(64) NOT NULL COMMENT 'table format, such as Lance, Iceberg and so on', + `properties` MEDIUMTEXT DEFAULT NULL COMMENT 'table properties', + `partitioning` MEDIUMTEXT DEFAULT NULL COMMENT 'table partition info', + `distribution` MEDIUMTEXT DEFAULT NULL COMMENT 'table distribution info', + `sort_orders` MEDIUMTEXT DEFAULT NULL COMMENT 'table sort order info', + `indexes` MEDIUMTEXT DEFAULT NULL COMMENT 'table index info', + `comment` MEDIUMTEXT DEFAULT NULL COMMENT 'table comment', + `version` BIGINT(20) UNSIGNED COMMENT 'table current version', + `deleted_at` BIGINT(20) UNSIGNED DEFAULT 0 COMMENT 'table deletion timestamp, 0 means not deleted', + PRIMARY KEY (table_id), + UNIQUE KEY `uk_table_id_deleted_at` (`table_id`, `deleted_at`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT 'table detail information including format, location, properties, partition, distribution, sort order, index and so on'; \ No newline at end of file diff --git a/scripts/postgresql/schema-1.1.0-postgresql.sql b/scripts/postgresql/schema-1.1.0-postgresql.sql new file mode 100644 index 00000000000..bc69e7839be --- /dev/null +++ b/scripts/postgresql/schema-1.1.0-postgresql.sql @@ -0,0 +1,794 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file-- +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"). You may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. +-- + +-- Note: Database and schema creation is not included in this script. Please create the database and +-- schema before running this script. for example in psql: +-- CREATE DATABASE example_db; +-- \c example_db +-- CREATE SCHEMA example_schema; +-- set search_path to example_schema; + +CREATE TABLE IF NOT EXISTS metalake_meta ( + metalake_id BIGINT NOT NULL, + metalake_name VARCHAR(128) NOT NULL, + metalake_comment VARCHAR(256) DEFAULT '', + properties TEXT DEFAULT NULL, + audit_info TEXT NOT NULL, + schema_version TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (metalake_id), + UNIQUE (metalake_name, deleted_at) + ); +COMMENT ON TABLE metalake_meta IS 'metalake metadata'; + +COMMENT ON COLUMN metalake_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN metalake_meta.metalake_name IS 'metalake name'; +COMMENT ON COLUMN metalake_meta.metalake_comment IS 'metalake comment'; +COMMENT ON COLUMN metalake_meta.properties IS 'metalake properties'; +COMMENT ON COLUMN metalake_meta.audit_info IS 'metalake audit info'; +COMMENT ON COLUMN metalake_meta.schema_version IS 'metalake schema version info'; +COMMENT ON COLUMN metalake_meta.current_version IS 'metalake current version'; +COMMENT ON COLUMN metalake_meta.last_version IS 'metalake last version'; +COMMENT ON COLUMN metalake_meta.deleted_at IS 'metalake deleted at'; + + +CREATE TABLE IF NOT EXISTS catalog_meta ( + catalog_id BIGINT NOT NULL, + catalog_name VARCHAR(128) NOT NULL, + metalake_id BIGINT NOT NULL, + type VARCHAR(64) NOT NULL, + provider VARCHAR(64) NOT NULL, + catalog_comment VARCHAR(256) DEFAULT '', + properties TEXT DEFAULT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (catalog_id), + UNIQUE (metalake_id, catalog_name, deleted_at) + ); + +COMMENT ON TABLE catalog_meta IS 'catalog metadata'; + +COMMENT ON COLUMN catalog_meta.catalog_id IS 'catalog id'; +COMMENT ON COLUMN catalog_meta.catalog_name IS 'catalog name'; +COMMENT ON COLUMN catalog_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN catalog_meta.type IS 'catalog type'; +COMMENT ON COLUMN catalog_meta.provider IS 'catalog provider'; +COMMENT ON COLUMN catalog_meta.catalog_comment IS 'catalog comment'; +COMMENT ON COLUMN catalog_meta.properties IS 'catalog properties'; +COMMENT ON COLUMN catalog_meta.audit_info IS 'catalog audit info'; +COMMENT ON COLUMN catalog_meta.current_version IS 'catalog current version'; +COMMENT ON COLUMN catalog_meta.last_version IS 'catalog last version'; +COMMENT ON COLUMN catalog_meta.deleted_at IS 'catalog deleted at'; + + +CREATE TABLE IF NOT EXISTS schema_meta ( + schema_id BIGINT NOT NULL, + schema_name VARCHAR(128) NOT NULL, + metalake_id BIGINT NOT NULL, + catalog_id BIGINT NOT NULL, + schema_comment VARCHAR(256) DEFAULT '', + properties TEXT DEFAULT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (schema_id), + UNIQUE (catalog_id, schema_name, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS schema_meta_idx_metalake_id ON schema_meta (metalake_id); +COMMENT ON TABLE schema_meta IS 'schema metadata'; + +COMMENT ON COLUMN schema_meta.schema_id IS 'schema id'; +COMMENT ON COLUMN schema_meta.schema_name IS 'schema name'; +COMMENT ON COLUMN schema_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN schema_meta.catalog_id IS 'catalog id'; +COMMENT ON COLUMN schema_meta.schema_comment IS 'schema comment'; +COMMENT ON COLUMN schema_meta.properties IS 'schema properties'; +COMMENT ON COLUMN schema_meta.audit_info IS 'schema audit info'; +COMMENT ON COLUMN schema_meta.current_version IS 'schema current version'; +COMMENT ON COLUMN schema_meta.last_version IS 'schema last version'; +COMMENT ON COLUMN schema_meta.deleted_at IS 'schema deleted at'; + + +CREATE TABLE IF NOT EXISTS table_meta ( + table_id BIGINT NOT NULL, + table_name VARCHAR(128) NOT NULL, + metalake_id BIGINT NOT NULL, + catalog_id BIGINT NOT NULL, + schema_id BIGINT NOT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (table_id), + UNIQUE (schema_id, table_name, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS table_meta_idx_metalake_id ON table_meta (metalake_id); +CREATE INDEX IF NOT EXISTS table_meta_idx_catalog_id ON table_meta (catalog_id); +COMMENT ON TABLE table_meta IS 'table metadata'; + +COMMENT ON COLUMN table_meta.table_id IS 'table id'; +COMMENT ON COLUMN table_meta.table_name IS 'table name'; +COMMENT ON COLUMN table_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN table_meta.catalog_id IS 'catalog id'; +COMMENT ON COLUMN table_meta.schema_id IS 'schema id'; +COMMENT ON COLUMN table_meta.audit_info IS 'table audit info'; +COMMENT ON COLUMN table_meta.current_version IS 'table current version'; +COMMENT ON COLUMN table_meta.last_version IS 'table last version'; +COMMENT ON COLUMN table_meta.deleted_at IS 'table deleted at'; + +CREATE TABLE IF NOT EXISTS table_column_version_info ( + id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY, + metalake_id BIGINT NOT NULL, + catalog_id BIGINT NOT NULL, + schema_id BIGINT NOT NULL, + table_id BIGINT NOT NULL, + table_version INT NOT NULL, + column_id BIGINT NOT NULL, + column_name VARCHAR(128) NOT NULL, + column_position INT NOT NULL, + column_type TEXT NOT NULL, + column_comment VARCHAR(256) DEFAULT '', + column_nullable SMALLINT NOT NULL DEFAULT 1, + column_auto_increment SMALLINT NOT NULL DEFAULT 0, + column_default_value TEXT DEFAULT NULL, + column_op_type SMALLINT NOT NULL, + deleted_at BIGINT NOT NULL DEFAULT 0, + audit_info TEXT NOT NULL, + PRIMARY KEY (id), + UNIQUE (table_id, table_version, column_id, deleted_at) + ); +CREATE INDEX table_column_version_info_idx_mid ON table_column_version_info (metalake_id); +CREATE INDEX table_column_version_info_idx_cid ON table_column_version_info (catalog_id); +CREATE INDEX table_column_version_info_idx_sid ON table_column_version_info (schema_id); +COMMENT ON TABLE table_column_version_info IS 'table column version information'; + +COMMENT ON COLUMN table_column_version_info.id IS 'auto increment id'; +COMMENT ON COLUMN table_column_version_info.metalake_id IS 'metalake id'; +COMMENT ON COLUMN table_column_version_info.catalog_id IS 'catalog id'; +COMMENT ON COLUMN table_column_version_info.schema_id IS 'schema id'; +COMMENT ON COLUMN table_column_version_info.table_id IS 'table id'; +COMMENT ON COLUMN table_column_version_info.table_version IS 'table version'; +COMMENT ON COLUMN table_column_version_info.column_id IS 'column id'; +COMMENT ON COLUMN table_column_version_info.column_name IS 'column name'; +COMMENT ON COLUMN table_column_version_info.column_position IS 'column position, starting from 0'; +COMMENT ON COLUMN table_column_version_info.column_type IS 'column type'; +COMMENT ON COLUMN table_column_version_info.column_comment IS 'column comment'; +COMMENT ON COLUMN table_column_version_info.column_nullable IS 'column nullable, 0 is not nullable, 1 is nullable'; +COMMENT ON COLUMN table_column_version_info.column_auto_increment IS 'column auto increment, 0 is not auto increment, 1 is auto increment'; +COMMENT ON COLUMN table_column_version_info.column_default_value IS 'column default value'; +COMMENT ON COLUMN table_column_version_info.column_op_type IS 'column operation type, 1 is create, 2 is update, 3 is delete'; +COMMENT ON COLUMN table_column_version_info.deleted_at IS 'column deleted at'; +COMMENT ON COLUMN table_column_version_info.audit_info IS 'column audit info'; + + +CREATE TABLE IF NOT EXISTS fileset_meta ( + fileset_id BIGINT NOT NULL, + fileset_name VARCHAR(128) NOT NULL, + metalake_id BIGINT NOT NULL, + catalog_id BIGINT NOT NULL, + schema_id BIGINT NOT NULL, + type VARCHAR(64) NOT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (fileset_id), + UNIQUE (schema_id, fileset_name, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS fileset_meta_idx_metalake_id ON fileset_meta (metalake_id); +CREATE INDEX IF NOT EXISTS fileset_meta_idx_catalog_id ON fileset_meta (catalog_id); +COMMENT ON TABLE fileset_meta IS 'fileset metadata'; + +COMMENT ON COLUMN fileset_meta.fileset_id IS 'fileset id'; +COMMENT ON COLUMN fileset_meta.fileset_name IS 'fileset name'; +COMMENT ON COLUMN fileset_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN fileset_meta.catalog_id IS 'catalog id'; +COMMENT ON COLUMN fileset_meta.schema_id IS 'schema id'; +COMMENT ON COLUMN fileset_meta.type IS 'fileset type'; +COMMENT ON COLUMN fileset_meta.audit_info IS 'fileset audit info'; +COMMENT ON COLUMN fileset_meta.current_version IS 'fileset current version'; +COMMENT ON COLUMN fileset_meta.last_version IS 'fileset last version'; +COMMENT ON COLUMN fileset_meta.deleted_at IS 'fileset deleted at'; + + +CREATE TABLE IF NOT EXISTS fileset_version_info ( + id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY, + metalake_id BIGINT NOT NULL, + catalog_id BIGINT NOT NULL, + schema_id BIGINT NOT NULL, + fileset_id BIGINT NOT NULL, + version INT NOT NULL, + fileset_comment VARCHAR(256) DEFAULT '', + properties TEXT DEFAULT NULL, + storage_location_name VARCHAR(256) NOT NULL DEFAULT 'default', + storage_location TEXT NOT NULL, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (id), + UNIQUE (fileset_id, version, storage_location_name, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS fileset_version_info_idx_metalake_id ON fileset_version_info (metalake_id); +CREATE INDEX IF NOT EXISTS fileset_version_info_idx_idx_catalog_id ON fileset_version_info (catalog_id); +CREATE INDEX IF NOT EXISTS fileset_version_info_idx_idx_schema_id ON fileset_version_info (schema_id); +COMMENT ON TABLE fileset_version_info IS 'fileset version information'; + +COMMENT ON COLUMN fileset_version_info.id IS 'auto increment id'; +COMMENT ON COLUMN fileset_version_info.metalake_id IS 'metalake id'; +COMMENT ON COLUMN fileset_version_info.catalog_id IS 'catalog id'; +COMMENT ON COLUMN fileset_version_info.schema_id IS 'schema id'; +COMMENT ON COLUMN fileset_version_info.fileset_id IS 'fileset id'; +COMMENT ON COLUMN fileset_version_info.version IS 'fileset info version'; +COMMENT ON COLUMN fileset_version_info.fileset_comment IS 'fileset comment'; +COMMENT ON COLUMN fileset_version_info.properties IS 'fileset properties'; +COMMENT ON COLUMN fileset_version_info.storage_location_name IS 'fileset storage location name'; +COMMENT ON COLUMN fileset_version_info.storage_location IS 'fileset storage location'; +COMMENT ON COLUMN fileset_version_info.deleted_at IS 'fileset deleted at'; + + +CREATE TABLE IF NOT EXISTS topic_meta ( + topic_id BIGINT NOT NULL, + topic_name VARCHAR(128) NOT NULL, + metalake_id BIGINT NOT NULL, + catalog_id BIGINT NOT NULL, + schema_id BIGINT NOT NULL, + comment VARCHAR(256) DEFAULT '', + properties TEXT DEFAULT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (topic_id), + UNIQUE (schema_id, topic_name, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS topic_meta_idx_metalake_id ON topic_meta (metalake_id); +CREATE INDEX IF NOT EXISTS topic_meta_idx_catalog_id ON topic_meta (catalog_id); +COMMENT ON TABLE topic_meta IS 'topic metadata'; + +COMMENT ON COLUMN topic_meta.topic_id IS 'topic id'; +COMMENT ON COLUMN topic_meta.topic_name IS 'topic name'; +COMMENT ON COLUMN topic_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN topic_meta.catalog_id IS 'catalog id'; +COMMENT ON COLUMN topic_meta.schema_id IS 'schema id'; +COMMENT ON COLUMN topic_meta.comment IS 'topic comment'; +COMMENT ON COLUMN topic_meta.properties IS 'topic properties'; +COMMENT ON COLUMN topic_meta.audit_info IS 'topic audit info'; +COMMENT ON COLUMN topic_meta.current_version IS 'topic current version'; +COMMENT ON COLUMN topic_meta.last_version IS 'topic last version'; +COMMENT ON COLUMN topic_meta.deleted_at IS 'topic deleted at'; + + +CREATE TABLE IF NOT EXISTS user_meta ( + user_id BIGINT NOT NULL, + user_name VARCHAR(128) NOT NULL, + metalake_id BIGINT NOT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (user_id), + UNIQUE (metalake_id, user_name, deleted_at) + ); +COMMENT ON TABLE user_meta IS 'user metadata'; + +COMMENT ON COLUMN user_meta.user_id IS 'user id'; +COMMENT ON COLUMN user_meta.user_name IS 'username'; +COMMENT ON COLUMN user_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN user_meta.audit_info IS 'user audit info'; +COMMENT ON COLUMN user_meta.current_version IS 'user current version'; +COMMENT ON COLUMN user_meta.last_version IS 'user last version'; +COMMENT ON COLUMN user_meta.deleted_at IS 'user deleted at'; + +CREATE TABLE IF NOT EXISTS role_meta ( + role_id BIGINT NOT NULL, + role_name VARCHAR(128) NOT NULL, + metalake_id BIGINT NOT NULL, + properties TEXT DEFAULT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (role_id), + UNIQUE (metalake_id, role_name, deleted_at) + ); + +COMMENT ON TABLE role_meta IS 'role metadata'; + +COMMENT ON COLUMN role_meta.role_id IS 'role id'; +COMMENT ON COLUMN role_meta.role_name IS 'role name'; +COMMENT ON COLUMN role_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN role_meta.properties IS 'role properties'; +COMMENT ON COLUMN role_meta.audit_info IS 'role audit info'; +COMMENT ON COLUMN role_meta.current_version IS 'role current version'; +COMMENT ON COLUMN role_meta.last_version IS 'role last version'; +COMMENT ON COLUMN role_meta.deleted_at IS 'role deleted at'; + + +CREATE TABLE IF NOT EXISTS role_meta_securable_object ( + id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY, + role_id BIGINT NOT NULL, + metadata_object_id BIGINT NOT NULL, + type VARCHAR(128) NOT NULL, + privilege_names VARCHAR(81920) NOT NULL, + privilege_conditions VARCHAR(81920) NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (id) + ); + +CREATE INDEX IF NOT EXISTS role_meta_securable_object_idx_role_id ON role_meta_securable_object (role_id); +COMMENT ON TABLE role_meta_securable_object IS 'role to securable object relation metadata'; + +COMMENT ON COLUMN role_meta_securable_object.id IS 'auto increment id'; +COMMENT ON COLUMN role_meta_securable_object.role_id IS 'role id'; +COMMENT ON COLUMN role_meta_securable_object.metadata_object_id IS 'The entity id of securable object'; +COMMENT ON COLUMN role_meta_securable_object.type IS 'securable object type'; +COMMENT ON COLUMN role_meta_securable_object.privilege_names IS 'securable object privilege names'; +COMMENT ON COLUMN role_meta_securable_object.privilege_conditions IS 'securable object privilege conditions'; +COMMENT ON COLUMN role_meta_securable_object.current_version IS 'securable object current version'; +COMMENT ON COLUMN role_meta_securable_object.last_version IS 'securable object last version'; +COMMENT ON COLUMN role_meta_securable_object.deleted_at IS 'securable object deleted at'; + + +CREATE TABLE IF NOT EXISTS user_role_rel ( + id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY, + user_id BIGINT NOT NULL, + role_id BIGINT NOT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (id), + UNIQUE (user_id, role_id, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS user_role_rel_idx_user_id ON user_role_rel (user_id); +COMMENT ON TABLE user_role_rel IS 'user role relation metadata'; + +COMMENT ON COLUMN user_role_rel.id IS 'auto increment id'; +COMMENT ON COLUMN user_role_rel.user_id IS 'user id'; +COMMENT ON COLUMN user_role_rel.role_id IS 'role id'; +COMMENT ON COLUMN user_role_rel.audit_info IS 'relation audit info'; +COMMENT ON COLUMN user_role_rel.current_version IS 'relation current version'; +COMMENT ON COLUMN user_role_rel.last_version IS 'relation last version'; +COMMENT ON COLUMN user_role_rel.deleted_at IS 'relation deleted at'; + + +CREATE TABLE IF NOT EXISTS group_meta ( + group_id BIGINT NOT NULL, + group_name VARCHAR(128) NOT NULL, + metalake_id BIGINT NOT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (group_id), + UNIQUE (metalake_id, group_name, deleted_at) + ); +COMMENT ON TABLE group_meta IS 'group metadata'; + +COMMENT ON COLUMN group_meta.group_id IS 'group id'; +COMMENT ON COLUMN group_meta.group_name IS 'group name'; +COMMENT ON COLUMN group_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN group_meta.audit_info IS 'group audit info'; +COMMENT ON COLUMN group_meta.current_version IS 'group current version'; +COMMENT ON COLUMN group_meta.last_version IS 'group last version'; +COMMENT ON COLUMN group_meta.deleted_at IS 'group deleted at'; + + +CREATE TABLE IF NOT EXISTS group_role_rel ( + id BIGSERIAL NOT NULL, + group_id BIGINT NOT NULL, + role_id BIGINT NOT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (id), + UNIQUE (group_id, role_id, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS group_role_rel_idx_group_id ON group_role_rel (group_id); +COMMENT ON TABLE group_role_rel IS 'relation between group and role'; +COMMENT ON COLUMN group_role_rel.id IS 'auto increment id'; +COMMENT ON COLUMN group_role_rel.group_id IS 'group id'; +COMMENT ON COLUMN group_role_rel.role_id IS 'role id'; +COMMENT ON COLUMN group_role_rel.audit_info IS 'relation audit info'; +COMMENT ON COLUMN group_role_rel.current_version IS 'relation current version'; +COMMENT ON COLUMN group_role_rel.last_version IS 'relation last version'; +COMMENT ON COLUMN group_role_rel.deleted_at IS 'relation deleted at'; + +CREATE TABLE IF NOT EXISTS tag_meta ( + tag_id BIGINT NOT NULL, + tag_name VARCHAR(128) NOT NULL, + metalake_id BIGINT NOT NULL, + tag_comment VARCHAR(256) DEFAULT '', + properties TEXT DEFAULT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (tag_id), + UNIQUE (metalake_id, tag_name, deleted_at) + ); + +COMMENT ON TABLE tag_meta IS 'tag metadata'; + +COMMENT ON COLUMN tag_meta.tag_id IS 'tag id'; +COMMENT ON COLUMN tag_meta.tag_name IS 'tag name'; +COMMENT ON COLUMN tag_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN tag_meta.tag_comment IS 'tag comment'; +COMMENT ON COLUMN tag_meta.properties IS 'tag properties'; +COMMENT ON COLUMN tag_meta.audit_info IS 'tag audit info'; + + +CREATE TABLE IF NOT EXISTS tag_relation_meta ( + id BIGINT GENERATED BY DEFAULT AS IDENTITY, + tag_id BIGINT NOT NULL, + metadata_object_id BIGINT NOT NULL, + metadata_object_type VARCHAR(64) NOT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (id), + UNIQUE (tag_id, metadata_object_id, metadata_object_type, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS tag_relation_meta_idx_tag_id ON tag_relation_meta (tag_id); +CREATE INDEX IF NOT EXISTS tag_relation_meta_idx_metadata_object_id ON tag_relation_meta (metadata_object_id); +COMMENT ON TABLE tag_relation_meta IS 'tag metadata object relation'; +COMMENT ON COLUMN tag_relation_meta.id IS 'auto increment id'; +COMMENT ON COLUMN tag_relation_meta.tag_id IS 'tag id'; +COMMENT ON COLUMN tag_relation_meta.metadata_object_id IS 'metadata object id'; +COMMENT ON COLUMN tag_relation_meta.metadata_object_type IS 'metadata object type'; +COMMENT ON COLUMN tag_relation_meta.audit_info IS 'tag relation audit info'; +COMMENT ON COLUMN tag_relation_meta.current_version IS 'tag relation current version'; +COMMENT ON COLUMN tag_relation_meta.last_version IS 'tag relation last version'; +COMMENT ON COLUMN tag_relation_meta.deleted_at IS 'tag relation deleted at'; + +CREATE TABLE IF NOT EXISTS owner_meta ( + id BIGINT GENERATED BY DEFAULT AS IDENTITY, + metalake_id BIGINT NOT NULL, + owner_id BIGINT NOT NULL, + owner_type VARCHAR(64) NOT NULL, + metadata_object_id BIGINT NOT NULL, + metadata_object_type VARCHAR(64) NOT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (id), + UNIQUE (owner_id, metadata_object_id, metadata_object_type, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS owner_meta_idx_owner_id ON owner_meta (owner_id); +CREATE INDEX IF NOT EXISTS owner_meta_idx_metadata_object_id ON owner_meta (metadata_object_id); +COMMENT ON TABLE owner_meta IS 'owner relation'; +COMMENT ON COLUMN owner_meta.id IS 'auto increment id'; +COMMENT ON COLUMN owner_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN owner_meta.owner_id IS 'owner id'; +COMMENT ON COLUMN owner_meta.owner_type IS 'owner type'; +COMMENT ON COLUMN owner_meta.metadata_object_id IS 'metadata object id'; +COMMENT ON COLUMN owner_meta.metadata_object_type IS 'metadata object type'; +COMMENT ON COLUMN owner_meta.audit_info IS 'owner relation audit info'; +COMMENT ON COLUMN owner_meta.current_version IS 'owner relation current version'; +COMMENT ON COLUMN owner_meta.last_version IS 'owner relation last version'; +COMMENT ON COLUMN owner_meta.deleted_at IS 'owner relation deleted at'; + + +CREATE TABLE IF NOT EXISTS model_meta ( + model_id BIGINT NOT NULL, + model_name VARCHAR(128) NOT NULL, + metalake_id BIGINT NOT NULL, + catalog_id BIGINT NOT NULL, + schema_id BIGINT NOT NULL, + model_comment VARCHAR(65535) DEFAULT NULL, + model_properties TEXT DEFAULT NULL, + model_latest_version INT NOT NULL DEFAULT 0, + audit_info TEXT NOT NULL, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (model_id), + UNIQUE (schema_id, model_name, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS model_meta_idx_metalake_id ON model_meta (metalake_id); +CREATE INDEX IF NOT EXISTS model_meta_idx_catalog_id ON model_meta (catalog_id); +COMMENT ON TABLE model_meta IS 'model metadata'; + +COMMENT ON COLUMN model_meta.model_id IS 'model id'; +COMMENT ON COLUMN model_meta.model_name IS 'model name'; +COMMENT ON COLUMN model_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN model_meta.catalog_id IS 'catalog id'; +COMMENT ON COLUMN model_meta.schema_id IS 'schema id'; +COMMENT ON COLUMN model_meta.model_comment IS 'model comment'; +COMMENT ON COLUMN model_meta.model_properties IS 'model properties'; +COMMENT ON COLUMN model_meta.model_latest_version IS 'model max version'; +COMMENT ON COLUMN model_meta.audit_info IS 'model audit info'; +COMMENT ON COLUMN model_meta.deleted_at IS 'model deleted at'; + + +CREATE TABLE IF NOT EXISTS model_version_info ( + id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY, + metalake_id BIGINT NOT NULL, + catalog_id BIGINT NOT NULL, + schema_id BIGINT NOT NULL, + model_id BIGINT NOT NULL, + version INT NOT NULL, + model_version_comment VARCHAR(65535) DEFAULT NULL, + model_version_properties TEXT DEFAULT NULL, + model_version_uri_name VARCHAR(256) NOT NULL, + model_version_uri TEXT NOT NULL, + audit_info TEXT NOT NULL, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (id), + UNIQUE (model_id, version, model_version_uri_name, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS model_version_info_idx_metalake_id ON model_version_info (metalake_id); +CREATE INDEX IF NOT EXISTS model_version_info_idx_catalog_id ON model_version_info (catalog_id); +CREATE INDEX IF NOT EXISTS model_version_info_idx_schema_id ON model_version_info (schema_id); +COMMENT ON TABLE model_version_info IS 'model version information'; + +COMMENT ON COLUMN model_version_info.id IS 'auto increment id'; +COMMENT ON COLUMN model_version_info.metalake_id IS 'metalake id'; +COMMENT ON COLUMN model_version_info.catalog_id IS 'catalog id'; +COMMENT ON COLUMN model_version_info.schema_id IS 'schema id'; +COMMENT ON COLUMN model_version_info.model_id IS 'model id'; +COMMENT ON COLUMN model_version_info.version IS 'model version'; +COMMENT ON COLUMN model_version_info.model_version_comment IS 'model version comment'; +COMMENT ON COLUMN model_version_info.model_version_properties IS 'model version properties'; +COMMENT ON COLUMN model_version_info.model_version_uri_name IS 'model version uri name'; +COMMENT ON COLUMN model_version_info.model_version_uri IS 'model storage uri'; +COMMENT ON COLUMN model_version_info.audit_info IS 'model version audit info'; +COMMENT ON COLUMN model_version_info.deleted_at IS 'model version deleted at'; + + +CREATE TABLE IF NOT EXISTS model_version_alias_rel ( + id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY, + model_id BIGINT NOT NULL, + model_version INT NOT NULL, + model_version_alias VARCHAR(128) NOT NULL, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (id), + UNIQUE (model_id, model_version_alias, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS model_version_alias_rel_idx_model_version_alias on model_version_alias_rel (model_version_alias); +COMMENT ON TABLE model_version_alias_rel IS 'model version alias relation'; + +COMMENT ON COLUMN model_version_alias_rel.id IS 'auto increment id'; +COMMENT ON COLUMN model_version_alias_rel.model_id IS 'model id'; +COMMENT ON COLUMN model_version_alias_rel.model_version IS 'model version'; +COMMENT ON COLUMN model_version_alias_rel.model_version_alias IS 'model version alias'; +COMMENT ON COLUMN model_version_alias_rel.deleted_at IS 'model version alias deleted at'; + + +CREATE TABLE IF NOT EXISTS policy_meta ( + policy_id BIGINT NOT NULL, + policy_name VARCHAR(128) NOT NULL, + policy_type VARCHAR(64) NOT NULL, + metalake_id BIGINT NOT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (policy_id), + UNIQUE (metalake_id, policy_name, deleted_at) + ); + +COMMENT ON TABLE policy_meta IS 'policy metadata'; +COMMENT ON COLUMN policy_meta.policy_id IS 'policy id'; +COMMENT ON COLUMN policy_meta.policy_name IS 'policy name'; +COMMENT ON COLUMN policy_meta.policy_type IS 'policy type'; +COMMENT ON COLUMN policy_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN policy_meta.audit_info IS 'policy audit info'; +COMMENT ON COLUMN policy_meta.current_version IS 'policy current version'; +COMMENT ON COLUMN policy_meta.last_version IS 'policy last version'; +COMMENT ON COLUMN policy_meta.deleted_at IS 'policy deleted at'; + + +CREATE TABLE IF NOT EXISTS policy_version_info ( + id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY, + metalake_id BIGINT NOT NULL, + policy_id BIGINT NOT NULL, + version INT NOT NULL, + policy_comment TEXT DEFAULT NULL, + enabled BOOLEAN DEFAULT TRUE, + content TEXT DEFAULT NULL, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (id), + UNIQUE (policy_id, version, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS policy_version_info_idx_metalake_id ON policy_version_info (metalake_id); +COMMENT ON TABLE policy_version_info IS 'policy version info'; +COMMENT ON COLUMN policy_version_info.id IS 'auto increment id'; +COMMENT ON COLUMN policy_version_info.metalake_id IS 'metalake id'; +COMMENT ON COLUMN policy_version_info.policy_id IS 'policy id'; +COMMENT ON COLUMN policy_version_info.version IS 'policy info version'; +COMMENT ON COLUMN policy_version_info.policy_comment IS 'policy info comment'; +COMMENT ON COLUMN policy_version_info.enabled IS 'whether the policy is enabled, 0 is disabled, 1 is enabled'; +COMMENT ON COLUMN policy_version_info.content IS 'policy content'; +COMMENT ON COLUMN policy_version_info.deleted_at IS 'policy deleted at'; + + +CREATE TABLE IF NOT EXISTS policy_relation_meta ( + id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY, + policy_id BIGINT NOT NULL, + metadata_object_id BIGINT NOT NULL, + metadata_object_type VARCHAR(64) NOT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (id), + UNIQUE (policy_id, metadata_object_id, metadata_object_type, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS policy_relation_meta_idx_policy_id ON policy_relation_meta (policy_id); +CREATE INDEX IF NOT EXISTS policy_relation_meta_idx_metadata_object_id ON policy_relation_meta (metadata_object_id); +COMMENT ON TABLE policy_relation_meta IS 'policy metadata object relation'; +COMMENT ON COLUMN policy_relation_meta.id IS 'auto increment id'; +COMMENT ON COLUMN policy_relation_meta.policy_id IS 'policy id'; +COMMENT ON COLUMN policy_relation_meta.metadata_object_id IS 'metadata object id'; +COMMENT ON COLUMN policy_relation_meta.metadata_object_type IS 'metadata object type'; +COMMENT ON COLUMN policy_relation_meta.audit_info IS 'policy relation audit info'; +COMMENT ON COLUMN policy_relation_meta.current_version IS 'policy relation current version'; +COMMENT ON COLUMN policy_relation_meta.last_version IS 'policy relation last version'; +COMMENT ON COLUMN policy_relation_meta.deleted_at IS 'policy relation deleted at'; + +CREATE TABLE IF NOT EXISTS statistic_meta ( + id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY, + statistic_id BIGINT NOT NULL, + statistic_name VARCHAR(128) NOT NULL, + metalake_id BIGINT NOT NULL, + statistic_value TEXT NOT NULL, + metadata_object_id BIGINT NOT NULL, + metadata_object_type VARCHAR(64) NOT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (statistic_id), + UNIQUE (statistic_name, metadata_object_id, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS statistic_meta_idx_stid ON statistic_meta (statistic_id); +CREATE INDEX IF NOT EXISTS statistic_meta_idx_moid ON statistic_meta (metadata_object_id); +COMMENT ON TABLE statistic_meta IS 'statistic metadata'; +COMMENT ON COLUMN statistic_meta.id IS 'auto increment id'; +COMMENT ON COLUMN statistic_meta.statistic_id IS 'statistic id'; +COMMENT ON COLUMN statistic_meta.statistic_name IS 'statistic name'; +COMMENT ON COLUMN statistic_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN statistic_meta.statistic_value IS 'statistic value'; +COMMENT ON COLUMN statistic_meta.metadata_object_id IS 'metadata object id'; +COMMENT ON COLUMN statistic_meta.metadata_object_type IS 'metadata object type'; +COMMENT ON COLUMN statistic_meta.audit_info IS 'statistic audit info'; +COMMENT ON COLUMN statistic_meta.current_version IS 'statistic current version'; +COMMENT ON COLUMN statistic_meta.last_version IS 'statistic last version'; +COMMENT ON COLUMN statistic_meta.deleted_at IS 'statistic deleted at'; + +CREATE TABLE IF NOT EXISTS job_template_meta ( + job_template_id BIGINT NOT NULL, + job_template_name VARCHAR(128) NOT NULL, + metalake_id BIGINT NOT NULL, + job_template_comment TEXT DEFAULT NULL, + job_template_content TEXT NOT NULL, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (job_template_id), + UNIQUE (metalake_id, job_template_name, deleted_at) + ); + +COMMENT ON TABLE job_template_meta IS 'job template metadata'; +COMMENT ON COLUMN job_template_meta.job_template_id IS 'job template id'; +COMMENT ON COLUMN job_template_meta.job_template_name IS 'job template name'; +COMMENT ON COLUMN job_template_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN job_template_meta.job_template_comment IS 'job template comment'; +COMMENT ON COLUMN job_template_meta.job_template_content IS 'job template content'; +COMMENT ON COLUMN job_template_meta.audit_info IS 'job template audit info'; +COMMENT ON COLUMN job_template_meta.current_version IS 'job template current version'; +COMMENT ON COLUMN job_template_meta.last_version IS 'job template last version'; +COMMENT ON COLUMN job_template_meta.deleted_at IS 'job template deleted at'; + + +CREATE TABLE IF NOT EXISTS job_run_meta ( + job_run_id BIGINT NOT NULL, + job_template_id BIGINT NOT NULL, + metalake_id BIGINT NOT NULL, + job_execution_id VARCHAR(256) NOT NULL, + job_run_status VARCHAR(64) NOT NULL, + job_finished_at BIGINT NOT NULL DEFAULT 0, + audit_info TEXT NOT NULL, + current_version INT NOT NULL DEFAULT 1, + last_version INT NOT NULL DEFAULT 1, + deleted_at BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (job_run_id), + UNIQUE (metalake_id, job_execution_id, deleted_at) + ); + +CREATE INDEX IF NOT EXISTS job_run_meta_idx_job_template_id ON job_run_meta (job_template_id); +CREATE INDEX IF NOT EXISTS job_run_meta_idx_job_execution_id ON job_run_meta (job_execution_id); +COMMENT ON TABLE job_run_meta IS 'job run metadata'; +COMMENT ON COLUMN job_run_meta.job_run_id IS 'job run id'; +COMMENT ON COLUMN job_run_meta.job_template_id IS 'job template id'; +COMMENT ON COLUMN job_run_meta.metalake_id IS 'metalake id'; +COMMENT ON COLUMN job_run_meta.job_execution_id IS 'job execution id'; +COMMENT ON COLUMN job_run_meta.job_run_status IS 'job run status'; +COMMENT ON COLUMN job_run_meta.job_finished_at IS 'job run finished at'; +COMMENT ON COLUMN job_run_meta.audit_info IS 'job run audit info'; +COMMENT ON COLUMN job_run_meta.current_version IS 'job run current version'; +COMMENT ON COLUMN job_run_meta.last_version IS 'job run last version'; +COMMENT ON COLUMN job_run_meta.deleted_at IS 'job run deleted at'; + +CREATE TABLE IF NOT EXISTS table_version_info ( + table_id BIGINT PRIMARY KEY, + format VARCHAR(64) NOT NULL, + properties TEXT, + partitioning TEXT, + distribution TEXT, + sort_orders TEXT, + indexes TEXT, + "comment" TEXT, + version BIGINT, + deleted_at BIGINT DEFAULT 0, + UNIQUE (table_id, deleted_at) +); +COMMENT ON TABLE table_version_info IS 'table detail information including format, location, properties, partition, distribution, sort order, index and so on'; +COMMENT ON COLUMN table_version_info.table_id IS 'table id'; +COMMENT ON COLUMN table_version_info.format IS 'table format, such as Lance, Iceberg and so on'; +COMMENT ON COLUMN table_version_info.properties IS 'table properties'; +COMMENT ON COLUMN table_version_info.partitioning IS 'table partition info'; +COMMENT on COLUMN table_version_info.distribution IS 'table distribution info'; +COMMENT ON COLUMN table_version_info.sort_orders IS 'table sort order info'; +COMMENT ON COLUMN table_version_info.indexes IS 'table index info'; +COMMENT ON COLUMN table_version_info."comment" IS 'table comment'; +COMMENT ON COLUMN table_version_info.version IS 'table current version'; +COMMENT ON COLUMN table_version_info.deleted_at IS 'table deletion timestamp, 0 means not deleted'; diff --git a/scripts/postgresql/upgrade-1.0.0-to-1.1.0-postgresql.sql b/scripts/postgresql/upgrade-1.0.0-to-1.1.0-postgresql.sql new file mode 100644 index 00000000000..882c9a6cc27 --- /dev/null +++ b/scripts/postgresql/upgrade-1.0.0-to-1.1.0-postgresql.sql @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +CREATE TABLE IF NOT EXISTS table_version_info ( + table_id BIGINT PRIMARY KEY, + format VARCHAR(64) NOT NULL, + properties TEXT, + partitions TEXT, + distribution TEXT, + sort_orders TEXT, + indexes TEXT, + "comment" TEXT, + version BIGINT, + deleted_at BIGINT DEFAULT 0, + UNIQUE (table_id, deleted_at) +); +COMMENT ON TABLE table_version_info IS 'table detail information including format, location, properties, partition, distribution, sort order, index and so on'; +COMMENT ON COLUMN table_version_info.table_id IS 'table id'; +COMMENT ON COLUMN table_version_info.format IS 'table format, such as Lance, Iceberg and so on'; +COMMENT ON COLUMN table_version_info.properties IS 'table properties'; +COMMENT ON COLUMN table_version_info.partitions IS 'table partition info'; +COMMENT on COLUMN table_version_info.distribution IS 'table distribution info'; +COMMENT ON COLUMN table_version_info.sort_orders IS 'table sort order info'; +COMMENT ON COLUMN table_version_info.indexes IS 'table index info'; +COMMENT ON COLUMN table_version_info."comment" IS 'table comment'; +COMMENT ON COLUMN table_version_info.version IS 'table current version'; +COMMENT ON COLUMN table_version_info.deleted_at IS 'table deletion timestamp, 0 means not deleted'; From 9aa61b2f8ba57f74082ecabbf61d8670a60665f1 Mon Sep 17 00:00:00 2001 From: mchades Date: Fri, 24 Oct 2025 10:47:38 +0800 Subject: [PATCH 05/43] [#8890] feat(lance-rest-server): add lance-rest-server framework (#8895) ### What changes were proposed in this pull request? - add lance-rest-server framework - support list namespaces ### Why are the changes needed? Fix: #8890 ### Does this PR introduce _any_ user-facing change? yes ### How was this patch tested? no tests now --- gradle/libs.versions.toml | 2 + lance/lance-common/build.gradle.kts | 4 + .../lance/common/config/LanceConfig.java | 26 ++ .../lance/common/ops/LanceCatalogService.java | 352 ------------------ .../common/ops/LanceNamespaceBackend.java | 54 +++ .../common/ops/LanceNamespaceOperations.java | 50 +++ .../common/ops/LanceTableOperations.java | 28 ++ .../lance/common/ops/NamespaceWrapper.java | 76 ++++ .../GravitinoLanceNamespaceWrapper.java | 174 +++++++++ lance/lance-rest-server/build.gradle.kts | 2 + .../gravitino/lance/LanceRESTService.java | 39 +- .../lance/service/LanceExceptionMapper.java | 88 +++++ .../rest/LanceListNamespacesResponse.java | 63 ---- .../service/rest/LanceListTablesResponse.java | 63 ---- .../rest/LanceNamespaceOperations.java | 63 +--- .../service/rest/LanceTableOperations.java | 63 ++++ 16 files changed, 617 insertions(+), 530 deletions(-) delete mode 100644 lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceCatalogService.java create mode 100644 lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceBackend.java create mode 100644 lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java create mode 100644 lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java create mode 100644 lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/NamespaceWrapper.java create mode 100644 lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java create mode 100644 lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/LanceExceptionMapper.java delete mode 100644 lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListNamespacesResponse.java delete mode 100644 lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListTablesResponse.java create mode 100644 lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index d0779e0ee60..db1ea15782f 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -29,6 +29,7 @@ lombok = "1.18.20" slf4j = "2.0.16" log4j = "2.24.3" lance = "0.34.0" +lance-namespace = "0.0.19" jetty = "9.4.51.v20230217" jersey = "2.41" mockito = "4.11.0" @@ -161,6 +162,7 @@ log4j-core = { group = "org.apache.logging.log4j", name = "log4j-core", version. log4j-12-api = { group = "org.apache.logging.log4j", name = "log4j-1.2-api", version.ref = "log4j" } log4j-layout-template-json = { group = "org.apache.logging.log4j", name = "log4j-layout-template-json", version.ref = "log4j" } lance = { group = "com.lancedb", name = "lance-core", version.ref = "lance" } +lance-namespace-core = { group = "com.lancedb", name = "lance-namespace-core", version.ref = "lance-namespace" } jakarta-validation-api = { group = "jakarta.validation", name = "jakarta.validation-api", version.ref = "jakarta-validation" } jetty-server = { group = "org.eclipse.jetty", name = "jetty-server", version.ref = "jetty" } jetty-servlet = { group = "org.eclipse.jetty", name = "jetty-servlet", version.ref = "jetty" } diff --git a/lance/lance-common/build.gradle.kts b/lance/lance-common/build.gradle.kts index 5048d274f66..4e91dd6c5ce 100644 --- a/lance/lance-common/build.gradle.kts +++ b/lance/lance-common/build.gradle.kts @@ -27,6 +27,9 @@ plugins { dependencies { implementation(project(":api")) implementation(project(":catalogs:catalog-common")) + implementation(project(":clients:client-java")) { + exclude("*") + } implementation(project(":common")) { exclude("*") } @@ -36,6 +39,7 @@ dependencies { implementation(libs.guava) implementation(libs.commons.lang3) + implementation(libs.lance.namespace.core) implementation(libs.slf4j.api) testImplementation(libs.junit.jupiter.api) diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java index f2d7e748cf8..dfe863953ee 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java @@ -30,9 +30,14 @@ public class LanceConfig extends Config implements OverwriteDefaultConfig { public static final String LANCE_CONFIG_PREFIX = "gravitino.lance-rest."; + public static final String CONFIG_NAMESPACE_BACKEND = "namespace-backend"; + public static final String CONFIG_METALAKE = "metalake-name"; + public static final String CONFIG_URI = "uri"; public static final int DEFAULT_LANCE_REST_SERVICE_HTTP_PORT = 9101; public static final int DEFAULT_LANCE_REST_SERVICE_HTTPS_PORT = 9533; + public static final String DEFAULT_NAMESPACE_BACKEND = "gravitino"; + public static final String DEFAULT_URI = "http://localhost:8090"; public static final ConfigEntry CATALOG_NAME = new ConfigBuilder(LANCE_CONFIG_PREFIX + "catalog-name") @@ -41,6 +46,27 @@ public class LanceConfig extends Config implements OverwriteDefaultConfig { .stringConf() .createWithDefault("default"); + public static final ConfigEntry NAMESPACE_BACKEND = + new ConfigBuilder(LANCE_CONFIG_PREFIX + CONFIG_NAMESPACE_BACKEND) + .doc("The backend implementation for namespace operations") + .version(ConfigConstants.VERSION_0_1_0) + .stringConf() + .createWithDefault(DEFAULT_NAMESPACE_BACKEND); + + public static final ConfigEntry METALAKE_NAME = + new ConfigBuilder(LANCE_CONFIG_PREFIX + CONFIG_METALAKE) + .doc("The Metalake name for Gravitino namespace backend") + .version(ConfigConstants.VERSION_0_1_0) + .stringConf() + .create(); + + public static final ConfigEntry NAMESPACE_URI = + new ConfigBuilder(LANCE_CONFIG_PREFIX + CONFIG_URI) + .doc("The URI for the namespace backend, e.g., Gravitino server URI") + .version(ConfigConstants.VERSION_0_1_0) + .stringConf() + .createWithDefault(DEFAULT_URI); + public LanceConfig(Map properties) { super(false); loadFromMap(properties, key -> true); diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceCatalogService.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceCatalogService.java deleted file mode 100644 index 67dd4c2d226..00000000000 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceCatalogService.java +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.gravitino.lance.common.ops; - -import com.google.common.collect.ImmutableMap; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.Objects; -import java.util.Optional; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.stream.Collectors; -import org.apache.commons.lang3.StringUtils; -import org.apache.gravitino.lance.common.config.LanceConfig; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Thin placeholder that will later bridge Lance catalog metadata into Gravitino. - * - *

    The current implementation keeps an in-memory catalog view so the REST surface mirrors the - * Iceberg catalog experience while the Lance integration is built out for real. - */ -public class LanceCatalogService implements AutoCloseable { - - private static final Logger LOG = LoggerFactory.getLogger(LanceCatalogService.class); - - private final LanceConfig config; - private final ConcurrentMap namespaces; - - public LanceCatalogService(LanceConfig config) { - this.config = config; - this.namespaces = new ConcurrentHashMap<>(); - seedSampleMetadata(); - } - - public String catalogName() { - return config.getCatalogName(); - } - - public boolean namespaceExists(String namespace) { - return namespaces.containsKey(namespace); - } - - public Map> listNamespaces() { - Map> result = new ConcurrentHashMap<>(); - namespaces.forEach( - (name, state) -> - result.put( - name, Collections.unmodifiableMap(new ConcurrentHashMap<>(state.properties)))); - return Map.copyOf(result); - } - - public List listNamespaceNames() { - return namespaces.keySet().stream() - .sorted(Comparator.naturalOrder()) - .collect(Collectors.toUnmodifiableList()); - } - - public NamespaceListingResult listChildNamespaces( - String parentId, String delimiter, String pageToken, Integer limit) { - String normalizedParent = StringUtils.trimToEmpty(parentId); - String effectiveDelimiter = StringUtils.isBlank(delimiter) ? "$" : delimiter; - - List sortedNamespaces = listNamespaceNames(); - List filtered = filterChildren(sortedNamespaces, normalizedParent, effectiveDelimiter); - - int startingOffset = parsePageToken(pageToken, filtered.size()); - int pageLimit = limit == null ? filtered.size() : validatePositiveLimit(limit, filtered.size()); - int endIndex = Math.min(filtered.size(), startingOffset + pageLimit); - - List page = filtered.subList(startingOffset, endIndex); - String nextToken = endIndex < filtered.size() ? String.valueOf(endIndex) : null; - return new NamespaceListingResult(normalizedParent, effectiveDelimiter, page, nextToken); - } - - public boolean createNamespace(String namespace) { - if (StringUtils.isBlank(namespace)) { - throw new IllegalArgumentException("Namespace must be non-empty"); - } - NamespaceState state = new NamespaceState(Collections.emptyMap()); - NamespaceState existing = namespaces.putIfAbsent(namespace, state); - if (existing == null) { - LOG.info("Created Lance namespace {}", namespace); - return true; - } - return false; - } - - public boolean dropNamespace(String namespace) { - NamespaceState state = namespaces.get(namespace); - if (state == null) { - return false; - } - if (!state.tables.isEmpty()) { - LOG.info("Refusing to drop Lance namespace {} because it still owns tables", namespace); - return false; - } - boolean removed = namespaces.remove(namespace, state); - if (removed) { - LOG.info("Dropped Lance namespace {}", namespace); - } - return removed; - } - - public List listTables(String namespace) { - NamespaceState state = namespaces.get(namespace); - if (state == null) { - throw new IllegalArgumentException("Unknown namespace: " + namespace); - } - return state.tables.keySet().stream() - .sorted(Comparator.naturalOrder()) - .collect(Collectors.toUnmodifiableList()); - } - - public Optional> loadTable(String namespace, String table) { - NamespaceState state = namespaces.get(namespace); - if (state == null) { - return Optional.empty(); - } - LanceTableEntry tableEntry = state.tables.get(table); - if (tableEntry == null) { - return Optional.empty(); - } - return Optional.of(tableEntry.describe()); - } - - public TableListingResult listTables( - String namespaceId, String delimiter, String pageToken, Integer limit) { - String normalizedNamespace = StringUtils.trimToEmpty(namespaceId); - if (StringUtils.isBlank(normalizedNamespace)) { - throw new IllegalArgumentException("Namespace id must be provided"); - } - - String effectiveDelimiter = StringUtils.isBlank(delimiter) ? "$" : delimiter; - - NamespaceState state = namespaces.get(normalizedNamespace); - if (state == null) { - throw new NoSuchElementException("Unknown namespace: " + normalizedNamespace); - } - - List sortedTables = - state.tables.keySet().stream() - .sorted(Comparator.naturalOrder()) - .collect(Collectors.toList()); - - int startingOffset = parsePageToken(pageToken, sortedTables.size()); - int pageLimit = - limit == null ? sortedTables.size() : validatePositiveLimit(limit, sortedTables.size()); - int endIndex = Math.min(sortedTables.size(), startingOffset + pageLimit); - - List page = sortedTables.subList(startingOffset, endIndex); - String nextToken = endIndex < sortedTables.size() ? String.valueOf(endIndex) : null; - - return new TableListingResult(normalizedNamespace, effectiveDelimiter, page, nextToken); - } - - @Override - public void close() { - namespaces.clear(); - } - - private void seedSampleMetadata() { - NamespaceState defaultNamespace = - namespaces.computeIfAbsent("default", key -> new NamespaceState(Collections.emptyMap())); - defaultNamespace.tables.put( - "sample_table", - new LanceTableEntry( - "sample_table", - "default", - ImmutableMap.of( - "format", "lance", - "uri", "file:///tmp/sample_table.lance", - "summary", "Placeholder Lance table metadata"))); - } - - private static final class NamespaceState { - private final Map properties; - private final ConcurrentMap tables; - - NamespaceState(Map properties) { - this.properties = new ConcurrentHashMap<>(properties); - this.tables = new ConcurrentHashMap<>(); - } - } - - private static final class LanceTableEntry { - private final String name; - private final String namespace; - private final Map metadata; - - LanceTableEntry(String name, String namespace, Map metadata) { - this.name = name; - this.namespace = namespace; - this.metadata = new ConcurrentHashMap<>(metadata); - } - - Map describe() { - Map result = new ConcurrentHashMap<>(metadata); - result.put("name", name); - result.put("namespace", namespace); - return Collections.unmodifiableMap(result); - } - } - - private List filterChildren(List namespaces, String parentId, String delimiter) { - boolean rootRequest = StringUtils.isBlank(parentId) || "root".equalsIgnoreCase(parentId); - if (rootRequest) { - return namespaces; - } - - String parentPrefix = parentId + delimiter; - return namespaces.stream() - .filter(ns -> ns.startsWith(parentPrefix)) - .map( - ns -> { - String remainder = ns.substring(parentPrefix.length()); - int nextDelimiter = remainder.indexOf(delimiter); - if (nextDelimiter >= 0) { - return remainder.substring(0, nextDelimiter); - } - return remainder; - }) - .filter(child -> !child.isEmpty()) - .distinct() - .sorted(Comparator.naturalOrder()) - .collect(Collectors.toUnmodifiableList()); - } - - private int parsePageToken(String pageToken, int size) { - if (StringUtils.isBlank(pageToken)) { - return 0; - } - try { - int parsed = Integer.parseInt(pageToken); - if (parsed < 0 || parsed > size) { - throw new IllegalArgumentException("Invalid page_token value"); - } - return parsed; - } catch (NumberFormatException nfe) { - throw new IllegalArgumentException("Invalid page_token value", nfe); - } - } - - private int validatePositiveLimit(int limit, int size) { - if (limit <= 0) { - throw new IllegalArgumentException("limit must be greater than 0"); - } - return Math.min(limit, Math.max(size, 0)); - } - - public static final class NamespaceListingResult { - private final String parentId; - private final String delimiter; - private final List namespaces; - private final String nextPageToken; - - NamespaceListingResult( - String parentId, String delimiter, List namespaces, String nextPageToken) { - this.parentId = parentId; - this.delimiter = delimiter; - this.namespaces = List.copyOf(namespaces); - this.nextPageToken = nextPageToken; - } - - public String getParentId() { - return parentId; - } - - public String getDelimiter() { - return delimiter; - } - - public List getNamespaces() { - return namespaces; - } - - public Optional getNextPageToken() { - return Optional.ofNullable(nextPageToken); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (!(o instanceof NamespaceListingResult)) { - return false; - } - NamespaceListingResult that = (NamespaceListingResult) o; - return Objects.equals(parentId, that.parentId) - && Objects.equals(delimiter, that.delimiter) - && Objects.equals(namespaces, that.namespaces) - && Objects.equals(nextPageToken, that.nextPageToken); - } - - @Override - public int hashCode() { - return Objects.hash(parentId, delimiter, namespaces, nextPageToken); - } - } - - public static final class TableListingResult { - private final String namespaceId; - private final String delimiter; - private final List tables; - private final String nextPageToken; - - TableListingResult( - String namespaceId, String delimiter, List tables, String nextPageToken) { - this.namespaceId = namespaceId; - this.delimiter = delimiter; - this.tables = List.copyOf(tables); - this.nextPageToken = nextPageToken; - } - - public String getNamespaceId() { - return namespaceId; - } - - public String getDelimiter() { - return delimiter; - } - - public List getTables() { - return tables; - } - - public Optional getNextPageToken() { - return Optional.ofNullable(nextPageToken); - } - } -} diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceBackend.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceBackend.java new file mode 100644 index 00000000000..57d393f5ad4 --- /dev/null +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceBackend.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.common.ops; + +import java.util.Arrays; +import org.apache.gravitino.lance.common.ops.gravitino.GravitinoLanceNamespaceWrapper; + +public enum LanceNamespaceBackend { + GRAVITINO("gravitino", GravitinoLanceNamespaceWrapper.class); + + private final String type; + private final Class wrapperClass; + + public static LanceNamespaceBackend fromType(String type) { + for (LanceNamespaceBackend backend : values()) { + if (backend.type.equalsIgnoreCase(type)) { + return backend; + } + } + throw new IllegalArgumentException( + String.format( + "Unknown backend type %s, available types: %s", + type, Arrays.toString(LanceNamespaceBackend.values()))); + } + + LanceNamespaceBackend(String type, Class wrapperClass) { + this.type = type; + this.wrapperClass = wrapperClass; + } + + public String getType() { + return type; + } + + public Class getWrapperClass() { + return wrapperClass; + } +} diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java new file mode 100644 index 00000000000..1b5da98ec04 --- /dev/null +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.common.ops; + +import com.lancedb.lance.namespace.LanceNamespaceException; +import com.lancedb.lance.namespace.model.CreateNamespaceRequest; +import com.lancedb.lance.namespace.model.CreateNamespaceResponse; +import com.lancedb.lance.namespace.model.DescribeNamespaceResponse; +import com.lancedb.lance.namespace.model.DropNamespaceRequest; +import com.lancedb.lance.namespace.model.DropNamespaceResponse; +import com.lancedb.lance.namespace.model.ListNamespacesResponse; +import java.util.Map; + +public interface LanceNamespaceOperations { + + ListNamespacesResponse listNamespaces( + String namespaceId, String delimiter, String pageToken, Integer limit); + + DescribeNamespaceResponse describeNamespace(String id, String delimiter); + + CreateNamespaceResponse createNamespace( + String id, + String delimiter, + CreateNamespaceRequest.ModeEnum mode, + Map properties); + + DropNamespaceResponse dropNamespace( + String id, + String delimiter, + DropNamespaceRequest.ModeEnum mode, + DropNamespaceRequest.BehaviorEnum behavior); + + void namespaceExists(String id, String delimiter) throws LanceNamespaceException; +} diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java new file mode 100644 index 00000000000..057dce8fb3c --- /dev/null +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.common.ops; + +import com.lancedb.lance.namespace.model.ListTablesResponse; + +public interface LanceTableOperations { + + ListTablesResponse listTables(String id, String delimiter, String pageToken, Integer limit); + + // todo: add more table operation methods +} diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/NamespaceWrapper.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/NamespaceWrapper.java new file mode 100644 index 00000000000..936a5a70697 --- /dev/null +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/NamespaceWrapper.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.common.ops; + +import org.apache.gravitino.lance.common.config.LanceConfig; + +public abstract class NamespaceWrapper { + + public static final String NAMESPACE_DELIMITER_DEFAULT = "$"; + private final LanceConfig config; + + private volatile boolean initialized = false; + private LanceNamespaceOperations namespaceOps; + private LanceTableOperations tableOps; + + public NamespaceWrapper(LanceConfig config) { + this.config = config; + } + + protected abstract void initialize(); + + protected abstract LanceNamespaceOperations newNamespaceOps(); + + protected abstract LanceTableOperations newTableOps(); + + public abstract void close() throws Exception; + + public LanceNamespaceOperations asNamespaceOps() { + // lazy initialize the operations because it may block the startup + initIfNeeded(); + return namespaceOps; + } + + public LanceTableOperations asTableOps() { + // lazy initialize the operations because it may block the startup + initIfNeeded(); + return tableOps; + } + + public LanceConfig config() { + return config; + } + + private void initAll() { + initialize(); + namespaceOps = newNamespaceOps(); + tableOps = newTableOps(); + initialized = true; + } + + private void initIfNeeded() { + if (!initialized) { + synchronized (this) { + if (!initialized) { + initAll(); + } + } + } + } +} diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java new file mode 100644 index 00000000000..59f637b5a1f --- /dev/null +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.common.ops.gravitino; + +import static org.apache.gravitino.lance.common.config.LanceConfig.METALAKE_NAME; +import static org.apache.gravitino.lance.common.config.LanceConfig.NAMESPACE_URI; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import com.lancedb.lance.namespace.LanceNamespaceException; +import com.lancedb.lance.namespace.ObjectIdentifier; +import com.lancedb.lance.namespace.model.CreateNamespaceRequest; +import com.lancedb.lance.namespace.model.CreateNamespaceResponse; +import com.lancedb.lance.namespace.model.DescribeNamespaceResponse; +import com.lancedb.lance.namespace.model.DropNamespaceRequest; +import com.lancedb.lance.namespace.model.DropNamespaceResponse; +import com.lancedb.lance.namespace.model.ListNamespacesResponse; +import com.lancedb.lance.namespace.model.ListTablesResponse; +import com.lancedb.lance.namespace.util.PageUtil; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.apache.commons.lang3.StringUtils; +import org.apache.gravitino.Catalog; +import org.apache.gravitino.client.GravitinoClient; +import org.apache.gravitino.exceptions.NoSuchCatalogException; +import org.apache.gravitino.lance.common.config.LanceConfig; +import org.apache.gravitino.lance.common.ops.LanceNamespaceOperations; +import org.apache.gravitino.lance.common.ops.LanceTableOperations; +import org.apache.gravitino.lance.common.ops.NamespaceWrapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class GravitinoLanceNamespaceWrapper extends NamespaceWrapper + implements LanceNamespaceOperations, LanceTableOperations { + + private static final Logger LOG = LoggerFactory.getLogger(GravitinoLanceNamespaceWrapper.class); + private GravitinoClient client; + + public GravitinoLanceNamespaceWrapper(LanceConfig config) { + super(config); + } + + @Override + protected void initialize() { + String uri = config().get(NAMESPACE_URI); + String metalakeName = config().get(METALAKE_NAME); + Preconditions.checkArgument( + StringUtils.isNotBlank(metalakeName), + "Metalake name must be provided for Gravitino namespace backend"); + + this.client = GravitinoClient.builder(uri).withMetalake(metalakeName).build(); + } + + @Override + public LanceNamespaceOperations newNamespaceOps() { + return this; + } + + @Override + protected LanceTableOperations newTableOps() { + return this; + } + + @Override + public void close() { + if (client != null) { + try { + client.close(); + } catch (Exception e) { + LOG.warn("Error closing Gravitino client", e); + } + } + } + + @Override + public ListNamespacesResponse listNamespaces( + String namespaceId, String delimiter, String pageToken, Integer limit) { + ObjectIdentifier nsId = ObjectIdentifier.of(namespaceId, delimiter); + Preconditions.checkArgument( + nsId.levels() <= 2, "Expected at most 2-level namespace but got: %s", namespaceId); + + List namespaces; + switch (nsId.levels()) { + case 0: + // List catalogs of type relational and provider generic-lakehouse + namespaces = + Arrays.stream(client.listCatalogsInfo()) + .filter(this::isLakehouseCatalog) + .map(Catalog::name) + .collect(Collectors.toList()); + break; + + case 1: + // List schemas under the catalog + String catalogName = nsId.levelAtListPos(0); + Catalog catalog = client.loadCatalog(catalogName); + if (!isLakehouseCatalog(catalog)) { + throw new NoSuchCatalogException("Catalog not found: %s", catalogName); + } + + namespaces = Lists.newArrayList(catalog.asSchemas().listSchemas()); + break; + + default: + throw new IllegalArgumentException( + "Expected at most 2-level namespace but got: " + namespaceId); + } + + Collections.sort(namespaces); + PageUtil.Page page = + PageUtil.splitPage(namespaces, pageToken, PageUtil.normalizePageSize(limit)); + ListNamespacesResponse response = new ListNamespacesResponse(); + response.setNamespaces(Sets.newHashSet(page.items())); + response.setPageToken(page.nextPageToken()); + return response; + } + + @Override + public DescribeNamespaceResponse describeNamespace(String id, String delimiter) { + throw new UnsupportedOperationException("Not implemented yet"); + } + + @Override + public CreateNamespaceResponse createNamespace( + String id, + String delimiter, + CreateNamespaceRequest.ModeEnum mode, + Map properties) { + throw new UnsupportedOperationException("Not implemented yet"); + } + + @Override + public DropNamespaceResponse dropNamespace( + String id, + String delimiter, + DropNamespaceRequest.ModeEnum mode, + DropNamespaceRequest.BehaviorEnum behavior) { + throw new UnsupportedOperationException("Not implemented yet"); + } + + @Override + public void namespaceExists(String id, String delimiter) throws LanceNamespaceException {} + + private boolean isLakehouseCatalog(Catalog catalog) { + return catalog.type().equals(Catalog.Type.RELATIONAL) + && "generic-lakehouse".equals(catalog.provider()); + } + + @Override + public ListTablesResponse listTables( + String id, String delimiter, String pageToken, Integer limit) { + throw new UnsupportedOperationException("Not implemented yet"); + } +} diff --git a/lance/lance-rest-server/build.gradle.kts b/lance/lance-rest-server/build.gradle.kts index 03376095935..4e4ca7db3ce 100644 --- a/lance/lance-rest-server/build.gradle.kts +++ b/lance/lance-rest-server/build.gradle.kts @@ -36,6 +36,7 @@ dependencies { implementation(project(":server-common")) { exclude("*") } + implementation(project(":lance:lance-common")) implementation(libs.bundles.jetty) @@ -43,6 +44,7 @@ dependencies { implementation(libs.bundles.log4j) implementation(libs.bundles.metrics) implementation(libs.bundles.prometheus) + implementation(libs.lance.namespace.core) implementation(libs.metrics.jersey2) implementation(libs.guava) implementation(libs.jackson.annotations) diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java index e85dc37b4a3..123781262fe 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java @@ -18,14 +18,18 @@ */ package org.apache.gravitino.lance; +import static org.apache.gravitino.lance.common.config.LanceConfig.NAMESPACE_BACKEND; + +import java.lang.reflect.Constructor; import java.util.Map; import javax.servlet.Servlet; import org.apache.gravitino.auxiliary.GravitinoAuxiliaryService; import org.apache.gravitino.lance.common.config.LanceConfig; -import org.apache.gravitino.lance.common.ops.LanceCatalogService; -import org.apache.gravitino.lance.service.rest.LanceNamespaceOperations; +import org.apache.gravitino.lance.common.ops.LanceNamespaceBackend; +import org.apache.gravitino.lance.common.ops.NamespaceWrapper; import org.apache.gravitino.server.web.JettyServer; import org.apache.gravitino.server.web.JettyServerConfig; +import org.glassfish.hk2.utilities.binding.AbstractBinder; import org.glassfish.jersey.jackson.JacksonFeature; import org.glassfish.jersey.server.ResourceConfig; import org.glassfish.jersey.servlet.ServletContainer; @@ -41,7 +45,7 @@ public class LanceRESTService implements GravitinoAuxiliaryService { public static final String LANCE_SPEC = "/lance/*"; private JettyServer server; - private LanceCatalogService catalogService; + private NamespaceWrapper lanceNamespace; @Override public String shortName() { @@ -56,11 +60,18 @@ public void serviceInit(Map properties) { server = new JettyServer(); server.initialize(serverConfig, SERVICE_NAME, false); - catalogService = new LanceCatalogService(lanceConfig); + this.lanceNamespace = loadNamespaceImpl(lanceConfig); ResourceConfig resourceConfig = new ResourceConfig(); resourceConfig.register(JacksonFeature.class); - resourceConfig.register(new LanceNamespaceOperations(catalogService)); + resourceConfig.packages("org.apache.gravitino.lance.service.rest"); + resourceConfig.register( + new AbstractBinder() { + @Override + protected void configure() { + bind(lanceNamespace).to(NamespaceWrapper.class).ranked(1); + } + }); Servlet container = new ServletContainer(resourceConfig); server.addServlet(container, LANCE_SPEC); @@ -84,8 +95,8 @@ public void serviceStop() throws Exception { server.stop(); LOG.info("Lance REST service stopped"); } - if (catalogService != null) { - catalogService.close(); + if (lanceNamespace != null) { + lanceNamespace.close(); } } @@ -94,4 +105,18 @@ public void join() { server.join(); } } + + private NamespaceWrapper loadNamespaceImpl(LanceConfig lanceConfig) { + String backendType = lanceConfig.get(NAMESPACE_BACKEND); + LanceNamespaceBackend lanceNamespaceBackend = LanceNamespaceBackend.fromType(backendType); + + try { + Constructor constructor = + lanceNamespaceBackend.getWrapperClass().getConstructor(LanceConfig.class); + return constructor.newInstance(lanceConfig); + } catch (Exception e) { + LOG.error("Error loading namespace implementation for backend type: {}", backendType, e); + throw new RuntimeException("Failed to load namespace implementation", e); + } + } } diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/LanceExceptionMapper.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/LanceExceptionMapper.java new file mode 100644 index 00000000000..2465e6ed632 --- /dev/null +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/LanceExceptionMapper.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.service; + +import static org.apache.commons.lang3.exception.ExceptionUtils.getStackTrace; + +import com.lancedb.lance.namespace.LanceNamespaceException; +import com.lancedb.lance.namespace.model.ErrorResponse; +import java.util.Optional; +import javax.ws.rs.core.Response; +import javax.ws.rs.ext.ExceptionMapper; +import javax.ws.rs.ext.Provider; +import org.apache.gravitino.exceptions.NotFoundException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Provider +public class LanceExceptionMapper implements ExceptionMapper { + + private static final Logger LOG = LoggerFactory.getLogger(LanceExceptionMapper.class); + + public static Response toRESTResponse(String instance, Exception ex) { + LanceNamespaceException lanceException = + ex instanceof LanceNamespaceException + ? (LanceNamespaceException) ex + : toLanceNamespaceException(instance, ex); + + return handleLanceNamespaceException(lanceException); + } + + @Override + public Response toResponse(Exception ex) { + return toRESTResponse("", ex); + } + + private static LanceNamespaceException toLanceNamespaceException(String instance, Exception ex) { + if (ex instanceof NotFoundException) { + return LanceNamespaceException.notFound( + ex.getMessage(), ex.getClass().getSimpleName(), instance, getStackTrace(ex)); + + } else if (ex instanceof IllegalArgumentException) { + return LanceNamespaceException.badRequest( + ex.getMessage(), ex.getClass().getSimpleName(), instance, getStackTrace(ex)); + + } else if (ex instanceof UnsupportedOperationException) { + return LanceNamespaceException.unsupportedOperation( + ex.getMessage(), ex.getClass().getSimpleName(), instance, getStackTrace(ex)); + + } else { + LOG.warn("Lance REST server unexpected exception:", ex); + return LanceNamespaceException.serverError( + ex.getMessage(), ex.getClass().getSimpleName(), instance, getStackTrace(ex)); + } + } + + // Referred from lance-namespace-adapter's LanceNamespaces exception handling + // com.lancedb.lance.namespace.adapter.GlobalExceptionHandler + private static Response handleLanceNamespaceException(LanceNamespaceException ex) { + ErrorResponse errResp = new ErrorResponse(); + Optional errorResponse = ex.getErrorResponse(); + if (errorResponse.isPresent() && errorResponse.get().getCode() != null) { + errResp = errorResponse.get(); + + } else { + // Transform error info into ErrorResponse + errResp.setCode(ex.getCode()); + errResp.setError(ex.getMessage()); + } + + return Response.status(errResp.getCode()).entity(errResp).build(); + } +} diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListNamespacesResponse.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListNamespacesResponse.java deleted file mode 100644 index 11ec7d3c3c5..00000000000 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListNamespacesResponse.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.gravitino.lance.service.rest; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonProperty; -import java.util.List; - -@JsonInclude(JsonInclude.Include.NON_NULL) -public class LanceListNamespacesResponse { - - @JsonProperty("id") - private final String id; - - @JsonProperty("delimiter") - private final String delimiter; - - @JsonProperty("namespaces") - private final List namespaces; - - @JsonProperty("next_page_token") - private final String nextPageToken; - - public LanceListNamespacesResponse( - String id, String delimiter, List namespaces, String nextPageToken) { - this.id = id; - this.delimiter = delimiter; - this.namespaces = List.copyOf(namespaces); - this.nextPageToken = nextPageToken; - } - - public String getId() { - return id; - } - - public String getDelimiter() { - return delimiter; - } - - public List getNamespaces() { - return namespaces; - } - - public String getNextPageToken() { - return nextPageToken; - } -} diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListTablesResponse.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListTablesResponse.java deleted file mode 100644 index 82e2a909787..00000000000 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceListTablesResponse.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.gravitino.lance.service.rest; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonProperty; -import java.util.List; - -@JsonInclude(JsonInclude.Include.NON_NULL) -public class LanceListTablesResponse { - - @JsonProperty("id") - private final String namespaceId; - - @JsonProperty("delimiter") - private final String delimiter; - - @JsonProperty("tables") - private final List tables; - - @JsonProperty("next_page_token") - private final String nextPageToken; - - public LanceListTablesResponse( - String namespaceId, String delimiter, List tables, String nextPageToken) { - this.namespaceId = namespaceId; - this.delimiter = delimiter; - this.tables = List.copyOf(tables); - this.nextPageToken = nextPageToken; - } - - public String getNamespaceId() { - return namespaceId; - } - - public String getDelimiter() { - return delimiter; - } - - public List getTables() { - return tables; - } - - public String getNextPageToken() { - return nextPageToken; - } -} diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java index 0ac9457eff9..6c0477a51c8 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java @@ -18,75 +18,48 @@ */ package org.apache.gravitino.lance.service.rest; -import java.util.NoSuchElementException; -import javax.ws.rs.BadRequestException; +import static org.apache.gravitino.lance.common.ops.NamespaceWrapper.NAMESPACE_DELIMITER_DEFAULT; + +import com.lancedb.lance.namespace.model.ListNamespacesResponse; +import javax.inject.Inject; +import javax.ws.rs.Consumes; import javax.ws.rs.DefaultValue; import javax.ws.rs.Encoded; import javax.ws.rs.GET; -import javax.ws.rs.NotFoundException; import javax.ws.rs.Path; import javax.ws.rs.PathParam; import javax.ws.rs.Produces; import javax.ws.rs.QueryParam; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; -import org.apache.gravitino.lance.common.ops.LanceCatalogService; +import org.apache.gravitino.lance.common.ops.NamespaceWrapper; +import org.apache.gravitino.lance.service.LanceExceptionMapper; @Path("/v1/namespace") +@Consumes(MediaType.APPLICATION_JSON) @Produces(MediaType.APPLICATION_JSON) public class LanceNamespaceOperations { - private final LanceCatalogService catalogService; + private final NamespaceWrapper lanceNamespace; - public LanceNamespaceOperations(LanceCatalogService catalogService) { - this.catalogService = catalogService; + @Inject + public LanceNamespaceOperations(NamespaceWrapper lanceNamespace) { + this.lanceNamespace = lanceNamespace; } @GET @Path("/{id}/list") public Response listNamespaces( @Encoded @PathParam("id") String namespaceId, - @DefaultValue("$") @QueryParam("delimiter") String delimiter, - @QueryParam("page_token") String pageToken, - @QueryParam("limit") Integer limit) { - try { - LanceCatalogService.NamespaceListingResult result = - catalogService.listChildNamespaces(namespaceId, delimiter, pageToken, limit); - LanceListNamespacesResponse payload = - new LanceListNamespacesResponse( - result.getParentId(), - result.getDelimiter(), - result.getNamespaces(), - result.getNextPageToken().orElse(null)); - return Response.ok(payload).build(); - } catch (NoSuchElementException nse) { - throw new NotFoundException(nse.getMessage(), nse); - } catch (IllegalArgumentException iae) { - throw new BadRequestException(iae.getMessage(), iae); - } - } - - @GET - @Path("/{id}/table/list") - public Response listTables( - @Encoded @PathParam("id") String namespaceId, - @DefaultValue("$") @QueryParam("delimiter") String delimiter, + @DefaultValue(NAMESPACE_DELIMITER_DEFAULT) @QueryParam("delimiter") String delimiter, @QueryParam("page_token") String pageToken, @QueryParam("limit") Integer limit) { try { - LanceCatalogService.TableListingResult result = - catalogService.listTables(namespaceId, delimiter, pageToken, limit); - LanceListTablesResponse payload = - new LanceListTablesResponse( - result.getNamespaceId(), - result.getDelimiter(), - result.getTables(), - result.getNextPageToken().orElse(null)); - return Response.ok(payload).build(); - } catch (NoSuchElementException nse) { - throw new NotFoundException(nse.getMessage(), nse); - } catch (IllegalArgumentException iae) { - throw new BadRequestException(iae.getMessage(), iae); + ListNamespacesResponse response = + lanceNamespace.asNamespaceOps().listNamespaces(namespaceId, delimiter, pageToken, limit); + return Response.ok(response).build(); + } catch (Exception e) { + return LanceExceptionMapper.toRESTResponse(namespaceId, e); } } } diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java new file mode 100644 index 00000000000..10f7399c40a --- /dev/null +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.service.rest; + +import com.lancedb.lance.namespace.model.ListTablesResponse; +import javax.inject.Inject; +import javax.ws.rs.Consumes; +import javax.ws.rs.DefaultValue; +import javax.ws.rs.Encoded; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import org.apache.gravitino.lance.common.ops.NamespaceWrapper; +import org.apache.gravitino.lance.service.LanceExceptionMapper; + +@Path("/v1/namespace/{id}/table") +@Consumes(MediaType.APPLICATION_JSON) +@Produces(MediaType.APPLICATION_JSON) +public class LanceTableOperations { + + private final NamespaceWrapper lanceNamespace; + + @Inject + public LanceTableOperations(NamespaceWrapper lanceNamespace) { + this.lanceNamespace = lanceNamespace; + } + + @GET + @Path("/list") + public Response listTables( + @Encoded @PathParam("id") String namespaceId, + @DefaultValue("$") @QueryParam("delimiter") String delimiter, + @QueryParam("page_token") String pageToken, + @QueryParam("limit") Integer limit) { + try { + ListTablesResponse response = + lanceNamespace.asTableOps().listTables(namespaceId, delimiter, pageToken, limit); + return Response.ok(response).build(); + } catch (Exception e) { + return LanceExceptionMapper.toRESTResponse(namespaceId, e); + } + } +} From f6d49d95e58947ec27fef0ea5bc14850637421ee Mon Sep 17 00:00:00 2001 From: Junda Yang Date: Thu, 23 Oct 2025 23:54:43 -0700 Subject: [PATCH 06/43] [#8891] feat(lance-rest-server): Add standalone and auxiliary mode support for Lance REST service (#8903) ### What changes were proposed in this pull request? 1. Created service discovery file (META-INF/services/org.apache.gravitino.auxiliary.GravitinoAuxiliaryService) 2. Added unit tests (TestLanceConfig.java) ### Why are the changes needed? Fix: #8891 To enable flexible deployment of Lance REST service: Standalone mode (independent JVM): - Lance needs to connect to Gravitino server via HTTP to access catalog/schema/table metadata - GRAVITINO_URI and GRAVITINO_METALAKE configs specify the connection parameters - Use case: Deploy Lance REST separately for isolation or scaling Auxiliary mode (embedded in Gravitino server): - Lance runs in the same JVM as Gravitino, enabling direct memory access (zero overhead) - META-INF/services file enables automatic service discovery via Java ServiceLoader - Use case: Simplified deployment with shared resources and unified monitoring ### Does this PR introduce _any_ user-facing change? Yes - New configuration properties For Standalone Mode: ``` # Required: Gravitino server URI gravitino.lance-rest.gravitino-uri=http://gravitino-server:8090 # Required: Metalake name gravitino.lance-rest.gravitino-metalake=production ``` For Auxiliary Mode: ``` # Enable Lance REST as auxiliary service gravitino.auxService.names=iceberg-rest,lance-rest # Classpath for Lance REST JAR gravitino.lance-rest.classpath=lance/lance-rest-server/build/libs ``` Deployment modes supported: Standalone: Lance REST runs independently, connects to Gravitino via HTTP Auxiliary: Lance REST runs embedded in Gravitino, uses direct memory access No breaking changes - all changes are additive and backward compatible. ### How was this patch tested? 1. unit test 2. manual test **Standalone mode test:** ``` junda@java(junda.devpod-us-or) ~/gravitino/distribution/package/bin % ./gravitino.sh start Gravitino Server is running[PID:3907790] junda@java(junda.devpod-us-or) ~ % curl http://localhost:8090/api/version {"code":0,"version":{"version":"1.1.0-SNAPSHOT","compileDate":"23/10/2025 21:03:17","gitCommit":"35bd819975badea2493b4dbd9d0de68425229945"}}% junda@java(junda.devpod-us-or) ~/gravitino/distribution/package/bin % ./gravitino-lance-rest-server.sh start GravitinoLanceRESTServer is running[PID:3930096] junda@java(junda.devpod-us-or) ~ % curl "http://localhost:9101/lance/v1/namespace/root/list" {"id":"root","delimiter":"$","namespaces":["default"]}% ``` **Auxiliary mode test:** ``` junda@java(junda.devpod-us-or) ~/gravitino/distribution/package % cd /home/user/gravitino && cat >> distribution/package/conf/gravitino.conf << 'EOF' # ===== Lance REST Auxiliary Service ===== gravitino.auxService.names = iceberg-rest,lance-rest gravitino.lance-rest.classpath = /home/user/gravitino/lance/lance-rest-server/build/libs gravitino.lance-rest.catalog-name = lance_catalog EOF junda@java(junda.devpod-us-or) ~/gravitino/distribution/package % ./bin/gravitino.sh start Gravitino Server is running[PID:274396] junda@java(junda.devpod-us-or) ~/gravitino/distribution/package % tail -50 logs/gravitino-server.log | grep -i lance | grep -i started 2025-10-23 21:34:41.495 INFO [main] [org.apache.gravitino.server.web.JettyServer.start(JettyServer.java:202)] - lance-rest web server started on host 0.0.0.0 port 9101. 2025-10-23 21:34:41.495 INFO [main] [org.apache.gravitino.lance.LanceRESTService.serviceStart(LanceRESTService.java:77)] - Lance REST service started ``` --- lance/lance-common/build.gradle.kts | 1 + .../lance/common/config/LanceConfig.java | 8 ++ .../lance/common/config/TestLanceConfig.java | 114 ++++++++++++++++++ ...vitino.auxiliary.GravitinoAuxiliaryService | 19 +++ 4 files changed, 142 insertions(+) create mode 100644 lance/lance-common/src/test/java/org/apache/gravitino/lance/common/config/TestLanceConfig.java create mode 100644 lance/lance-rest-server/src/main/resources/META-INF/services/org.apache.gravitino.auxiliary.GravitinoAuxiliaryService diff --git a/lance/lance-common/build.gradle.kts b/lance/lance-common/build.gradle.kts index 4e91dd6c5ce..27057950f3a 100644 --- a/lance/lance-common/build.gradle.kts +++ b/lance/lance-common/build.gradle.kts @@ -42,6 +42,7 @@ dependencies { implementation(libs.lance.namespace.core) implementation(libs.slf4j.api) + testImplementation(project(":server-common")) testImplementation(libs.junit.jupiter.api) testRuntimeOnly(libs.junit.jupiter.engine) } diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java index dfe863953ee..b6614c87ee3 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java @@ -80,6 +80,14 @@ public String getCatalogName() { return get(CATALOG_NAME); } + public String getNamespaceUri() { + return get(NAMESPACE_URI); + } + + public String getGravitinoMetalake() { + return get(METALAKE_NAME); + } + @Override public Map getOverwriteDefaultConfig() { return ImmutableMap.of( diff --git a/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/config/TestLanceConfig.java b/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/config/TestLanceConfig.java new file mode 100644 index 00000000000..176634f3090 --- /dev/null +++ b/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/config/TestLanceConfig.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.lance.common.config; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.gravitino.server.web.JettyServerConfig; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestLanceConfig { + @Test + public void testLoadLanceConfig() { + Map properties = + ImmutableMap.of("gravitino.lance-rest.catalog-name", "test_catalog"); + + LanceConfig lanceConfig = new LanceConfig(); + lanceConfig.loadFromMap(properties, k -> k.startsWith("gravitino.lance-rest.")); + Assertions.assertEquals("test_catalog", lanceConfig.getCatalogName()); + + LanceConfig lanceConfig2 = new LanceConfig(properties); + Assertions.assertEquals("test_catalog", lanceConfig2.getCatalogName()); + } + + @Test + public void testDefaultCatalogName() { + // Test default catalog name when not specified + Map properties = ImmutableMap.of(); + LanceConfig lanceConfig = new LanceConfig(properties); + Assertions.assertEquals("default", lanceConfig.getCatalogName()); + } + + @Test + public void testLanceHttpPort() { + Map properties = ImmutableMap.of(); + LanceConfig lanceConfig = new LanceConfig(properties); + JettyServerConfig jettyServerConfig = JettyServerConfig.fromConfig(lanceConfig); + Assertions.assertEquals( + LanceConfig.DEFAULT_LANCE_REST_SERVICE_HTTP_PORT, jettyServerConfig.getHttpPort()); + Assertions.assertEquals( + LanceConfig.DEFAULT_LANCE_REST_SERVICE_HTTPS_PORT, jettyServerConfig.getHttpsPort()); + + properties = + ImmutableMap.of( + JettyServerConfig.WEBSERVER_HTTP_PORT.getKey(), + "9101", + JettyServerConfig.WEBSERVER_HTTPS_PORT.getKey(), + "9533"); + lanceConfig = new LanceConfig(properties); + jettyServerConfig = JettyServerConfig.fromConfig(lanceConfig); + Assertions.assertEquals(9101, jettyServerConfig.getHttpPort()); + Assertions.assertEquals(9533, jettyServerConfig.getHttpsPort()); + } + + @Test + public void testGravitinoUriAndMetalake() { + // Test default values + Map properties = ImmutableMap.of(); + LanceConfig lanceConfig = new LanceConfig(properties); + Assertions.assertEquals("http://localhost:8090", lanceConfig.getNamespaceUri()); + Assertions.assertNull(lanceConfig.getGravitinoMetalake()); // No default, must be configured + + // Test custom values + properties = + ImmutableMap.of( + LanceConfig.NAMESPACE_URI.getKey(), + "http://gravitino-server:8090", + LanceConfig.METALAKE_NAME.getKey(), + "production"); + lanceConfig = new LanceConfig(properties); + Assertions.assertEquals("http://gravitino-server:8090", lanceConfig.getNamespaceUri()); + Assertions.assertEquals("production", lanceConfig.getGravitinoMetalake()); + } + + @Test + public void testCompleteConfiguration() { + // Test all configurations together for auxiliary mode + Map properties = + ImmutableMap.builder() + .put(LanceConfig.CATALOG_NAME.getKey(), "lance_catalog") + .put(LanceConfig.NAMESPACE_URI.getKey(), "http://gravitino-prod:8090") + .put(LanceConfig.METALAKE_NAME.getKey(), "production") + .put(LanceConfig.NAMESPACE_BACKEND.getKey(), "gravitino") + .put(JettyServerConfig.WEBSERVER_HTTP_PORT.getKey(), "9101") + .build(); + + LanceConfig lanceConfig = new LanceConfig(properties); + + // Verify all config values + Assertions.assertEquals("lance_catalog", lanceConfig.getCatalogName()); + Assertions.assertEquals("http://gravitino-prod:8090", lanceConfig.getNamespaceUri()); + Assertions.assertEquals("production", lanceConfig.getGravitinoMetalake()); + + JettyServerConfig jettyConfig = JettyServerConfig.fromConfig(lanceConfig); + Assertions.assertEquals(9101, jettyConfig.getHttpPort()); + } +} diff --git a/lance/lance-rest-server/src/main/resources/META-INF/services/org.apache.gravitino.auxiliary.GravitinoAuxiliaryService b/lance/lance-rest-server/src/main/resources/META-INF/services/org.apache.gravitino.auxiliary.GravitinoAuxiliaryService new file mode 100644 index 00000000000..97c3f356dff --- /dev/null +++ b/lance/lance-rest-server/src/main/resources/META-INF/services/org.apache.gravitino.auxiliary.GravitinoAuxiliaryService @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +org.apache.gravitino.lance.LanceRESTService From 618fc6042d6d76fe5c5111790290f52e73cd403d Mon Sep 17 00:00:00 2001 From: Junda Yang Date: Tue, 21 Oct 2025 02:37:04 -0700 Subject: [PATCH 07/43] [#8861] Improve(test): fix testPolicyAndTagCacheWeigher (#8862) ### What changes were proposed in this pull request? Bump timeout from 5 seconds to 10 seconds ### Why are the changes needed? ``` TestCacheConfig > testPolicyAndTagCacheWeigher() FAILED org.awaitility.core.ConditionTimeoutException: Condition with Lambda expression in org.apache.gravitino.cache.TestCacheConfig was not fulfilled within 5 seconds. at app//org.awaitility.core.ConditionAwaiter.await(ConditionAwaiter.java:167) at app//org.awaitility.core.CallableCondition.await(CallableCondition.java:78) at app//org.awaitility.core.CallableCondition.await(CallableCondition.java:26) at app//org.awaitility.core.ConditionFactory.until(ConditionFactory.java:1006) at app//org.awaitility.core.ConditionFactory.until(ConditionFactory.java:975) at app//org.apache.gravitino.cache.TestCacheConfig.testPolicyAndTagCacheWeigher(TestCacheConfig.java:162) ``` see failure: https://github.com/apache/gravitino/actions/runs/18668130387/job/53223578211?pr=8859 Fix: #8861 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Updated unit tests --- .../test/java/org/apache/gravitino/cache/TestCacheConfig.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/java/org/apache/gravitino/cache/TestCacheConfig.java b/core/src/test/java/org/apache/gravitino/cache/TestCacheConfig.java index 3944d9a282b..675305ef91c 100644 --- a/core/src/test/java/org/apache/gravitino/cache/TestCacheConfig.java +++ b/core/src/test/java/org/apache/gravitino/cache/TestCacheConfig.java @@ -157,7 +157,7 @@ void testPolicyAndTagCacheWeigher() throws InterruptedException { // There should no tag entities in the cache, because the weight of each tag entity is 100 that // is higher than the maximum weight of the fileset entity which is 200. Awaitility.await() - .atMost(Duration.ofSeconds(5)) + .atMost(Duration.ofSeconds(10)) .pollInterval(Duration.ofMillis(10)) .until( () -> From 6c66f9d496b386ffac130508606dc6d39d739740 Mon Sep 17 00:00:00 2001 From: Junda Yang Date: Wed, 22 Oct 2025 00:17:16 -0700 Subject: [PATCH 08/43] [#8871]Improve(core): fix EntityCacheWeigher and testPolicyAndTagCacheWeigher (#8871) ### What changes were proposed in this pull request? Fix EntityCacheWeigher priorities and update cache eviction test This commit corrects the entity weight configuration in EntityCacheWeigher and updates the corresponding test to validate the new behavior. The previous weight configuration was inverted: - Schema: weight=500 (incorrectly high, causing unwanted evictions) - Tag/Policy: weight=100 (incorrectly low, preventing desired evictions) This violated the Caffeine cache principle where higher weights lead to higher eviction priority. ### Why are the changes needed? Correct EntityCacheWeigher and fix the flaky test of testPolicyAndTagCacheWeigher Fix: #8861 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test updated --- .../gravitino/cache/EntityCacheWeigher.java | 25 ++++--- .../gravitino/cache/TestCacheConfig.java | 75 ++++++++++++++----- 2 files changed, 73 insertions(+), 27 deletions(-) diff --git a/core/src/main/java/org/apache/gravitino/cache/EntityCacheWeigher.java b/core/src/main/java/org/apache/gravitino/cache/EntityCacheWeigher.java index edc3ca6b9b4..f9f1212dd70 100644 --- a/core/src/main/java/org/apache/gravitino/cache/EntityCacheWeigher.java +++ b/core/src/main/java/org/apache/gravitino/cache/EntityCacheWeigher.java @@ -32,27 +32,34 @@ import org.slf4j.LoggerFactory; /** - * A {@link Weigher} implementation that calculates the weight of an entity based on its type. The - * weight is calculated as follows: + * A {@link Weigher} implementation that calculates the weight of an entity based on its type. In + * Caffeine's weight-based eviction, higher weights make entities MORE likely to be evicted (all + * else being equal), as the cache prefers to retain lighter entries within the maximum weight + * limit. + * + *

    Weight assignments (lower weight = higher retention priority): * *

      *
    • Metalake: 0, which means that it will never be evicted from the cache unless timeout occurs * or manually cleared. *
    • Catalog: 0, which means that it will never be evicted from the cache unless timeout occurs * or manually cleared. - *
    • Schema: 500 - *
    • Tag: 100 - *
    • Policy: 100 - *
    • Other: 200 + *
    • Schema: 100 (lowest weight, highest retention priority) + *
    • Other (e.g., Fileset): 200 (medium weight, medium retention priority) + *
    • Tag: 500 (highest weight, lowest retention priority) + *
    • Policy: 500 (highest weight, lowest retention priority) *
    + * + *

    Note: Caffeine's W-TinyLFU algorithm considers both access frequency and weight. Frequently + * accessed heavier entries may still be retained over infrequently accessed lighter entries. */ public class EntityCacheWeigher implements Weigher> { public static final int METALAKE_WEIGHT = 0; // 0 means never evict public static final int CATALOG_WEIGHT = 0; - public static final int SCHEMA_WEIGHT = 500; // higher weight means it will less likely be evicted + public static final int SCHEMA_WEIGHT = 100; // Lower weight = higher retention priority public static final int OTHER_WEIGHT = 200; - public static final int TAG_WEIGHT = 100; - public static final int POLICY_WEIGHT = 100; + public static final int TAG_WEIGHT = 500; + public static final int POLICY_WEIGHT = 500; private static final Logger LOG = LoggerFactory.getLogger(EntityCacheWeigher.class.getName()); private static final EntityCacheWeigher INSTANCE = new EntityCacheWeigher(); private static final Map ENTITY_WEIGHTS = diff --git a/core/src/test/java/org/apache/gravitino/cache/TestCacheConfig.java b/core/src/test/java/org/apache/gravitino/cache/TestCacheConfig.java index 675305ef91c..6c21b3c9c4a 100644 --- a/core/src/test/java/org/apache/gravitino/cache/TestCacheConfig.java +++ b/core/src/test/java/org/apache/gravitino/cache/TestCacheConfig.java @@ -53,14 +53,14 @@ void testDefaultCacheConfig() { Assertions.assertTrue(config.get(Configs.CACHE_WEIGHER_ENABLED)); Assertions.assertEquals(10_000, config.get(Configs.CACHE_MAX_ENTRIES)); Assertions.assertEquals(3_600_000L, config.get(Configs.CACHE_EXPIRATION_TIME)); - Assertions.assertEquals(9_000_000L, EntityCacheWeigher.getMaxWeight()); + Assertions.assertEquals(24_200_000L, EntityCacheWeigher.getMaxWeight()); Assertions.assertEquals("caffeine", config.get(Configs.CACHE_IMPLEMENTATION)); } @Test void testPolicyAndTagCacheWeigher() throws InterruptedException { Caffeine builder = Caffeine.newBuilder(); - builder.maximumWeight(2000); + builder.maximumWeight(5000); builder.weigher(EntityCacheWeigher.getInstance()); Cache> cache = builder.build(); @@ -152,22 +152,60 @@ void testPolicyAndTagCacheWeigher() throws InterruptedException { List.of(fileset)); } + // Access filesets 5-14 twice to increase their frequency to 5 (insert + 4 gets) + for (int access = 0; access < 4; access++) { + for (int i = 5; i < 15; i++) { + String filesetName = "fileset" + i; + cache.getIfPresent( + EntityCacheRelationKey.of( + NameIdentifier.of(new String[] {"metalake1", "catalog1", "schema1", filesetName}), + Entity.EntityType.FILESET)); + } + } + Thread.sleep(1000); - // There should no tag entities in the cache, because the weight of each tag entity is 100 that - // is higher than the maximum weight of the fileset entity which is 200. - Awaitility.await() - .atMost(Duration.ofSeconds(10)) - .pollInterval(Duration.ofMillis(10)) - .until( - () -> - IntStream.of(0, 1, 2, 3) - .mapToObj(i -> NameIdentifierUtil.ofTag("metalake", "tag" + i)) - .allMatch( - tagNameIdent -> - cache.getIfPresent( - EntityCacheRelationKey.of(tagNameIdent, Entity.EntityType.TAG)) - == null)); + // Count how many filesets are still in cache + // Weight calculation: base(100) + filesets(15×200=3000) + tags(10×500=5000) = 8100 > 5000 limit + // Filesets 5-14 have freq=5, tags have freq=1. With frequency advantage + lighter weight, + // filesets should be strongly prioritized by Caffeine's W-TinyLFU + long remainingFilesets = + IntStream.range(5, 15) + .mapToObj(i -> "fileset" + i) + .filter( + filesetName -> + cache.getIfPresent( + EntityCacheRelationKey.of( + NameIdentifier.of( + new String[] {"metalake1", "catalog1", "schema1", filesetName}), + Entity.EntityType.FILESET)) + != null) + .count(); + + // Count how many tags are still in cache + long remainingTags = + IntStream.range(0, 10) + .mapToObj(i -> NameIdentifierUtil.ofTag("metalake", "tag" + i)) + .filter( + tagNameIdent -> + cache.getIfPresent( + EntityCacheRelationKey.of(tagNameIdent, Entity.EntityType.TAG)) + != null) + .count(); + + // Verify weight-based eviction: filesets (weight=200, freq=5) should be strongly + // prioritized over tags (weight=500, freq=1) due to both higher frequency and lighter weight + Assertions.assertTrue( + remainingFilesets + remainingTags < 20, + String.format( + "Expected significant eviction due to weight limit (max=5000). Found filesets=%d, tags=%d (total=%d/20)", + remainingFilesets, remainingTags, remainingFilesets + remainingTags)); + + Assertions.assertTrue( + remainingFilesets > remainingTags, + String.format( + "Expected filesets (weight=200, freq=5) to be prioritized over tags (weight=500, freq=1). Found filesets=%d, tags=%d", + remainingFilesets, remainingTags)); } @Test @@ -240,11 +278,12 @@ void testCaffeineCacheWithWeight() throws Exception { NameIdentifier.of("metalake1.catalog" + i), Entity.EntityType.CATALOG))); } - // Only some of the 100 schemas are still in the cache, to be exact, 5000 / 500 = 10 schemas. + // Only some of the 100 schemas are still in the cache. + // With new weights: schema=100, so approximately 5000 / 100 = 50 schemas fit. Awaitility.await() .atMost(Duration.ofSeconds(5)) .pollInterval(Duration.ofMillis(10)) - .until(() -> cache.asMap().size() == 10 + 3 + 5000 / 500); + .until(() -> cache.asMap().size() == 10 + 3 + 5000 / 100); } @Test From 059d27918c652e006d31522cb52ff260dfdf0076 Mon Sep 17 00:00:00 2001 From: Yuhui Date: Fri, 24 Oct 2025 17:57:32 +0800 Subject: [PATCH 09/43] [HOTFIX][#8830] fix (gvfs-fuse) : Disable the CI to build fuse-filesystem module (#8907) ### What changes were proposed in this pull request? Disable the CI to build fuse-filesystem module. ### Why are the changes needed? #8827 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI (cherry picked from commit 7c359976986d2f446681dcdcdae92828d4db6a9d) --- .github/workflows/gvfs-fuse-build-test.yml | 9 +++++---- clients/filesystem-fuse/Makefile | 4 +++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/gvfs-fuse-build-test.yml b/.github/workflows/gvfs-fuse-build-test.yml index 09965be35e1..a2f0fb8040e 100644 --- a/.github/workflows/gvfs-fuse-build-test.yml +++ b/.github/workflows/gvfs-fuse-build-test.yml @@ -2,10 +2,11 @@ name: Build gvfs-fuse and testing # Controls when the workflow will run on: - push: - branches: [ "main", "branch-*" ] - pull_request: - branches: [ "main", "branch-*" ] + # Temporarily disable + # push: + # branches: [ "main", "branch-*" ] + # pull_request: + # branches: [ "main", "branch-*" ] workflow_dispatch: concurrency: diff --git a/clients/filesystem-fuse/Makefile b/clients/filesystem-fuse/Makefile index bbfc5419f67..890e33244a2 100644 --- a/clients/filesystem-fuse/Makefile +++ b/clients/filesystem-fuse/Makefile @@ -54,7 +54,9 @@ install-taplo-cli: check-toml: install-taplo-cli taplo check -check: check-fmt check-clippy check-cargo-sort +# todo: Disable some checks due to build failures caused by Rust dependency library upgrades. +# check: check-fmt check-clippy check-cargo-sort check-toml cargo-machete +check: check-fmt check-cargo-sort doc-test: cargo test --no-fail-fast --doc --all-features --workspace From 67179e6c283b39c6d198fe7d5a73eb7e9ea6f0de Mon Sep 17 00:00:00 2001 From: Junda Yang Date: Fri, 24 Oct 2025 05:17:39 -0700 Subject: [PATCH 10/43] [#8909] feat(lance): Add metric systems to lance server (#8904) ### What changes were proposed in this pull request? 1. Added Lance REST server metrics constant (MetricsSource.java) 2. Added MapperConfig rule to transform Dropwizard metrics to Prometheus format 3. Integrated metrics collection in Lance REST service (LanceRESTService.java) 4. Enables automatic exposure of metrics at /metrics and /prometheus/metrics endpoints ### Why are the changes needed? Observability is needed for the new lance rest service. Fix: #8909 ### Does this PR introduce _any_ user-facing change? Yes - New metrics endpoints Standalone mode: - http://localhost:9101/metrics (Dropwizard JSON format) - http://localhost:9101/prometheus/metrics (Prometheus text format) Auxiliary mode: - Metrics exposed on all service ports (8090, 9001, 9101) showing unified metrics from all services ### How was this patch tested? Unit tests --------- Co-authored-by: Jerry Shao --- .../apache/gravitino/metrics/MetricsSystem.java | 4 ++++ .../gravitino/metrics/source/MetricsSource.java | 1 + .../metrics/TestExtractMetricNameAndLabel.java | 16 ++++++++++++++++ .../apache/gravitino/lance/LanceRESTService.java | 12 ++++++++++++ .../service/rest/LanceNamespaceOperations.java | 5 +++++ .../lance/service/rest/LanceTableOperations.java | 9 ++++++++- 6 files changed, 46 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/gravitino/metrics/MetricsSystem.java b/core/src/main/java/org/apache/gravitino/metrics/MetricsSystem.java index dde401bfd18..27ae051a7d2 100644 --- a/core/src/main/java/org/apache/gravitino/metrics/MetricsSystem.java +++ b/core/src/main/java/org/apache/gravitino/metrics/MetricsSystem.java @@ -203,6 +203,10 @@ static List getMetricNameAndLabelRules() { MetricsSource.ICEBERG_REST_SERVER_METRIC_NAME + ".*.*", MetricsSource.ICEBERG_REST_SERVER_METRIC_NAME + "_${1}", ImmutableMap.of("operation", "${0}")), + new MapperConfig( + MetricsSource.LANCE_REST_SERVER_METRIC_NAME + ".*.*", + MetricsSource.LANCE_REST_SERVER_METRIC_NAME + "_${1}", + ImmutableMap.of("operation", "${0}")), new MapperConfig( MetricsSource.GRAVITINO_SERVER_METRIC_NAME + ".*.*", MetricsSource.GRAVITINO_SERVER_METRIC_NAME + "_${1}", diff --git a/core/src/main/java/org/apache/gravitino/metrics/source/MetricsSource.java b/core/src/main/java/org/apache/gravitino/metrics/source/MetricsSource.java index cee2d6afe52..1d079f49aea 100644 --- a/core/src/main/java/org/apache/gravitino/metrics/source/MetricsSource.java +++ b/core/src/main/java/org/apache/gravitino/metrics/source/MetricsSource.java @@ -40,6 +40,7 @@ public abstract class MetricsSource { // metrics source name public static final String ICEBERG_REST_SERVER_METRIC_NAME = "iceberg-rest-server"; + public static final String LANCE_REST_SERVER_METRIC_NAME = "lance-rest-server"; public static final String GRAVITINO_SERVER_METRIC_NAME = "gravitino-server"; public static final String GRAVITINO_RELATIONAL_STORE_METRIC_NAME = "gravitino-relational-store"; public static final String GRAVITINO_CATALOG_METRIC_PREFIX = "gravitino-catalog"; diff --git a/core/src/test/java/org/apache/gravitino/metrics/TestExtractMetricNameAndLabel.java b/core/src/test/java/org/apache/gravitino/metrics/TestExtractMetricNameAndLabel.java index 4f3a1feba2d..c6a57f3c87f 100644 --- a/core/src/test/java/org/apache/gravitino/metrics/TestExtractMetricNameAndLabel.java +++ b/core/src/test/java/org/apache/gravitino/metrics/TestExtractMetricNameAndLabel.java @@ -62,6 +62,13 @@ void testMapperConfig() { + Collector.sanitizeMetricName(MetricNames.SERVER_IDLE_THREAD_NUM), ImmutableMap.of()); + checkResult( + MetricsSource.LANCE_REST_SERVER_METRIC_NAME + "." + MetricNames.SERVER_IDLE_THREAD_NUM, + Collector.sanitizeMetricName(MetricsSource.LANCE_REST_SERVER_METRIC_NAME) + + "_" + + Collector.sanitizeMetricName(MetricNames.SERVER_IDLE_THREAD_NUM), + ImmutableMap.of()); + checkResult( MetricsSource.ICEBERG_REST_SERVER_METRIC_NAME + ".update-table." @@ -71,6 +78,15 @@ void testMapperConfig() { + Collector.sanitizeMetricName(MetricNames.HTTP_PROCESS_DURATION), ImmutableMap.of("operation", "update-table")); + checkResult( + MetricsSource.LANCE_REST_SERVER_METRIC_NAME + + ".load-namespace." + + MetricNames.HTTP_PROCESS_DURATION, + Collector.sanitizeMetricName(MetricsSource.LANCE_REST_SERVER_METRIC_NAME) + + "_" + + Collector.sanitizeMetricName(MetricNames.HTTP_PROCESS_DURATION), + ImmutableMap.of("operation", "load-namespace")); + checkResult( MetricsSource.GRAVITINO_SERVER_METRIC_NAME + ".update-table." diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java index 123781262fe..d1409c8e12f 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java @@ -23,10 +23,14 @@ import java.lang.reflect.Constructor; import java.util.Map; import javax.servlet.Servlet; +import org.apache.gravitino.GravitinoEnv; import org.apache.gravitino.auxiliary.GravitinoAuxiliaryService; import org.apache.gravitino.lance.common.config.LanceConfig; import org.apache.gravitino.lance.common.ops.LanceNamespaceBackend; import org.apache.gravitino.lance.common.ops.NamespaceWrapper; +import org.apache.gravitino.metrics.MetricsSystem; +import org.apache.gravitino.metrics.source.MetricsSource; +import org.apache.gravitino.server.web.HttpServerMetricsSource; import org.apache.gravitino.server.web.JettyServer; import org.apache.gravitino.server.web.JettyServerConfig; import org.glassfish.hk2.utilities.binding.AbstractBinder; @@ -58,6 +62,8 @@ public void serviceInit(Map properties) { JettyServerConfig serverConfig = JettyServerConfig.fromConfig(lanceConfig); server = new JettyServer(); + // Get MetricsSystem directly from GravitinoEnv for zero-overhead access + MetricsSystem metricsSystem = GravitinoEnv.getInstance().metricsSystem(); server.initialize(serverConfig, SERVICE_NAME, false); this.lanceNamespace = loadNamespaceImpl(lanceConfig); @@ -73,6 +79,12 @@ protected void configure() { } }); + // Register metrics with shared MetricsSystem + HttpServerMetricsSource httpServerMetricsSource = + new HttpServerMetricsSource( + MetricsSource.LANCE_REST_SERVER_METRIC_NAME, resourceConfig, server); + metricsSystem.register(httpServerMetricsSource); + Servlet container = new ServletContainer(resourceConfig); server.addServlet(container, LANCE_SPEC); server.addCustomFilters(LANCE_SPEC); diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java index 6c0477a51c8..2d07357f30c 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java @@ -20,6 +20,8 @@ import static org.apache.gravitino.lance.common.ops.NamespaceWrapper.NAMESPACE_DELIMITER_DEFAULT; +import com.codahale.metrics.annotation.ResponseMetered; +import com.codahale.metrics.annotation.Timed; import com.lancedb.lance.namespace.model.ListNamespacesResponse; import javax.inject.Inject; import javax.ws.rs.Consumes; @@ -34,6 +36,7 @@ import javax.ws.rs.core.Response; import org.apache.gravitino.lance.common.ops.NamespaceWrapper; import org.apache.gravitino.lance.service.LanceExceptionMapper; +import org.apache.gravitino.metrics.MetricNames; @Path("/v1/namespace") @Consumes(MediaType.APPLICATION_JSON) @@ -49,6 +52,8 @@ public LanceNamespaceOperations(NamespaceWrapper lanceNamespace) { @GET @Path("/{id}/list") + @Timed(name = "list-namespaces." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "list-namespaces", absolute = true) public Response listNamespaces( @Encoded @PathParam("id") String namespaceId, @DefaultValue(NAMESPACE_DELIMITER_DEFAULT) @QueryParam("delimiter") String delimiter, diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java index 10f7399c40a..1f30d1b326b 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java @@ -18,6 +18,10 @@ */ package org.apache.gravitino.lance.service.rest; +import static org.apache.gravitino.lance.common.ops.NamespaceWrapper.NAMESPACE_DELIMITER_DEFAULT; + +import com.codahale.metrics.annotation.ResponseMetered; +import com.codahale.metrics.annotation.Timed; import com.lancedb.lance.namespace.model.ListTablesResponse; import javax.inject.Inject; import javax.ws.rs.Consumes; @@ -32,6 +36,7 @@ import javax.ws.rs.core.Response; import org.apache.gravitino.lance.common.ops.NamespaceWrapper; import org.apache.gravitino.lance.service.LanceExceptionMapper; +import org.apache.gravitino.metrics.MetricNames; @Path("/v1/namespace/{id}/table") @Consumes(MediaType.APPLICATION_JSON) @@ -47,9 +52,11 @@ public LanceTableOperations(NamespaceWrapper lanceNamespace) { @GET @Path("/list") + @Timed(name = "list-tables." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "list-tables", absolute = true) public Response listTables( @Encoded @PathParam("id") String namespaceId, - @DefaultValue("$") @QueryParam("delimiter") String delimiter, + @DefaultValue(NAMESPACE_DELIMITER_DEFAULT) @QueryParam("delimiter") String delimiter, @QueryParam("page_token") String pageToken, @QueryParam("limit") Integer limit) { try { From d093824b7268aad1735ead114e5e760f28832fea Mon Sep 17 00:00:00 2001 From: Mini Yu Date: Fri, 24 Oct 2025 22:56:03 +0800 Subject: [PATCH 11/43] [#8838] feat(catalogs): Support create/load/list table operation for lance table. (#8879) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Add support create and load table operations for lance table. ### Why are the changes needed? It's a need. Fix: #8838 Fix: #8837 ### Does this PR introduce _any_ user-facing change? N/A. ### How was this patch tested? Currently, I have only tested it locally ```shell ➜ [/Users/yuqi/Downloads] curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \ -H "Content-Type: application/json" -d '{ "name": "lance_table14", "comment": "This is an example table", "columns": [ { "name": "id", "type": "integer", "comment": "id column comment", "nullable": false, "autoIncrement": true, "defaultValue": { "type": "literal", "dataType": "integer", "value": "-1" } } ], "indexes": [ { "indexType": "primary_key", "name": "PRIMARY", "fieldNames": [["id"]] } ], "properties": { "format": "lance", "location": "/tmp/lance_catalog/schema/lance_table14" } }' http://localhost:8090/api/metalakes/test/catalogs/lance_catalog/schemas/schema/tables {"code":0,"table":{"name":"lance_table14","comment":"This is an example table","columns":[{"name":"id","type":"integer","comment":"id column comment","nullable":false,"autoIncrement":true,"defaultValue":{"type":"literal","dataType":"integer","value":"-1"}}],"properties":{"format":"lance","location":"/tmp/lance_catalog/schema/lance_table14/"},"audit":{"creator":"anonymous","createTime":"2025-10-23T03:18:39.123151Z"},"distribution":{"strategy":"none","number":0,"funcArgs":[]},"sortOrders":[],"partitioning":[],"indexes":[{"indexType":"PRIMARY_KEY","name":"PRIMARY","fieldNames":[["id"]]}]}} ➜ [/Users/yuqi/Downloads] curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \ -H "Content-Type: application/json" http://localhost:8090/api/metalakes/test/catalogs/lance_catalog/schemas/schema/tables/lance_table14 {"code":0,"table":{"name":"lance_table14","comment":"This is an example table","columns":[{"name":"id","type":"integer","comment":"id column comment","nullable":false,"autoIncrement":false,"defaultValue":{"type":"literal","dataType":"integer","value":"-1"}}],"properties":{"format":"lance","location":"/tmp/lance_catalog/schema/lance_table14/"},"audit":{"creator":"anonymous","createTime":"2025-10-23T03:18:39.123151Z"},"distribution":{"strategy":"none","number":0,"funcArgs":[]},"sortOrders":[],"partitioning":[],"indexes":[{"indexType":"PRIMARY_KEY","name":"PRIMARY","fieldNames":[["id"]]}]}} ➜ [/Users/yuqi/Downloads] curl -X GET -H "Accept: application/vnd.gravitino.v1+json" \ -H "Content-Type: application/json" http://localhost:8090/api/metalakes/test/catalogs/lance_catalog/schemas/schema/tables {"code":0,"identifiers":[{"namespace":["test","lance_catalog","schema"],"name":"lance_table10"},{"namespace":["test","lance_catalog","schema"],"name":"lance_table11"},{"namespace":["test","lance_catalog","schema"],"name":"lance_table12"},{"namespace":["test","lance_catalog","schema"],"name":"lance_table13"},{"namespace":["test","lance_catalog","schema"],"name":"lance_table14"}]} ➜ [/Users/yuqi/Downloads] ``` And the lance location ```shell ➜ [/tmp/lance_catalog/schema] ls lance_table10 lance_table11 lance_table12 lance_table13 lance_table14 ➜ [/tmp/lance_catalog/schema] cd lance_table14 ➜ [/tmp/lance_catalog/schema/lance_table14] ls -al total 0 drwxr-xr-x@ 4 yuqi wheel 128 10 23 11:18 . drwxr-xr-x@ 7 yuqi wheel 224 10 23 11:18 .. drwxr-xr-x@ 3 yuqi wheel 96 10 23 11:18 _transactions drwxr-xr-x@ 3 yuqi wheel 96 10 23 11:18 _versions ➜ [/tmp/lance_catalog/schema/lance_table14] ls -al _versions total 8 drwxr-xr-x@ 3 yuqi wheel 96 10 23 11:18 . drwxr-xr-x@ 4 yuqi wheel 128 10 23 11:18 .. -rw-r--r--@ 1 yuqi wheel 225 10 23 11:18 1.manifest ➜ [/tmp/lance_catalog/schema/lance_table14] ``` --- api/build.gradle.kts | 2 + .../apache/gravitino/rel/GenericTable.java | 47 +++++ .../apache/gravitino/rel/indexes/Indexes.java | 87 ++++++++ .../org/apache/gravitino/rel/TestIndex.java | 57 ++++++ .../build.gradle.kts | 1 + .../GenericLakehouseCatalogOperations.java | 134 ++++++++++++- .../lakehouse/LakehouseCatalogOperations.java | 25 +++ .../lance/LanceCatalogOperations.java | 173 ++++++++++++++++ .../lance/LanceDataTypeConverter.java | 123 ++++++++++++ .../gravitino/config/ConfigConstants.java | 5 +- .../catalog/TableOperationDispatcher.java | 74 ++++++- .../connector/GenericLakehouseColumn.java | 56 ++++++ .../connector/GenericLakehouseTable.java | 86 ++++++++ .../gravitino/meta/GenericTableEntity.java | 186 ++++++++++++++++++ .../apache/gravitino/meta/TableEntity.java | 10 +- .../relational/mapper/TableVersionMapper.java | 36 ++++ .../TableVersionSQLProviderFactory.java | 62 ++++++ .../DefaultMapperPackageProvider.java | 4 +- .../base/TableMetaBaseSQLProvider.java | 41 ++-- .../base/TableVersionBaseSQLProvider.java | 79 ++++++++ .../TableVersionPostgreSQLProvider.java | 24 +++ .../storage/relational/po/TablePO.java | 46 +++++ .../relational/service/TableMetaService.java | 16 +- .../relational/utils/POConverters.java | 61 +++++- .../relational/utils/SessionUtils.java | 12 ++ scripts/h2/schema-1.1.0-h2.sql | 19 -- scripts/h2/upgrade-1.0.0-to-1.1.0-h2.sql | 36 ++-- scripts/mysql/schema-1.1.0-mysql.sql | 19 -- .../mysql/upgrade-1.0.0-to-1.1.0-mysql.sql | 36 ++-- .../postgresql/schema-1.1.0-postgresql.sql | 19 -- .../upgrade-1.0.0-to-1.1.0-postgresql.sql | 36 ++-- 31 files changed, 1466 insertions(+), 146 deletions(-) create mode 100644 api/src/main/java/org/apache/gravitino/rel/GenericTable.java create mode 100644 api/src/test/java/org/apache/gravitino/rel/TestIndex.java create mode 100644 catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/LakehouseCatalogOperations.java create mode 100644 catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java create mode 100644 catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java create mode 100644 core/src/main/java/org/apache/gravitino/connector/GenericLakehouseColumn.java create mode 100644 core/src/main/java/org/apache/gravitino/connector/GenericLakehouseTable.java create mode 100644 core/src/main/java/org/apache/gravitino/meta/GenericTableEntity.java create mode 100644 core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionMapper.java create mode 100644 core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionSQLProviderFactory.java create mode 100644 core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableVersionBaseSQLProvider.java create mode 100644 core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/TableVersionPostgreSQLProvider.java diff --git a/api/build.gradle.kts b/api/build.gradle.kts index b4399b13c0c..f0fe3ba5ee9 100644 --- a/api/build.gradle.kts +++ b/api/build.gradle.kts @@ -26,6 +26,8 @@ dependencies { implementation(libs.commons.lang3) implementation(libs.commons.collections4) implementation(libs.guava) + implementation(libs.jackson.annotations) + implementation(libs.jackson.databind) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.params) diff --git a/api/src/main/java/org/apache/gravitino/rel/GenericTable.java b/api/src/main/java/org/apache/gravitino/rel/GenericTable.java new file mode 100644 index 00000000000..4796421c53c --- /dev/null +++ b/api/src/main/java/org/apache/gravitino/rel/GenericTable.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.rel; + +/** A generic table interface that extends the Table interface. */ +public interface GenericTable extends Table { + + /** + * Formats the table as a string representation. + * + * @return the formatted string representation of the table + */ + String format(); + + /** + * Gets the location of the table. + * + * @return the location of the table + */ + String location(); + + /** + * Indicates whether the table is external. + * + * @return true if the table is external, false otherwise + */ + default boolean external() { + return false; + } +} diff --git a/api/src/main/java/org/apache/gravitino/rel/indexes/Indexes.java b/api/src/main/java/org/apache/gravitino/rel/indexes/Indexes.java index ce10fd0a0fa..d1b1a1f5239 100644 --- a/api/src/main/java/org/apache/gravitino/rel/indexes/Indexes.java +++ b/api/src/main/java/org/apache/gravitino/rel/indexes/Indexes.java @@ -18,6 +18,22 @@ */ package org.apache.gravitino.rel.indexes; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import java.io.IOException; +import java.util.List; +import java.util.Locale; + /** Helper methods to create index to pass into Apache Gravitino. */ public class Indexes { @@ -73,10 +89,81 @@ public static Index of(Index.IndexType indexType, String name, String[][] fieldN .build(); } + /** Custom JSON serializer for Index objects. */ + public static class IndexSerializer extends JsonSerializer { + @Override + public void serialize(Index value, JsonGenerator gen, SerializerProvider serializers) + throws IOException { + gen.writeStartObject(); + gen.writeStringField("indexType", value.type().name().toUpperCase(Locale.ROOT)); + if (null != value.name()) { + gen.writeStringField("name", value.name()); + } + gen.writeFieldName("fieldNames"); + gen.writeObject(value.fieldNames()); + gen.writeEndObject(); + } + } + + /** Custom JSON deserializer for Index objects. */ + public static class IndexDeserializer extends JsonDeserializer { + + @Override + public Index deserialize(JsonParser p, DeserializationContext ctxt) throws IOException { + JsonNode node = p.getCodec().readTree(p); + Preconditions.checkArgument( + node != null && !node.isNull() && node.isObject(), + "Index must be a valid JSON object, but found: %s", + node); + + IndexImpl.Builder builder = IndexImpl.builder(); + Preconditions.checkArgument( + node.has("indexType"), "Cannot parse index from missing type: %s", node); + String indexType = getString("indexType", node); + builder.withIndexType(Index.IndexType.valueOf(indexType.toUpperCase(Locale.ROOT))); + if (node.has("name")) { + builder.withName(getString("name", node)); + } + Preconditions.checkArgument( + node.has("fieldNames"), "Cannot parse index from missing field names: %s", node); + List fieldNames = Lists.newArrayList(); + node.get("fieldNames").forEach(field -> fieldNames.add(getStringArray((ArrayNode) field))); + builder.withFieldNames(fieldNames.toArray(new String[0][0])); + return builder.build(); + } + + private static String[] getStringArray(ArrayNode node) { + String[] array = new String[node.size()]; + for (int i = 0; i < node.size(); i++) { + array[i] = node.get(i).asText(); + } + return array; + } + + private static String getString(String property, JsonNode node) { + Preconditions.checkArgument(node.has(property), "Cannot parse missing string: %s", property); + JsonNode pNode = node.get(property); + return convertToString(property, pNode); + } + + private static String convertToString(String property, JsonNode pNode) { + Preconditions.checkArgument( + pNode != null && !pNode.isNull() && pNode.isTextual(), + "Cannot parse to a string value %s: %s", + property, + pNode); + return pNode.asText(); + } + } + /** The user side implementation of the index. */ + @JsonSerialize(using = IndexSerializer.class) + @JsonDeserialize(using = IndexDeserializer.class) public static final class IndexImpl implements Index { private final IndexType indexType; + private final String name; + private final String[][] fieldNames; /** diff --git a/api/src/test/java/org/apache/gravitino/rel/TestIndex.java b/api/src/test/java/org/apache/gravitino/rel/TestIndex.java new file mode 100644 index 00000000000..4a807fbb7be --- /dev/null +++ b/api/src/test/java/org/apache/gravitino/rel/TestIndex.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.rel; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.MapperFeature; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.databind.cfg.EnumFeature; +import com.fasterxml.jackson.databind.json.JsonMapper; +import org.apache.gravitino.rel.indexes.Index; +import org.apache.gravitino.rel.indexes.Indexes; +import org.apache.gravitino.rel.indexes.Indexes.IndexImpl; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestIndex { + + @Test + void testIndexSerialization() throws JsonProcessingException { + String[][] fields = {{"column1"}, {"column2", "subcolumn"}}; + Index index = Indexes.unique("test_index", fields); + + JsonMapper jsonMapper = + JsonMapper.builder() + .configure(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS, false) + .configure(EnumFeature.WRITE_ENUMS_TO_LOWERCASE, true) + .enable(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS) + .disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES) + .configure(DeserializationFeature.READ_UNKNOWN_ENUM_VALUES_AS_NULL, true) + .build(); + + String json = jsonMapper.writeValueAsString(index); + + Index deserializedIndex = jsonMapper.readValue(json, IndexImpl.class); + Assertions.assertEquals(index.type(), deserializedIndex.type()); + Assertions.assertEquals(index.name(), deserializedIndex.name()); + Assertions.assertArrayEquals(index.fieldNames(), deserializedIndex.fieldNames()); + } +} diff --git a/catalogs/catalog-generic-lakehouse/build.gradle.kts b/catalogs/catalog-generic-lakehouse/build.gradle.kts index fceac14304b..704dbda7e36 100644 --- a/catalogs/catalog-generic-lakehouse/build.gradle.kts +++ b/catalogs/catalog-generic-lakehouse/build.gradle.kts @@ -43,6 +43,7 @@ dependencies { implementation(libs.commons.lang3) implementation(libs.guava) implementation(libs.hadoop3.client.api) + implementation(libs.lance) annotationProcessor(libs.lombok) diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java index b626aabc161..acac35528e2 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java @@ -18,11 +18,17 @@ */ package org.apache.gravitino.catalog.lakehouse; +import static org.apache.gravitino.Entity.EntityType.TABLE; + import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Maps; +import java.io.IOException; +import java.util.List; import java.util.Map; import java.util.Optional; import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.Catalog; +import org.apache.gravitino.Entity; import org.apache.gravitino.EntityStore; import org.apache.gravitino.GravitinoEnv; import org.apache.gravitino.NameIdentifier; @@ -30,16 +36,20 @@ import org.apache.gravitino.Schema; import org.apache.gravitino.SchemaChange; import org.apache.gravitino.catalog.ManagedSchemaOperations; +import org.apache.gravitino.catalog.lakehouse.lance.LanceCatalogOperations; import org.apache.gravitino.connector.CatalogInfo; import org.apache.gravitino.connector.CatalogOperations; import org.apache.gravitino.connector.HasPropertyMetadata; import org.apache.gravitino.connector.SupportsSchemas; import org.apache.gravitino.exceptions.NoSuchCatalogException; +import org.apache.gravitino.exceptions.NoSuchEntityException; import org.apache.gravitino.exceptions.NoSuchSchemaException; import org.apache.gravitino.exceptions.NoSuchTableException; import org.apache.gravitino.exceptions.NonEmptySchemaException; import org.apache.gravitino.exceptions.SchemaAlreadyExistsException; import org.apache.gravitino.exceptions.TableAlreadyExistsException; +import org.apache.gravitino.meta.GenericTableEntity; +import org.apache.gravitino.meta.SchemaEntity; import org.apache.gravitino.rel.Column; import org.apache.gravitino.rel.Table; import org.apache.gravitino.rel.TableCatalog; @@ -61,6 +71,11 @@ public class GenericLakehouseCatalogOperations @SuppressWarnings("unused") // todo: remove this after implementing table operations private Optional catalogLakehouseDir; + private static final Map SUPPORTED_FORMATS = + Maps.newHashMap(); + + private CatalogInfo catalogInfo; + private HasPropertyMetadata propertiesMetadata; /** * Initializes the generic lakehouse catalog operations with the provided configuration. * @@ -141,7 +156,25 @@ public boolean dropSchema(NameIdentifier ident, boolean cascade) throws NonEmpty @Override public NameIdentifier[] listTables(Namespace namespace) throws NoSuchSchemaException { - throw new UnsupportedOperationException("Not implemented yet."); + EntityStore store = GravitinoEnv.getInstance().entityStore(); + NameIdentifier identifier = NameIdentifier.of(namespace.levels()); + try { + store.get(identifier, Entity.EntityType.SCHEMA, SchemaEntity.class); + } catch (NoSuchTableException e) { + throw new NoSuchEntityException(e, "Schema %s does not exist", namespace); + } catch (IOException ioe) { + throw new RuntimeException("Failed to get schema " + identifier); + } + + try { + List tableEntityList = + store.list(namespace, GenericTableEntity.class, TABLE); + return tableEntityList.stream() + .map(e -> NameIdentifier.of(namespace, e.name())) + .toArray(NameIdentifier[]::new); + } catch (IOException e) { + throw new RuntimeException("Failed to list tables under schema " + namespace, e); + } } @Override @@ -160,7 +193,66 @@ public Table createTable( SortOrder[] sortOrders, Index[] indexes) throws NoSuchSchemaException, TableAlreadyExistsException { - throw new UnsupportedOperationException("Not implemented yet."); + String format = properties.getOrDefault("format", "lance"); + String tableLocation = calculateTableLocation(ident, properties); + Map newProperties = Maps.newHashMap(properties); + newProperties.put("location", tableLocation); + + LakehouseCatalogOperations lakehouseCatalogOperations = + SUPPORTED_FORMATS.compute( + format, + (k, v) -> + v == null + ? createLakehouseCatalogOperations( + format, properties, catalogInfo, propertiesMetadata) + : v); + + return lakehouseCatalogOperations.createTable( + ident, columns, comment, newProperties, partitions, distribution, sortOrders, indexes); + } + + private String calculateTableLocation( + NameIdentifier tableIdent, Map tableProperties) { + String tableLocation = tableProperties.get("location"); + if (StringUtils.isNotBlank(tableLocation)) { + return ensureTrailingSlash(tableLocation); + } + + String schemaLocation; + try { + Schema schema = loadSchema(NameIdentifier.of(tableIdent.namespace().levels())); + schemaLocation = schema.properties().get("location"); + } catch (NoSuchSchemaException e) { + throw new RuntimeException( + String.format( + "Failed to load schema for table %s to determine default location.", tableIdent), + e); + } + + // If we do not set location in table properties, and schema location is set, use schema + // location + // as the base path. + if (StringUtils.isNotBlank(schemaLocation)) { + return ensureTrailingSlash(schemaLocation) + tableIdent.name() + SLASH; + } + + // If the schema location is not set, use catalog lakehouse dir as the base path. Or else, throw + // an exception. + if (catalogLakehouseDir.isEmpty()) { + throw new RuntimeException( + String.format( + "No location specified for table %s, you need to set location either in catalog, schema, or table properties", + tableIdent)); + } + + String catalogLakehousePath = catalogLakehouseDir.get().toString(); + String[] nsLevels = tableIdent.namespace().levels(); + String schemaName = nsLevels[nsLevels.length - 1]; + return ensureTrailingSlash(catalogLakehousePath) + + schemaName + + SLASH + + tableIdent.name() + + SLASH; } @Override @@ -171,10 +263,46 @@ public Table alterTable(NameIdentifier ident, TableChange... changes) @Override public boolean dropTable(NameIdentifier ident) { - throw new UnsupportedOperationException("Not implemented yet."); + EntityStore store = GravitinoEnv.getInstance().entityStore(); + Namespace namespace = ident.namespace(); + try { + GenericTableEntity tableEntity = + store.get(ident, Entity.EntityType.TABLE, GenericTableEntity.class); + Map tableProperties = tableEntity.getProperties(); + String format = tableProperties.getOrDefault("format", "lance"); + LakehouseCatalogOperations lakehouseCatalogOperations = + SUPPORTED_FORMATS.compute( + format, + (k, v) -> + v == null + ? createLakehouseCatalogOperations( + format, tableProperties, catalogInfo, propertiesMetadata) + : v); + return lakehouseCatalogOperations.dropTable(ident); + } catch (IOException e) { + throw new RuntimeException("Failed to list tables under schema " + namespace, e); + } } private String ensureTrailingSlash(String path) { return path.endsWith(SLASH) ? path : path + SLASH; } + + private LakehouseCatalogOperations createLakehouseCatalogOperations( + String format, + Map properties, + CatalogInfo catalogInfo, + HasPropertyMetadata propertiesMetadata) { + LakehouseCatalogOperations operations; + switch (format.toLowerCase()) { + case "lance": + operations = new LanceCatalogOperations(); + break; + default: + throw new UnsupportedOperationException("Unsupported lakehouse format: " + format); + } + + operations.initialize(properties, catalogInfo, propertiesMetadata); + return operations; + } } diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/LakehouseCatalogOperations.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/LakehouseCatalogOperations.java new file mode 100644 index 00000000000..66c7147626f --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/LakehouseCatalogOperations.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.catalog.lakehouse; + +import org.apache.gravitino.connector.CatalogOperations; +import org.apache.gravitino.rel.TableCatalog; + +public interface LakehouseCatalogOperations extends CatalogOperations, TableCatalog {} diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java new file mode 100644 index 00000000000..3e1146b7ad9 --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.catalog.lakehouse.lance; + +import com.google.common.collect.ImmutableMap; +import com.lancedb.lance.Dataset; +import com.lancedb.lance.WriteParams; +import java.io.IOException; +import java.time.Instant; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.gravitino.Catalog; +import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.Namespace; +import org.apache.gravitino.catalog.lakehouse.LakehouseCatalogOperations; +import org.apache.gravitino.connector.CatalogInfo; +import org.apache.gravitino.connector.GenericLakehouseTable; +import org.apache.gravitino.connector.HasPropertyMetadata; +import org.apache.gravitino.exceptions.NoSuchSchemaException; +import org.apache.gravitino.exceptions.NoSuchTableException; +import org.apache.gravitino.exceptions.TableAlreadyExistsException; +import org.apache.gravitino.meta.AuditInfo; +import org.apache.gravitino.rel.Column; +import org.apache.gravitino.rel.Table; +import org.apache.gravitino.rel.TableChange; +import org.apache.gravitino.rel.expressions.distributions.Distribution; +import org.apache.gravitino.rel.expressions.sorts.SortOrder; +import org.apache.gravitino.rel.expressions.transforms.Transform; +import org.apache.gravitino.rel.indexes.Index; +import org.apache.gravitino.utils.PrincipalUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +public class LanceCatalogOperations implements LakehouseCatalogOperations { + + private Map lancePropertiesMap; + + @Override + public void initialize( + Map config, CatalogInfo info, HasPropertyMetadata propertiesMetadata) + throws RuntimeException { + lancePropertiesMap = ImmutableMap.copyOf(config); + } + + @Override + public void testConnection( + NameIdentifier catalogIdent, + Catalog.Type type, + String provider, + String comment, + Map properties) + throws Exception {} + + @Override + public void close() throws IOException {} + + @Override + public NameIdentifier[] listTables(Namespace namespace) throws NoSuchSchemaException { + return new NameIdentifier[0]; + } + + @Override + public Table loadTable(NameIdentifier ident) throws NoSuchTableException { + // Should not come here. + return null; + } + + @Override + public Table createTable( + NameIdentifier ident, + Column[] columns, + String comment, + Map properties, + Transform[] partitions, + Distribution distribution, + SortOrder[] sortOrders, + Index[] indexes) + throws NoSuchSchemaException, TableAlreadyExistsException { + // Ignore partitions, distributions, sortOrders, and indexes for Lance tables; + String location = properties.get("location"); + try (Dataset dataset = + Dataset.create( + new RootAllocator(), + location, + convertColumnsToSchema(columns), + new WriteParams.Builder().build())) { + GenericLakehouseTable.Builder builder = GenericLakehouseTable.builder(); + return builder + .withName(ident.name()) + .withColumns(columns) + .withComment(comment) + .withProperties(properties) + .withDistribution(distribution) + .withIndexes(indexes) + .withAuditInfo( + AuditInfo.builder() + .withCreator(PrincipalUtils.getCurrentUserName()) + .withCreateTime(Instant.now()) + .build()) + .withPartitioning(partitions) + .withSortOrders(sortOrders) + .withFormat("lance") + .build(); + } + } + + private org.apache.arrow.vector.types.pojo.Schema convertColumnsToSchema(Column[] columns) { + LanceDataTypeConverter converter = new LanceDataTypeConverter(); + List fields = + Arrays.stream(columns) + .map( + col -> { + boolean nullable = col.nullable(); + if (nullable) { + return new org.apache.arrow.vector.types.pojo.Field( + col.name(), + org.apache.arrow.vector.types.pojo.FieldType.nullable( + converter.fromGravitino(col.dataType())), + null); + } + + // not nullable + return new org.apache.arrow.vector.types.pojo.Field( + col.name(), + org.apache.arrow.vector.types.pojo.FieldType.notNullable( + converter.fromGravitino(col.dataType())), + null); + }) + .collect(Collectors.toList()); + return new org.apache.arrow.vector.types.pojo.Schema(fields); + } + + @Override + public Table alterTable(NameIdentifier ident, TableChange... changes) + throws NoSuchTableException, IllegalArgumentException { + // Use another PRs to implement alter table for Lance tables + return null; + } + + @Override + public boolean dropTable(NameIdentifier ident) { + try { + String location = lancePropertiesMap.get("location"); + // Remove the directory on storage + FileSystem fs = FileSystem.get(new Configuration()); + return fs.delete(new Path(location), true); + } catch (IOException e) { + throw new RuntimeException("Failed to drop Lance table: " + ident.name(), e); + } + } +} diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java new file mode 100644 index 00000000000..117863659e1 --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.catalog.lakehouse.lance; + +import org.apache.arrow.vector.types.DateUnit; +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.Bool; +import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; +import org.apache.arrow.vector.types.pojo.ArrowType.Int; +import org.apache.gravitino.connector.DataTypeConverter; +import org.apache.gravitino.rel.types.Type; +import org.apache.gravitino.rel.types.Types; +import org.apache.gravitino.rel.types.Types.FixedType; + +public class LanceDataTypeConverter implements DataTypeConverter { + + @Override + public ArrowType fromGravitino(Type type) { + switch (type.name()) { + case BOOLEAN: + return Bool.INSTANCE; + case BYTE: + return new Int(8, true); + case SHORT: + return new Int(16, true); + case INTEGER: + return new Int(32, true); + case LONG: + return new Int(64, true); + case FLOAT: + return new FloatingPoint(FloatingPointPrecision.SINGLE); + case DOUBLE: + return new FloatingPoint(FloatingPointPrecision.DOUBLE); + case DECIMAL: + // Lance uses FIXED_SIZE_BINARY for decimal types + return new ArrowType.FixedSizeBinary(16); // assuming 16 bytes for decimal + case DATE: + return new ArrowType.Date(DateUnit.DAY); + case TIME: + return new ArrowType.Time(TimeUnit.MILLISECOND, 32); + case TIMESTAMP: + return new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); + case VARCHAR: + case STRING: + return new ArrowType.Utf8(); + case FIXED: + FixedType fixedType = (FixedType) type; + return new ArrowType.FixedSizeBinary(fixedType.length()); + case BINARY: + return new ArrowType.Binary(); + default: + throw new UnsupportedOperationException("Unsupported Gravitino type: " + type.name()); + } + } + + @Override + public Type toGravitino(ArrowType arrowType) { + if (arrowType instanceof Bool) { + return Types.BooleanType.get(); + } else if (arrowType instanceof Int intType) { + switch (intType.getBitWidth()) { + case 8 -> { + return Types.ByteType.get(); + } + case 16 -> { + return Types.ShortType.get(); + } + case 32 -> { + return Types.IntegerType.get(); + } + case 64 -> { + return Types.LongType.get(); + } + default -> throw new UnsupportedOperationException( + "Unsupported Int bit width: " + intType.getBitWidth()); + } + } else if (arrowType instanceof FloatingPoint floatingPoint) { + switch (floatingPoint.getPrecision()) { + case SINGLE: + return Types.FloatType.get(); + case DOUBLE: + return Types.DoubleType.get(); + default: + throw new UnsupportedOperationException( + "Unsupported FloatingPoint precision: " + floatingPoint.getPrecision()); + } + } else if (arrowType instanceof ArrowType.FixedSizeBinary) { + ArrowType.FixedSizeBinary fixedSizeBinary = (ArrowType.FixedSizeBinary) arrowType; + return Types.FixedType.of(fixedSizeBinary.getByteWidth()); + } else if (arrowType instanceof ArrowType.Date) { + return Types.DateType.get(); + } else if (arrowType instanceof ArrowType.Time) { + return Types.TimeType.get(); + } else if (arrowType instanceof ArrowType.Timestamp) { + return Types.TimestampType.withoutTimeZone(); + } else if (arrowType instanceof ArrowType.Utf8) { + return Types.StringType.get(); + } else if (arrowType instanceof ArrowType.Binary) { + return Types.BinaryType.get(); + } else { + throw new UnsupportedOperationException("Unsupported Arrow type: " + arrowType); + } + } +} diff --git a/common/src/main/java/org/apache/gravitino/config/ConfigConstants.java b/common/src/main/java/org/apache/gravitino/config/ConfigConstants.java index 17c08ac0e9f..0ef761a6ca0 100644 --- a/common/src/main/java/org/apache/gravitino/config/ConfigConstants.java +++ b/common/src/main/java/org/apache/gravitino/config/ConfigConstants.java @@ -80,6 +80,9 @@ private ConfigConstants() {} /** The version number for the 1.0.0 release. */ public static final String VERSION_1_0_0 = "1.0.0"; + /** The version number for the 1.1.0 release. */ + public static final String VERSION_1_1_0 = "1.1.0"; + /** The current version of backend storage initialization script. */ - public static final String CURRENT_SCRIPT_VERSION = VERSION_1_0_0; + public static final String CURRENT_SCRIPT_VERSION = VERSION_1_1_0; } diff --git a/core/src/main/java/org/apache/gravitino/catalog/TableOperationDispatcher.java b/core/src/main/java/org/apache/gravitino/catalog/TableOperationDispatcher.java index a777f2a5118..1cbab0d6ed3 100644 --- a/core/src/main/java/org/apache/gravitino/catalog/TableOperationDispatcher.java +++ b/core/src/main/java/org/apache/gravitino/catalog/TableOperationDispatcher.java @@ -27,6 +27,7 @@ import com.google.common.base.Objects; import com.google.common.collect.Lists; +import java.io.IOException; import java.time.Instant; import java.util.Arrays; import java.util.Collections; @@ -36,6 +37,7 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; import org.apache.commons.lang3.tuple.Pair; +import org.apache.gravitino.Catalog; import org.apache.gravitino.EntityAlreadyExistsException; import org.apache.gravitino.EntityStore; import org.apache.gravitino.GravitinoEnv; @@ -52,8 +54,10 @@ import org.apache.gravitino.lock.TreeLockUtils; import org.apache.gravitino.meta.AuditInfo; import org.apache.gravitino.meta.ColumnEntity; +import org.apache.gravitino.meta.GenericTableEntity; import org.apache.gravitino.meta.TableEntity; import org.apache.gravitino.rel.Column; +import org.apache.gravitino.rel.GenericTable; import org.apache.gravitino.rel.Table; import org.apache.gravitino.rel.TableChange; import org.apache.gravitino.rel.expressions.distributions.Distribution; @@ -487,6 +491,19 @@ private EntityCombinedTable importTable(NameIdentifier identifier) { } private EntityCombinedTable internalLoadTable(NameIdentifier ident) { + NameIdentifier catalogIdent = getCatalogIdentifier(ident); + if (isGenericLakehouseCatalog(catalogIdent)) { + try { + GenericTableEntity tableEntity = store.get(ident, TABLE, GenericTableEntity.class); + if (tableEntity != null) { + GenericTable genericTable = tableEntity.toGenericTable(); + return EntityCombinedTable.of(genericTable).withImported(true); + } + } catch (IOException ioe) { + throw new RuntimeException("Failed to load table entity " + ident, ioe); + } + } + NameIdentifier catalogIdentifier = getCatalogIdentifier(ident); Table table = doWithCatalog( @@ -597,18 +614,46 @@ private Table internalCreateTable( .mapToObj(i -> ColumnEntity.toColumnEntity(columns[i], i, idGenerator.nextId(), audit)) .collect(Collectors.toList()); - TableEntity tableEntity = - TableEntity.builder() - .withId(uid) - .withName(ident.name()) - .withNamespace(ident.namespace()) - .withColumns(columnEntityList) - .withAuditInfo(audit) - .build(); + TableEntity tableEntity; + if (isGenericLakehouseCatalog(catalogIdent)) { + // For generic lakehouse catalog, we only create the table entity with basic info. + GenericTable genericTable = (GenericTable) table; + tableEntity = + GenericTableEntity.getBuilder() + .withId(uid) + .withName(ident.name()) + .withNamespace(ident.namespace()) + .withFormat(genericTable.format()) + .withAuditInfo(audit) + .withColumns(columnEntityList) + .withIndexes(table.index()) + .withDistribution(table.distribution()) + .withFormat(genericTable.format()) + .withPartitions(table.partitioning()) + .withSortOrder(table.sortOrder()) + .withProperties(genericTable.properties()) + .withComment(genericTable.comment()) + .build(); + } else { + tableEntity = + TableEntity.builder() + .withId(uid) + .withName(ident.name()) + .withNamespace(ident.namespace()) + .withColumns(columnEntityList) + .withAuditInfo(audit) + .build(); + } try { store.put(tableEntity, true /* overwrite */); } catch (Exception e) { + if (isGenericLakehouseCatalog(catalogIdent)) { + // Drop table + doWithCatalog( + catalogIdent, c -> c.doWithTableOps(t -> t.dropTable(ident)), RuntimeException.class); + } + LOG.error(FormattedErrorMessages.STORE_OP_FAILURE, "put", ident, e); return EntityCombinedTable.of(table) .withHiddenProperties( @@ -616,6 +661,7 @@ private Table internalCreateTable( catalogIdent, HasPropertyMetadata::tablePropertiesMetadata, table.properties())); } + // For managed table, we can use table entity to indicate the table is created successfully. return EntityCombinedTable.of(table, tableEntity) .withHiddenProperties( getHiddenPropertyNames( @@ -630,6 +676,18 @@ private List toColumnEntities(Column[] columns, AuditInfo audit) { .collect(Collectors.toList()); } + private boolean isGenericLakehouseCatalog(NameIdentifier catalogIdent) { + CatalogManager catalogManager = GravitinoEnv.getInstance().catalogManager(); + try { + Catalog catalog = catalogManager.loadCatalog(catalogIdent); + return catalog.type() == Catalog.Type.RELATIONAL + && catalog.provider().equals("generic-lakehouse"); + } catch (NoSuchEntityException e) { + LOG.warn("Catalog not found: {}", catalogIdent, e); + return false; + } + } + private boolean isSameColumn(Column left, int columnPosition, ColumnEntity right) { return Objects.equal(left.name(), right.name()) && columnPosition == right.position() diff --git a/core/src/main/java/org/apache/gravitino/connector/GenericLakehouseColumn.java b/core/src/main/java/org/apache/gravitino/connector/GenericLakehouseColumn.java new file mode 100644 index 00000000000..b84b2652566 --- /dev/null +++ b/core/src/main/java/org/apache/gravitino/connector/GenericLakehouseColumn.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.connector; + +import org.apache.gravitino.tag.SupportsTags; + +public class GenericLakehouseColumn extends BaseColumn { + @Override + public SupportsTags supportsTags() { + return super.supportsTags(); + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder extends BaseColumnBuilder { + + /** Creates a new instance of {@link Builder}. */ + private Builder() {} + + /** + * Internal method to build a HiveColumn instance using the provided values. + * + * @return A new HiveColumn instance with the configured values. + */ + @Override + protected GenericLakehouseColumn internalBuild() { + GenericLakehouseColumn hiveColumn = new GenericLakehouseColumn(); + + hiveColumn.name = name; + hiveColumn.comment = comment; + hiveColumn.dataType = dataType; + hiveColumn.nullable = nullable; + hiveColumn.defaultValue = defaultValue == null ? DEFAULT_VALUE_NOT_SET : defaultValue; + return hiveColumn; + } + } +} diff --git a/core/src/main/java/org/apache/gravitino/connector/GenericLakehouseTable.java b/core/src/main/java/org/apache/gravitino/connector/GenericLakehouseTable.java new file mode 100644 index 00000000000..a9379a5b316 --- /dev/null +++ b/core/src/main/java/org/apache/gravitino/connector/GenericLakehouseTable.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.connector; + +import org.apache.gravitino.rel.GenericTable; + +public class GenericLakehouseTable extends BaseTable implements GenericTable { + @SuppressWarnings("unused") + private String schemaName; + + private String format; + + public static Builder builder() { + return new Builder(); + } + + @Override + public String format() { + return format; + } + + @Override + public String location() { + return properties.get("location"); + } + + @Override + public boolean external() { + return properties.get("external") != null && Boolean.parseBoolean(properties.get("external")); + } + + @Override + protected TableOperations newOps() throws UnsupportedOperationException { + throw new UnsupportedOperationException("Not implemented yet"); + } + + public static class Builder extends BaseTableBuilder { + + private String schemaName; + private String format; + + public Builder withSchemaName(String schemaName) { + this.schemaName = schemaName; + return this; + } + + public Builder withFormat(String format) { + this.format = format; + return this; + } + + @Override + protected GenericLakehouseTable internalBuild() { + GenericLakehouseTable genericLakehouseTable = new GenericLakehouseTable(); + genericLakehouseTable.schemaName = this.schemaName; + genericLakehouseTable.format = this.format; + genericLakehouseTable.columns = this.columns; + genericLakehouseTable.comment = this.comment; + genericLakehouseTable.properties = this.properties; + genericLakehouseTable.auditInfo = this.auditInfo; + genericLakehouseTable.distribution = this.distribution; + genericLakehouseTable.indexes = this.indexes; + genericLakehouseTable.name = this.name; + genericLakehouseTable.partitioning = this.partitioning; + genericLakehouseTable.sortOrders = this.sortOrders; + return genericLakehouseTable; + } + } +} diff --git a/core/src/main/java/org/apache/gravitino/meta/GenericTableEntity.java b/core/src/main/java/org/apache/gravitino/meta/GenericTableEntity.java new file mode 100644 index 00000000000..4b2dd9ad039 --- /dev/null +++ b/core/src/main/java/org/apache/gravitino/meta/GenericTableEntity.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.meta; + +import com.google.common.collect.Maps; +import java.util.Map; +import lombok.Getter; +import org.apache.gravitino.Field; +import org.apache.gravitino.connector.GenericLakehouseColumn; +import org.apache.gravitino.connector.GenericLakehouseTable; +import org.apache.gravitino.rel.GenericTable; +import org.apache.gravitino.rel.expressions.distributions.Distribution; +import org.apache.gravitino.rel.expressions.sorts.SortOrder; +import org.apache.gravitino.rel.expressions.transforms.Transform; +import org.apache.gravitino.rel.indexes.Index; + +@Getter +public class GenericTableEntity extends TableEntity { + public static final Field FORMAT = Field.required("format", Long.class, "The table's format"); + public static final Field PROPERTIES = + Field.optional("properties", Map.class, "The table's properties"); + + public static final Field PARTITIONS = + Field.optional("partitions", Transform[].class, "The table's partition"); + + public static final Field SORT_ORDER = + Field.optional("sortOrders", SortOrder[].class, "The table's sort order"); + + public static final Field DISTRIBUTION = + Field.optional("distribution", Distribution.class, "The table's distribution"); + + public static final Field INDEXES = + Field.optional("indexes", Index[].class, "The table's indexes"); + + public static final Field COMMENT = + Field.optional("comment", String.class, "The table's comment"); + + public GenericTableEntity() { + super(); + } + + @Override + public Map fields() { + Map superFields = super.fields(); + Map result = Maps.newHashMap(superFields); + result.put(FORMAT, format); + result.put(PROPERTIES, properties); + result.put(PARTITIONS, partitions); + result.put(SORT_ORDER, sortOrder); + result.put(DISTRIBUTION, distribution); + result.put(INDEXES, indexes); + result.put(COMMENT, comment); + + return result; + } + + private String format; + @Getter private Map properties; + private Transform[] partitions; + private SortOrder[] sortOrder; + private Distribution distribution; + private Index[] indexes; + private String comment; + + public static class Builder { + private final GenericTableEntity tableEntity; + + public Builder() { + this.tableEntity = new GenericTableEntity(); + } + + public Builder withId(Long id) { + tableEntity.id = id; + return this; + } + + public Builder withName(String name) { + tableEntity.name = name; + return this; + } + + public Builder withAuditInfo(AuditInfo auditInfo) { + tableEntity.auditInfo = auditInfo; + return this; + } + + public Builder withColumns(java.util.List columns) { + tableEntity.columns = columns; + return this; + } + + public Builder withNamespace(org.apache.gravitino.Namespace namespace) { + tableEntity.namespace = namespace; + return this; + } + + public Builder withFormat(String format) { + tableEntity.format = format; + return this; + } + + public Builder withProperties(Map properties) { + tableEntity.properties = properties; + return this; + } + + public Builder withPartitions(Transform[] partitions) { + tableEntity.partitions = partitions; + return this; + } + + public Builder withSortOrder(SortOrder[] sortOrder) { + tableEntity.sortOrder = sortOrder; + return this; + } + + public Builder withDistribution(Distribution distribution) { + tableEntity.distribution = distribution; + return this; + } + + public Builder withIndexes(Index[] indexes) { + tableEntity.indexes = indexes; + return this; + } + + public Builder withComment(String comment) { + tableEntity.comment = comment; + return this; + } + + public GenericTableEntity build() { + return tableEntity; + } + } + + public static GenericTableEntity.Builder getBuilder() { + return new GenericTableEntity.Builder(); + } + + public GenericTable toGenericTable() { + return GenericLakehouseTable.builder() + .withFormat(format) + .withProperties(properties) + .withAuditInfo(auditInfo) + .withSortOrders(sortOrder) + .withPartitioning(partitions) + .withDistribution(distribution) + .withColumns( + columns.stream() + .map(this::toGenericLakehouseColumn) + .toArray(GenericLakehouseColumn[]::new)) + .withIndexes(indexes) + .withName(name) + .withComment(comment) + .build(); + } + + private GenericLakehouseColumn toGenericLakehouseColumn(ColumnEntity columnEntity) { + return GenericLakehouseColumn.builder() + .withName(columnEntity.name()) + .withComment(columnEntity.comment()) + .withAutoIncrement(columnEntity.autoIncrement()) + .withNullable(columnEntity.nullable()) + .withType(columnEntity.dataType()) + .withDefaultValue(columnEntity.defaultValue()) + .build(); + } +} diff --git a/core/src/main/java/org/apache/gravitino/meta/TableEntity.java b/core/src/main/java/org/apache/gravitino/meta/TableEntity.java index 9d15be7df65..595defed086 100644 --- a/core/src/main/java/org/apache/gravitino/meta/TableEntity.java +++ b/core/src/main/java/org/apache/gravitino/meta/TableEntity.java @@ -42,15 +42,15 @@ public class TableEntity implements Entity, Auditable, HasIdentifier { public static final Field COLUMNS = Field.optional("columns", List.class, "The columns of the table"); - private Long id; + protected Long id; - private String name; + protected String name; - private AuditInfo auditInfo; + protected AuditInfo auditInfo; - private Namespace namespace; + protected Namespace namespace; - private List columns; + protected List columns; /** * Returns a map of the fields and their corresponding values for this table. diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionMapper.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionMapper.java new file mode 100644 index 00000000000..a723c3db4a8 --- /dev/null +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionMapper.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.storage.relational.mapper; + +import org.apache.gravitino.storage.relational.po.TablePO; +import org.apache.ibatis.annotations.InsertProvider; +import org.apache.ibatis.annotations.Param; + +public interface TableVersionMapper { + String TABLE_NAME = "table_version_info"; + + @InsertProvider(type = TableVersionSQLProviderFactory.class, method = "insertTableVersion") + void insertTableVersion(@Param("tablePO") TablePO tablePO); + + @InsertProvider( + type = TableVersionSQLProviderFactory.class, + method = "insertTableVersionOnDuplicateKeyUpdate") + void insertTableVersionOnDuplicateKeyUpdate(@Param("tablePO") TablePO tablePO); +} diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionSQLProviderFactory.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionSQLProviderFactory.java new file mode 100644 index 00000000000..ab27353c002 --- /dev/null +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionSQLProviderFactory.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.storage.relational.mapper; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.gravitino.storage.relational.JDBCBackend.JDBCBackendType; +import org.apache.gravitino.storage.relational.mapper.provider.base.TableVersionBaseSQLProvider; +import org.apache.gravitino.storage.relational.mapper.provider.postgresql.TableVersionPostgreSQLProvider; +import org.apache.gravitino.storage.relational.po.TablePO; +import org.apache.gravitino.storage.relational.session.SqlSessionFactoryHelper; +import org.apache.ibatis.annotations.Param; + +public class TableVersionSQLProviderFactory { + + private static final Map + TABLE_VERSION_SQL_PROVIDER_MAP = + ImmutableMap.of( + JDBCBackendType.MYSQL, new TableVersionSQLProviderFactory.TableVersionMySQLProvider(), + JDBCBackendType.H2, new TableVersionSQLProviderFactory.TableVersionH2Provider(), + JDBCBackendType.POSTGRESQL, new TableVersionPostgreSQLProvider()); + + public static TableVersionBaseSQLProvider getProvider() { + String databaseId = + SqlSessionFactoryHelper.getInstance() + .getSqlSessionFactory() + .getConfiguration() + .getDatabaseId(); + + JDBCBackendType jdbcBackendType = JDBCBackendType.fromString(databaseId); + return TABLE_VERSION_SQL_PROVIDER_MAP.get(jdbcBackendType); + } + + static class TableVersionMySQLProvider extends TableVersionBaseSQLProvider {} + + static class TableVersionH2Provider extends TableVersionBaseSQLProvider {} + + public static String insertTableVersion(@Param("tablePO") TablePO tablePO) { + return getProvider().insertTableVersion(tablePO); + } + + public static String insertTableVersionOnDuplicateKeyUpdate(@Param("tablePO") TablePO tablePO) { + return getProvider().insertTableVersionOnDuplicateKeyUpdate(tablePO); + } +} diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/DefaultMapperPackageProvider.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/DefaultMapperPackageProvider.java index f214bd1962f..aaf22ccda88 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/DefaultMapperPackageProvider.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/DefaultMapperPackageProvider.java @@ -41,6 +41,7 @@ import org.apache.gravitino.storage.relational.mapper.StatisticMetaMapper; import org.apache.gravitino.storage.relational.mapper.TableColumnMapper; import org.apache.gravitino.storage.relational.mapper.TableMetaMapper; +import org.apache.gravitino.storage.relational.mapper.TableVersionMapper; import org.apache.gravitino.storage.relational.mapper.TagMetaMapper; import org.apache.gravitino.storage.relational.mapper.TagMetadataObjectRelMapper; import org.apache.gravitino.storage.relational.mapper.TopicMetaMapper; @@ -78,6 +79,7 @@ public List> getMapperClasses() { TagMetaMapper.class, TopicMetaMapper.class, UserMetaMapper.class, - UserRoleRelMapper.class); + UserRoleRelMapper.class, + TableVersionMapper.class); } } diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableMetaBaseSQLProvider.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableMetaBaseSQLProvider.java index 9360e2c3544..8065476a613 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableMetaBaseSQLProvider.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableMetaBaseSQLProvider.java @@ -21,20 +21,29 @@ import static org.apache.gravitino.storage.relational.mapper.TableMetaMapper.TABLE_NAME; import java.util.List; +import org.apache.gravitino.storage.relational.mapper.TableVersionMapper; import org.apache.gravitino.storage.relational.po.TablePO; import org.apache.ibatis.annotations.Param; public class TableMetaBaseSQLProvider { public String listTablePOsBySchemaId(@Param("schemaId") Long schemaId) { - return "SELECT table_id as tableId, table_name as tableName," - + " metalake_id as metalakeId, catalog_id as catalogId," - + " schema_id as schemaId, audit_info as auditInfo," - + " current_version as currentVersion, last_version as lastVersion," - + " deleted_at as deletedAt" + return "SELECT tm.table_id as tableId, tm.table_name as tableName," + + " tm.metalake_id as metalakeId, tm.catalog_id as catalogId," + + " tm.schema_id as schemaId, tm.audit_info as auditInfo," + + " tm.current_version as currentVersion, tm.last_version as lastVersion," + + " tm.deleted_at as deletedAt," + + " tv.format as format, " + + " tv.properties as properties," + + " tv.partitioning as partitions, tv.sort_orders as sortOrders," + + " tv.distribution as distribution, tv.indexes as indexes," + + " tv.comment as comment" + " FROM " + TABLE_NAME - + " WHERE schema_id = #{schemaId} AND deleted_at = 0"; + + " tm LEFT JOIN " + + TableVersionMapper.TABLE_NAME + + " tv ON tm.table_id = tv.table_id AND tm.current_version = tv.version AND tv.deleted_at = 0" + + " WHERE tm.schema_id = #{schemaId} AND tm.deleted_at = 0"; } public String listTablePOsByTableIds(List tableIds) { @@ -65,14 +74,22 @@ public String selectTableIdBySchemaIdAndName( public String selectTableMetaBySchemaIdAndName( @Param("schemaId") Long schemaId, @Param("tableName") String name) { - return "SELECT table_id as tableId, table_name as tableName," - + " metalake_id as metalakeId, catalog_id as catalogId," - + " schema_id as schemaId, audit_info as auditInfo," - + " current_version as currentVersion, last_version as lastVersion," - + " deleted_at as deletedAt" + return "SELECT tm.table_id as tableId, tm.table_name as tableName," + + " tm.metalake_id as metalakeId, tm.catalog_id as catalogId," + + " tm.schema_id as schemaId, tm.audit_info as auditInfo," + + " tm.current_version as currentVersion, tm.last_version as lastVersion," + + " tm.deleted_at as deletedAt," + + " tv.format as format, " + + " tv.properties as properties," + + " tv.partitioning as partitions, tv.sort_orders as sortOrders," + + " tv.distribution as distribution, tv.indexes as indexes," + + " tv.comment as comment" + " FROM " + TABLE_NAME - + " WHERE schema_id = #{schemaId} AND table_name = #{tableName} AND deleted_at = 0"; + + " tm LEFT JOIN " + + TableVersionMapper.TABLE_NAME + + " tv ON tm.table_id = tv.table_id AND tm.current_version = tv.version AND tv.deleted_at = 0" + + " WHERE tm.schema_id = #{schemaId} AND tm.table_name = #{tableName} AND tm.deleted_at = 0"; } public String selectTableMetaById(@Param("tableId") Long tableId) { diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableVersionBaseSQLProvider.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableVersionBaseSQLProvider.java new file mode 100644 index 00000000000..3501abe10cf --- /dev/null +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableVersionBaseSQLProvider.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.storage.relational.mapper.provider.base; + +import static org.apache.gravitino.storage.relational.mapper.TableVersionMapper.TABLE_NAME; + +import org.apache.gravitino.storage.relational.po.TablePO; +import org.apache.ibatis.annotations.Param; + +public class TableVersionBaseSQLProvider { + + public String insertTableVersion(@Param("tablePO") TablePO tablePO) { + return "INSERT INTO " + + TABLE_NAME + + " (table_id, format, properties, partitioning" + + " distribution, sort_orders, indexes, comment," + + " version, last_version, deleted_at)" + + " VALUES (" + + " #{tablePO.tableId}," + + " #{tablePO.format}," + + " #{tablePO.properties}," + + " #{tablePO.partitions}," + + " #{tablePO.distribution}," + + " #{tablePO.sortOrders}," + + " #{tablePO.indexes}," + + " #{tablePO.comment}," + + " #{tablePO.currentVersion}," + + " #{tablePO.lastVersion}," + + " #{tablePO.deletedAt}" + + " )"; + } + + public String insertTableVersionOnDuplicateKeyUpdate(@Param("tablePO") TablePO tablePO) { + return "INSERT INTO " + + TABLE_NAME + + " (table_id, format, properties, partitioning," + + " distribution, sort_orders, indexes, comment," + + " version, deleted_at)" + + " VALUES (" + + " #{tablePO.tableId}," + + " #{tablePO.format}," + + " #{tablePO.properties}," + + " #{tablePO.partitions}," + + " #{tablePO.distribution}," + + " #{tablePO.sortOrders}," + + " #{tablePO.indexes}," + + " #{tablePO.comment}," + + " #{tablePO.currentVersion}," + + " #{tablePO.deletedAt}" + + " )" + + " ON DUPLICATE KEY UPDATE" + + " format = #{tablePO.format}," + + " properties = #{tablePO.properties}," + + " partitioning = #{tablePO.partitions}," + + " distribution = #{tablePO.distribution}," + + " sort_orders = #{tablePO.sortOrders}," + + " indexes = #{tablePO.indexes}," + + " comment = #{tablePO.comment}," + + " version = #{tablePO.currentVersion}," + + " deleted_at = #{tablePO.deletedAt}"; + } +} diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/TableVersionPostgreSQLProvider.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/TableVersionPostgreSQLProvider.java new file mode 100644 index 00000000000..e0a7413b1cc --- /dev/null +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/TableVersionPostgreSQLProvider.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.storage.relational.mapper.provider.postgresql; + +import org.apache.gravitino.storage.relational.mapper.provider.base.TableVersionBaseSQLProvider; + +public class TableVersionPostgreSQLProvider extends TableVersionBaseSQLProvider {} diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/po/TablePO.java b/core/src/main/java/org/apache/gravitino/storage/relational/po/TablePO.java index 693105e7727..56fea38337a 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/po/TablePO.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/po/TablePO.java @@ -20,7 +20,9 @@ import com.google.common.base.Objects; import com.google.common.base.Preconditions; +import lombok.Getter; +@Getter public class TablePO { private Long tableId; private String tableName; @@ -32,6 +34,15 @@ public class TablePO { private Long lastVersion; private Long deletedAt; + private String format; + + private String properties; + private String partitions; + private String sortOrders; + private String distribution; + private String indexes; + private String comment; + public Long getTableId() { return tableId; } @@ -154,6 +165,41 @@ public Builder withDeletedAt(Long deletedAt) { return this; } + public Builder withFormat(String format) { + tablePO.format = format; + return this; + } + + public Builder withProperties(String properties) { + tablePO.properties = properties; + return this; + } + + public Builder withPartitions(String partitions) { + tablePO.partitions = partitions; + return this; + } + + public Builder withSortOrders(String sortOrders) { + tablePO.sortOrders = sortOrders; + return this; + } + + public Builder withDistribution(String distribution) { + tablePO.distribution = distribution; + return this; + } + + public Builder withIndexes(String indexes) { + tablePO.indexes = indexes; + return this; + } + + public Builder withComment(String comment) { + tablePO.comment = comment; + return this; + } + private void validate() { Preconditions.checkArgument(tablePO.tableId != null, "Table id is required"); Preconditions.checkArgument(tablePO.tableName != null, "Table name is required"); diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/service/TableMetaService.java b/core/src/main/java/org/apache/gravitino/storage/relational/service/TableMetaService.java index 326ba63b5f2..f4bbf7a6f6d 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/service/TableMetaService.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/service/TableMetaService.java @@ -40,6 +40,7 @@ import org.apache.gravitino.storage.relational.mapper.SecurableObjectMapper; import org.apache.gravitino.storage.relational.mapper.StatisticMetaMapper; import org.apache.gravitino.storage.relational.mapper.TableMetaMapper; +import org.apache.gravitino.storage.relational.mapper.TableVersionMapper; import org.apache.gravitino.storage.relational.mapper.TagMetadataObjectRelMapper; import org.apache.gravitino.storage.relational.po.ColumnPO; import org.apache.gravitino.storage.relational.po.TablePO; @@ -118,12 +119,12 @@ public void insertTable(TableEntity tableEntity, boolean overwrite) throws IOExc fillTablePOBuilderParentEntityId(builder, tableEntity.namespace()); AtomicReference tablePORef = new AtomicReference<>(); + TablePO po = POConverters.initializeTablePOWithVersion(tableEntity, builder); SessionUtils.doMultipleWithCommit( () -> SessionUtils.doWithoutCommit( TableMetaMapper.class, mapper -> { - TablePO po = POConverters.initializeTablePOWithVersion(tableEntity, builder); tablePORef.set(po); if (overwrite) { mapper.insertTableMetaOnDuplicateKeyUpdate(po); @@ -131,6 +132,18 @@ public void insertTable(TableEntity tableEntity, boolean overwrite) throws IOExc mapper.insertTableMeta(po); } }), + () -> + SessionUtils.doWithCommit( + TableVersionMapper.class, + mapper -> { + if (po.getFormat() != null) { + if (overwrite) { + mapper.insertTableVersionOnDuplicateKeyUpdate(po); + } else { + mapper.insertTableVersion(po); + } + } + }), () -> { // We need to delete the columns first if we want to overwrite the table. if (overwrite) { @@ -292,7 +305,6 @@ private TablePO getTablePOBySchemaIdAndName(Long schemaId, String tableName) { SessionUtils.getWithoutCommit( TableMetaMapper.class, mapper -> mapper.selectTableMetaBySchemaIdAndName(schemaId, tableName)); - if (tablePO == null) { throw new NoSuchEntityException( NoSuchEntityException.NO_SUCH_ENTITY_MESSAGE, diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/utils/POConverters.java b/core/src/main/java/org/apache/gravitino/storage/relational/utils/POConverters.java index 127cb022e85..62bc11f8915 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/utils/POConverters.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/utils/POConverters.java @@ -45,6 +45,7 @@ import org.apache.gravitino.meta.CatalogEntity; import org.apache.gravitino.meta.ColumnEntity; import org.apache.gravitino.meta.FilesetEntity; +import org.apache.gravitino.meta.GenericTableEntity; import org.apache.gravitino.meta.GroupEntity; import org.apache.gravitino.meta.ModelEntity; import org.apache.gravitino.meta.ModelVersionEntity; @@ -60,6 +61,7 @@ import org.apache.gravitino.policy.PolicyContent; import org.apache.gravitino.rel.Column; import org.apache.gravitino.rel.expressions.Expression; +import org.apache.gravitino.rel.indexes.Indexes.IndexImpl; import org.apache.gravitino.rel.types.Type; import org.apache.gravitino.storage.relational.po.CatalogPO; import org.apache.gravitino.storage.relational.po.ColumnPO; @@ -390,14 +392,44 @@ public static List fromSchemaPOs(List schemaPOs, Namespa public static TablePO initializeTablePOWithVersion( TableEntity tableEntity, TablePO.Builder builder) { try { - return builder + builder .withTableId(tableEntity.id()) .withTableName(tableEntity.name()) .withAuditInfo(JsonUtils.anyFieldMapper().writeValueAsString(tableEntity.auditInfo())) .withCurrentVersion(INIT_VERSION) .withLastVersion(INIT_VERSION) - .withDeletedAt(DEFAULT_DELETED_AT) - .build(); + .withDeletedAt(DEFAULT_DELETED_AT); + + if (tableEntity instanceof GenericTableEntity genericTable) { + builder.withFormat(genericTable.getFormat()); + builder.withComment(genericTable.getComment()); + builder.withProperties( + genericTable.getProperties() == null + ? null + : JsonUtils.anyFieldMapper().writeValueAsString(genericTable.getProperties())); + + // TODO store the following information to databases; + /** + * builder.withDistribution( genericTable.getDistribution() == null ? null : + * JsonUtils.anyFieldMapper().writeValueAsString(genericTable.getDistribution())); + * builder.withPartitions( genericTable.getPartitions() == null ? null : + * JsonUtils.anyFieldMapper().writeValueAsString(genericTable.getPartitions())); + */ + builder.withIndexes( + genericTable.getIndexes() == null + ? null + : JsonUtils.anyFieldMapper().writeValueAsString(genericTable.getIndexes())); + builder.withProperties( + genericTable.getProperties() == null + ? null + : JsonUtils.anyFieldMapper().writeValueAsString(genericTable.getProperties())); + builder.withSortOrders( + genericTable.getSortOrder() == null + ? null + : JsonUtils.anyFieldMapper().writeValueAsString(genericTable.getSortOrder())); + } + + return builder.build(); } catch (JsonProcessingException e) { throw new RuntimeException("Failed to serialize json object:", e); } @@ -455,6 +487,29 @@ public static TableEntity fromTablePO(TablePO tablePO, Namespace namespace) { public static TableEntity fromTableAndColumnPOs( TablePO tablePO, List columnPOs, Namespace namespace) { try { + if (tablePO.getFormat() != null) { + return GenericTableEntity.getBuilder() + .withId(tablePO.getTableId()) + .withName(tablePO.getTableName()) + .withNamespace(namespace) + .withColumns(fromColumnPOs(columnPOs)) + .withAuditInfo( + JsonUtils.anyFieldMapper().readValue(tablePO.getAuditInfo(), AuditInfo.class)) + // TODO add field partition, distribution and sort order; + .withIndexes( + StringUtils.isBlank(tablePO.getIndexes()) + ? null + : JsonUtils.anyFieldMapper().readValue(tablePO.getIndexes(), IndexImpl[].class)) + .withFormat(tablePO.getFormat()) + .withComment(tablePO.getComment()) + .withProperties( + StringUtils.isBlank(tablePO.getProperties()) + ? null + : JsonUtils.anyFieldMapper().readValue(tablePO.getProperties(), Map.class)) + .withColumns(fromColumnPOs(columnPOs)) + .build(); + } + return TableEntity.builder() .withId(tablePO.getTableId()) .withName(tablePO.getTableName()) diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/utils/SessionUtils.java b/core/src/main/java/org/apache/gravitino/storage/relational/utils/SessionUtils.java index 752d89533d2..0482bfecfd5 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/utils/SessionUtils.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/utils/SessionUtils.java @@ -106,4 +106,16 @@ public static void doMultipleWithCommit(Runnable... operations) { throw e; } } + + public static void beginTransaction() { + SqlSessions.getSqlSession(); + } + + public static void commitTransaction() { + SqlSessions.commitAndCloseSqlSession(); + } + + public static void rollbackTransaction() { + SqlSessions.rollbackAndCloseSqlSession(); + } } diff --git a/scripts/h2/schema-1.1.0-h2.sql b/scripts/h2/schema-1.1.0-h2.sql index 98a12174234..6172915f1f3 100644 --- a/scripts/h2/schema-1.1.0-h2.sql +++ b/scripts/h2/schema-1.1.0-h2.sql @@ -1,22 +1,3 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -- -- Licensed to the Apache Software Foundation (ASF) under one -- or more contributor license agreements. See the NOTICE file-- diff --git a/scripts/h2/upgrade-1.0.0-to-1.1.0-h2.sql b/scripts/h2/upgrade-1.0.0-to-1.1.0-h2.sql index f76a2c25931..cf42a02b57c 100644 --- a/scripts/h2/upgrade-1.0.0-to-1.1.0-h2.sql +++ b/scripts/h2/upgrade-1.0.0-to-1.1.0-h2.sql @@ -1,21 +1,21 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file-- +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"). You may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. +-- CREATE TABLE IF NOT EXISTS `table_version_info` ( `table_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'table id', diff --git a/scripts/mysql/schema-1.1.0-mysql.sql b/scripts/mysql/schema-1.1.0-mysql.sql index c6bd8a81e3c..ca9b351b034 100644 --- a/scripts/mysql/schema-1.1.0-mysql.sql +++ b/scripts/mysql/schema-1.1.0-mysql.sql @@ -1,22 +1,3 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -- -- Licensed to the Apache Software Foundation (ASF) under one -- or more contributor license agreements. See the NOTICE file-- diff --git a/scripts/mysql/upgrade-1.0.0-to-1.1.0-mysql.sql b/scripts/mysql/upgrade-1.0.0-to-1.1.0-mysql.sql index 5560993eb61..6663150f15a 100644 --- a/scripts/mysql/upgrade-1.0.0-to-1.1.0-mysql.sql +++ b/scripts/mysql/upgrade-1.0.0-to-1.1.0-mysql.sql @@ -1,21 +1,21 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file-- +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"). You may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. +-- CREATE TABLE IF NOT EXISTS `table_version_info` ( `table_id` BIGINT(20) UNSIGNED NOT NULL COMMENT 'table id', diff --git a/scripts/postgresql/schema-1.1.0-postgresql.sql b/scripts/postgresql/schema-1.1.0-postgresql.sql index bc69e7839be..c5bc6b32055 100644 --- a/scripts/postgresql/schema-1.1.0-postgresql.sql +++ b/scripts/postgresql/schema-1.1.0-postgresql.sql @@ -1,22 +1,3 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -- -- Licensed to the Apache Software Foundation (ASF) under one -- or more contributor license agreements. See the NOTICE file-- diff --git a/scripts/postgresql/upgrade-1.0.0-to-1.1.0-postgresql.sql b/scripts/postgresql/upgrade-1.0.0-to-1.1.0-postgresql.sql index 882c9a6cc27..42d06e30a83 100644 --- a/scripts/postgresql/upgrade-1.0.0-to-1.1.0-postgresql.sql +++ b/scripts/postgresql/upgrade-1.0.0-to-1.1.0-postgresql.sql @@ -1,21 +1,21 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file-- +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"). You may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. +-- CREATE TABLE IF NOT EXISTS table_version_info ( From 662097ad51cd44bc031fe02ecff2d16c613c89ba Mon Sep 17 00:00:00 2001 From: mchades Date: Sat, 25 Oct 2025 00:30:04 +0800 Subject: [PATCH 12/43] [#8892] feat(Lance-REST-Server): implement namespace operation APIs for LRS (#8902) ### What changes were proposed in this pull request? implement namespace operation APIs for LRS ### Why are the changes needed? Fix: #8892 ### Does this PR introduce _any_ user-facing change? yes, new REST APIs added ### How was this patch tested? not now --- .../gravitino-lance-rest-server.conf.template | 9 +- conf/gravitino.conf.template | 17 +- .../lance/common/config/LanceConfig.java | 17 +- .../common/ops/LanceNamespaceOperations.java | 8 +- .../GravitinoLanceNamespaceWrapper.java | 332 +++++++++++++++++- .../lance/common/config/TestLanceConfig.java | 13 +- .../gravitino/lance/LanceRESTService.java | 2 +- .../rest/LanceNamespaceOperations.java | 94 ++++- .../gravitino/server/TestServerConfig.java | 3 +- 9 files changed, 447 insertions(+), 48 deletions(-) diff --git a/conf/gravitino-lance-rest-server.conf.template b/conf/gravitino-lance-rest-server.conf.template index 32609bffcaa..137daf145d7 100644 --- a/conf/gravitino-lance-rest-server.conf.template +++ b/conf/gravitino-lance-rest-server.conf.template @@ -40,6 +40,9 @@ gravitino.lance-rest.requestHeaderSize = 131072 # The response header size of the built-in web server gravitino.lance-rest.responseHeaderSize = 131072 -# THE CONFIGURATION FOR Lance CATALOG -# The logical Lance catalog served by this REST endpoint -gravitino.lance-rest.catalog-name = default +# THE CONFIGURATION FOR Lance namespace backend +# The backend Lance namespace for Lance REST service, it's recommended to use Gravitino +gravitino.lance-rest.namespace-backend = gravitino +gravitino.lance-rest.uri = http://localhost:8090 +# replace metalake with your metalake name in Gravitino +# gravitino.lance-rest.metalake-name = metalake diff --git a/conf/gravitino.conf.template b/conf/gravitino.conf.template index 418d14f14ce..a1fdb005cad 100644 --- a/conf/gravitino.conf.template +++ b/conf/gravitino.conf.template @@ -81,8 +81,9 @@ gravitino.authorization.enable = false gravitino.authorization.serviceAdmins = anonymous # THE CONFIGURATION FOR AUXILIARY SERVICE -# Auxiliary service names, separate by ',' +# Auxiliary service names, separate by ',' such as iceberg-rest,lance-rest gravitino.auxService.names = iceberg-rest + # Iceberg REST service classpath gravitino.iceberg-rest.classpath = iceberg-rest-server/libs, iceberg-rest-server/conf # Iceberg REST service host @@ -93,3 +94,17 @@ gravitino.iceberg-rest.httpPort = 9001 gravitino.iceberg-rest.catalog-backend = memory # The warehouse directory of Iceberg catalog for Iceberg REST service gravitino.iceberg-rest.warehouse = /tmp/ + +# Lance REST service classpath +gravitino.lance-rest.classpath = lance-rest-server/libs +# Lance REST service host +gravitino.lance-rest.host = 0.0.0.0 +# Lance REST service http port +gravitino.lance-rest.httpPort = 9101 + +# THE CONFIGURATION FOR Lance namespace backend +# The backend Lance namespace for Lance REST service, it's recommended to use Gravitino +gravitino.lance-rest.namespace-backend = gravitino +gravitino.lance-rest.uri = http://localhost:8090 +# replace metalake with your metalake name in Gravitino +# gravitino.lance-rest.metalake-name = metalake diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java index b6614c87ee3..3703189ba87 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java @@ -39,29 +39,22 @@ public class LanceConfig extends Config implements OverwriteDefaultConfig { public static final String DEFAULT_NAMESPACE_BACKEND = "gravitino"; public static final String DEFAULT_URI = "http://localhost:8090"; - public static final ConfigEntry CATALOG_NAME = - new ConfigBuilder(LANCE_CONFIG_PREFIX + "catalog-name") - .doc("Logical Lance catalog served by the REST endpoint") - .version(ConfigConstants.VERSION_0_1_0) - .stringConf() - .createWithDefault("default"); - public static final ConfigEntry NAMESPACE_BACKEND = - new ConfigBuilder(LANCE_CONFIG_PREFIX + CONFIG_NAMESPACE_BACKEND) + new ConfigBuilder(CONFIG_NAMESPACE_BACKEND) .doc("The backend implementation for namespace operations") .version(ConfigConstants.VERSION_0_1_0) .stringConf() .createWithDefault(DEFAULT_NAMESPACE_BACKEND); public static final ConfigEntry METALAKE_NAME = - new ConfigBuilder(LANCE_CONFIG_PREFIX + CONFIG_METALAKE) + new ConfigBuilder(CONFIG_METALAKE) .doc("The Metalake name for Gravitino namespace backend") .version(ConfigConstants.VERSION_0_1_0) .stringConf() .create(); public static final ConfigEntry NAMESPACE_URI = - new ConfigBuilder(LANCE_CONFIG_PREFIX + CONFIG_URI) + new ConfigBuilder(CONFIG_URI) .doc("The URI for the namespace backend, e.g., Gravitino server URI") .version(ConfigConstants.VERSION_0_1_0) .stringConf() @@ -76,8 +69,8 @@ public LanceConfig() { super(false); } - public String getCatalogName() { - return get(CATALOG_NAME); + public String getNamespaceBackend() { + return get(NAMESPACE_BACKEND); } public String getNamespaceUri() { diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java index 1b5da98ec04..226de4dbd78 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java @@ -32,19 +32,19 @@ public interface LanceNamespaceOperations { ListNamespacesResponse listNamespaces( String namespaceId, String delimiter, String pageToken, Integer limit); - DescribeNamespaceResponse describeNamespace(String id, String delimiter); + DescribeNamespaceResponse describeNamespace(String namespaceId, String delimiter); CreateNamespaceResponse createNamespace( - String id, + String namespaceId, String delimiter, CreateNamespaceRequest.ModeEnum mode, Map properties); DropNamespaceResponse dropNamespace( - String id, + String namespaceId, String delimiter, DropNamespaceRequest.ModeEnum mode, DropNamespaceRequest.BehaviorEnum behavior); - void namespaceExists(String id, String delimiter) throws LanceNamespaceException; + void namespaceExists(String namespaceId, String delimiter) throws LanceNamespaceException; } diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java index 59f637b5a1f..cb1b85752a6 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java @@ -23,6 +23,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.lancedb.lance.namespace.LanceNamespaceException; import com.lancedb.lance.namespace.ObjectIdentifier; @@ -33,16 +34,30 @@ import com.lancedb.lance.namespace.model.DropNamespaceResponse; import com.lancedb.lance.namespace.model.ListNamespacesResponse; import com.lancedb.lance.namespace.model.ListTablesResponse; +import com.lancedb.lance.namespace.util.CommonUtil; import com.lancedb.lance.namespace.util.PageUtil; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.function.IntFunction; import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.Catalog; +import org.apache.gravitino.CatalogChange; +import org.apache.gravitino.Schema; +import org.apache.gravitino.SchemaChange; import org.apache.gravitino.client.GravitinoClient; +import org.apache.gravitino.exceptions.CatalogAlreadyExistsException; import org.apache.gravitino.exceptions.NoSuchCatalogException; +import org.apache.gravitino.exceptions.NoSuchSchemaException; +import org.apache.gravitino.exceptions.NonEmptyCatalogException; +import org.apache.gravitino.exceptions.NonEmptySchemaException; +import org.apache.gravitino.exceptions.SchemaAlreadyExistsException; import org.apache.gravitino.lance.common.config.LanceConfig; import org.apache.gravitino.lance.common.ops.LanceNamespaceOperations; import org.apache.gravitino.lance.common.ops.LanceTableOperations; @@ -102,7 +117,6 @@ public ListNamespacesResponse listNamespaces( List namespaces; switch (nsId.levels()) { case 0: - // List catalogs of type relational and provider generic-lakehouse namespaces = Arrays.stream(client.listCatalogsInfo()) .filter(this::isLakehouseCatalog) @@ -111,16 +125,14 @@ public ListNamespacesResponse listNamespaces( break; case 1: - // List schemas under the catalog - String catalogName = nsId.levelAtListPos(0); - Catalog catalog = client.loadCatalog(catalogName); - if (!isLakehouseCatalog(catalog)) { - throw new NoSuchCatalogException("Catalog not found: %s", catalogName); - } - + Catalog catalog = loadAndValidateLakehouseCatalog(nsId.levelAtListPos(0)); namespaces = Lists.newArrayList(catalog.asSchemas().listSchemas()); break; + case 2: + namespaces = Lists.newArrayList(); + break; + default: throw new IllegalArgumentException( "Expected at most 2-level namespace but got: " + namespaceId); @@ -136,34 +148,101 @@ public ListNamespacesResponse listNamespaces( } @Override - public DescribeNamespaceResponse describeNamespace(String id, String delimiter) { - throw new UnsupportedOperationException("Not implemented yet"); + public DescribeNamespaceResponse describeNamespace(String namespaceId, String delimiter) { + ObjectIdentifier nsId = ObjectIdentifier.of(namespaceId, delimiter); + Preconditions.checkArgument( + nsId.levels() <= 2 && nsId.levels() > 0, + "Expected at most 2-level and at least 1-level namespace but got: %s", + namespaceId); + + Catalog catalog = loadAndValidateLakehouseCatalog(nsId.levelAtListPos(0)); + Map properties = Maps.newHashMap(); + + switch (nsId.levels()) { + case 1: + Optional.ofNullable(catalog.properties()).ifPresent(properties::putAll); + break; + case 2: + String schemaName = nsId.levelAtListPos(1); + Schema schema = catalog.asSchemas().loadSchema(schemaName); + Optional.ofNullable(schema.properties()).ifPresent(properties::putAll); + break; + default: + throw new IllegalArgumentException( + "Expected at most 2-level and at least 1-level namespace but got: " + namespaceId); + } + + DescribeNamespaceResponse response = new DescribeNamespaceResponse(); + response.setProperties(properties); + return response; } @Override public CreateNamespaceResponse createNamespace( - String id, + String namespaceId, String delimiter, CreateNamespaceRequest.ModeEnum mode, Map properties) { - throw new UnsupportedOperationException("Not implemented yet"); + ObjectIdentifier nsId = ObjectIdentifier.of(namespaceId, delimiter); + Preconditions.checkArgument( + nsId.levels() <= 2 && nsId.levels() > 0, + "Expected at most 2-level and at least 1-level namespace but got: %s", + namespaceId); + + switch (nsId.levels()) { + case 1: + return createOrUpdateCatalog(nsId.levelAtListPos(0), mode, properties); + case 2: + return createOrUpdateSchema( + nsId.levelAtListPos(0), nsId.levelAtListPos(1), mode, properties); + default: + throw new IllegalArgumentException( + "Expected at most 2-level and at least 1-level namespace but got: " + namespaceId); + } } @Override public DropNamespaceResponse dropNamespace( - String id, + String namespaceId, String delimiter, DropNamespaceRequest.ModeEnum mode, DropNamespaceRequest.BehaviorEnum behavior) { - throw new UnsupportedOperationException("Not implemented yet"); + ObjectIdentifier nsId = ObjectIdentifier.of(namespaceId, delimiter); + Preconditions.checkArgument( + nsId.levels() <= 2 && nsId.levels() > 0, + "Expected at most 2-level and at least 1-level namespace but got: %s", + namespaceId); + + switch (nsId.levels()) { + case 1: + return dropCatalog(nsId.levelAtListPos(0), mode, behavior); + case 2: + return dropSchema(nsId.levelAtListPos(0), nsId.levelAtListPos(1), mode, behavior); + default: + throw new IllegalArgumentException( + "Expected at most 2-level and at least 1-level namespace but got: " + namespaceId); + } } @Override - public void namespaceExists(String id, String delimiter) throws LanceNamespaceException {} + public void namespaceExists(String namespaceId, String delimiter) throws LanceNamespaceException { + ObjectIdentifier nsId = ObjectIdentifier.of(namespaceId, delimiter); + Preconditions.checkArgument( + nsId.levels() <= 2 && nsId.levels() > 0, + "Expected at most 2-level and at least 1-level namespace but got: %s", + namespaceId); - private boolean isLakehouseCatalog(Catalog catalog) { - return catalog.type().equals(Catalog.Type.RELATIONAL) - && "generic-lakehouse".equals(catalog.provider()); + Catalog catalog = loadAndValidateLakehouseCatalog(nsId.levelAtListPos(0)); + if (nsId.levels() == 2) { + String schemaName = nsId.levelAtListPos(1); + if (!catalog.asSchemas().schemaExists(schemaName)) { + throw LanceNamespaceException.notFound( + "Schema not found: " + schemaName, + NoSuchSchemaException.class.getSimpleName(), + schemaName, + CommonUtil.formatCurrentStackTrace()); + } + } } @Override @@ -171,4 +250,221 @@ public ListTablesResponse listTables( String id, String delimiter, String pageToken, Integer limit) { throw new UnsupportedOperationException("Not implemented yet"); } + + private boolean isLakehouseCatalog(Catalog catalog) { + return catalog.type().equals(Catalog.Type.RELATIONAL) + && "generic-lakehouse".equals(catalog.provider()); + } + + private Catalog loadAndValidateLakehouseCatalog(String catalogName) { + Catalog catalog; + try { + catalog = client.loadCatalog(catalogName); + } catch (NoSuchCatalogException e) { + throw LanceNamespaceException.notFound( + "Catalog not found: " + catalogName, + NoSuchCatalogException.class.getSimpleName(), + catalogName, + CommonUtil.formatCurrentStackTrace()); + } + if (!isLakehouseCatalog(catalog)) { + throw LanceNamespaceException.notFound( + "Catalog is not a lakehouse catalog: " + catalogName, + NoSuchCatalogException.class.getSimpleName(), + catalogName, + CommonUtil.formatCurrentStackTrace()); + } + return catalog; + } + + private CreateNamespaceResponse createOrUpdateCatalog( + String catalogName, CreateNamespaceRequest.ModeEnum mode, Map properties) { + CreateNamespaceResponse response = new CreateNamespaceResponse(); + + Catalog catalog; + try { + catalog = client.loadCatalog(catalogName); + } catch (NoSuchCatalogException e) { + // Catalog does not exist, create it + Catalog createdCatalog = + client.createCatalog( + catalogName, + Catalog.Type.RELATIONAL, + "generic-lakehouse", + "created by Lance REST server", + properties); + response.setProperties( + createdCatalog.properties() == null ? Maps.newHashMap() : createdCatalog.properties()); + return response; + } + + // Catalog exists, validate type + if (!isLakehouseCatalog(catalog)) { + throw LanceNamespaceException.conflict( + "Catalog already exists but is not a lakehouse catalog: " + catalogName, + CatalogAlreadyExistsException.class.getSimpleName(), + catalogName, + CommonUtil.formatCurrentStackTrace()); + } + + // Catalog exists, handle based on mode + switch (mode) { + case EXIST_OK: + response.setProperties(Maps.newHashMap()); + return response; + case CREATE: + throw LanceNamespaceException.conflict( + "Catalog already exists: " + catalogName, + CatalogAlreadyExistsException.class.getSimpleName(), + catalogName, + CommonUtil.formatCurrentStackTrace()); + case OVERWRITE: + CatalogChange[] changes = + buildChanges( + properties, + catalog.properties(), + CatalogChange::setProperty, + CatalogChange::removeProperty, + CatalogChange[]::new); + Catalog alteredCatalog = client.alterCatalog(catalogName, changes); + Optional.ofNullable(alteredCatalog.properties()).ifPresent(response::setProperties); + return response; + default: + throw new IllegalArgumentException("Unknown mode: " + mode); + } + } + + private CreateNamespaceResponse createOrUpdateSchema( + String catalogName, + String schemaName, + CreateNamespaceRequest.ModeEnum mode, + Map properties) { + CreateNamespaceResponse response = new CreateNamespaceResponse(); + Catalog loadedCatalog = loadAndValidateLakehouseCatalog(catalogName); + + Schema schema; + try { + schema = loadedCatalog.asSchemas().loadSchema(schemaName); + } catch (NoSuchSchemaException e) { + // Schema does not exist, create it + Schema createdSchema = loadedCatalog.asSchemas().createSchema(schemaName, null, properties); + response.setProperties( + createdSchema.properties() == null ? Maps.newHashMap() : createdSchema.properties()); + return response; + } + + // Schema exists, handle based on mode + switch (mode) { + case EXIST_OK: + response.setProperties(Maps.newHashMap()); + return response; + case CREATE: + throw LanceNamespaceException.conflict( + "Schema already exists: " + schemaName, + SchemaAlreadyExistsException.class.getSimpleName(), + schemaName, + CommonUtil.formatCurrentStackTrace()); + case OVERWRITE: + SchemaChange[] changes = + buildChanges( + properties, + schema.properties(), + SchemaChange::setProperty, + SchemaChange::removeProperty, + SchemaChange[]::new); + Schema alteredSchema = loadedCatalog.asSchemas().alterSchema(schemaName, changes); + Optional.ofNullable(alteredSchema.properties()).ifPresent(response::setProperties); + return response; + default: + throw new IllegalArgumentException("Unknown mode: " + mode); + } + } + + private DropNamespaceResponse dropCatalog( + String catalogName, + DropNamespaceRequest.ModeEnum mode, + DropNamespaceRequest.BehaviorEnum behavior) { + try { + boolean dropped = + client.dropCatalog(catalogName, behavior == DropNamespaceRequest.BehaviorEnum.CASCADE); + if (dropped) { + return new DropNamespaceResponse(); + } else { + // Catalog did not exist + if (mode == DropNamespaceRequest.ModeEnum.FAIL) { + throw LanceNamespaceException.notFound( + "Catalog not found: " + catalogName, + NoSuchCatalogException.class.getSimpleName(), + catalogName, + CommonUtil.formatCurrentStackTrace()); + } + return new DropNamespaceResponse(); // SKIP mode + } + } catch (NonEmptyCatalogException e) { + throw LanceNamespaceException.badRequest( + String.format("Catalog %s is not empty.", catalogName), + NonEmptyCatalogException.class.getSimpleName(), + catalogName, + CommonUtil.formatCurrentStackTrace()); + } + } + + private DropNamespaceResponse dropSchema( + String catalogName, + String schemaName, + DropNamespaceRequest.ModeEnum mode, + DropNamespaceRequest.BehaviorEnum behavior) { + try { + boolean dropped = + client + .loadCatalog(catalogName) + .asSchemas() + .dropSchema(schemaName, behavior == DropNamespaceRequest.BehaviorEnum.CASCADE); + if (dropped) { + return new DropNamespaceResponse(); + } else { + // Schema did not exist + if (mode == DropNamespaceRequest.ModeEnum.FAIL) { + throw LanceNamespaceException.notFound( + "Schema not found: " + schemaName, + NoSuchSchemaException.class.getSimpleName(), + schemaName, + CommonUtil.formatCurrentStackTrace()); + } + return new DropNamespaceResponse(); // SKIP mode + } + } catch (NoSuchCatalogException e) { + throw LanceNamespaceException.notFound( + "Catalog not found: " + catalogName, + NoSuchCatalogException.class.getSimpleName(), + catalogName, + CommonUtil.formatCurrentStackTrace()); + } catch (NonEmptySchemaException e) { + throw LanceNamespaceException.badRequest( + String.format("Schema %s is not empty.", schemaName), + NonEmptySchemaException.class.getSimpleName(), + schemaName, + CommonUtil.formatCurrentStackTrace()); + } + } + + private T[] buildChanges( + Map newProps, + Map oldProps, + BiFunction setPropertyFunc, + Function removePropertyFunc, + IntFunction arrayCreator) { + Stream setPropertiesStream = + newProps.entrySet().stream() + .map(entry -> setPropertyFunc.apply(entry.getKey(), entry.getValue())); + + Stream removePropertiesStream = + oldProps == null + ? Stream.empty() + : oldProps.keySet().stream() + .filter(key -> !newProps.containsKey(key)) + .map(removePropertyFunc); + + return Stream.concat(setPropertiesStream, removePropertiesStream).toArray(arrayCreator); + } } diff --git a/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/config/TestLanceConfig.java b/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/config/TestLanceConfig.java index 176634f3090..44577a2dfac 100644 --- a/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/config/TestLanceConfig.java +++ b/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/config/TestLanceConfig.java @@ -29,22 +29,22 @@ public class TestLanceConfig { @Test public void testLoadLanceConfig() { Map properties = - ImmutableMap.of("gravitino.lance-rest.catalog-name", "test_catalog"); + ImmutableMap.of("gravitino.lance-rest.namespace-backend", "test_catalog"); LanceConfig lanceConfig = new LanceConfig(); lanceConfig.loadFromMap(properties, k -> k.startsWith("gravitino.lance-rest.")); - Assertions.assertEquals("test_catalog", lanceConfig.getCatalogName()); + Assertions.assertEquals("gravitino", lanceConfig.getNamespaceBackend()); LanceConfig lanceConfig2 = new LanceConfig(properties); - Assertions.assertEquals("test_catalog", lanceConfig2.getCatalogName()); + Assertions.assertEquals("gravitino", lanceConfig2.getNamespaceBackend()); } @Test public void testDefaultCatalogName() { - // Test default catalog name when not specified + // Test default namespace backend name when not specified Map properties = ImmutableMap.of(); LanceConfig lanceConfig = new LanceConfig(properties); - Assertions.assertEquals("default", lanceConfig.getCatalogName()); + Assertions.assertEquals("gravitino", lanceConfig.getNamespaceBackend()); } @Test @@ -94,7 +94,6 @@ public void testCompleteConfiguration() { // Test all configurations together for auxiliary mode Map properties = ImmutableMap.builder() - .put(LanceConfig.CATALOG_NAME.getKey(), "lance_catalog") .put(LanceConfig.NAMESPACE_URI.getKey(), "http://gravitino-prod:8090") .put(LanceConfig.METALAKE_NAME.getKey(), "production") .put(LanceConfig.NAMESPACE_BACKEND.getKey(), "gravitino") @@ -104,7 +103,7 @@ public void testCompleteConfiguration() { LanceConfig lanceConfig = new LanceConfig(properties); // Verify all config values - Assertions.assertEquals("lance_catalog", lanceConfig.getCatalogName()); + Assertions.assertEquals("gravitino", lanceConfig.getNamespaceBackend()); Assertions.assertEquals("http://gravitino-prod:8090", lanceConfig.getNamespaceUri()); Assertions.assertEquals("production", lanceConfig.getGravitinoMetalake()); diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java index d1409c8e12f..2d9f3e8823d 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java @@ -90,7 +90,7 @@ protected void configure() { server.addCustomFilters(LANCE_SPEC); server.addSystemFilters(LANCE_SPEC); - LOG.info("Initialized Lance REST service for catalog {}", lanceConfig.getCatalogName()); + LOG.info("Initialized Lance REST service for backend {}", lanceConfig.getNamespaceBackend()); } @Override diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java index 2d07357f30c..dd548541add 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java @@ -22,12 +22,19 @@ import com.codahale.metrics.annotation.ResponseMetered; import com.codahale.metrics.annotation.Timed; +import com.lancedb.lance.namespace.model.CreateNamespaceRequest; +import com.lancedb.lance.namespace.model.CreateNamespaceResponse; +import com.lancedb.lance.namespace.model.DescribeNamespaceResponse; +import com.lancedb.lance.namespace.model.DropNamespaceRequest; +import com.lancedb.lance.namespace.model.DropNamespaceResponse; import com.lancedb.lance.namespace.model.ListNamespacesResponse; +import java.util.regex.Pattern; import javax.inject.Inject; import javax.ws.rs.Consumes; import javax.ws.rs.DefaultValue; import javax.ws.rs.Encoded; import javax.ws.rs.GET; +import javax.ws.rs.POST; import javax.ws.rs.Path; import javax.ws.rs.PathParam; import javax.ws.rs.Produces; @@ -61,10 +68,95 @@ public Response listNamespaces( @QueryParam("limit") Integer limit) { try { ListNamespacesResponse response = - lanceNamespace.asNamespaceOps().listNamespaces(namespaceId, delimiter, pageToken, limit); + lanceNamespace + .asNamespaceOps() + .listNamespaces(namespaceId, Pattern.quote(delimiter), pageToken, limit); return Response.ok(response).build(); } catch (Exception e) { return LanceExceptionMapper.toRESTResponse(namespaceId, e); } } + + @POST + @Path("/{id}/describe") + @Timed(name = "describe-namespaces." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "describe-namespaces", absolute = true) + public Response describeNamespace( + @Encoded @PathParam("id") String namespaceId, + @DefaultValue(NAMESPACE_DELIMITER_DEFAULT) @QueryParam("delimiter") String delimiter) { + try { + DescribeNamespaceResponse response = + lanceNamespace.asNamespaceOps().describeNamespace(namespaceId, Pattern.quote(delimiter)); + return Response.ok(response).build(); + } catch (Exception e) { + return LanceExceptionMapper.toRESTResponse(namespaceId, e); + } + } + + @POST + @Path("/{id}/create") + @Timed(name = "create-namespaces." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "create-namespaces", absolute = true) + public Response createNamespace( + @Encoded @PathParam("id") String namespaceId, + @DefaultValue(NAMESPACE_DELIMITER_DEFAULT) @QueryParam("delimiter") String delimiter, + CreateNamespaceRequest request) { + try { + CreateNamespaceResponse response = + lanceNamespace + .asNamespaceOps() + .createNamespace( + namespaceId, + Pattern.quote(delimiter), + request.getMode() == null + ? CreateNamespaceRequest.ModeEnum.CREATE + : request.getMode(), + request.getProperties()); + return Response.ok(response).build(); + } catch (Exception e) { + return LanceExceptionMapper.toRESTResponse(namespaceId, e); + } + } + + @POST + @Path("/{id}/drop") + @Timed(name = "drop-namespaces." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "drop-namespaces", absolute = true) + public Response dropNamespace( + @Encoded @PathParam("id") String namespaceId, + @DefaultValue(NAMESPACE_DELIMITER_DEFAULT) @QueryParam("delimiter") String delimiter, + DropNamespaceRequest request) { + try { + DropNamespaceResponse response = + lanceNamespace + .asNamespaceOps() + .dropNamespace( + namespaceId, + Pattern.quote(delimiter), + request.getMode() == null + ? DropNamespaceRequest.ModeEnum.FAIL + : request.getMode(), + request.getBehavior() == null + ? DropNamespaceRequest.BehaviorEnum.RESTRICT + : request.getBehavior()); + return Response.ok(response).build(); + } catch (Exception e) { + return LanceExceptionMapper.toRESTResponse(namespaceId, e); + } + } + + @POST + @Path("/{id}/exists") + @Timed(name = "namespace-exists." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "namespace-exists", absolute = true) + public Response namespaceExists( + @Encoded @PathParam("id") String namespaceId, + @DefaultValue(NAMESPACE_DELIMITER_DEFAULT) @QueryParam("delimiter") String delimiter) { + try { + lanceNamespace.asNamespaceOps().namespaceExists(namespaceId, Pattern.quote(delimiter)); + return Response.ok().build(); + } catch (Exception e) { + return LanceExceptionMapper.toRESTResponse(namespaceId, e); + } + } } diff --git a/server-common/src/test/java/org/apache/gravitino/server/TestServerConfig.java b/server-common/src/test/java/org/apache/gravitino/server/TestServerConfig.java index fc9193e0126..e46e27b8079 100644 --- a/server-common/src/test/java/org/apache/gravitino/server/TestServerConfig.java +++ b/server-common/src/test/java/org/apache/gravitino/server/TestServerConfig.java @@ -67,7 +67,8 @@ public void checkGravitinoConfFile() for (Map.Entry entry : properties.entrySet()) { String propKey = (String) entry.getKey(); if (propKey.startsWith(AuxiliaryServiceManager.GRAVITINO_AUX_SERVICE_PREFIX) - || propKey.startsWith("gravitino.iceberg-rest.")) { + || propKey.startsWith("gravitino.iceberg-rest.") + || propKey.startsWith("gravitino.lance-rest.")) { continue; } Assertions.assertTrue( From 96a1caa72c4e596eddf12509a8cec7b343f23708 Mon Sep 17 00:00:00 2001 From: Mini Yu Date: Sat, 25 Oct 2025 09:14:14 +0800 Subject: [PATCH 13/43] [#8893] feat(lance-rest-server): Support create/load table operations for lance (#8911) ### What changes were proposed in this pull request? 1. Support alter lance table 2. Support API for table operations in Lance REST server. ### Why are the changes needed? It's a feature. Fix: #8893 ### Does this PR introduce _any_ user-facing change? N/A. ### How was this patch tested? Test locally with Spark and curl --------- Co-authored-by: mchades --- .../apache/gravitino/rel/indexes/Index.java | 45 +++ .../GenericLakehouseCatalogOperations.java | 20 +- .../lance/LanceCatalogOperations.java | 120 +++++- .../lance/LanceDataTypeConverter.java | 69 ++++ .../client/TestRelationalCatalog.java | 10 - .../apache/gravitino/dto/rel/TableDTO.java | 2 - .../dto/requests/TableCreateRequest.java | 3 - .../dto/responses/TableResponse.java | 3 - .../org/apache/gravitino/json/JsonUtils.java | 22 +- .../gravitino/dto/rel/TestTableDTO.java | 13 +- .../catalog/TableOperationDispatcher.java | 51 +++ .../common/ops/LanceNamespaceOperations.java | 3 + .../common/ops/LanceTableOperations.java | 15 +- .../ops/arrow/ArrowRecordBatchList.java | 40 ++ .../GravitinoLanceNamespaceWrapper.java | 344 +++++++++++++++++- .../gravitino/lance/common/TestArrowIPC.java | 83 +++++ lance/lance-rest-server/build.gradle.kts | 2 + .../rest/LanceNamespaceOperations.java | 17 + .../service/rest/LanceTableOperations.java | 89 +++-- .../jdbc/mysql/MySQLMetadataAdapter.java | 2 + 20 files changed, 882 insertions(+), 71 deletions(-) create mode 100644 lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/arrow/ArrowRecordBatchList.java create mode 100644 lance/lance-common/src/test/java/org/apache/gravitino/lance/common/TestArrowIPC.java diff --git a/api/src/main/java/org/apache/gravitino/rel/indexes/Index.java b/api/src/main/java/org/apache/gravitino/rel/indexes/Index.java index 4b92977fd5b..fd3beb0e67f 100644 --- a/api/src/main/java/org/apache/gravitino/rel/indexes/Index.java +++ b/api/src/main/java/org/apache/gravitino/rel/indexes/Index.java @@ -67,5 +67,50 @@ enum IndexType { * UNIQUE KEY helps in avoiding redundancy and ensuring data accuracy in the database. */ UNIQUE_KEY, + + // The following index types are specific to Lance, for more, please see: IndexType in LanceDB + /** + * SCALAR index is used to optimize searches on scalar data types such as integers, floats, + * strings, etc. Currently, this type is only applicable to Lance. + */ + SCALAR, + /** + * BTREE index is a balanced tree data structure that maintains sorted data and allows for + * logarithmic time complexity for search, insert, and delete operations. Currently, this type + * is only applicable to Lance. + */ + BTREE, + /** + * Bitmap index is a type of database index that uses bit arrays (bitmaps) to represent the + * presence or absence of values in a column, enabling efficient querying and data retrieval. + * Currently, this type is only applicable to Lance. + */ + BITMAP, + /** + * LABEL_LIST index is used to optimize searches on columns containing lists of labels or tags. + * Currently, this type is only applicable to Lance. + */ + LABEL_LIST, + /** + * INVERTED index is a data structure used to optimize full-text searches by mapping terms to + * their locations within a dataset, allowing for quick retrieval of documents containing + * specific words or phrases. Currently, this type is only applicable to Lance. + */ + INVERTED, + /** + * VECTOR index is used to optimize similarity searches in high-dimensional vector spaces. + * Currently, this type is only applicable to Lance. + */ + VECTOR, + /** IVF_FLAT (Inverted File with Flat quantization) is an indexing method used for efficient */ + IVF_FLAT, + /** IVF_SQ (Inverted File with Scalar Quantization) is an indexing method used for efficient */ + IVF_SQ, + /** IVF_PQ (Inverted File with Product Quantization) is an indexing method used for efficient */ + IVF_PQ, + /** IVF_HNSW_FLAT */ + IVF_HNSW_SQ, + /** IVF_HNSW_PQ */ + IVF_HNSW_PQ; } } diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java index acac35528e2..0f85532e8c2 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java @@ -258,7 +258,25 @@ private String calculateTableLocation( @Override public Table alterTable(NameIdentifier ident, TableChange... changes) throws NoSuchTableException, IllegalArgumentException { - throw new UnsupportedOperationException("Not implemented yet."); + EntityStore store = GravitinoEnv.getInstance().entityStore(); + Namespace namespace = ident.namespace(); + try { + GenericTableEntity tableEntity = + store.get(ident, Entity.EntityType.TABLE, GenericTableEntity.class); + Map tableProperties = tableEntity.getProperties(); + String format = tableProperties.getOrDefault("format", "lance"); + LakehouseCatalogOperations lakehouseCatalogOperations = + SUPPORTED_FORMATS.compute( + format, + (k, v) -> + v == null + ? createLakehouseCatalogOperations( + format, tableProperties, catalogInfo, propertiesMetadata) + : v); + return lakehouseCatalogOperations.alterTable(ident, changes); + } catch (IOException e) { + throw new RuntimeException("Failed to list tables under schema " + namespace, e); + } } @Override diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java index 3e1146b7ad9..342826a882d 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java @@ -20,34 +20,49 @@ package org.apache.gravitino.catalog.lakehouse.lance; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; import com.lancedb.lance.Dataset; import com.lancedb.lance.WriteParams; +import com.lancedb.lance.index.DistanceType; +import com.lancedb.lance.index.IndexParams; +import com.lancedb.lance.index.IndexType; +import com.lancedb.lance.index.vector.VectorIndexParams; import java.io.IOException; import java.time.Instant; import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; +import org.apache.commons.lang3.ArrayUtils; import org.apache.gravitino.Catalog; +import org.apache.gravitino.Entity; +import org.apache.gravitino.EntityStore; +import org.apache.gravitino.GravitinoEnv; import org.apache.gravitino.NameIdentifier; import org.apache.gravitino.Namespace; import org.apache.gravitino.catalog.lakehouse.LakehouseCatalogOperations; import org.apache.gravitino.connector.CatalogInfo; import org.apache.gravitino.connector.GenericLakehouseTable; import org.apache.gravitino.connector.HasPropertyMetadata; +import org.apache.gravitino.exceptions.NoSuchEntityException; import org.apache.gravitino.exceptions.NoSuchSchemaException; import org.apache.gravitino.exceptions.NoSuchTableException; import org.apache.gravitino.exceptions.TableAlreadyExistsException; import org.apache.gravitino.meta.AuditInfo; +import org.apache.gravitino.meta.GenericTableEntity; import org.apache.gravitino.rel.Column; +import org.apache.gravitino.rel.GenericTable; import org.apache.gravitino.rel.Table; import org.apache.gravitino.rel.TableChange; import org.apache.gravitino.rel.expressions.distributions.Distribution; import org.apache.gravitino.rel.expressions.sorts.SortOrder; import org.apache.gravitino.rel.expressions.transforms.Transform; import org.apache.gravitino.rel.indexes.Index; +import org.apache.gravitino.rel.indexes.Indexes.IndexImpl; import org.apache.gravitino.utils.PrincipalUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -133,20 +148,31 @@ private org.apache.arrow.vector.types.pojo.Schema convertColumnsToSchema(Column[ .map( col -> { boolean nullable = col.nullable(); + ArrowType parentType = converter.fromGravitino(col.dataType()); + List childTypes = converter.getChildTypes(col.dataType()); + List childFields = + childTypes.stream() + .map( + childType -> + new org.apache.arrow.vector.types.pojo.Field( + "", + org.apache.arrow.vector.types.pojo.FieldType.nullable( + childType), + null)) + .collect(Collectors.toList()); + if (nullable) { return new org.apache.arrow.vector.types.pojo.Field( col.name(), - org.apache.arrow.vector.types.pojo.FieldType.nullable( - converter.fromGravitino(col.dataType())), - null); + org.apache.arrow.vector.types.pojo.FieldType.nullable(parentType), + childFields); } // not nullable return new org.apache.arrow.vector.types.pojo.Field( col.name(), - org.apache.arrow.vector.types.pojo.FieldType.notNullable( - converter.fromGravitino(col.dataType())), - null); + org.apache.arrow.vector.types.pojo.FieldType.notNullable(parentType), + childFields); }) .collect(Collectors.toList()); return new org.apache.arrow.vector.types.pojo.Schema(fields); @@ -155,8 +181,86 @@ private org.apache.arrow.vector.types.pojo.Schema convertColumnsToSchema(Column[ @Override public Table alterTable(NameIdentifier ident, TableChange... changes) throws NoSuchTableException, IllegalArgumentException { - // Use another PRs to implement alter table for Lance tables - return null; + // Lance only supports adding indexes for now. + List addedIndexes = Lists.newArrayList(); + + for (TableChange change : changes) { + if (change instanceof TableChange.AddIndex addIndexChange) { + Index index = + IndexImpl.builder() + .withIndexType(addIndexChange.getType()) + .withName(addIndexChange.getName()) + .withFieldNames(addIndexChange.getFieldNames()) + .build(); + addedIndexes.add(index); + } + } + + EntityStore entityStore = GravitinoEnv.getInstance().entityStore(); + GenericTableEntity entity; + try { + entity = entityStore.get(ident, Entity.EntityType.TABLE, GenericTableEntity.class); + } catch (NoSuchEntityException e) { + throw new NoSuchTableException("No such table: %s", ident); + } catch (IOException ioe) { + throw new RuntimeException("Failed to load table entity for: " + ident, ioe); + } + + String location = entity.getProperties().get("location"); + try (Dataset dataset = Dataset.open(location, new RootAllocator())) { + // For Lance, we only support adding indexes, so in fact, we can't handle drop index here. + for (Index index : addedIndexes) { + IndexType indexType = IndexType.valueOf(index.type().name()); + IndexParams indexParams = getIndexParamsByIndexType(indexType); + + dataset.createIndex( + Arrays.stream(index.fieldNames()) + .map(fieldPath -> String.join(".", fieldPath)) + .collect(Collectors.toList()), + indexType, + Optional.of(index.name()), + indexParams, + true); + } + } catch (Exception e) { + throw new RuntimeException("Failed to alter Lance table: " + ident, e); + } + + GenericTable oldTable = entity.toGenericTable(); + Index[] newIndexes = oldTable.index(); + for (Index index : addedIndexes) { + newIndexes = ArrayUtils.add(newIndexes, index); + } + + return GenericLakehouseTable.builder() + .withFormat(oldTable.format()) + .withProperties(oldTable.properties()) + .withAuditInfo((AuditInfo) oldTable.auditInfo()) + .withSortOrders(oldTable.sortOrder()) + .withPartitioning(oldTable.partitioning()) + .withDistribution(oldTable.distribution()) + .withColumns(oldTable.columns()) + .withIndexes(newIndexes) + .withName(oldTable.name()) + .withComment(oldTable.comment()) + .build(); + } + + private IndexParams getIndexParamsByIndexType(IndexType indexType) { + switch (indexType) { + case SCALAR: + return new IndexParams.Builder().build(); + case VECTOR: + // TODO make these parameters configurable + int numberOfDimensions = 3; // this value should be determined dynamically based on the data + // Add properties to Index to set this value. + return new IndexParams.Builder() + .setVectorIndexParams( + VectorIndexParams.ivfPq(2, 8, numberOfDimensions, DistanceType.L2, 2)) + .build(); + default: + throw new IllegalArgumentException("Unsupported index type: " + indexType); + } } @Override diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java index 117863659e1..d7966edd5ee 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java @@ -19,6 +19,8 @@ package org.apache.gravitino.catalog.lakehouse.lance; +import com.google.common.collect.Lists; +import java.util.List; import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.TimeUnit; @@ -27,9 +29,11 @@ import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; import org.apache.arrow.vector.types.pojo.ArrowType.Int; import org.apache.gravitino.connector.DataTypeConverter; +import org.apache.gravitino.json.JsonUtils; import org.apache.gravitino.rel.types.Type; import org.apache.gravitino.rel.types.Types; import org.apache.gravitino.rel.types.Types.FixedType; +import org.apache.gravitino.rel.types.Types.UnparsedType; public class LanceDataTypeConverter implements DataTypeConverter { @@ -67,6 +71,30 @@ public ArrowType fromGravitino(Type type) { return new ArrowType.FixedSizeBinary(fixedType.length()); case BINARY: return new ArrowType.Binary(); + case UNPARSED: + String typeStr = ((UnparsedType) type).unparsedType().toString(); + try { + Type t = JsonUtils.anyFieldMapper().readValue(typeStr, Type.class); + if (t instanceof Types.ListType) { + return ArrowType.List.INSTANCE; + } else if (t instanceof Types.MapType) { + return new ArrowType.Map(false); + } else if (t instanceof Types.StructType) { + return ArrowType.Struct.INSTANCE; + } else { + throw new UnsupportedOperationException( + "Unsupported UnparsedType conversion: " + t.simpleString()); + } + } catch (Exception e) { + // FixedSizeListArray(integer, 3) + if (typeStr.startsWith("FixedSizeListArray")) { + int size = + Integer.parseInt( + typeStr.substring(typeStr.indexOf(',') + 1, typeStr.indexOf(')')).trim()); + return new ArrowType.FixedSizeList(size); + } + throw new UnsupportedOperationException("Failed to parse UnparsedType: " + typeStr, e); + } default: throw new UnsupportedOperationException("Unsupported Gravitino type: " + type.name()); } @@ -116,8 +144,49 @@ public Type toGravitino(ArrowType arrowType) { return Types.StringType.get(); } else if (arrowType instanceof ArrowType.Binary) { return Types.BinaryType.get(); + // TODO handle complex types like List, Map, Struct } else { throw new UnsupportedOperationException("Unsupported Arrow type: " + arrowType); } } + + public List getChildTypes(Type parentType) { + if (parentType.name() != Type.Name.UNPARSED) { + return List.of(); + } + + List arrowTypes = Lists.newArrayList(); + String typeStr = ((UnparsedType) parentType).unparsedType().toString(); + try { + Type t = JsonUtils.anyFieldMapper().readValue(typeStr, Type.class); + if (t instanceof Types.ListType listType) { + arrowTypes.add(fromGravitino(listType.elementType())); + } else if (t instanceof Types.MapType mapType) { + arrowTypes.add(fromGravitino(mapType.keyType())); + arrowTypes.add(fromGravitino(mapType.valueType())); + } else { + // TODO support struct type. + throw new UnsupportedOperationException( + "Unsupported UnparsedType conversion: " + t.simpleString()); + } + + return arrowTypes; + } catch (Exception e) { + // FixedSizeListArray(integer, 3) + + try { + if (typeStr.startsWith("FixedSizeListArray")) { + String type = typeStr.substring(typeStr.indexOf('(') + 1, typeStr.indexOf(',')).trim(); + Type childType = JsonUtils.anyFieldMapper().readValue("\"" + type + "\"", Type.class); + arrowTypes.add(fromGravitino(childType)); + + return arrowTypes; + } + } catch (Exception e1) { + throw new UnsupportedOperationException("Failed to parse UnparsedType: " + typeStr, e1); + } + + throw new UnsupportedOperationException("Failed to parse UnparsedType: " + typeStr, e); + } + } } diff --git a/clients/client-java/src/test/java/org/apache/gravitino/client/TestRelationalCatalog.java b/clients/client-java/src/test/java/org/apache/gravitino/client/TestRelationalCatalog.java index c5e36247bbf..ff0a29c0325 100644 --- a/clients/client-java/src/test/java/org/apache/gravitino/client/TestRelationalCatalog.java +++ b/clients/client-java/src/test/java/org/apache/gravitino/client/TestRelationalCatalog.java @@ -571,16 +571,6 @@ public void testCreatePartitionedTable() throws JsonProcessingException { tableId, fromDTOs(columns), "comment", emptyMap, errorPartitioning)); Assertions.assertTrue(ex2.getMessage().contains("not found in table")); - // Test empty columns - Throwable ex3 = - Assertions.assertThrows( - IllegalArgumentException.class, - () -> - tableCatalog.createTable( - tableId, new Column[0], "comment", emptyMap, errorPartitioning)); - Assertions.assertTrue( - ex3.getMessage().contains("\"columns\" field is required and cannot be empty")); - // Test partitioning with assignments Partitioning[] partitioningWithAssignments = { RangePartitioningDTO.of( diff --git a/common/src/main/java/org/apache/gravitino/dto/rel/TableDTO.java b/common/src/main/java/org/apache/gravitino/dto/rel/TableDTO.java index c1fb160a381..a23a1cd37cf 100644 --- a/common/src/main/java/org/apache/gravitino/dto/rel/TableDTO.java +++ b/common/src/main/java/org/apache/gravitino/dto/rel/TableDTO.java @@ -314,8 +314,6 @@ public S withIndex(IndexDTO[] indexes) { public TableDTO build() { Preconditions.checkArgument(name != null && !name.isEmpty(), "name cannot be null or empty"); Preconditions.checkArgument(audit != null, "audit cannot be null"); - Preconditions.checkArgument( - columns != null && columns.length > 0, "columns cannot be null or empty"); return new TableDTO( name, diff --git a/common/src/main/java/org/apache/gravitino/dto/requests/TableCreateRequest.java b/common/src/main/java/org/apache/gravitino/dto/requests/TableCreateRequest.java index d6d22ddb9f5..8a97c9e2dd7 100644 --- a/common/src/main/java/org/apache/gravitino/dto/requests/TableCreateRequest.java +++ b/common/src/main/java/org/apache/gravitino/dto/requests/TableCreateRequest.java @@ -122,9 +122,6 @@ public TableCreateRequest( public void validate() throws IllegalArgumentException { Preconditions.checkArgument( StringUtils.isNotBlank(name), "\"name\" field is required and cannot be empty"); - Preconditions.checkArgument( - columns != null && columns.length != 0, - "\"columns\" field is required and cannot be empty"); if (sortOrders != null) { Arrays.stream(sortOrders).forEach(sortOrder -> sortOrder.validate(columns)); diff --git a/common/src/main/java/org/apache/gravitino/dto/responses/TableResponse.java b/common/src/main/java/org/apache/gravitino/dto/responses/TableResponse.java index d3dfa215992..a1cc3262952 100644 --- a/common/src/main/java/org/apache/gravitino/dto/responses/TableResponse.java +++ b/common/src/main/java/org/apache/gravitino/dto/responses/TableResponse.java @@ -63,9 +63,6 @@ public void validate() throws IllegalArgumentException { Preconditions.checkArgument(table != null, "table must not be null"); Preconditions.checkArgument( StringUtils.isNotBlank(table.name()), "table 'name' must not be null and empty"); - Preconditions.checkArgument( - table.columns() != null && table.columns().length > 0, - "table 'columns' must not be null and empty"); Preconditions.checkArgument(table.auditInfo() != null, "table 'audit' must not be null"); Preconditions.checkArgument( table.partitioning() != null, "table 'partitions' must not be null"); diff --git a/common/src/main/java/org/apache/gravitino/json/JsonUtils.java b/common/src/main/java/org/apache/gravitino/json/JsonUtils.java index 5fd2ab3f8fa..fb9a33f268a 100644 --- a/common/src/main/java/org/apache/gravitino/json/JsonUtils.java +++ b/common/src/main/java/org/apache/gravitino/json/JsonUtils.java @@ -715,7 +715,7 @@ private static Type readDataType(JsonNode node) { String text = node.asText().toLowerCase(); return text.equals(Types.NullType.get().simpleString()) ? Types.NullType.get() - : fromPrimitiveTypeString(text); + : fromPrimitiveTypeString(text, node.asText()); } if (node.isObject() && node.has(TYPE)) { @@ -834,49 +834,49 @@ private static void writeExternalType(Types.ExternalType externalType, JsonGener gen.writeEndObject(); } - private static Type fromPrimitiveTypeString(String typeString) { - Type.PrimitiveType primitiveType = TYPES.get(typeString); + private static Type fromPrimitiveTypeString(String lowerTypeString, String orignalTypeString) { + Type.PrimitiveType primitiveType = TYPES.get(lowerTypeString); if (primitiveType != null) { return primitiveType; } - Matcher fixed = FIXED.matcher(typeString); + Matcher fixed = FIXED.matcher(lowerTypeString); if (fixed.matches()) { return Types.FixedType.of(Integer.parseInt(fixed.group(1))); } - Matcher fixedChar = FIXEDCHAR.matcher(typeString); + Matcher fixedChar = FIXEDCHAR.matcher(lowerTypeString); if (fixedChar.matches()) { return Types.FixedCharType.of(Integer.parseInt(fixedChar.group(1))); } - Matcher varchar = VARCHAR.matcher(typeString); + Matcher varchar = VARCHAR.matcher(lowerTypeString); if (varchar.matches()) { return Types.VarCharType.of(Integer.parseInt(varchar.group(1))); } - Matcher decimal = DECIMAL.matcher(typeString); + Matcher decimal = DECIMAL.matcher(lowerTypeString); if (decimal.matches()) { return Types.DecimalType.of( Integer.parseInt(decimal.group(1)), Integer.parseInt(decimal.group(2))); } - Matcher time = TIME.matcher(typeString); + Matcher time = TIME.matcher(lowerTypeString); if (time.matches()) { return Types.TimeType.of(Integer.parseInt(time.group(1))); } - Matcher timestampTz = TIMESTAMP_TZ.matcher(typeString); + Matcher timestampTz = TIMESTAMP_TZ.matcher(lowerTypeString); if (timestampTz.matches()) { return Types.TimestampType.withTimeZone(Integer.parseInt(timestampTz.group(1))); } - Matcher timestamp = TIMESTAMP.matcher(typeString); + Matcher timestamp = TIMESTAMP.matcher(lowerTypeString); if (timestamp.matches()) { return Types.TimestampType.withoutTimeZone(Integer.parseInt(timestamp.group(1))); } - return Types.UnparsedType.of(typeString); + return Types.UnparsedType.of(orignalTypeString); } private static Types.StructType readStructType(JsonNode node) { diff --git a/common/src/test/java/org/apache/gravitino/dto/rel/TestTableDTO.java b/common/src/test/java/org/apache/gravitino/dto/rel/TestTableDTO.java index 5f0733c5ea0..44de2e03d40 100644 --- a/common/src/test/java/org/apache/gravitino/dto/rel/TestTableDTO.java +++ b/common/src/test/java/org/apache/gravitino/dto/rel/TestTableDTO.java @@ -19,11 +19,11 @@ package org.apache.gravitino.dto.rel; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -import static org.junit.jupiter.api.Assertions.assertThrows; import java.time.Instant; import org.apache.gravitino.dto.AuditDTO; import org.apache.gravitino.rel.types.Types; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; public class TestTableDTO { @@ -32,7 +32,10 @@ public void testBuildWithoutColumns() { AuditDTO audit = AuditDTO.builder().withCreator("creator").withCreateTime(Instant.now()).build(); TableDTO.Builder builder = TableDTO.builder().withName("t1").withAudit(audit); - assertThrows(IllegalArgumentException.class, builder::build); + Assertions.assertDoesNotThrow( + () -> { + builder.build(); + }); } @Test @@ -41,7 +44,11 @@ public void testBuildWithEmptyColumns() { AuditDTO.builder().withCreator("creator").withCreateTime(Instant.now()).build(); TableDTO.Builder builder = TableDTO.builder().withName("t1").withAudit(audit).withColumns(new ColumnDTO[0]); - assertThrows(IllegalArgumentException.class, builder::build); + + Assertions.assertDoesNotThrow( + () -> { + builder.build(); + }); } @Test diff --git a/core/src/main/java/org/apache/gravitino/catalog/TableOperationDispatcher.java b/core/src/main/java/org/apache/gravitino/catalog/TableOperationDispatcher.java index 1cbab0d6ed3..27119f0c999 100644 --- a/core/src/main/java/org/apache/gravitino/catalog/TableOperationDispatcher.java +++ b/core/src/main/java/org/apache/gravitino/catalog/TableOperationDispatcher.java @@ -262,6 +262,57 @@ public Table alterTable(NameIdentifier ident, TableChange... changes) tableId = te.id(); } + if (isGenericLakehouseCatalog(catalogIdent)) { + // For generic lakehouse catalog, we only update the table entity with basic info. + GenericTableEntity genericTableEntity = + operateOnEntity( + ident, id -> store.get(id, TABLE, GenericTableEntity.class), "GET", tableId); + if (genericTableEntity == null) { + throw new NoSuchTableException("No such table: %s", ident); + } + + GenericTable genericTable = (GenericTable) alteredTable; + GenericTableEntity updatedGenericTableEntity = + operateOnEntity( + ident, + id -> + store.update( + id, + GenericTableEntity.class, + TABLE, + tableEntity -> + GenericTableEntity.getBuilder() + .withId(tableEntity.id()) + .withName(alteredTable.name()) + .withNamespace(getNewNamespace(ident, changes)) + .withFormat(genericTable.format()) + .withAuditInfo( + AuditInfo.builder() + .withCreator(tableEntity.auditInfo().creator()) + .withCreateTime(tableEntity.auditInfo().createTime()) + .withLastModifier( + PrincipalUtils.getCurrentPrincipal().getName()) + .withLastModifiedTime(Instant.now()) + .build()) + .withColumns(tableEntity.columns()) + .withIndexes(genericTable.index()) + .withDistribution(genericTable.distribution()) + .withPartitions(genericTable.partitioning()) + .withSortOrder(genericTable.sortOrder()) + .withProperties(genericTable.properties()) + .withComment(genericTable.comment()) + .build()), + "UPDATE", + tableId); + + return EntityCombinedTable.of(alteredTable, updatedGenericTableEntity) + .withHiddenProperties( + getHiddenPropertyNames( + getCatalogIdentifier(ident), + HasPropertyMetadata::tablePropertiesMetadata, + alteredTable.properties())); + } + TableEntity updatedTableEntity = operateOnEntity( ident, diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java index 226de4dbd78..49141665a53 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceNamespaceOperations.java @@ -25,6 +25,7 @@ import com.lancedb.lance.namespace.model.DropNamespaceRequest; import com.lancedb.lance.namespace.model.DropNamespaceResponse; import com.lancedb.lance.namespace.model.ListNamespacesResponse; +import com.lancedb.lance.namespace.model.ListTablesResponse; import java.util.Map; public interface LanceNamespaceOperations { @@ -47,4 +48,6 @@ DropNamespaceResponse dropNamespace( DropNamespaceRequest.BehaviorEnum behavior); void namespaceExists(String namespaceId, String delimiter) throws LanceNamespaceException; + + ListTablesResponse listTables(String id, String delimiter, String pageToken, Integer limit); } diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java index 057dce8fb3c..b8a967cd30b 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/LanceTableOperations.java @@ -18,11 +18,20 @@ */ package org.apache.gravitino.lance.common.ops; -import com.lancedb.lance.namespace.model.ListTablesResponse; +import com.lancedb.lance.namespace.model.CreateTableResponse; +import com.lancedb.lance.namespace.model.DescribeTableResponse; +import java.util.Map; public interface LanceTableOperations { - ListTablesResponse listTables(String id, String delimiter, String pageToken, Integer limit); + DescribeTableResponse describeTable(String tableId, String delimiter); - // todo: add more table operation methods + CreateTableResponse createTable( + String tableId, + String mode, + String delimiter, + String tableLocation, + Map tableProperties, + String rootCatalog, + byte[] arrowStreamBody); } diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/arrow/ArrowRecordBatchList.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/arrow/ArrowRecordBatchList.java new file mode 100644 index 00000000000..b0c6a089d53 --- /dev/null +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/arrow/ArrowRecordBatchList.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.lance.common.ops.arrow; + +import java.util.List; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; + +public class ArrowRecordBatchList { + private final Schema schema; + + @SuppressWarnings("unused") + private final List recordBatches; + + public Schema getSchema() { + return schema; + } + + public ArrowRecordBatchList(Schema schema, List recordBatches) { + this.schema = schema; + this.recordBatches = recordBatches; + } +} diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java index cb1b85752a6..1f9e41a2cb0 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java @@ -20,7 +20,9 @@ import static org.apache.gravitino.lance.common.config.LanceConfig.METALAKE_NAME; import static org.apache.gravitino.lance.common.config.LanceConfig.NAMESPACE_URI; +import static org.apache.gravitino.rel.Column.DEFAULT_VALUE_NOT_SET; +import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -28,14 +30,22 @@ import com.lancedb.lance.namespace.LanceNamespaceException; import com.lancedb.lance.namespace.ObjectIdentifier; import com.lancedb.lance.namespace.model.CreateNamespaceRequest; +import com.lancedb.lance.namespace.model.CreateNamespaceRequest.ModeEnum; import com.lancedb.lance.namespace.model.CreateNamespaceResponse; +import com.lancedb.lance.namespace.model.CreateTableResponse; import com.lancedb.lance.namespace.model.DescribeNamespaceResponse; +import com.lancedb.lance.namespace.model.DescribeTableResponse; import com.lancedb.lance.namespace.model.DropNamespaceRequest; import com.lancedb.lance.namespace.model.DropNamespaceResponse; +import com.lancedb.lance.namespace.model.JsonArrowDataType; +import com.lancedb.lance.namespace.model.JsonArrowField; +import com.lancedb.lance.namespace.model.JsonArrowSchema; import com.lancedb.lance.namespace.model.ListNamespacesResponse; import com.lancedb.lance.namespace.model.ListTablesResponse; import com.lancedb.lance.namespace.util.CommonUtil; import com.lancedb.lance.namespace.util.PageUtil; +import java.io.ByteArrayInputStream; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -44,11 +54,27 @@ import java.util.function.BiFunction; import java.util.function.Function; import java.util.function.IntFunction; +import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowStreamReader; +import org.apache.arrow.vector.types.DateUnit; +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.Bool; +import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; +import org.apache.arrow.vector.types.pojo.ArrowType.Int; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.Catalog; import org.apache.gravitino.CatalogChange; +import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.Namespace; import org.apache.gravitino.Schema; import org.apache.gravitino.SchemaChange; import org.apache.gravitino.client.GravitinoClient; @@ -58,10 +84,18 @@ import org.apache.gravitino.exceptions.NonEmptyCatalogException; import org.apache.gravitino.exceptions.NonEmptySchemaException; import org.apache.gravitino.exceptions.SchemaAlreadyExistsException; +import org.apache.gravitino.json.JsonUtils; import org.apache.gravitino.lance.common.config.LanceConfig; import org.apache.gravitino.lance.common.ops.LanceNamespaceOperations; import org.apache.gravitino.lance.common.ops.LanceTableOperations; import org.apache.gravitino.lance.common.ops.NamespaceWrapper; +import org.apache.gravitino.lance.common.ops.arrow.ArrowRecordBatchList; +import org.apache.gravitino.rel.Column; +import org.apache.gravitino.rel.Table; +import org.apache.gravitino.rel.types.Type; +import org.apache.gravitino.rel.types.Types; +import org.apache.gravitino.rel.types.Types.FixedType; +import org.apache.gravitino.rel.types.Types.UnparsedType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -245,12 +279,6 @@ public void namespaceExists(String namespaceId, String delimiter) throws LanceNa } } - @Override - public ListTablesResponse listTables( - String id, String delimiter, String pageToken, Integer limit) { - throw new UnsupportedOperationException("Not implemented yet"); - } - private boolean isLakehouseCatalog(Catalog catalog) { return catalog.type().equals(Catalog.Type.RELATIONAL) && "generic-lakehouse".equals(catalog.provider()); @@ -467,4 +495,308 @@ private T[] buildChanges( return Stream.concat(setPropertiesStream, removePropertiesStream).toArray(arrayCreator); } + + @Override + public ListTablesResponse listTables( + String id, String delimiter, String pageToken, Integer limit) { + ObjectIdentifier nsId = ObjectIdentifier.of(id, Pattern.quote(delimiter)); + Preconditions.checkArgument( + nsId.levels() <= 2, "Expected at most 2-level namespace but got: %s", nsId.levels()); + String catalogName = nsId.levelAtListPos(0); + Catalog catalog = loadAndValidateLakehouseCatalog(catalogName); + String schemaName = nsId.levelAtListPos(1); + List tables = + Arrays.stream(catalog.asTableCatalog().listTables(Namespace.of(schemaName))) + .map(ident -> Joiner.on(delimiter).join(catalogName, schemaName, ident.name())) + .collect(Collectors.toList()); + + Collections.sort(tables); + PageUtil.Page page = PageUtil.splitPage(tables, pageToken, PageUtil.normalizePageSize(limit)); + ListNamespacesResponse response = new ListNamespacesResponse(); + response.setNamespaces(Sets.newHashSet(page.items())); + response.setPageToken(page.nextPageToken()); + + return new ListTablesResponse() + .tables(response.getNamespaces()) + .pageToken(response.getPageToken()); + } + + @Override + public DescribeTableResponse describeTable(String tableId, String delimiter) { + ObjectIdentifier nsId = ObjectIdentifier.of(tableId, Pattern.quote(delimiter)); + Preconditions.checkArgument( + nsId.levels() <= 3, "Expected at most 3-level namespace but got: %s", nsId.levels()); + + String catalogName = nsId.levelAtListPos(0); + Catalog catalog = loadAndValidateLakehouseCatalog(catalogName); + NameIdentifier tableIdentifier = + NameIdentifier.of(nsId.levelAtListPos(1), nsId.levelAtListPos(2)); + + Table table = catalog.asTableCatalog().loadTable(tableIdentifier); + DescribeTableResponse response = new DescribeTableResponse(); + response.setProperties(table.properties()); + response.setLocation(table.properties().get("location")); + response.setSchema(toJsonArrowSchema(table.columns())); + return response; + } + + private JsonArrowSchema toJsonArrowSchema(Column[] columns) { + List fields = new ArrayList<>(); + for (Column column : columns) { + ArrowType arrowType = fromGravitinoType(column.dataType()); + FieldType fieldType = new FieldType(column.nullable(), arrowType, null, null); + Field field = new Field(column.name(), fieldType, null); + + JsonArrowDataType jsonArrowDataType = new JsonArrowDataType(); + // other filed needs to be set accordingly such as list, map, struct + jsonArrowDataType.setType(arrowType.toString()); + + JsonArrowField arrowField = new JsonArrowField(); + arrowField.setName(field.getName()); + arrowField.setType(jsonArrowDataType); + + fields.add(arrowField); + } + + JsonArrowSchema jsonArrowSchema = new JsonArrowSchema(); + jsonArrowSchema.setFields(fields); + return jsonArrowSchema; + } + + @Override + public CreateTableResponse createTable( + String tableId, + String mode, + String delimiter, + String tableLocation, + Map tableProperties, + String rootCatalog, + byte[] arrowStreamBody) { + ObjectIdentifier nsId = ObjectIdentifier.of(tableId, Pattern.quote(delimiter)); + Preconditions.checkArgument( + nsId.levels() <= 3, "Expected at most 3-level namespace but got: %s", nsId.levels()); + if (rootCatalog != null) { + List levels = nsId.listStyleId(); + List newLevels = Lists.newArrayList(rootCatalog); + newLevels.addAll(levels); + nsId = ObjectIdentifier.of(newLevels); + } + + // Parser column information. + List columns = Lists.newArrayList(); + if (arrowStreamBody != null) { + ArrowRecordBatchList recordBatchList = parseArrowIpcStream(arrowStreamBody); + columns = extractColumns(recordBatchList); + } + + String catalogName = nsId.levelAtListPos(0); + Catalog catalog = loadAndValidateLakehouseCatalog(catalogName); + + NameIdentifier tableIdentifier = + NameIdentifier.of(nsId.levelAtListPos(1), nsId.levelAtListPos(2)); + + Map createTableProperties = Maps.newHashMap(tableProperties); + createTableProperties.put("location", tableLocation); + createTableProperties.put("mode", mode); + // TODO considering the mode (create, exist_ok, overwrite) + + ModeEnum createMode = ModeEnum.fromValue(mode.toLowerCase()); + switch (createMode) { + case EXIST_OK: + if (catalog.asTableCatalog().tableExists(tableIdentifier)) { + CreateTableResponse response = new CreateTableResponse(); + Table existingTable = catalog.asTableCatalog().loadTable(tableIdentifier); + response.setProperties(existingTable.properties()); + response.setLocation(existingTable.properties().get("location")); + response.setVersion(0L); + return response; + } + break; + case CREATE: + if (catalog.asTableCatalog().tableExists(tableIdentifier)) { + throw LanceNamespaceException.conflict( + "Table already exists: " + tableId, + SchemaAlreadyExistsException.class.getSimpleName(), + tableId, + CommonUtil.formatCurrentStackTrace()); + } + break; + case OVERWRITE: + if (catalog.asTableCatalog().tableExists(tableIdentifier)) { + catalog.asTableCatalog().dropTable(tableIdentifier); + } + break; + default: + throw new IllegalArgumentException("Unknown mode: " + mode); + } + + Table t = + catalog + .asTableCatalog() + .createTable( + tableIdentifier, + columns.toArray(new Column[0]), + tableLocation, + createTableProperties); + + CreateTableResponse response = new CreateTableResponse(); + response.setProperties(t.properties()); + response.setLocation(tableLocation); + response.setVersion(0L); + return response; + } + + private ArrowRecordBatchList parseArrowIpcStream(byte[] stream) { + try (BufferAllocator allocator = new RootAllocator(); + ByteArrayInputStream bais = new ByteArrayInputStream(stream); + ArrowStreamReader reader = new ArrowStreamReader(bais, allocator)) { + + org.apache.arrow.vector.types.pojo.Schema schema = reader.getVectorSchemaRoot().getSchema(); + List batches = new ArrayList<>(); + + while (reader.loadNextBatch()) { + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + if (root.getRowCount() > 0) { + batches.add(root); + } + } + return new ArrowRecordBatchList(schema, batches); + } catch (Exception e) { + throw new RuntimeException("Failed to parse Arrow IPC stream", e); + } + } + + private List extractColumns(ArrowRecordBatchList recordBatchList) { + List columns = new ArrayList<>(); + org.apache.arrow.vector.types.pojo.Schema arrowSchema = recordBatchList.getSchema(); + + for (org.apache.arrow.vector.types.pojo.Field field : arrowSchema.getFields()) { + columns.add(toGravitinoColumn(field)); + } + return columns; + } + + private Column toGravitinoColumn(Field field) { + return Column.of( + field.getName(), + toGravitinoType(field), + field.getMetadata().get("comment"), + field.isNullable(), + false, + DEFAULT_VALUE_NOT_SET); + } + + private ArrowType fromGravitinoType(Type type) { + switch (type.name()) { + case BOOLEAN: + return Bool.INSTANCE; + case BYTE: + return new Int(8, true); + case SHORT: + return new Int(16, true); + case INTEGER: + return new Int(32, true); + case LONG: + return new Int(64, true); + case FLOAT: + return new FloatingPoint(FloatingPointPrecision.SINGLE); + case DOUBLE: + return new FloatingPoint(FloatingPointPrecision.DOUBLE); + case DECIMAL: + // Lance uses FIXED_SIZE_BINARY for decimal types + return new ArrowType.FixedSizeBinary(16); // assuming 16 bytes for decimal + case DATE: + return new ArrowType.Date(DateUnit.DAY); + case TIME: + return new ArrowType.Time(TimeUnit.MILLISECOND, 32); + case TIMESTAMP: + return new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); + case VARCHAR: + case STRING: + return new ArrowType.Utf8(); + case FIXED: + FixedType fixedType = (FixedType) type; + return new ArrowType.FixedSizeBinary(fixedType.length()); + case BINARY: + return new ArrowType.Binary(); + case UNPARSED: + String typeStr = ((UnparsedType) type).unparsedType().toString(); + try { + Type t = JsonUtils.anyFieldMapper().readValue(typeStr, Type.class); + if (t instanceof Types.ListType) { + return ArrowType.List.INSTANCE; + } else if (t instanceof Types.MapType) { + return new ArrowType.Map(false); + } else if (t instanceof Types.StructType) { + return ArrowType.Struct.INSTANCE; + } else { + throw new UnsupportedOperationException( + "Unsupported UnparsedType conversion: " + t.simpleString()); + } + } catch (Exception e) { + // FixedSizeListArray(integer, 3) + if (typeStr.startsWith("FixedSizeListArray")) { + int size = + Integer.parseInt( + typeStr.substring(typeStr.indexOf(',') + 1, typeStr.indexOf(')')).trim()); + return new ArrowType.FixedSizeList(size); + } + throw new UnsupportedOperationException("Failed to parse UnparsedType: " + typeStr, e); + } + default: + throw new UnsupportedOperationException("Unsupported Gravitino type: " + type.name()); + } + } + + private Type toGravitinoType(Field field) { + FieldType parentType = field.getFieldType(); + ArrowType arrowType = parentType.getType(); + if (arrowType instanceof Bool) { + return Types.BooleanType.get(); + } else if (arrowType instanceof Int) { + Int intType = (Int) arrowType; + switch (intType.getBitWidth()) { + case 8 -> { + return Types.ByteType.get(); + } + case 16 -> { + return Types.ShortType.get(); + } + case 32 -> { + return Types.IntegerType.get(); + } + case 64 -> { + return Types.LongType.get(); + } + default -> throw new UnsupportedOperationException( + "Unsupported Int bit width: " + intType.getBitWidth()); + } + } else if (arrowType instanceof FloatingPoint) { + FloatingPoint floatingPoint = (FloatingPoint) arrowType; + switch (floatingPoint.getPrecision()) { + case SINGLE: + return Types.FloatType.get(); + case DOUBLE: + return Types.DoubleType.get(); + default: + throw new UnsupportedOperationException( + "Unsupported FloatingPoint precision: " + floatingPoint.getPrecision()); + } + } else if (arrowType instanceof ArrowType.FixedSizeBinary) { + ArrowType.FixedSizeBinary fixedSizeBinary = (ArrowType.FixedSizeBinary) arrowType; + return Types.FixedType.of(fixedSizeBinary.getByteWidth()); + } else if (arrowType instanceof ArrowType.Date) { + return Types.DateType.get(); + } else if (arrowType instanceof ArrowType.Time) { + return Types.TimeType.get(); + } else if (arrowType instanceof ArrowType.Timestamp) { + return Types.TimestampType.withoutTimeZone(); + } else if (arrowType instanceof ArrowType.Utf8) { + return Types.StringType.get(); + } else if (arrowType instanceof ArrowType.Binary) { + return Types.BinaryType.get(); + } else { + return Types.UnparsedType.of(arrowType.toString()); + } + } } diff --git a/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/TestArrowIPC.java b/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/TestArrowIPC.java new file mode 100644 index 00000000000..71f1bfc587c --- /dev/null +++ b/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/TestArrowIPC.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.lance.common; + +import java.io.File; +import java.io.FileOutputStream; +import java.util.Arrays; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.arrow.vector.util.Text; +import org.junit.jupiter.api.Test; + +public class TestArrowIPC { + + private static final String FILENAME = "/tmp/initial_data.arrow"; + private static final int RECORD_COUNT = 3; + + @Test + void testIPC() throws Exception { + try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { + + Schema schema = + new Schema( + Arrays.asList( + Field.nullable("id", new ArrowType.Int(32, true)), + Field.nullable("value", new ArrowType.Utf8()))); + + try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + IntVector idVector = (IntVector) root.getVector("id"); + VarCharVector valueVector = (VarCharVector) root.getVector("value"); + + idVector.allocateNew(); + valueVector.allocateNew(); + + for (int i = 0; i < RECORD_COUNT; i++) { + idVector.setSafe(i, i + 1); + valueVector.setSafe(i, new Text("Row_" + (i + 1))); + } + + idVector.setValueCount(RECORD_COUNT); + valueVector.setValueCount(RECORD_COUNT); + root.setRowCount(RECORD_COUNT); + + File outFile = new File(FILENAME); + try (FileOutputStream fos = new FileOutputStream(outFile); + ArrowStreamWriter writer = new ArrowStreamWriter(root, null, fos)) { + + writer.start(); + writer.writeBatch(); + writer.end(); + } + + System.out.println( + "✅ Successfully generated Arrow IPC Stream file: " + outFile.getAbsolutePath()); + System.out.println("--- Ready for cURL test ---"); + } + } + } +} diff --git a/lance/lance-rest-server/build.gradle.kts b/lance/lance-rest-server/build.gradle.kts index 4e4ca7db3ce..0d7a2c98526 100644 --- a/lance/lance-rest-server/build.gradle.kts +++ b/lance/lance-rest-server/build.gradle.kts @@ -38,6 +38,8 @@ dependencies { } implementation(project(":lance:lance-common")) + implementation(libs.lance) + implementation(libs.commons.lang3) implementation(libs.bundles.jetty) implementation(libs.bundles.jersey) diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java index dd548541add..493d0acace8 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceNamespaceOperations.java @@ -28,6 +28,7 @@ import com.lancedb.lance.namespace.model.DropNamespaceRequest; import com.lancedb.lance.namespace.model.DropNamespaceResponse; import com.lancedb.lance.namespace.model.ListNamespacesResponse; +import com.lancedb.lance.namespace.model.ListTablesResponse; import java.util.regex.Pattern; import javax.inject.Inject; import javax.ws.rs.Consumes; @@ -159,4 +160,20 @@ public Response namespaceExists( return LanceExceptionMapper.toRESTResponse(namespaceId, e); } } + + @GET + @Path("{id}/table/list") + public Response listTables( + @PathParam("id") String namespaceId, + @DefaultValue("$") @QueryParam("delimiter") String delimiter, + @QueryParam("page_token") String pageToken, + @QueryParam("limit") Integer limit) { + try { + ListTablesResponse response = + lanceNamespace.asNamespaceOps().listTables(namespaceId, delimiter, pageToken, limit); + return Response.ok(response).build(); + } catch (Exception e) { + return LanceExceptionMapper.toRESTResponse(namespaceId, e); + } + } } diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java index 1f30d1b326b..359fc94c424 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceTableOperations.java @@ -18,27 +18,27 @@ */ package org.apache.gravitino.lance.service.rest; -import static org.apache.gravitino.lance.common.ops.NamespaceWrapper.NAMESPACE_DELIMITER_DEFAULT; - -import com.codahale.metrics.annotation.ResponseMetered; -import com.codahale.metrics.annotation.Timed; -import com.lancedb.lance.namespace.model.ListTablesResponse; +import com.fasterxml.jackson.core.type.TypeReference; +import com.lancedb.lance.namespace.model.CreateTableResponse; +import com.lancedb.lance.namespace.model.DescribeTableResponse; +import com.lancedb.lance.namespace.util.JsonUtil; +import java.util.Map; import javax.inject.Inject; import javax.ws.rs.Consumes; import javax.ws.rs.DefaultValue; -import javax.ws.rs.Encoded; -import javax.ws.rs.GET; +import javax.ws.rs.HeaderParam; +import javax.ws.rs.POST; import javax.ws.rs.Path; import javax.ws.rs.PathParam; import javax.ws.rs.Produces; import javax.ws.rs.QueryParam; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; +import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.lance.common.ops.NamespaceWrapper; import org.apache.gravitino.lance.service.LanceExceptionMapper; -import org.apache.gravitino.metrics.MetricNames; -@Path("/v1/namespace/{id}/table") +@Path("/v1/table/{id}") @Consumes(MediaType.APPLICATION_JSON) @Produces(MediaType.APPLICATION_JSON) public class LanceTableOperations { @@ -50,21 +50,68 @@ public LanceTableOperations(NamespaceWrapper lanceNamespace) { this.lanceNamespace = lanceNamespace; } - @GET - @Path("/list") - @Timed(name = "list-tables." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) - @ResponseMetered(name = "list-tables", absolute = true) - public Response listTables( - @Encoded @PathParam("id") String namespaceId, - @DefaultValue(NAMESPACE_DELIMITER_DEFAULT) @QueryParam("delimiter") String delimiter, - @QueryParam("page_token") String pageToken, - @QueryParam("limit") Integer limit) { + @POST + @Path("/describe") + public Response describeTable( + @PathParam("id") String tableId, + @DefaultValue("$") @QueryParam("delimiter") String delimiter) { + try { + DescribeTableResponse response = + lanceNamespace.asTableOps().describeTable(tableId, delimiter); + return Response.ok(response).build(); + } catch (Exception e) { + return LanceExceptionMapper.toRESTResponse(tableId, e); + } + } + + @POST + @Path("/create") + @Consumes("application/vnd.apache.arrow.stream") + @Produces("application/json") + public Response createTable( + @PathParam("id") String tableId, + @QueryParam("mode") @DefaultValue("create") String mode, // create, exist_ok, overwrite + @QueryParam("delimiter") @DefaultValue("$") String delimiter, + @HeaderParam("x-lance-table-location") String tableLocation, + @HeaderParam("x-lance-table-properties") String tableProperties, + @HeaderParam("x-lance-root-catalog") String rootCatalog, + byte[] arrowStreamBody) { + try { + Map props = + JsonUtil.mapper().readValue(tableProperties, new TypeReference>() {}); + CreateTableResponse response = + lanceNamespace + .asTableOps() + .createTable( + tableId, mode, delimiter, tableLocation, props, rootCatalog, arrowStreamBody); + return Response.ok(response).build(); + } catch (Exception e) { + return LanceExceptionMapper.toRESTResponse(tableId, e); + } + } + + @POST + @Path("/create-empty") + public Response createEmptyTable( + @PathParam("id") String tableId, + @QueryParam("mode") @DefaultValue("create") String mode, // create, exist_ok, overwrite + @QueryParam("delimiter") @DefaultValue("$") String delimiter, + @HeaderParam("x-lance-table-location") String tableLocation, + @HeaderParam("x-lance-root-catalog") String rootCatalog, + @HeaderParam("x-lance-table-properties") String tableProperties) { try { - ListTablesResponse response = - lanceNamespace.asTableOps().listTables(namespaceId, delimiter, pageToken, limit); + Map props = + StringUtils.isBlank(tableProperties) + ? Map.of() + : JsonUtil.mapper() + .readValue(tableProperties, new TypeReference>() {}); + CreateTableResponse response = + lanceNamespace + .asTableOps() + .createTable(tableId, mode, delimiter, tableLocation, props, rootCatalog, null); return Response.ok(response).build(); } catch (Exception e) { - return LanceExceptionMapper.toRESTResponse(namespaceId, e); + return LanceExceptionMapper.toRESTResponse(tableId, e); } } } diff --git a/trino-connector/trino-connector/src/main/java/org/apache/gravitino/trino/connector/catalog/jdbc/mysql/MySQLMetadataAdapter.java b/trino-connector/trino-connector/src/main/java/org/apache/gravitino/trino/connector/catalog/jdbc/mysql/MySQLMetadataAdapter.java index 1c8d051cd57..d77fd620168 100644 --- a/trino-connector/trino-connector/src/main/java/org/apache/gravitino/trino/connector/catalog/jdbc/mysql/MySQLMetadataAdapter.java +++ b/trino-connector/trino-connector/src/main/java/org/apache/gravitino/trino/connector/catalog/jdbc/mysql/MySQLMetadataAdapter.java @@ -283,6 +283,8 @@ public ConnectorTableMetadata getTableMetadata(GravitinoTable gravitinoTable) { .collect(Collectors.toUnmodifiableList()); uniqueKeys.add(String.format("%s:%s", index.name(), Strings.join(columns, ','))); break; + default: + throw new UnsupportedOperationException("Unsupported index type: " + index.type()); } } if (!primaryKeys.isEmpty()) { From bedc52221669b072bfd2135fa644e2530ade2008 Mon Sep 17 00:00:00 2001 From: Jerry Shao Date: Tue, 28 Oct 2025 11:18:10 +0800 Subject: [PATCH 14/43] [MINOR] Improve(lance-namespace): Slightly improve the configuration and build script (#8918) ### What changes were proposed in this pull request? This PR improves the configuration key and also the build kts for lance namespace service. ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Local test. --- .../gravitino-lance-rest-server.conf.template | 7 ++-- conf/gravitino.conf.template | 10 +++--- lance/lance-common/build.gradle.kts | 9 +----- .../lance/common/config/LanceConfig.java | 22 ++++++------- .../GravitinoLanceNamespaceWrapper.java | 6 ++-- .../lance/common/config/TestLanceConfig.java | 32 +++---------------- lance/lance-rest-server/build.gradle.kts | 1 - .../gravitino/lance/LanceRESTService.java | 4 ++- 8 files changed, 32 insertions(+), 59 deletions(-) diff --git a/conf/gravitino-lance-rest-server.conf.template b/conf/gravitino-lance-rest-server.conf.template index 137daf145d7..d74fec3eb7e 100644 --- a/conf/gravitino-lance-rest-server.conf.template +++ b/conf/gravitino-lance-rest-server.conf.template @@ -43,6 +43,7 @@ gravitino.lance-rest.responseHeaderSize = 131072 # THE CONFIGURATION FOR Lance namespace backend # The backend Lance namespace for Lance REST service, it's recommended to use Gravitino gravitino.lance-rest.namespace-backend = gravitino -gravitino.lance-rest.uri = http://localhost:8090 -# replace metalake with your metalake name in Gravitino -# gravitino.lance-rest.metalake-name = metalake +# The uri of the Lance REST service gravitino namespace backend +gravitino.lance-rest.gravitino.uri = http://localhost:8090 +# The metalake name used for Lance REST service gravitino namespace backend, please create the metalake before using it, and configure the metalake name here. +# gravitino.lance-rest.gravitino.metalake-name = metalake diff --git a/conf/gravitino.conf.template b/conf/gravitino.conf.template index a1fdb005cad..44a9e50f82c 100644 --- a/conf/gravitino.conf.template +++ b/conf/gravitino.conf.template @@ -81,9 +81,8 @@ gravitino.authorization.enable = false gravitino.authorization.serviceAdmins = anonymous # THE CONFIGURATION FOR AUXILIARY SERVICE -# Auxiliary service names, separate by ',' such as iceberg-rest,lance-rest +# Auxiliary service names, separate by ',', currently support iceberg-rest and lance-rest gravitino.auxService.names = iceberg-rest - # Iceberg REST service classpath gravitino.iceberg-rest.classpath = iceberg-rest-server/libs, iceberg-rest-server/conf # Iceberg REST service host @@ -105,6 +104,7 @@ gravitino.lance-rest.httpPort = 9101 # THE CONFIGURATION FOR Lance namespace backend # The backend Lance namespace for Lance REST service, it's recommended to use Gravitino gravitino.lance-rest.namespace-backend = gravitino -gravitino.lance-rest.uri = http://localhost:8090 -# replace metalake with your metalake name in Gravitino -# gravitino.lance-rest.metalake-name = metalake +# The uri of the Lance REST service gravitino namespace backend +gravitino.lance-rest.gravitino.uri = http://localhost:8090 +# The metalake name used for Lance REST service gravitino namespace backend, please create the metalake first before using it, and configure the metalake name here. +# gravitino.lance-rest.gravitino.metalake-name = metalake diff --git a/lance/lance-common/build.gradle.kts b/lance/lance-common/build.gradle.kts index 27057950f3a..43cf5f42b85 100644 --- a/lance/lance-common/build.gradle.kts +++ b/lance/lance-common/build.gradle.kts @@ -25,14 +25,7 @@ plugins { } dependencies { - implementation(project(":api")) - implementation(project(":catalogs:catalog-common")) - implementation(project(":clients:client-java")) { - exclude("*") - } - implementation(project(":common")) { - exclude("*") - } + implementation(project(":clients:client-java-runtime", configuration = "shadow")) implementation(project(":core")) { exclude("*") } diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java index 3703189ba87..f517d1a349c 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java @@ -36,29 +36,29 @@ public class LanceConfig extends Config implements OverwriteDefaultConfig { public static final int DEFAULT_LANCE_REST_SERVICE_HTTP_PORT = 9101; public static final int DEFAULT_LANCE_REST_SERVICE_HTTPS_PORT = 9533; - public static final String DEFAULT_NAMESPACE_BACKEND = "gravitino"; - public static final String DEFAULT_URI = "http://localhost:8090"; + public static final String GRAVITINO_NAMESPACE_BACKEND = "gravitino"; + public static final String GRAVITINO_URI = "http://localhost:8090"; public static final ConfigEntry NAMESPACE_BACKEND = new ConfigBuilder(CONFIG_NAMESPACE_BACKEND) .doc("The backend implementation for namespace operations") .version(ConfigConstants.VERSION_0_1_0) .stringConf() - .createWithDefault(DEFAULT_NAMESPACE_BACKEND); + .createWithDefault(GRAVITINO_NAMESPACE_BACKEND); public static final ConfigEntry METALAKE_NAME = - new ConfigBuilder(CONFIG_METALAKE) - .doc("The Metalake name for Gravitino namespace backend") + new ConfigBuilder(LANCE_CONFIG_PREFIX + GRAVITINO_NAMESPACE_BACKEND + "." + CONFIG_METALAKE) + .doc("The Metalake name for Lance Gravitino namespace backend") .version(ConfigConstants.VERSION_0_1_0) .stringConf() .create(); - public static final ConfigEntry NAMESPACE_URI = - new ConfigBuilder(CONFIG_URI) - .doc("The URI for the namespace backend, e.g., Gravitino server URI") + public static final ConfigEntry NAMESPACE_BACKEND_URI = + new ConfigBuilder(LANCE_CONFIG_PREFIX + GRAVITINO_NAMESPACE_BACKEND + "." + CONFIG_URI) + .doc("The URI of the namespace backend, e.g., Gravitino server URI") .version(ConfigConstants.VERSION_0_1_0) .stringConf() - .createWithDefault(DEFAULT_URI); + .createWithDefault(GRAVITINO_URI); public LanceConfig(Map properties) { super(false); @@ -73,8 +73,8 @@ public String getNamespaceBackend() { return get(NAMESPACE_BACKEND); } - public String getNamespaceUri() { - return get(NAMESPACE_URI); + public String getNamespaceBackendUri() { + return get(NAMESPACE_BACKEND_URI); } public String getGravitinoMetalake() { diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java index 1f9e41a2cb0..fe6404a424c 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/ops/gravitino/GravitinoLanceNamespaceWrapper.java @@ -19,7 +19,7 @@ package org.apache.gravitino.lance.common.ops.gravitino; import static org.apache.gravitino.lance.common.config.LanceConfig.METALAKE_NAME; -import static org.apache.gravitino.lance.common.config.LanceConfig.NAMESPACE_URI; +import static org.apache.gravitino.lance.common.config.LanceConfig.NAMESPACE_BACKEND_URI; import static org.apache.gravitino.rel.Column.DEFAULT_VALUE_NOT_SET; import com.google.common.base.Joiner; @@ -111,11 +111,11 @@ public GravitinoLanceNamespaceWrapper(LanceConfig config) { @Override protected void initialize() { - String uri = config().get(NAMESPACE_URI); + String uri = config().get(NAMESPACE_BACKEND_URI); String metalakeName = config().get(METALAKE_NAME); Preconditions.checkArgument( StringUtils.isNotBlank(metalakeName), - "Metalake name must be provided for Gravitino namespace backend"); + "Metalake name must be provided for Lance Gravitino namespace backend"); this.client = GravitinoClient.builder(uri).withMetalake(metalakeName).build(); } diff --git a/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/config/TestLanceConfig.java b/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/config/TestLanceConfig.java index 44577a2dfac..6544ca7a3ad 100644 --- a/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/config/TestLanceConfig.java +++ b/lance/lance-common/src/test/java/org/apache/gravitino/lance/common/config/TestLanceConfig.java @@ -26,27 +26,6 @@ import org.junit.jupiter.api.Test; public class TestLanceConfig { - @Test - public void testLoadLanceConfig() { - Map properties = - ImmutableMap.of("gravitino.lance-rest.namespace-backend", "test_catalog"); - - LanceConfig lanceConfig = new LanceConfig(); - lanceConfig.loadFromMap(properties, k -> k.startsWith("gravitino.lance-rest.")); - Assertions.assertEquals("gravitino", lanceConfig.getNamespaceBackend()); - - LanceConfig lanceConfig2 = new LanceConfig(properties); - Assertions.assertEquals("gravitino", lanceConfig2.getNamespaceBackend()); - } - - @Test - public void testDefaultCatalogName() { - // Test default namespace backend name when not specified - Map properties = ImmutableMap.of(); - LanceConfig lanceConfig = new LanceConfig(properties); - Assertions.assertEquals("gravitino", lanceConfig.getNamespaceBackend()); - } - @Test public void testLanceHttpPort() { Map properties = ImmutableMap.of(); @@ -74,18 +53,18 @@ public void testGravitinoUriAndMetalake() { // Test default values Map properties = ImmutableMap.of(); LanceConfig lanceConfig = new LanceConfig(properties); - Assertions.assertEquals("http://localhost:8090", lanceConfig.getNamespaceUri()); + Assertions.assertEquals("http://localhost:8090", lanceConfig.getNamespaceBackendUri()); Assertions.assertNull(lanceConfig.getGravitinoMetalake()); // No default, must be configured // Test custom values properties = ImmutableMap.of( - LanceConfig.NAMESPACE_URI.getKey(), + LanceConfig.NAMESPACE_BACKEND_URI.getKey(), "http://gravitino-server:8090", LanceConfig.METALAKE_NAME.getKey(), "production"); lanceConfig = new LanceConfig(properties); - Assertions.assertEquals("http://gravitino-server:8090", lanceConfig.getNamespaceUri()); + Assertions.assertEquals("http://gravitino-server:8090", lanceConfig.getNamespaceBackendUri()); Assertions.assertEquals("production", lanceConfig.getGravitinoMetalake()); } @@ -94,7 +73,7 @@ public void testCompleteConfiguration() { // Test all configurations together for auxiliary mode Map properties = ImmutableMap.builder() - .put(LanceConfig.NAMESPACE_URI.getKey(), "http://gravitino-prod:8090") + .put(LanceConfig.NAMESPACE_BACKEND_URI.getKey(), "http://gravitino-prod:8090") .put(LanceConfig.METALAKE_NAME.getKey(), "production") .put(LanceConfig.NAMESPACE_BACKEND.getKey(), "gravitino") .put(JettyServerConfig.WEBSERVER_HTTP_PORT.getKey(), "9101") @@ -103,8 +82,7 @@ public void testCompleteConfiguration() { LanceConfig lanceConfig = new LanceConfig(properties); // Verify all config values - Assertions.assertEquals("gravitino", lanceConfig.getNamespaceBackend()); - Assertions.assertEquals("http://gravitino-prod:8090", lanceConfig.getNamespaceUri()); + Assertions.assertEquals("http://gravitino-prod:8090", lanceConfig.getNamespaceBackendUri()); Assertions.assertEquals("production", lanceConfig.getGravitinoMetalake()); JettyServerConfig jettyConfig = JettyServerConfig.fromConfig(lanceConfig); diff --git a/lance/lance-rest-server/build.gradle.kts b/lance/lance-rest-server/build.gradle.kts index 0d7a2c98526..7befc28b35a 100644 --- a/lance/lance-rest-server/build.gradle.kts +++ b/lance/lance-rest-server/build.gradle.kts @@ -26,7 +26,6 @@ plugins { dependencies { implementation(project(":api")) - implementation(project(":catalogs:catalog-common")) implementation(project(":common")) { exclude("*") } diff --git a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java index 2d9f3e8823d..8c800e49d64 100644 --- a/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java +++ b/lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java @@ -48,6 +48,8 @@ public class LanceRESTService implements GravitinoAuxiliaryService { public static final String SERVICE_NAME = "lance-rest"; public static final String LANCE_SPEC = "/lance/*"; + private static final String LANCE_REST_SPEC_PACKAGE = "org.apache.gravitino.lance.service.rest"; + private JettyServer server; private NamespaceWrapper lanceNamespace; @@ -70,7 +72,7 @@ public void serviceInit(Map properties) { ResourceConfig resourceConfig = new ResourceConfig(); resourceConfig.register(JacksonFeature.class); - resourceConfig.packages("org.apache.gravitino.lance.service.rest"); + resourceConfig.packages(LANCE_REST_SPEC_PACKAGE); resourceConfig.register( new AbstractBinder() { @Override From 4a92762501a1c1b8e680746414d69c81cd1cda05 Mon Sep 17 00:00:00 2001 From: mchades Date: Tue, 28 Oct 2025 11:33:39 +0800 Subject: [PATCH 15/43] [#8919] improve(lance-table): Supports object store configurations for Lance table (#8920) ### What changes were proposed in this pull request? - The properties with the prefix `lance.storage.` will be used for lance table storage ### Why are the changes needed? Fix: #8919 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? by hand --- .../GenericLakehouseCatalogOperations.java | 67 +++++++++++++------ ...ricLakehouseCatalogPropertiesMetadata.java | 10 ++- ...ericLakehouseSchemaPropertiesMetadata.java | 10 ++- ...nericLakehouseTablePropertiesMetadata.java | 30 +++++++-- .../lance/LanceCatalogOperations.java | 14 +++- 5 files changed, 104 insertions(+), 27 deletions(-) diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java index 0f85532e8c2..8e823a18a9e 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java @@ -19,6 +19,7 @@ package org.apache.gravitino.catalog.lakehouse; import static org.apache.gravitino.Entity.EntityType.TABLE; +import static org.apache.gravitino.catalog.lakehouse.GenericLakehouseTablePropertiesMetadata.LOCATION; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Maps; @@ -26,6 +27,8 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.stream.Collectors; +import org.apache.commons.collections4.MapUtils; import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.Catalog; import org.apache.gravitino.Entity; @@ -67,15 +70,14 @@ public class GenericLakehouseCatalogOperations private static final String SLASH = "/"; private final ManagedSchemaOperations managedSchemaOps; - - @SuppressWarnings("unused") // todo: remove this after implementing table operations - private Optional catalogLakehouseDir; - private static final Map SUPPORTED_FORMATS = Maps.newHashMap(); + private Optional catalogLakehouseDir; + private Map catalogConfig; private CatalogInfo catalogInfo; private HasPropertyMetadata propertiesMetadata; + /** * Initializes the generic lakehouse catalog operations with the provided configuration. * @@ -97,6 +99,9 @@ public void initialize( StringUtils.isNotBlank(catalogDir) ? Optional.of(catalogDir).map(this::ensureTrailingSlash).map(Path::new) : Optional.empty(); + this.catalogConfig = conf; + this.catalogInfo = info; + this.propertiesMetadata = propertiesMetadata; } public GenericLakehouseCatalogOperations() { @@ -193,11 +198,15 @@ public Table createTable( SortOrder[] sortOrders, Index[] indexes) throws NoSuchSchemaException, TableAlreadyExistsException { - String format = properties.getOrDefault("format", "lance"); - String tableLocation = calculateTableLocation(ident, properties); + Schema schema = loadSchema(NameIdentifier.of(ident.namespace().levels())); + String tableLocation = calculateTableLocation(schema, ident, properties); + Map tableStorageProps = calculateTableStorageProps(schema, properties); + Map newProperties = Maps.newHashMap(properties); - newProperties.put("location", tableLocation); + newProperties.put(LOCATION, tableLocation); + newProperties.putAll(tableStorageProps); + String format = properties.getOrDefault("format", "lance"); LakehouseCatalogOperations lakehouseCatalogOperations = SUPPORTED_FORMATS.compute( format, @@ -212,22 +221,13 @@ public Table createTable( } private String calculateTableLocation( - NameIdentifier tableIdent, Map tableProperties) { - String tableLocation = tableProperties.get("location"); + Schema schema, NameIdentifier tableIdent, Map tableProperties) { + String tableLocation = tableProperties.get(LOCATION); if (StringUtils.isNotBlank(tableLocation)) { return ensureTrailingSlash(tableLocation); } - String schemaLocation; - try { - Schema schema = loadSchema(NameIdentifier.of(tableIdent.namespace().levels())); - schemaLocation = schema.properties().get("location"); - } catch (NoSuchSchemaException e) { - throw new RuntimeException( - String.format( - "Failed to load schema for table %s to determine default location.", tableIdent), - e); - } + String schemaLocation = schema.properties() == null ? null : schema.properties().get(LOCATION); // If we do not set location in table properties, and schema location is set, use schema // location @@ -323,4 +323,33 @@ private LakehouseCatalogOperations createLakehouseCatalogOperations( operations.initialize(properties, catalogInfo, propertiesMetadata); return operations; } + + /** + * Calculate the table storage properties by merging catalog config, schema properties and table + * properties. The precedence is: table properties > schema properties > catalog config. + * + * @param schema The schema of the table. + * @param tableProps The table properties. + * @return The merged table storage properties. + */ + private Map calculateTableStorageProps( + Schema schema, Map tableProps) { + Map storageProps = getLanceTableStorageOptions(catalogConfig); + storageProps.putAll(getLanceTableStorageOptions(schema.properties())); + storageProps.putAll(getLanceTableStorageOptions(tableProps)); + return storageProps; + } + + private Map getLanceTableStorageOptions(Map properties) { + if (MapUtils.isEmpty(properties)) { + return Maps.newHashMap(); + } + return properties.entrySet().stream() + .filter( + e -> + e.getKey() + .startsWith( + GenericLakehouseTablePropertiesMetadata.LANCE_TABLE_STORAGE_OPTION_PREFIX)) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } } diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java index 01dfc1da171..e381558c321 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java @@ -19,6 +19,7 @@ package org.apache.gravitino.catalog.lakehouse; +import static org.apache.gravitino.catalog.lakehouse.GenericLakehouseTablePropertiesMetadata.LANCE_TABLE_STORAGE_OPTION_PREFIX; import static org.apache.gravitino.connector.PropertyEntry.stringOptionalPropertyEntry; import com.google.common.collect.ImmutableList; @@ -42,7 +43,14 @@ public class GenericLakehouseCatalogPropertiesMetadata extends BaseCatalogProper "The root directory of the lakehouse catalog.", false /* immutable */, null, /* defaultValue */ - false /* hidden */)); + false /* hidden */), + PropertyEntry.stringOptionalPropertyPrefixEntry( + LANCE_TABLE_STORAGE_OPTION_PREFIX, + "The storage options passed to Lance table.", + false /* immutable */, + null /* default value*/, + false /* hidden */, + false /* reserved */)); PROPERTIES_METADATA = Maps.uniqueIndex(propertyEntries, PropertyEntry::getName); } diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java index 52a65e7698d..a6da0ac2ded 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java @@ -18,6 +18,7 @@ */ package org.apache.gravitino.catalog.lakehouse; +import static org.apache.gravitino.catalog.lakehouse.GenericLakehouseTablePropertiesMetadata.LANCE_TABLE_STORAGE_OPTION_PREFIX; import static org.apache.gravitino.connector.PropertyEntry.stringOptionalPropertyEntry; import com.google.common.collect.ImmutableList; @@ -41,7 +42,14 @@ public class GenericLakehouseSchemaPropertiesMetadata extends BasePropertiesMeta "The root directory of the lakehouse schema.", false /* immutable */, null, /* defaultValue */ - false /* hidden */)); + false /* hidden */), + PropertyEntry.stringOptionalPropertyPrefixEntry( + LANCE_TABLE_STORAGE_OPTION_PREFIX, + "The storage options passed to Lance table.", + false /* immutable */, + null /* default value*/, + false /* hidden */, + false /* reserved */)); PROPERTIES_METADATA = Maps.uniqueIndex(propertyEntries, PropertyEntry::getName); } diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseTablePropertiesMetadata.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseTablePropertiesMetadata.java index 362b10dbe4a..e9a61a6b0fc 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseTablePropertiesMetadata.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseTablePropertiesMetadata.java @@ -18,21 +18,43 @@ */ package org.apache.gravitino.catalog.lakehouse; -import com.google.common.collect.ImmutableMap; +import static org.apache.gravitino.connector.PropertyEntry.stringOptionalPropertyEntry; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Maps; +import java.util.List; import java.util.Map; import org.apache.gravitino.connector.BasePropertiesMetadata; import org.apache.gravitino.connector.PropertyEntry; public class GenericLakehouseTablePropertiesMetadata extends BasePropertiesMetadata { + public static final String LOCATION = "location"; + public static final String LANCE_TABLE_STORAGE_OPTION_PREFIX = "lance.storage."; - private static final Map> propertiesMetadata; + private static final Map> PROPERTIES_METADATA; static { - propertiesMetadata = ImmutableMap.of(); + List> propertyEntries = + ImmutableList.of( + stringOptionalPropertyEntry( + LOCATION, + "The root directory of the lakehouse table.", + true /* immutable */, + null, /* defaultValue */ + false /* hidden */), + PropertyEntry.stringOptionalPropertyPrefixEntry( + LANCE_TABLE_STORAGE_OPTION_PREFIX, + "The storage options passed to Lance table.", + false /* immutable */, + null /* default value*/, + false /* hidden */, + false /* reserved */)); + + PROPERTIES_METADATA = Maps.uniqueIndex(propertyEntries, PropertyEntry::getName); } @Override protected Map> specificPropertyEntries() { - return propertiesMetadata; + return PROPERTIES_METADATA; } } diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java index 342826a882d..dcfe6bd4896 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java @@ -19,6 +19,9 @@ package org.apache.gravitino.catalog.lakehouse.lance; +import static org.apache.gravitino.catalog.lakehouse.GenericLakehouseTablePropertiesMetadata.LANCE_TABLE_STORAGE_OPTION_PREFIX; +import static org.apache.gravitino.catalog.lakehouse.GenericLakehouseTablePropertiesMetadata.LOCATION; + import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.lancedb.lance.Dataset; @@ -114,13 +117,20 @@ public Table createTable( Index[] indexes) throws NoSuchSchemaException, TableAlreadyExistsException { // Ignore partitions, distributions, sortOrders, and indexes for Lance tables; - String location = properties.get("location"); + String location = properties.get(LOCATION); + Map storageProps = + properties.entrySet().stream() + .filter(e -> e.getKey().startsWith(LANCE_TABLE_STORAGE_OPTION_PREFIX)) + .collect( + Collectors.toMap( + e -> e.getKey().substring(LANCE_TABLE_STORAGE_OPTION_PREFIX.length()), + Map.Entry::getValue)); try (Dataset dataset = Dataset.create( new RootAllocator(), location, convertColumnsToSchema(columns), - new WriteParams.Builder().build())) { + new WriteParams.Builder().withStorageOptions(storageProps).build())) { GenericLakehouseTable.Builder builder = GenericLakehouseTable.builder(); return builder .withName(ident.name()) From d0c58f9fe161090c2c14cee1c2028dd0f2972b75 Mon Sep 17 00:00:00 2001 From: Mini Yu Date: Tue, 28 Oct 2025 20:22:26 +0800 Subject: [PATCH 16/43] [#8921] improvement(catalogs): Add ITs for lance table operations. (#8923) ### What changes were proposed in this pull request? Add some ITs to covert lance table operations and fix a bug by the way. ### Why are the changes needed? It's a improvement. Fix: #8921 ### Does this PR introduce _any_ user-facing change? N/A. ### How was this patch tested? It's self just the test. --- .../build.gradle.kts | 1 + .../GenericLakehouseCatalogOperations.java | 39 +- .../test/CatalogGenericLakehouseLanceIT.java | 419 ++++++++++++++++++ .../base/TableVersionBaseSQLProvider.java | 5 +- .../TableVersionPostgreSQLProvider.java | 37 +- .../relational/service/TableMetaService.java | 8 + .../relational/utils/POConverters.java | 40 +- .../gravitino/storage/TestEntityStorage.java | 135 ++++++ 8 files changed, 653 insertions(+), 31 deletions(-) create mode 100644 catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/integration/test/CatalogGenericLakehouseLanceIT.java diff --git a/catalogs/catalog-generic-lakehouse/build.gradle.kts b/catalogs/catalog-generic-lakehouse/build.gradle.kts index 704dbda7e36..df401dcde41 100644 --- a/catalogs/catalog-generic-lakehouse/build.gradle.kts +++ b/catalogs/catalog-generic-lakehouse/build.gradle.kts @@ -43,6 +43,7 @@ dependencies { implementation(libs.commons.lang3) implementation(libs.guava) implementation(libs.hadoop3.client.api) + implementation(libs.hadoop3.client.runtime) implementation(libs.lance) annotationProcessor(libs.lombok) diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java index 8e823a18a9e..358c2dcab5c 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java @@ -62,10 +62,14 @@ import org.apache.gravitino.rel.expressions.transforms.Transform; import org.apache.gravitino.rel.indexes.Index; import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** Operations for interacting with a generic lakehouse catalog in Apache Gravitino. */ public class GenericLakehouseCatalogOperations implements CatalogOperations, SupportsSchemas, TableCatalog { + private static final Logger LOG = + LoggerFactory.getLogger(GenericLakehouseCatalogOperations.class); private static final String SLASH = "/"; @@ -282,24 +286,27 @@ public Table alterTable(NameIdentifier ident, TableChange... changes) @Override public boolean dropTable(NameIdentifier ident) { EntityStore store = GravitinoEnv.getInstance().entityStore(); - Namespace namespace = ident.namespace(); + GenericTableEntity tableEntity; try { - GenericTableEntity tableEntity = - store.get(ident, Entity.EntityType.TABLE, GenericTableEntity.class); - Map tableProperties = tableEntity.getProperties(); - String format = tableProperties.getOrDefault("format", "lance"); - LakehouseCatalogOperations lakehouseCatalogOperations = - SUPPORTED_FORMATS.compute( - format, - (k, v) -> - v == null - ? createLakehouseCatalogOperations( - format, tableProperties, catalogInfo, propertiesMetadata) - : v); - return lakehouseCatalogOperations.dropTable(ident); - } catch (IOException e) { - throw new RuntimeException("Failed to list tables under schema " + namespace, e); + tableEntity = store.get(ident, Entity.EntityType.TABLE, GenericTableEntity.class); + } catch (NoSuchEntityException e) { + LOG.warn("Table {} does not exist, skip dropping.", ident); + return false; + } catch (IOException ioe) { + throw new RuntimeException("Failed to get table " + ident); } + + Map tableProperties = tableEntity.getProperties(); + String format = tableProperties.getOrDefault("format", "lance"); + LakehouseCatalogOperations lakehouseCatalogOperations = + SUPPORTED_FORMATS.compute( + format, + (k, v) -> + v == null + ? createLakehouseCatalogOperations( + format, tableProperties, catalogInfo, propertiesMetadata) + : v); + return lakehouseCatalogOperations.dropTable(ident); } private String ensureTrailingSlash(String path) { diff --git a/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/integration/test/CatalogGenericLakehouseLanceIT.java b/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/integration/test/CatalogGenericLakehouseLanceIT.java new file mode 100644 index 00000000000..c4790c4d85c --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/integration/test/CatalogGenericLakehouseLanceIT.java @@ -0,0 +1,419 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse.integration.test; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.lancedb.lance.Dataset; +import com.lancedb.lance.Fragment; +import com.lancedb.lance.FragmentMetadata; +import com.lancedb.lance.Transaction; +import com.lancedb.lance.WriteParams; +import com.lancedb.lance.ipc.LanceScanner; +import com.lancedb.lance.ipc.ScanOptions; +import com.lancedb.lance.operation.Append; +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.gravitino.Catalog; +import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.Schema; +import org.apache.gravitino.client.GravitinoMetalake; +import org.apache.gravitino.integration.test.container.ContainerSuite; +import org.apache.gravitino.integration.test.util.BaseIT; +import org.apache.gravitino.integration.test.util.GravitinoITUtils; +import org.apache.gravitino.rel.Column; +import org.apache.gravitino.rel.Table; +import org.apache.gravitino.rel.expressions.transforms.Transforms; +import org.apache.gravitino.rel.types.Types; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CatalogGenericLakehouseLanceIT extends BaseIT { + private static final Logger LOG = LoggerFactory.getLogger(CatalogGenericLakehouseLanceIT.class); + public static final String metalakeName = + GravitinoITUtils.genRandomName("CatalogGenericLakeLanceIT_metalake"); + public String catalogName = GravitinoITUtils.genRandomName("CatalogGenericLakeLanceI_catalog"); + public String SCHEMA_PREFIX = "CatalogGenericLakeLance_schema"; + public String schemaName = GravitinoITUtils.genRandomName(SCHEMA_PREFIX); + public String TABLE_PREFIX = "CatalogGenericLakeLance_table"; + public String tableName = GravitinoITUtils.genRandomName(TABLE_PREFIX); + public static final String TABLE_COMMENT = "table_comment"; + public static final String LANCE_COL_NAME1 = "lance_col_name1"; + public static final String LANCE_COL_NAME2 = "lance_col_name2"; + public static final String LANCE_COL_NAME3 = "lance_col_name3"; + protected final String provider = "generic-lakehouse"; + protected final ContainerSuite containerSuite = ContainerSuite.getInstance(); + protected GravitinoMetalake metalake; + protected Catalog catalog; + protected String tempDirectory; + + @BeforeAll + public void startup() throws Exception { + createMetalake(); + createCatalog(); + createSchema(); + + // Create a temp directory for test use + Path tempDir = Files.createTempDirectory("myTempDir"); + tempDirectory = tempDir.toString(); + File file = new File(tempDirectory); + file.deleteOnExit(); + } + + @AfterAll + public void stop() throws IOException { + if (client != null) { + Arrays.stream(catalog.asSchemas().listSchemas()) + .filter(schema -> !schema.equals("default")) + .forEach( + (schema -> { + catalog.asSchemas().dropSchema(schema, true); + })); + Arrays.stream(metalake.listCatalogs()) + .forEach( + catalogName -> { + metalake.dropCatalog(catalogName, true); + }); + client.dropMetalake(metalakeName, true); + } + try { + closer.close(); + } catch (Exception e) { + LOG.error("Failed to close CloseableGroup", e); + } + + client = null; + } + + @AfterEach + public void resetSchema() throws InterruptedException { + catalog.asSchemas().dropSchema(schemaName, true); + createSchema(); + } + + @Test + public void testCreateLanceTable() throws InterruptedException { + // Create a table from Gravitino API + Column[] columns = createColumns(); + NameIdentifier nameIdentifier = NameIdentifier.of(schemaName, tableName); + + Map properties = createProperties(); + String tableLocation = tempDirectory + "/" + tableName; + properties.put("format", "lance"); + properties.put("location", tableLocation); + + Table createdTable = + catalog + .asTableCatalog() + .createTable( + nameIdentifier, + columns, + TABLE_COMMENT, + properties, + Transforms.EMPTY_TRANSFORM, + null, + null); + + Assertions.assertEquals(createdTable.name(), tableName); + Map createdTableProperties = createdTable.properties(); + Assertions.assertEquals("lance", createdTableProperties.get("format")); + + Assertions.assertEquals(TABLE_COMMENT, createdTable.comment()); + Assertions.assertEquals(3, createdTable.columns().length); + columnEquals(columns, createdTable.columns()); + String expectedTableLocation = tempDirectory + "/" + tableName + "/"; + Assertions.assertEquals(expectedTableLocation, createdTableProperties.get("location")); + Assertions.assertTrue(new File(expectedTableLocation).exists()); + + // Drop table + catalog.asTableCatalog().dropTable(nameIdentifier); + catalog.asSchemas().dropSchema(schemaName, true); + + Map schemaProperties = createSchemaProperties(); + String schemaLocation = tempDirectory + "/schema_location"; + schemaProperties.put("location", schemaLocation); + catalog.asSchemas().createSchema(schemaName, "comment", schemaProperties); + properties = createProperties(); + properties.put("format", "lance"); + + createdTable = + catalog + .asTableCatalog() + .createTable( + nameIdentifier, + columns, + TABLE_COMMENT, + properties, + Transforms.EMPTY_TRANSFORM, + null, + null); + Assertions.assertEquals(createdTable.name(), tableName); + createdTableProperties = createdTable.properties(); + Assertions.assertEquals("lance", createdTableProperties.get("format")); + + Assertions.assertEquals(TABLE_COMMENT, createdTable.comment()); + Assertions.assertEquals(3, createdTable.columns().length); + columnEquals(columns, createdTable.columns()); + expectedTableLocation = schemaLocation + "/" + tableName + "/"; + Assertions.assertEquals(expectedTableLocation, createdTableProperties.get("location")); + Assertions.assertTrue(new File(expectedTableLocation).exists()); + + // Now try to load table + Table loadedTable = catalog.asTableCatalog().loadTable(nameIdentifier); + Assertions.assertEquals(createdTable.name(), loadedTable.name()); + Map loadedTableProperties = loadedTable.properties(); + Assertions.assertEquals("lance", loadedTableProperties.get("format")); + Assertions.assertEquals(expectedTableLocation, loadedTableProperties.get("location")); + Assertions.assertEquals(TABLE_COMMENT, loadedTable.comment()); + + // Now test list tables + List tableIdentifiers = + Arrays.asList(catalog.asTableCatalog().listTables(nameIdentifier.namespace())); + Assertions.assertEquals(1, tableIdentifiers.size()); + Assertions.assertEquals(nameIdentifier, tableIdentifiers.get(0)); + } + + @Test + void testLanceTableFormat() { + String tableName = GravitinoITUtils.genRandomName(TABLE_PREFIX); + Column[] columns = createColumns(); + NameIdentifier nameIdentifier = NameIdentifier.of(schemaName, tableName); + + Map properties = createProperties(); + String tableLocation = tempDirectory + "/" + tableName; + properties.put("format", "lance"); + properties.put("location", tableLocation); + + catalog + .asTableCatalog() + .createTable( + nameIdentifier, + columns, + TABLE_COMMENT, + properties, + Transforms.EMPTY_TRANSFORM, + null, + null); + + // Now try to read the lance directory and check it. + try (Dataset dataset = Dataset.open(tableLocation)) { + org.apache.arrow.vector.types.pojo.Schema lanceSchema = dataset.getSchema(); + List fields = lanceSchema.getFields(); + for (Field field : fields) { + if (field.getName().equals(LANCE_COL_NAME1)) { + Assertions.assertEquals(new ArrowType.Int(32, true), field.getType()); + } else if (field.getName().equals(LANCE_COL_NAME2)) { + Assertions.assertEquals(new ArrowType.Int(64, true), field.getType()); + } else if (field.getName().equals(LANCE_COL_NAME3)) { + Assertions.assertEquals(new ArrowType.Utf8(), field.getType()); + } else { + Assertions.fail("Unexpected column name in lance table: " + field.getName()); + } + } + + // Now try to write some data to the dataset + Transaction trans = + dataset + .newTransactionBuilder() + .operation( + Append.builder() + .fragments( + createFragmentMetadata( + tableLocation, + Arrays.asList( + new LanceDataValue(1, 100L, "first"), + new LanceDataValue(2, 200L, "second"), + new LanceDataValue(3, 300L, "third")), + lanceSchema)) + .build()) + .writeParams(ImmutableMap.of()) + .build(); + + Dataset newDataset = dataset.commitTransaction(trans); + try (LanceScanner scanner = + newDataset.newScan( + new ScanOptions.Builder() + .columns(Arrays.asList(LANCE_COL_NAME1, LANCE_COL_NAME2, LANCE_COL_NAME3)) + .batchSize(1000) + .build())) { + + List dataValues = Lists.newArrayList(); + try (ArrowReader reader = scanner.scanBatches()) { + while (reader.loadNextBatch()) { + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + List fieldVectors = root.getFieldVectors(); + + IntVector col1Vector = (IntVector) fieldVectors.get(0); + BigIntVector col2Vector = (BigIntVector) fieldVectors.get(1); + VarCharVector col3Vector = (VarCharVector) fieldVectors.get(2); + + for (int i = 0; i < root.getRowCount(); i++) { + int col1 = col1Vector.get(i); + long col2 = col2Vector.get(i); + String col3 = new String(col3Vector.get(i), StandardCharsets.UTF_8); + dataValues.add(new LanceDataValue(col1, col2, col3)); + } + } + } + + Assertions.assertEquals(3, dataValues.size()); + Assertions.assertEquals(1, dataValues.get(0).col1); + Assertions.assertEquals(100L, dataValues.get(0).col2); + Assertions.assertEquals("first", dataValues.get(0).col3); + } catch (Exception e) { + throw new RuntimeException(e); + } + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } + + static class LanceDataValue { + public Integer col1; + public Long col2; + public String col3; + + public LanceDataValue(Integer col1, Long col2, String col3) { + this.col1 = col1; + this.col2 = col2; + this.col3 = col3; + } + } + + private List createFragmentMetadata( + String tableLocation, + List updates, + org.apache.arrow.vector.types.pojo.Schema schema) + throws JsonProcessingException { + List fragmentMetas; + int count = 0; + RootAllocator rootAllocator = new RootAllocator(); + try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, rootAllocator)) { + for (FieldVector vector : root.getFieldVectors()) { + vector.setInitialCapacity(count); + } + root.allocateNew(); + + IntVector col1Vector = (IntVector) root.getVector(LANCE_COL_NAME1); + BigIntVector col2Vector = (BigIntVector) root.getVector(LANCE_COL_NAME2); + VarCharVector col3Vector = (VarCharVector) root.getVector(LANCE_COL_NAME3); + + int index = 0; + for (LanceDataValue data : updates) { + col1Vector.setSafe(index, data.col1); + col2Vector.setSafe(index, data.col2); + col3Vector.setSafe(index, data.col3.getBytes(StandardCharsets.UTF_8)); + index++; + } + root.setRowCount(index); + + fragmentMetas = + Fragment.create(tableLocation, rootAllocator, root, new WriteParams.Builder().build()); + return fragmentMetas; + } + } + + protected Map createSchemaProperties() { + Map properties = new HashMap<>(); + properties.put("key1", "val1"); + properties.put("key2", "val2"); + return properties; + } + + private void columnEquals(Column[] expect, Column[] actual) { + Assertions.assertEquals(expect.length, actual.length); + + for (int i = 0; i < expect.length; i++) { + Column expectCol = expect[i]; + Column actualCol = actual[i]; + + Assertions.assertEquals(expectCol.name(), actualCol.name()); + Assertions.assertEquals(expectCol.dataType(), actualCol.dataType()); + Assertions.assertEquals(expectCol.comment(), actualCol.comment()); + } + } + + private void createMetalake() { + GravitinoMetalake[] gravitinoMetalakes = client.listMetalakes(); + Assertions.assertEquals(0, gravitinoMetalakes.length); + + client.createMetalake(metalakeName, "comment", Collections.emptyMap()); + GravitinoMetalake loadMetalake = client.loadMetalake(metalakeName); + Assertions.assertEquals(metalakeName, loadMetalake.name()); + + metalake = loadMetalake; + } + + protected void createCatalog() { + Map properties = Maps.newHashMap(); + metalake.createCatalog(catalogName, Catalog.Type.RELATIONAL, provider, "comment", properties); + + catalog = metalake.loadCatalog(catalogName); + } + + private void createSchema() throws InterruptedException { + Map schemaProperties = createSchemaProperties(); + String comment = "comment"; + catalog.asSchemas().createSchema(schemaName, comment, schemaProperties); + Schema loadSchema = catalog.asSchemas().loadSchema(schemaName); + Assertions.assertEquals(schemaName, loadSchema.name()); + Assertions.assertEquals(comment, loadSchema.comment()); + Assertions.assertEquals("val1", loadSchema.properties().get("key1")); + Assertions.assertEquals("val2", loadSchema.properties().get("key2")); + } + + private Column[] createColumns() { + Column col1 = Column.of(LANCE_COL_NAME1, Types.IntegerType.get(), "col_1_comment"); + Column col2 = Column.of(LANCE_COL_NAME2, Types.LongType.get(), "col_2_comment"); + Column col3 = Column.of(LANCE_COL_NAME3, Types.StringType.get(), "col_3_comment"); + return new Column[] {col1, col2, col3}; + } + + protected Map createProperties() { + Map properties = Maps.newHashMap(); + properties.put("key1", "val1"); + properties.put("key2", "val2"); + + return properties; + } +} diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableVersionBaseSQLProvider.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableVersionBaseSQLProvider.java index 3501abe10cf..c39b8cbabbc 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableVersionBaseSQLProvider.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableVersionBaseSQLProvider.java @@ -29,9 +29,9 @@ public class TableVersionBaseSQLProvider { public String insertTableVersion(@Param("tablePO") TablePO tablePO) { return "INSERT INTO " + TABLE_NAME - + " (table_id, format, properties, partitioning" + + " (table_id, format, properties, partitioning," + " distribution, sort_orders, indexes, comment," - + " version, last_version, deleted_at)" + + " version, deleted_at)" + " VALUES (" + " #{tablePO.tableId}," + " #{tablePO.format}," @@ -42,7 +42,6 @@ public String insertTableVersion(@Param("tablePO") TablePO tablePO) { + " #{tablePO.indexes}," + " #{tablePO.comment}," + " #{tablePO.currentVersion}," - + " #{tablePO.lastVersion}," + " #{tablePO.deletedAt}" + " )"; } diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/TableVersionPostgreSQLProvider.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/TableVersionPostgreSQLProvider.java index e0a7413b1cc..13eebeaa2ca 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/TableVersionPostgreSQLProvider.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/postgresql/TableVersionPostgreSQLProvider.java @@ -19,6 +19,41 @@ package org.apache.gravitino.storage.relational.mapper.provider.postgresql; +import static org.apache.gravitino.storage.relational.mapper.TableVersionMapper.TABLE_NAME; + import org.apache.gravitino.storage.relational.mapper.provider.base.TableVersionBaseSQLProvider; +import org.apache.gravitino.storage.relational.po.TablePO; +import org.apache.ibatis.annotations.Param; + +public class TableVersionPostgreSQLProvider extends TableVersionBaseSQLProvider { -public class TableVersionPostgreSQLProvider extends TableVersionBaseSQLProvider {} + public String insertTableVersionOnDuplicateKeyUpdate(@Param("tablePO") TablePO tablePO) { + return "INSERT INTO " + + TABLE_NAME + + " (table_id, format, properties, partitioning," + + " distribution, sort_orders, indexes, comment," + + " version, deleted_at)" + + " VALUES (" + + " #{tablePO.tableId}," + + " #{tablePO.format}," + + " #{tablePO.properties}," + + " #{tablePO.partitions}," + + " #{tablePO.distribution}," + + " #{tablePO.sortOrders}," + + " #{tablePO.indexes}," + + " #{tablePO.comment}," + + " #{tablePO.currentVersion}," + + " #{tablePO.deletedAt}" + + " )" + + " ON CONFLICT (table_id, deleted_at) DO UPDATE SET" + + " format = #{tablePO.format}," + + " properties = #{tablePO.properties}," + + " partitioning = #{tablePO.partitions}," + + " distribution = #{tablePO.distribution}," + + " sort_orders = #{tablePO.sortOrders}," + + " indexes = #{tablePO.indexes}," + + " comment = #{tablePO.comment}," + + " version = #{tablePO.currentVersion}," + + " deleted_at = #{tablePO.deletedAt}"; + } +} diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/service/TableMetaService.java b/core/src/main/java/org/apache/gravitino/storage/relational/service/TableMetaService.java index f4bbf7a6f6d..7a42d95db64 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/service/TableMetaService.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/service/TableMetaService.java @@ -210,6 +210,14 @@ public TableEntity updateTable( SessionUtils.getWithoutCommit( TableMetaMapper.class, mapper -> mapper.updateTableMeta(newTablePO, oldTablePO, newSchemaId))), + () -> + SessionUtils.doWithCommit( + TableVersionMapper.class, + mapper -> { + if (newTablePO.getFormat() != null) { + mapper.insertTableVersionOnDuplicateKeyUpdate(newTablePO); + } + }), () -> { if (updateResult.get() > 0 && (isColumnChanged || isSchemaChanged)) { TableColumnMetaService.getInstance() diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/utils/POConverters.java b/core/src/main/java/org/apache/gravitino/storage/relational/utils/POConverters.java index 62bc11f8915..fa8f06a9f8e 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/utils/POConverters.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/utils/POConverters.java @@ -457,17 +457,35 @@ public static TablePO updateTablePOWithVersionAndSchemaId( } try { - return TablePO.builder() - .withTableId(oldTablePO.getTableId()) - .withTableName(newTable.name()) - .withMetalakeId(oldTablePO.getMetalakeId()) - .withCatalogId(oldTablePO.getCatalogId()) - .withSchemaId(newSchemaId) - .withAuditInfo(JsonUtils.anyFieldMapper().writeValueAsString(newTable.auditInfo())) - .withCurrentVersion(currentVersion) - .withLastVersion(lastVersion) - .withDeletedAt(DEFAULT_DELETED_AT) - .build(); + TablePO.Builder builder = + TablePO.builder() + .withTableId(oldTablePO.getTableId()) + .withTableName(newTable.name()) + .withMetalakeId(oldTablePO.getMetalakeId()) + .withCatalogId(oldTablePO.getCatalogId()) + .withSchemaId(newSchemaId) + .withAuditInfo(JsonUtils.anyFieldMapper().writeValueAsString(newTable.auditInfo())) + .withCurrentVersion(currentVersion) + .withLastVersion(lastVersion) + .withDeletedAt(DEFAULT_DELETED_AT); + + // Note: GenericTableEntity will be removed in the refactor PR, so here just keep the old + // logic to make the UT pass. + if (newTable instanceof GenericTableEntity genericTable) { + builder.withFormat(genericTable.getFormat()); + builder.withComment(genericTable.getComment()); + builder.withProperties( + genericTable.getProperties() == null + ? null + : JsonUtils.anyFieldMapper().writeValueAsString(genericTable.getProperties())); + builder.withIndexes( + genericTable.getIndexes() == null + ? null + : JsonUtils.anyFieldMapper().writeValueAsString(genericTable.getIndexes())); + // TODO other fields in the refactor PRs. + } + + return builder.build(); } catch (JsonProcessingException e) { throw new RuntimeException("Failed to serialize json object:", e); } diff --git a/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java b/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java index 0e8298a2a04..ce9f0f5c738 100644 --- a/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java +++ b/core/src/test/java/org/apache/gravitino/storage/TestEntityStorage.java @@ -79,6 +79,7 @@ import org.apache.gravitino.meta.CatalogEntity; import org.apache.gravitino.meta.ColumnEntity; import org.apache.gravitino.meta.FilesetEntity; +import org.apache.gravitino.meta.GenericTableEntity; import org.apache.gravitino.meta.GroupEntity; import org.apache.gravitino.meta.ModelEntity; import org.apache.gravitino.meta.ModelVersionEntity; @@ -2642,4 +2643,138 @@ void testInvalidRelationCache(String type) throws Exception { destroy(type); } } + + @ParameterizedTest + @MethodSource("storageProvider") + void testLanceTableCreateAndUpdate(String type) { + Config config = Mockito.mock(Config.class); + init(type, config); + + AuditInfo auditInfo = + AuditInfo.builder().withCreator("creator").withCreateTime(Instant.now()).build(); + + try (EntityStore store = EntityStoreFactory.createEntityStore(config)) { + store.initialize(config); + + BaseMetalake metalake = + createBaseMakeLake(RandomIdGenerator.INSTANCE.nextId(), "metalake", auditInfo); + store.put(metalake, false); + + CatalogEntity catalogEntity = + CatalogEntity.builder() + .withId(RandomIdGenerator.INSTANCE.nextId()) + .withName("catalog") + .withNamespace(NamespaceUtil.ofCatalog("metalake")) + .withType(Catalog.Type.RELATIONAL) + .withProvider("generic-lakehouse") + .withComment("This is a generic-lakehouse") + .withProperties(ImmutableMap.of()) + .withAuditInfo(auditInfo) + .build(); + + store.put(catalogEntity, false); + + SchemaEntity schemaEntity = + SchemaEntity.builder() + .withId(RandomIdGenerator.INSTANCE.nextId()) + .withName("schema") + .withNamespace(NamespaceUtil.ofSchema("metalake", "catalog")) + .withComment("This is a schema for generic-lakehouse") + .withProperties(ImmutableMap.of()) + .withAuditInfo(auditInfo) + .build(); + store.put(schemaEntity, false); + + long column1Id = RandomIdGenerator.INSTANCE.nextId(); + GenericTableEntity table = + GenericTableEntity.getBuilder() + .withId(RandomIdGenerator.INSTANCE.nextId()) + .withNamespace(NamespaceUtil.ofTable("metalake", "catalog", "schema")) + .withName("table") + .withAuditInfo(auditInfo) + .withColumns( + Lists.newArrayList( + ColumnEntity.builder() + .withId(column1Id) + .withName("column1") + .withDataType(Types.StringType.get()) + .withComment("test column") + .withPosition(1) + .withAuditInfo(auditInfo) + .build())) + .withComment("This is a lance table") + .withFormat("lance") + .withProperties(ImmutableMap.of("location", "/tmp/test", "format", "lance")) + .build(); + store.put(table, false); + GenericTableEntity fetchedTable = + store.get(table.nameIdentifier(), Entity.EntityType.TABLE, GenericTableEntity.class); + + // check table properties + Assertions.assertEquals("/tmp/test", fetchedTable.getProperties().get("location")); + Assertions.assertEquals("lance", fetchedTable.getProperties().get("format")); + Assertions.assertEquals("This is a lance table", fetchedTable.getComment()); + Assertions.assertEquals(1, fetchedTable.columns().size()); + Assertions.assertEquals("column1", fetchedTable.columns().get(0).name()); + + // Now try to update the table + GenericTableEntity updatedTable = + GenericTableEntity.getBuilder() + .withId(table.id()) + .withNamespace(table.namespace()) + .withName(table.name()) + .withAuditInfo(auditInfo) + .withFormat("lance") + .withColumns( + Lists.newArrayList( + ColumnEntity.builder() + .withId(column1Id) + .withName("column1") + .withDataType(Types.StringType.get()) + .withComment("updated test column") + .withPosition(1) + .withAuditInfo(auditInfo) + .build(), + ColumnEntity.builder() + .withId(RandomIdGenerator.INSTANCE.nextId()) + .withName("column2") + .withDataType(Types.IntegerType.get()) + .withComment("new column") + .withPosition(2) + .withAuditInfo(auditInfo) + .build())) + .withComment("This is an updated lance table") + .withProperties(ImmutableMap.of("location", "/tmp/updated_test", "format", "lance")) + .build(); + + store.update( + table.nameIdentifier(), + GenericTableEntity.class, + Entity.EntityType.TABLE, + e -> updatedTable); + GenericTableEntity fetchedUpdatedTable = + store.get(table.nameIdentifier(), Entity.EntityType.TABLE, GenericTableEntity.class); + + // check updated table properties + Assertions.assertEquals( + "/tmp/updated_test", fetchedUpdatedTable.getProperties().get("location")); + Assertions.assertEquals("lance", fetchedUpdatedTable.getProperties().get("format")); + Assertions.assertEquals("This is an updated lance table", fetchedUpdatedTable.getComment()); + Assertions.assertEquals(2, fetchedUpdatedTable.columns().size()); + for (ColumnEntity column : fetchedUpdatedTable.columns()) { + if (column.name().equals("column1")) { + Assertions.assertEquals("updated test column", column.comment()); + } + } + + Assertions.assertTrue( + fetchedUpdatedTable.columns().stream() + .filter(c -> c.name().equals("column2")) + .findFirst() + .isPresent()); + destroy(type); + } catch (IOException e) { + throw new RuntimeException(e); + } + } } From 4c6934df5cdd88aa8710bfa2cfe7a16e09e9aa72 Mon Sep 17 00:00:00 2001 From: Jerry Shao Date: Tue, 28 Oct 2025 20:28:57 +0800 Subject: [PATCH 17/43] [#8933] fix(CI): Fix the lance rest start issue (#8934) ### What changes were proposed in this pull request? Fix the CI failure when configuring the lance rest aux service. ### Why are the changes needed? Fix: #8933 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing CI. --- .../build.gradle.kts | 2 ++ .../test/CatalogIcebergBaseIT.java | 2 +- .../test/CatalogIcebergKerberosHiveIT.java | 2 +- conf/gravitino.conf.template | 2 +- .../integration/test/FlinkEnvIT.java | 2 +- .../integration/test/MiniGravitino.java | 25 ++++++++++++++++--- .../test/MiniGravitinoContext.java | 6 ++--- .../integration/test/util/BaseIT.java | 5 ++-- .../integration/test/SparkEnvIT.java | 2 +- spark-connector/v3.3/spark/build.gradle.kts | 1 + spark-connector/v3.4/spark/build.gradle.kts | 1 + spark-connector/v3.5/spark/build.gradle.kts | 1 + 12 files changed, 36 insertions(+), 15 deletions(-) diff --git a/catalogs/catalog-lakehouse-iceberg/build.gradle.kts b/catalogs/catalog-lakehouse-iceberg/build.gradle.kts index 25f750a6be9..1b1cd9a29e3 100644 --- a/catalogs/catalog-lakehouse-iceberg/build.gradle.kts +++ b/catalogs/catalog-lakehouse-iceberg/build.gradle.kts @@ -143,6 +143,8 @@ tasks.test { exclude("**/integration/test/**") } else { dependsOn(tasks.jar) + dependsOn(":iceberg:iceberg-rest-server:jar") + dependsOn(":lance:lance-rest-server:jar") } } diff --git a/catalogs/catalog-lakehouse-iceberg/src/test/java/org/apache/gravitino/catalog/lakehouse/iceberg/integration/test/CatalogIcebergBaseIT.java b/catalogs/catalog-lakehouse-iceberg/src/test/java/org/apache/gravitino/catalog/lakehouse/iceberg/integration/test/CatalogIcebergBaseIT.java index 1260d7d795e..b0d4f3b5eb2 100644 --- a/catalogs/catalog-lakehouse-iceberg/src/test/java/org/apache/gravitino/catalog/lakehouse/iceberg/integration/test/CatalogIcebergBaseIT.java +++ b/catalogs/catalog-lakehouse-iceberg/src/test/java/org/apache/gravitino/catalog/lakehouse/iceberg/integration/test/CatalogIcebergBaseIT.java @@ -124,7 +124,7 @@ public abstract class CatalogIcebergBaseIT extends BaseIT { @BeforeAll public void startup() throws Exception { - ignoreIcebergRestService = false; + super.ignoreAuxRestService = false; super.startIntegrationTest(); containerSuite.startHiveContainer(); initIcebergCatalogProperties(); diff --git a/catalogs/catalog-lakehouse-iceberg/src/test/java/org/apache/gravitino/catalog/lakehouse/iceberg/integration/test/CatalogIcebergKerberosHiveIT.java b/catalogs/catalog-lakehouse-iceberg/src/test/java/org/apache/gravitino/catalog/lakehouse/iceberg/integration/test/CatalogIcebergKerberosHiveIT.java index 1017ccb4e10..cce787ec850 100644 --- a/catalogs/catalog-lakehouse-iceberg/src/test/java/org/apache/gravitino/catalog/lakehouse/iceberg/integration/test/CatalogIcebergKerberosHiveIT.java +++ b/catalogs/catalog-lakehouse-iceberg/src/test/java/org/apache/gravitino/catalog/lakehouse/iceberg/integration/test/CatalogIcebergKerberosHiveIT.java @@ -127,7 +127,7 @@ public void startIntegrationTest() { // Config kerberos configuration for Gravitino server addKerberosConfig(); - ignoreIcebergRestService = false; + super.ignoreAuxRestService = false; // Start Gravitino server super.startIntegrationTest(); } catch (Exception e) { diff --git a/conf/gravitino.conf.template b/conf/gravitino.conf.template index 44a9e50f82c..f3af89c12b9 100644 --- a/conf/gravitino.conf.template +++ b/conf/gravitino.conf.template @@ -82,7 +82,7 @@ gravitino.authorization.serviceAdmins = anonymous # THE CONFIGURATION FOR AUXILIARY SERVICE # Auxiliary service names, separate by ',', currently support iceberg-rest and lance-rest -gravitino.auxService.names = iceberg-rest +gravitino.auxService.names = iceberg-rest,lance-rest # Iceberg REST service classpath gravitino.iceberg-rest.classpath = iceberg-rest-server/libs, iceberg-rest-server/conf # Iceberg REST service host diff --git a/flink-connector/flink/src/test/java/org/apache/gravitino/flink/connector/integration/test/FlinkEnvIT.java b/flink-connector/flink/src/test/java/org/apache/gravitino/flink/connector/integration/test/FlinkEnvIT.java index d372eb92afd..ca79144f5b6 100644 --- a/flink-connector/flink/src/test/java/org/apache/gravitino/flink/connector/integration/test/FlinkEnvIT.java +++ b/flink-connector/flink/src/test/java/org/apache/gravitino/flink/connector/integration/test/FlinkEnvIT.java @@ -112,7 +112,7 @@ protected String flinkByPass(String key) { protected abstract String getProvider(); private void initIcebergRestServiceEnv() { - ignoreIcebergRestService = false; + super.ignoreAuxRestService = false; Map icebergRestServiceConfigs = new HashMap<>(); icebergRestServiceConfigs.put( "gravitino." diff --git a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/MiniGravitino.java b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/MiniGravitino.java index d0ce1710d7f..3cff6940c6e 100644 --- a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/MiniGravitino.java +++ b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/MiniGravitino.java @@ -78,7 +78,7 @@ public MiniGravitino(MiniGravitinoContext context) throws IOException { mockConfDir.mkdirs(); } - private void removeIcebergRestConfiguration(Properties properties) { + private void removeAuxRestConfiguration(Properties properties) { // Disable Iceberg REST service properties.remove( AuxiliaryServiceManager.GRAVITINO_AUX_SERVICE_PREFIX @@ -103,9 +103,9 @@ public void start() throws Exception { serverConfig.loadPropertiesFromFile( new File(ITUtils.joinPath(mockConfDir.getAbsolutePath(), "gravitino.conf"))); - // Remove Iceberg rest service. - if (context.ignoreIcebergRestService) { - removeIcebergRestConfiguration(properties); + // Disable auxiliary rest service. + if (context.ignoreAuxRestService) { + removeAuxRestConfiguration(properties); ITUtils.overwriteConfigFile( ITUtils.joinPath(mockConfDir.getAbsolutePath(), "gravitino.conf"), properties); } @@ -230,6 +230,22 @@ Map getIcebergRestServiceConfigs() throws IOException { return customConfigs; } + private Map getLanceRestServiceConfigs() throws IOException { + Map customConfigs = new HashMap<>(); + + String lanceJarPath = Paths.get("lance", "lance-rest-server", "build", "libs").toString(); + String lanceConfigPath = + Paths.get("lance", "lance-rest-server", "src", "main", "resources").toString(); + customConfigs.put( + "gravitino.lance-rest." + AuxiliaryServiceManager.AUX_SERVICE_CLASSPATH, + String.join(",", lanceJarPath, lanceConfigPath)); + + customConfigs.put( + "gravitino.lance-rest." + JettyServerConfig.WEBSERVER_HTTP_PORT.getKey(), + String.valueOf(RESTUtils.findAvailablePort(4000, 5000))); + return customConfigs; + } + // Customize the config file private void customizeConfigFile(String configTempFileName, String configFileName) throws IOException { @@ -239,6 +255,7 @@ private void customizeConfigFile(String configTempFileName, String configFileNam String.valueOf(RESTUtils.findAvailablePort(2000, 3000))); configMap.putAll(getIcebergRestServiceConfigs()); + configMap.putAll(getLanceRestServiceConfigs()); configMap.putAll(context.customConfig); ITUtils.rewriteConfigFile(configTempFileName, configFileName, configMap); diff --git a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/MiniGravitinoContext.java b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/MiniGravitinoContext.java index 5b2acfac331..601138ecfd0 100644 --- a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/MiniGravitinoContext.java +++ b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/MiniGravitinoContext.java @@ -23,10 +23,10 @@ public class MiniGravitinoContext { Map customConfig; - final boolean ignoreIcebergRestService; + final boolean ignoreAuxRestService; - public MiniGravitinoContext(Map customConfig, boolean ignoreIcebergRestService) { + public MiniGravitinoContext(Map customConfig, boolean ignoreAuxRestService) { this.customConfig = customConfig; - this.ignoreIcebergRestService = ignoreIcebergRestService; + this.ignoreAuxRestService = ignoreAuxRestService; } } diff --git a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/BaseIT.java b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/BaseIT.java index 72923341b93..1ae60a0fecb 100644 --- a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/BaseIT.java +++ b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/BaseIT.java @@ -102,7 +102,7 @@ public class BaseIT { protected Map customConfigs = new HashMap<>(); - protected boolean ignoreIcebergRestService = true; + protected boolean ignoreAuxRestService = true; public String DOWNLOAD_MYSQL_JDBC_DRIVER_URL = "https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.26/mysql-connector-java-8.0.26.jar"; @@ -330,8 +330,7 @@ public void startIntegrationTest() throws Exception { serverConfig = new ServerConfig(); customConfigs.put(ENTITY_RELATIONAL_JDBC_BACKEND_PATH.getKey(), file.getAbsolutePath()); if (testMode != null && testMode.equals(ITUtils.EMBEDDED_TEST_MODE)) { - MiniGravitinoContext context = - new MiniGravitinoContext(customConfigs, ignoreIcebergRestService); + MiniGravitinoContext context = new MiniGravitinoContext(customConfigs, ignoreAuxRestService); miniGravitino = new MiniGravitino(context); miniGravitino.start(); serverConfig = miniGravitino.getServerConfig(); diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkEnvIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkEnvIT.java index 273d6d3bcd0..9c2d2d5110b 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkEnvIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkEnvIT.java @@ -155,7 +155,7 @@ private void initHiveEnv() { protected void initCatalogEnv() throws Exception {} private void initIcebergRestServiceEnv() { - ignoreIcebergRestService = false; + super.ignoreAuxRestService = false; Map icebergRestServiceConfigs = new HashMap<>(); icebergRestServiceConfigs.put( "gravitino." diff --git a/spark-connector/v3.3/spark/build.gradle.kts b/spark-connector/v3.3/spark/build.gradle.kts index 9023404980b..1cc7f6d1a18 100644 --- a/spark-connector/v3.3/spark/build.gradle.kts +++ b/spark-connector/v3.3/spark/build.gradle.kts @@ -175,6 +175,7 @@ tasks.test { dependsOn(":catalogs:catalog-lakehouse-iceberg:jar") dependsOn(":catalogs:catalog-hive:jar") dependsOn(":iceberg:iceberg-rest-server:jar") + dependsOn(":lance:lance-rest-server:jar") dependsOn(":catalogs:catalog-lakehouse-paimon:jar") dependsOn(":catalogs:catalog-jdbc-mysql:jar") dependsOn(":catalogs:catalog-jdbc-postgresql:jar") diff --git a/spark-connector/v3.4/spark/build.gradle.kts b/spark-connector/v3.4/spark/build.gradle.kts index 8a9b527dcec..519799d0764 100644 --- a/spark-connector/v3.4/spark/build.gradle.kts +++ b/spark-connector/v3.4/spark/build.gradle.kts @@ -175,6 +175,7 @@ tasks.test { dependsOn(":catalogs:catalog-lakehouse-iceberg:jar") dependsOn(":catalogs:catalog-hive:jar") dependsOn(":iceberg:iceberg-rest-server:jar") + dependsOn(":lance:lance-rest-server:jar") dependsOn(":catalogs:catalog-lakehouse-paimon:jar") dependsOn(":catalogs:catalog-jdbc-mysql:jar") dependsOn(":catalogs:catalog-jdbc-postgresql:jar") diff --git a/spark-connector/v3.5/spark/build.gradle.kts b/spark-connector/v3.5/spark/build.gradle.kts index edbed1ca2db..cc1d8c9f887 100644 --- a/spark-connector/v3.5/spark/build.gradle.kts +++ b/spark-connector/v3.5/spark/build.gradle.kts @@ -177,6 +177,7 @@ tasks.test { dependsOn(":catalogs:catalog-lakehouse-iceberg:jar") dependsOn(":catalogs:catalog-hive:jar") dependsOn(":iceberg:iceberg-rest-server:jar") + dependsOn(":lance:lance-rest-server:jar") dependsOn(":catalogs:catalog-lakehouse-paimon:jar") dependsOn(":catalogs:catalog-jdbc-mysql:jar") dependsOn(":catalogs:catalog-jdbc-postgresql:jar") From c4a96189b2951fe42cc90c5d58949525fc05a6d5 Mon Sep 17 00:00:00 2001 From: Mini Yu Date: Wed, 29 Oct 2025 21:23:01 +0800 Subject: [PATCH 18/43] [#8959] fix(lance-rest): Fix configuration name error in Lance configurations. (#8960) ### What changes were proposed in this pull request? Remove excessive LANCE_CONFIG_PREFIX; ### Why are the changes needed? Prefix LANCE_CONFIG_PREFIX has been removed when Lance server starts. Fix: #8959 ### Does this PR introduce _any_ user-facing change? N/A. ### How was this patch tested? Test locally. --- .../org/apache/gravitino/lance/common/config/LanceConfig.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java index f517d1a349c..f4556e43e4f 100644 --- a/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java +++ b/lance/lance-common/src/main/java/org/apache/gravitino/lance/common/config/LanceConfig.java @@ -47,14 +47,14 @@ public class LanceConfig extends Config implements OverwriteDefaultConfig { .createWithDefault(GRAVITINO_NAMESPACE_BACKEND); public static final ConfigEntry METALAKE_NAME = - new ConfigBuilder(LANCE_CONFIG_PREFIX + GRAVITINO_NAMESPACE_BACKEND + "." + CONFIG_METALAKE) + new ConfigBuilder(GRAVITINO_NAMESPACE_BACKEND + "." + CONFIG_METALAKE) .doc("The Metalake name for Lance Gravitino namespace backend") .version(ConfigConstants.VERSION_0_1_0) .stringConf() .create(); public static final ConfigEntry NAMESPACE_BACKEND_URI = - new ConfigBuilder(LANCE_CONFIG_PREFIX + GRAVITINO_NAMESPACE_BACKEND + "." + CONFIG_URI) + new ConfigBuilder(GRAVITINO_NAMESPACE_BACKEND + "." + CONFIG_URI) .doc("The URI of the namespace backend, e.g., Gravitino server URI") .version(ConfigConstants.VERSION_0_1_0) .stringConf() From bc1b77a3bb357dd26bd64f025899b0069ece17b3 Mon Sep 17 00:00:00 2001 From: mchades Date: Wed, 29 Oct 2025 23:13:59 +0800 Subject: [PATCH 19/43] [#8946] improvement(lance): supports more dataTypes for lance table creation (#8947) ### What changes were proposed in this pull request? supports more dataTypes for lance table creation ### Why are the changes needed? Fix: #8946 ### Does this PR introduce _any_ user-facing change? yes, more column data types supports ### How was this patch tested? tests added --- .../lance/LanceCatalogOperations.java | 37 +- .../lance/LanceDataTypeConverter.java | 268 +++++++------- .../lance/TestLanceDataTypeConverter.java | 327 ++++++++++++++++++ docs/generic-lakehouse-catalog.md | 140 ++++++++ 4 files changed, 615 insertions(+), 157 deletions(-) create mode 100644 catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/TestLanceDataTypeConverter.java create mode 100644 docs/generic-lakehouse-catalog.md diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java index dcfe6bd4896..9572c656d23 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java @@ -38,7 +38,6 @@ import java.util.Optional; import java.util.stream.Collectors; import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.commons.lang3.ArrayUtils; import org.apache.gravitino.Catalog; @@ -129,7 +128,7 @@ public Table createTable( Dataset.create( new RootAllocator(), location, - convertColumnsToSchema(columns), + convertColumnsToArrowSchema(columns), new WriteParams.Builder().withStorageOptions(storageProps).build())) { GenericLakehouseTable.Builder builder = GenericLakehouseTable.builder(); return builder @@ -151,39 +150,13 @@ public Table createTable( } } - private org.apache.arrow.vector.types.pojo.Schema convertColumnsToSchema(Column[] columns) { - LanceDataTypeConverter converter = new LanceDataTypeConverter(); + private org.apache.arrow.vector.types.pojo.Schema convertColumnsToArrowSchema(Column[] columns) { List fields = Arrays.stream(columns) .map( - col -> { - boolean nullable = col.nullable(); - ArrowType parentType = converter.fromGravitino(col.dataType()); - List childTypes = converter.getChildTypes(col.dataType()); - List childFields = - childTypes.stream() - .map( - childType -> - new org.apache.arrow.vector.types.pojo.Field( - "", - org.apache.arrow.vector.types.pojo.FieldType.nullable( - childType), - null)) - .collect(Collectors.toList()); - - if (nullable) { - return new org.apache.arrow.vector.types.pojo.Field( - col.name(), - org.apache.arrow.vector.types.pojo.FieldType.nullable(parentType), - childFields); - } - - // not nullable - return new org.apache.arrow.vector.types.pojo.Field( - col.name(), - org.apache.arrow.vector.types.pojo.FieldType.notNullable(parentType), - childFields); - }) + col -> + LanceDataTypeConverter.CONVERTER.toArrowField( + col.name(), col.dataType(), col.nullable())) .collect(Collectors.toList()); return new org.apache.arrow.vector.types.pojo.Schema(fields); } diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java index d7966edd5ee..9cd5783de1b 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceDataTypeConverter.java @@ -19,82 +19,183 @@ package org.apache.gravitino.catalog.lakehouse.lance; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.google.common.base.Preconditions; import com.google.common.collect.Lists; +import java.util.Arrays; import java.util.List; +import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.IntervalUnit; import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.UnionMode; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.Bool; import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; import org.apache.arrow.vector.types.pojo.ArrowType.Int; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.gravitino.connector.DataTypeConverter; import org.apache.gravitino.json.JsonUtils; import org.apache.gravitino.rel.types.Type; import org.apache.gravitino.rel.types.Types; import org.apache.gravitino.rel.types.Types.FixedType; -import org.apache.gravitino.rel.types.Types.UnparsedType; public class LanceDataTypeConverter implements DataTypeConverter { + public static final LanceDataTypeConverter CONVERTER = new LanceDataTypeConverter(); + + public Field toArrowField(String name, Type type, boolean nullable) { + switch (type.name()) { + case LIST: + Types.ListType listType = (Types.ListType) type; + FieldType listField = new FieldType(nullable, ArrowType.List.INSTANCE, null); + return new Field( + name, + listField, + Lists.newArrayList( + toArrowField("element", listType.elementType(), listType.elementNullable()))); + + case STRUCT: + Types.StructType structType = (Types.StructType) type; + FieldType structField = new FieldType(nullable, ArrowType.Struct.INSTANCE, null); + return new Field( + name, + structField, + Arrays.stream(structType.fields()) + .map(field -> toArrowField(field.name(), field.type(), field.nullable())) + .toList()); + + case MAP: + Types.MapType mapType = (Types.MapType) type; + FieldType mapField = new FieldType(nullable, new ArrowType.Map(false), null); + return new Field( + name, + mapField, + Lists.newArrayList( + toArrowField( + MapVector.DATA_VECTOR_NAME, + Types.StructType.of( + Types.StructType.Field.of( + // Note: Arrow MapVector requires key field to be non-nullable + MapVector.KEY_NAME, + mapType.keyType(), + false /*nullable*/, + null /*comment*/), + Types.StructType.Field.of( + MapVector.VALUE_NAME, + mapType.valueType(), + mapType.valueNullable(), + null)), + false /*nullable*/))); + + case UNION: + Types.UnionType unionType = (Types.UnionType) type; + List types = + Arrays.stream(unionType.types()) + .map( + t -> + toArrowField( + t.simpleString(), t, true /*nullable*/) // union members are nullable + ) + .toList(); + int[] typeIds = + types.stream() + .mapToInt( + f -> + org.apache.arrow.vector.types.Types.getMinorTypeForArrowType(f.getType()) + .ordinal()) + .toArray(); + FieldType unionField = + new FieldType(nullable, new ArrowType.Union(UnionMode.Sparse, typeIds), null); + return new Field(name, unionField, types); + + case EXTERNAL: + Types.ExternalType externalType = (Types.ExternalType) type; + Field field; + try { + field = JsonUtils.anyFieldMapper().readValue(externalType.catalogString(), Field.class); + } catch (JsonProcessingException e) { + throw new RuntimeException( + "Failed to parse external type catalog string: " + externalType.catalogString(), e); + } + Preconditions.checkArgument( + name.equals(field.getName()), + "expected field name %s but got %s", + name, + field.getName()); + Preconditions.checkArgument( + nullable == field.isNullable(), + "expected field nullable %s but got %s", + nullable, + field.isNullable()); + return field; + + default: + // non-complex type + FieldType fieldType = new FieldType(nullable, fromGravitino(type), null); + return new Field(name, fieldType, null); + } + } + @Override public ArrowType fromGravitino(Type type) { switch (type.name()) { case BOOLEAN: return Bool.INSTANCE; case BYTE: - return new Int(8, true); + return new Int(8, ((Types.ByteType) type).signed()); case SHORT: - return new Int(16, true); + return new Int(8 * 2, ((Types.ShortType) type).signed()); case INTEGER: - return new Int(32, true); + return new Int(8 * 4, ((Types.IntegerType) type).signed()); case LONG: - return new Int(64, true); + return new Int(8 * 8, ((Types.LongType) type).signed()); case FLOAT: return new FloatingPoint(FloatingPointPrecision.SINGLE); case DOUBLE: return new FloatingPoint(FloatingPointPrecision.DOUBLE); + case STRING: + return ArrowType.Utf8.INSTANCE; + case BINARY: + return ArrowType.Binary.INSTANCE; case DECIMAL: - // Lance uses FIXED_SIZE_BINARY for decimal types - return new ArrowType.FixedSizeBinary(16); // assuming 16 bytes for decimal + Types.DecimalType decimalType = (Types.DecimalType) type; + return new ArrowType.Decimal(decimalType.precision(), decimalType.scale(), 8 * 16); case DATE: return new ArrowType.Date(DateUnit.DAY); - case TIME: - return new ArrowType.Time(TimeUnit.MILLISECOND, 32); case TIMESTAMP: - return new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); - case VARCHAR: - case STRING: - return new ArrowType.Utf8(); + Types.TimestampType timestampType = (Types.TimestampType) type; + TimeUnit timeUnit = TimeUnit.MICROSECOND; + if (timestampType.hasPrecisionSet()) { + timeUnit = + switch (timestampType.precision()) { + case 0 -> TimeUnit.SECOND; + case 3 -> TimeUnit.MILLISECOND; + case 6 -> TimeUnit.MICROSECOND; + case 9 -> TimeUnit.NANOSECOND; + default -> throw new UnsupportedOperationException( + "Expected precision to be one of 0, 3, 6, 9 but got: " + + timestampType.precision()); + }; + } + if (timestampType.hasTimeZone()) { + // todo: need timeZoneId for timestamp with time zone + return new ArrowType.Timestamp(timeUnit, "UTC"); + } + return new ArrowType.Timestamp(timeUnit, null); + case TIME: + return new ArrowType.Time(TimeUnit.NANOSECOND, 8 * 8); + case NULL: + return ArrowType.Null.INSTANCE; + case INTERVAL_YEAR: + return new ArrowType.Interval(IntervalUnit.YEAR_MONTH); + case INTERVAL_DAY: + return new ArrowType.Duration(TimeUnit.MICROSECOND); case FIXED: FixedType fixedType = (FixedType) type; return new ArrowType.FixedSizeBinary(fixedType.length()); - case BINARY: - return new ArrowType.Binary(); - case UNPARSED: - String typeStr = ((UnparsedType) type).unparsedType().toString(); - try { - Type t = JsonUtils.anyFieldMapper().readValue(typeStr, Type.class); - if (t instanceof Types.ListType) { - return ArrowType.List.INSTANCE; - } else if (t instanceof Types.MapType) { - return new ArrowType.Map(false); - } else if (t instanceof Types.StructType) { - return ArrowType.Struct.INSTANCE; - } else { - throw new UnsupportedOperationException( - "Unsupported UnparsedType conversion: " + t.simpleString()); - } - } catch (Exception e) { - // FixedSizeListArray(integer, 3) - if (typeStr.startsWith("FixedSizeListArray")) { - int size = - Integer.parseInt( - typeStr.substring(typeStr.indexOf(',') + 1, typeStr.indexOf(')')).trim()); - return new ArrowType.FixedSizeList(size); - } - throw new UnsupportedOperationException("Failed to parse UnparsedType: " + typeStr, e); - } default: throw new UnsupportedOperationException("Unsupported Gravitino type: " + type.name()); } @@ -102,91 +203,8 @@ public ArrowType fromGravitino(Type type) { @Override public Type toGravitino(ArrowType arrowType) { - if (arrowType instanceof Bool) { - return Types.BooleanType.get(); - } else if (arrowType instanceof Int intType) { - switch (intType.getBitWidth()) { - case 8 -> { - return Types.ByteType.get(); - } - case 16 -> { - return Types.ShortType.get(); - } - case 32 -> { - return Types.IntegerType.get(); - } - case 64 -> { - return Types.LongType.get(); - } - default -> throw new UnsupportedOperationException( - "Unsupported Int bit width: " + intType.getBitWidth()); - } - } else if (arrowType instanceof FloatingPoint floatingPoint) { - switch (floatingPoint.getPrecision()) { - case SINGLE: - return Types.FloatType.get(); - case DOUBLE: - return Types.DoubleType.get(); - default: - throw new UnsupportedOperationException( - "Unsupported FloatingPoint precision: " + floatingPoint.getPrecision()); - } - } else if (arrowType instanceof ArrowType.FixedSizeBinary) { - ArrowType.FixedSizeBinary fixedSizeBinary = (ArrowType.FixedSizeBinary) arrowType; - return Types.FixedType.of(fixedSizeBinary.getByteWidth()); - } else if (arrowType instanceof ArrowType.Date) { - return Types.DateType.get(); - } else if (arrowType instanceof ArrowType.Time) { - return Types.TimeType.get(); - } else if (arrowType instanceof ArrowType.Timestamp) { - return Types.TimestampType.withoutTimeZone(); - } else if (arrowType instanceof ArrowType.Utf8) { - return Types.StringType.get(); - } else if (arrowType instanceof ArrowType.Binary) { - return Types.BinaryType.get(); - // TODO handle complex types like List, Map, Struct - } else { - throw new UnsupportedOperationException("Unsupported Arrow type: " + arrowType); - } - } - - public List getChildTypes(Type parentType) { - if (parentType.name() != Type.Name.UNPARSED) { - return List.of(); - } - - List arrowTypes = Lists.newArrayList(); - String typeStr = ((UnparsedType) parentType).unparsedType().toString(); - try { - Type t = JsonUtils.anyFieldMapper().readValue(typeStr, Type.class); - if (t instanceof Types.ListType listType) { - arrowTypes.add(fromGravitino(listType.elementType())); - } else if (t instanceof Types.MapType mapType) { - arrowTypes.add(fromGravitino(mapType.keyType())); - arrowTypes.add(fromGravitino(mapType.valueType())); - } else { - // TODO support struct type. - throw new UnsupportedOperationException( - "Unsupported UnparsedType conversion: " + t.simpleString()); - } - - return arrowTypes; - } catch (Exception e) { - // FixedSizeListArray(integer, 3) - - try { - if (typeStr.startsWith("FixedSizeListArray")) { - String type = typeStr.substring(typeStr.indexOf('(') + 1, typeStr.indexOf(',')).trim(); - Type childType = JsonUtils.anyFieldMapper().readValue("\"" + type + "\"", Type.class); - arrowTypes.add(fromGravitino(childType)); - - return arrowTypes; - } - } catch (Exception e1) { - throw new UnsupportedOperationException("Failed to parse UnparsedType: " + typeStr, e1); - } - - throw new UnsupportedOperationException("Failed to parse UnparsedType: " + typeStr, e); - } + // since the table metadata will load from Gravitino storage directly, we don't need to + // implement this method for now. + throw new UnsupportedOperationException("toGravitino is not implemented yet."); } } diff --git a/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/TestLanceDataTypeConverter.java b/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/TestLanceDataTypeConverter.java new file mode 100644 index 00000000000..cf28ee74342 --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/lance/TestLanceDataTypeConverter.java @@ -0,0 +1,327 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse.lance; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.function.Consumer; +import java.util.stream.Stream; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.UnionMode; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.gravitino.rel.types.Type; +import org.apache.gravitino.rel.types.Types; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.MethodSource; + +public class TestLanceDataTypeConverter { + private static final LanceDataTypeConverter CONVERTER = LanceDataTypeConverter.CONVERTER; + + // Gravitino complex type definitions for testing + private static final Types.StructType SIMPLE_STRUCT = + Types.StructType.of( + Types.StructType.Field.of("id", Types.LongType.get(), false, null), + Types.StructType.Field.of("name", Types.StringType.get(), true, null)); + + private static final Types.StructType NESTED_STRUCT = + Types.StructType.of( + Types.StructType.Field.of("id", Types.LongType.get(), false, null), + Types.StructType.Field.of( + "address", + Types.StructType.of( + Types.StructType.Field.of("street", Types.StringType.get(), false, null), + Types.StructType.Field.of("city", Types.StringType.get(), false, null)), + true, + null)); + private static final String NESTED_STRUCT_JSON = + "{\"name\":\"person_nested_json\",\"nullable\":false,\"type\":{\"name\":\"struct\"},\"children\":[" + + "{\"name\":\"id\",\"nullable\":false,\"type\":{\"name\":\"int\",\"bitWidth\":64,\"isSigned\":true},\"children\":[]}," + + "{\"name\":\"address\",\"nullable\":true,\"type\":{\"name\":\"struct\"},\"children\":[" + + "{\"name\":\"street\",\"nullable\":false,\"type\":{\"name\":\"utf8\"},\"children\":[]}," + + "{\"name\":\"city\",\"nullable\":false,\"type\":{\"name\":\"utf8\"},\"children\":[]}" + + "]}" + + "]}"; + + private static final Types.ListType LIST_OF_STRUCTS = + Types.ListType.of( + Types.StructType.of( + Types.StructType.Field.of("sku", Types.StringType.get(), false, null), + Types.StructType.Field.of("quantity", Types.IntegerType.get(), false, null)), + true); + + // Field validators for Arrow conversion tests + private static Consumer INT_VALIDATOR = + field -> assertInstanceOf(ArrowType.Int.class, field.getFieldType().getType()); + private static Consumer STRING_VALIDATOR = + field -> assertInstanceOf(ArrowType.Utf8.class, field.getFieldType().getType()); + private static Consumer LARGE_UTF8_VALIDATOR = + field -> assertInstanceOf(ArrowType.LargeUtf8.class, field.getFieldType().getType()); + private static Consumer BOOLEAN_VALIDATOR = + field -> assertInstanceOf(ArrowType.Bool.class, field.getFieldType().getType()); + private static Consumer DECIMAL_VALIDATOR = + field -> { + assertInstanceOf(ArrowType.Decimal.class, field.getFieldType().getType()); + ArrowType.Decimal decimal = (ArrowType.Decimal) field.getFieldType().getType(); + + assertEquals(10, decimal.getPrecision()); + assertEquals(2, decimal.getScale()); + }; + private static Consumer LIST_VALIDATOR = + field -> { + assertInstanceOf(ArrowType.List.class, field.getFieldType().getType()); + assertEquals(1, field.getChildren().size()); + + Field elementField = field.getChildren().get(0); + assertEquals("element", elementField.getName()); + assertTrue(elementField.isNullable()); + assertInstanceOf(ArrowType.Int.class, elementField.getFieldType().getType()); + }; + private static Consumer MAP_VALIDATOR = + field -> { + assertInstanceOf(ArrowType.Map.class, field.getFieldType().getType()); + assertEquals(1, field.getChildren().size()); + + Field structField = field.getChildren().get(0); + assertEquals(MapVector.DATA_VECTOR_NAME, structField.getName()); + assertEquals(2, structField.getChildren().size()); + + Field keyField = structField.getChildren().get(0); + assertEquals(MapVector.KEY_NAME, keyField.getName()); + assertFalse(keyField.isNullable()); + assertInstanceOf(ArrowType.Utf8.class, keyField.getFieldType().getType()); + + Field valueField = structField.getChildren().get(1); + assertEquals(MapVector.VALUE_NAME, valueField.getName()); + assertTrue(valueField.isNullable()); + assertInstanceOf(ArrowType.Int.class, valueField.getFieldType().getType()); + }; + private static Consumer STRUCT_VALIDATOR = + field -> { + assertInstanceOf(ArrowType.Struct.class, field.getFieldType().getType()); + assertEquals(2, field.getChildren().size()); + + Field idField = field.getChildren().get(0); + assertEquals("id", idField.getName()); + assertFalse(idField.isNullable()); + assertInstanceOf(ArrowType.Int.class, idField.getFieldType().getType()); + + Field nameField = field.getChildren().get(1); + assertEquals("name", nameField.getName()); + assertTrue(nameField.isNullable()); + assertInstanceOf(ArrowType.Utf8.class, nameField.getFieldType().getType()); + }; + private static Consumer NESTED_STRUCT_VALIDATOR = + field -> { + assertInstanceOf(ArrowType.Struct.class, field.getFieldType().getType()); + assertEquals(2, field.getChildren().size()); + + Field addressField = field.getChildren().get(1); + assertEquals("address", addressField.getName()); + assertTrue(addressField.isNullable()); + + assertInstanceOf(ArrowType.Struct.class, addressField.getFieldType().getType()); + assertEquals(2, addressField.getChildren().size()); + }; + private static Consumer LIST_OF_STRUCTS_VALIDATOR = + field -> { + assertInstanceOf(ArrowType.List.class, field.getFieldType().getType()); + assertEquals(1, field.getChildren().size()); + + Field elementField = field.getChildren().get(0); + assertEquals("element", elementField.getName()); + assertTrue(elementField.isNullable()); + assertInstanceOf(ArrowType.Struct.class, elementField.getFieldType().getType()); + assertEquals(2, elementField.getChildren().size()); + }; + private static Consumer UNION_VALIDATOR = + field -> { + assertInstanceOf(ArrowType.Union.class, field.getFieldType().getType()); + ArrowType.Union unionType = (ArrowType.Union) field.getFieldType().getType(); + assertEquals(UnionMode.Sparse, unionType.getMode()); + assertEquals(2, field.getChildren().size()); + assertInstanceOf(ArrowType.Int.class, field.getChildren().get(0).getFieldType().getType()); + assertInstanceOf(ArrowType.Utf8.class, field.getChildren().get(1).getFieldType().getType()); + }; + + @ParameterizedTest + @DisplayName("Test conversion of Integer types (Byte, Short, Integer, Long)") + @CsvSource({"BYTE, 8, true", "SHORT, 16, true", "INTEGER, 32, true", "LONG, 64, true"}) + public void testFromGravitinoIntegerTypes( + String typeName, int expectedBitWidth, boolean expectedSigned) { + Type type = + switch (typeName) { + case "BYTE" -> Types.ByteType.get(); + case "SHORT" -> Types.ShortType.get(); + case "INTEGER" -> Types.IntegerType.get(); + case "LONG" -> Types.LongType.get(); + default -> throw new IllegalArgumentException("Unknown type: " + typeName); + }; + + ArrowType arrowType = CONVERTER.fromGravitino(type); + assertInstanceOf(ArrowType.Int.class, arrowType); + + ArrowType.Int intType = (ArrowType.Int) arrowType; + assertEquals(expectedBitWidth, intType.getBitWidth()); + assertEquals(expectedSigned, intType.getIsSigned()); + } + + @Test + public void testFromGravitinoTimestampWithTz() { + Types.TimestampType timestampType = Types.TimestampType.withTimeZone(); + ArrowType arrowType = CONVERTER.fromGravitino(timestampType); + assertInstanceOf(ArrowType.Timestamp.class, arrowType); + + ArrowType.Timestamp tsArrow = (ArrowType.Timestamp) arrowType; + assertEquals(TimeUnit.MICROSECOND, tsArrow.getUnit()); + assertEquals("UTC", tsArrow.getTimezone()); + } + + @Test + public void testExternalTypeConversion() { + String expectedColumnName = "col_name"; + boolean expectedNullable = true; + Types.ExternalType externalType = + Types.ExternalType.of( + "{\"name\":\"col_name\",\"nullable\":true," + + "\"type\":{\"name\":\"largeutf8\"},\"children\":[]}"); + Field arrowField = CONVERTER.toArrowField(expectedColumnName, externalType, expectedNullable); + assertEquals(expectedColumnName, arrowField.getName()); + assertEquals(expectedNullable, arrowField.isNullable()); + assertInstanceOf(ArrowType.LargeUtf8.class, arrowField.getFieldType().getType()); + + externalType = + Types.ExternalType.of( + "{\"name\":\"col_name\",\"nullable\":true," + + "\"type\":{\"name\":\"largebinary\"},\"children\":[]}"); + arrowField = CONVERTER.toArrowField(expectedColumnName, externalType, expectedNullable); + assertEquals(expectedColumnName, arrowField.getName()); + assertEquals(expectedNullable, arrowField.isNullable()); + assertInstanceOf(ArrowType.LargeBinary.class, arrowField.getFieldType().getType()); + + externalType = + Types.ExternalType.of( + "{\"name\":\"col_name\",\"nullable\":true," + + "\"type\":{\"name\":\"largelist\"}," + + "\"children\":[" + + "{\"name\":\"element\",\"nullable\":true," + + "\"type\":{\"name\":\"int\", \"bitWidth\":32, \"isSigned\": true}," + + "\"children\":[]}]}"); + arrowField = CONVERTER.toArrowField(expectedColumnName, externalType, expectedNullable); + assertEquals(expectedColumnName, arrowField.getName()); + assertEquals(expectedNullable, arrowField.isNullable()); + assertInstanceOf(ArrowType.LargeList.class, arrowField.getFieldType().getType()); + + externalType = + Types.ExternalType.of( + "{\"name\":\"col_name\",\"nullable\":true," + + "\"type\":{\"name\":\"fixedsizelist\", \"listSize\":10}," + + "\"children\":[" + + "{\"name\":\"element\",\"nullable\":true," + + "\"type\":{\"name\":\"int\", \"bitWidth\":32, \"isSigned\": true}," + + "\"children\":[]}]}"); + arrowField = CONVERTER.toArrowField(expectedColumnName, externalType, expectedNullable); + assertEquals(expectedColumnName, arrowField.getName()); + assertEquals(expectedNullable, arrowField.isNullable()); + assertInstanceOf(ArrowType.FixedSizeList.class, arrowField.getFieldType().getType()); + assertEquals(10, ((ArrowType.FixedSizeList) arrowField.getFieldType().getType()).getListSize()); + } + + @ParameterizedTest(name = "[{index}] name={0}, type={1}, nullable={2}") + @MethodSource("toArrowFieldArguments") + @DisplayName("Test toArrowField for various types") + public void testToArrowField( + String name, Type gravitinoType, boolean nullable, Consumer validator) { + Field field = CONVERTER.toArrowField(name, gravitinoType, nullable); + + assertEquals(name, field.getName()); + assertEquals(nullable, field.isNullable()); + validator.accept(field); + } + + @Test + void testUnsupportedTypeThrowsException() { + Types.UnparsedType unparsedType = Types.UnparsedType.of("UNKNOWN_TYPE"); + assertThrows(UnsupportedOperationException.class, () -> CONVERTER.fromGravitino(unparsedType)); + } + + @Test + void testToGravitinoNotImplemented() { + assertThrows( + UnsupportedOperationException.class, () -> CONVERTER.toGravitino(ArrowType.Utf8.INSTANCE)); + } + + private static Stream toArrowFieldArguments() { + return Stream.of( + // Simple types + Arguments.of("age", Types.IntegerType.get(), true, INT_VALIDATOR), + Arguments.of("id", Types.LongType.get(), false, INT_VALIDATOR), + Arguments.of("name", Types.StringType.get(), true, STRING_VALIDATOR), + Arguments.of( + "description", + Types.ExternalType.of( + "{\n" + + " \"name\": \"description\",\n" + + " \"nullable\": true,\n" + + " \"type\": {\n" + + " \"name\": \"largeutf8\"\n" + + " }\n" + + "}"), + true, + LARGE_UTF8_VALIDATOR), + Arguments.of("active", Types.BooleanType.get(), false, BOOLEAN_VALIDATOR), + // Decimal + Arguments.of("price", Types.DecimalType.of(10, 2), false, DECIMAL_VALIDATOR), + // List + Arguments.of( + "numbers", Types.ListType.of(Types.IntegerType.get(), true), false, LIST_VALIDATOR), + // Map + Arguments.of( + "properties", + Types.MapType.of(Types.StringType.get(), Types.IntegerType.get(), true), + true, + MAP_VALIDATOR), + // Struct + Arguments.of("person", SIMPLE_STRUCT, true, STRUCT_VALIDATOR), + // Nested Struct + Arguments.of("person_nested", NESTED_STRUCT, false, NESTED_STRUCT_VALIDATOR), + Arguments.of( + "person_nested_json", + Types.ExternalType.of(NESTED_STRUCT_JSON), + false, + NESTED_STRUCT_VALIDATOR), + // List of Structs + Arguments.of("items", LIST_OF_STRUCTS, false, LIST_OF_STRUCTS_VALIDATOR), + // Union + Arguments.of( + "union_field", + Types.UnionType.of(Types.IntegerType.get(), Types.StringType.get()), + true, + UNION_VALIDATOR)); + } +} diff --git a/docs/generic-lakehouse-catalog.md b/docs/generic-lakehouse-catalog.md new file mode 100644 index 00000000000..35eaeb46602 --- /dev/null +++ b/docs/generic-lakehouse-catalog.md @@ -0,0 +1,140 @@ +--- +title: "Lakehouse catalog" +slug: /lakehouse-catalog +keywords: + - lakehouse + - lance + - metadata +license: "This software is licensed under the Apache License version 2." +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +## Introduction + +TBD. + +### Requirements and limitations + +TBD. + +## Catalog + +### Catalog capabilities + +TBD. + +### Catalog properties + +TBD. + +### Catalog operations + +TBD. + +## Schema + +### Schema capabilities + +TBD. + +### Schema properties + +TBD. + +### Schema operations + +Please refer to [Manage Relational Metadata Using Gravitino](./manage-relational-metadata-using-gravitino.md#schema-operations) for more details. + +## Table + +### Table capabilities + +TBD. + +### Table partitions + +TBD. + +### Table sort orders + +TBD. + +### Table distributions + +TBD. + +### Table column types + +Since Lance uses Apache Arrow as the table schema, the following table shows the mapping between Gravitino types and Arrow types: + +| Gravitino Type | Arrow Type | +|----------------------------------|-----------------------------------------| +| `Struct` | `Struct` | +| `Map` | `Map` | +| `List` | `Array` | +| `Boolean` | `Boolean` | +| `Byte` | `Int8` | +| `Short` | `Int16` | +| `Integer` | `Int32` | +| `Long` | `Int64` | +| `Float` | `Float` | +| `Double` | `Double` | +| `String` | `Utf8` | +| `Binary` | `Binary` | +| `Decimal(p, s)` | `Decimal(p, s)` (128-bit) | +| `Date` | `Date` | +| `Timestamp`/`Timestamp(6)` | `TimestampType withoutZone` | +| `Timestamp(0)` | `TimestampType Second withoutZone` | +| `Timestamp(3)` | `TimestampType Millisecond withoutZone` | +| `Timestamp(9)` | `TimestampType Nanosecond withoutZone` | +| `Timestamp_tz`/`Timestamp_tz(6)` | `TimestampType Microsecond withUtc` | +| `Timestamp_tz(0)` | `TimestampType Second withUtc` | +| `Timestamp_tz(3)` | `TimestampType Millisecond withUtc` | +| `Timestamp_tz(9)` | `TimestampType Nanosecond withUtc` | +| `Time`/`Time(9)` | `Time Nanosecond` | +| `Null` | `Null` | +| `Fixed(n)` | `Fixed-Size Binary(n)` | +| `Interval_year` | `Interval(YearMonth)` | +| `Interval_day` | `Duration(Microsecond)` | +| `External(arrow_field_json_str)` | Any Arrow Field (see note below) | + +`External(arrow_field_json_str)`: + +As the table above shows, Gravitino provides mappings for most common data types. However, +in some cases, you may need to use an Arrow data type that is not directly supported by Gravitino. + +To address this, Gravitino introduces the `External(arrow_field_json_str)` type, +which allows you to define any Arrow data type by providing the JSON string of an Arrow `Field`. + +The JSON string must conform to the Apache Arrow `Field` [specification](https://github.com/apache/arrow-java/blob/ed81e5981a2bee40584b3a411ed755cb4cc5b91f/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java#L80C1-L86C68), +including details such as the field name, data type, and nullability. +Here are some examples of how to use `External` type for various Arrow types that are not natively supported by Gravitino: + +| Arrow Type | External type | +|-------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `Large Utf8` | `External("{\"name\":\"col_name\",\"nullable\":true,\"type\":{\"name\":\"largeutf8\"},\"children\":[]}")` | +| `Large Binary` | `External("{\"name\":\"col_name\",\"nullable\":true,\"type\":{\"name\":\"largebinary\"},\"children\":[]}")` | +| `Large List` | `External("{\"name\":\"col_name\",\"nullable\":true,\"type\":{\"name\":\"largelist\"},\"children\":[{\"name\":\"element\",\"nullable\":true,\"type\":{\"name\":\"int\", \"bitWidth\":32, \"isSigned\": true},\"children\":[]}]}")` | +| `Fixed-Size List` | `External("{\"name\":\"col_name\",\"nullable\":true,\"type\":{\"name\":\"fixedsizelist\", \"listSize\":10},\"children\":[{\"name\":\"element\",\"nullable\":true,\"type\":{\"name\":\"int\", \"bitWidth\":32, \"isSigned\": true},\"children\":[]}]}")` | + +**Important considerations:** +- The `name` attribute and `nullable` attribute in the JSON string must exactly match the corresponding column name and nullable in the Gravitino table. +- The `children` array should be empty for primitive types. For complex types like `Struct` or `List`, it must contain the definitions of the child fields. + +### Table properties + +TBD. + +### Table indexes + +TBD. + +### Table operations + +Please refer to [Manage Relational Metadata Using Gravitino](./manage-relational-metadata-using-gravitino.md#table-operations) for more details. + +## Object store configuration + +TBD. From f313b70f9c59969a7d95d99b72a02d23c17bee05 Mon Sep 17 00:00:00 2001 From: Mini Yu Date: Thu, 30 Oct 2025 15:01:12 +0800 Subject: [PATCH 20/43] [#8915] improvment(catalogs): Polish code for PR #8879 (#8922) ### What changes were proposed in this pull request? This PR trys to resolve the comments that have not been addressed in #8879 ### Why are the changes needed? It's an improvement. Fix: #8915 ### Does this PR introduce _any_ user-facing change? N/A. ### How was this patch tested? Test locally, and we will add ITs in https://github.com/apache/gravitino/issues/8921 --- .../java/org/apache/gravitino/rel/Table.java | 31 +++ .../build.gradle.kts | 1 - .../GenericLakehouseCatalogOperations.java | 197 ++++++++++++------ ...ricLakehouseCatalogPropertiesMetadata.java | 4 +- ...ericLakehouseSchemaPropertiesMetadata.java | 6 +- ...nericLakehouseTablePropertiesMetadata.java | 19 +- .../lakehouse/LakehouseCatalogOperations.java | 14 ++ .../lakehouse/LakehouseTableFormat.java | 38 ++-- .../lance/LanceCatalogOperations.java | 126 +++++++---- .../lakehouse/utils/EntityConverter.java | 42 ++++ .../lakehouse/TestPropertiesMetadata.java | 102 +++++++++ .../lakehouse/utils/TestEntityConverter.java | 78 +++++++ .../catalog/EntityCombinedTable.java | 10 + .../catalog/TableOperationDispatcher.java | 181 +++++++--------- .../connector/GenericLakehouseColumn.java | 1 + .../connector/GenericLakehouseTable.java | 14 +- .../gravitino/meta/GenericTableEntity.java | 186 ----------------- .../apache/gravitino/meta/TableEntity.java | 112 +++++++++- .../relational/mapper/TableMetaMapper.java | 3 - .../relational/mapper/TableVersionMapper.java | 14 ++ .../TableVersionSQLProviderFactory.java | 10 + .../base/TableMetaBaseSQLProvider.java | 43 ++-- .../base/TableVersionBaseSQLProvider.java | 16 ++ .../TableVersionPostgreSQLProvider.java | 11 +- .../relational/service/TableMetaService.java | 46 ++-- .../relational/utils/POConverters.java | 119 ++++------- .../catalog/TestOperationDispatcher.java | 1 + .../hook/TestTableHookDispatcher.java | 7 + .../org/apache/gravitino/meta/TestEntity.java | 35 +++- .../gravitino/storage/TestEntityStorage.java | 27 +-- .../storage/relational/TestJDBCBackend.java | 123 +++++++++++ .../relational/utils/TestPOConverters.java | 3 +- .../rest/LanceNamespaceOperations.java | 2 + .../service/rest/LanceTableOperations.java | 9 + scripts/h2/schema-1.1.0-h2.sql | 5 +- scripts/h2/upgrade-1.0.0-to-1.1.0-h2.sql | 5 +- scripts/mysql/schema-1.1.0-mysql.sql | 5 +- .../mysql/upgrade-1.0.0-to-1.1.0-mysql.sql | 5 +- .../postgresql/schema-1.1.0-postgresql.sql | 8 +- .../upgrade-1.0.0-to-1.1.0-postgresql.sql | 8 +- 40 files changed, 1062 insertions(+), 605 deletions(-) rename api/src/main/java/org/apache/gravitino/rel/GenericTable.java => catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/LakehouseTableFormat.java (58%) create mode 100644 catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/utils/EntityConverter.java create mode 100644 catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/TestPropertiesMetadata.java create mode 100644 catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/utils/TestEntityConverter.java delete mode 100644 core/src/main/java/org/apache/gravitino/meta/GenericTableEntity.java diff --git a/api/src/main/java/org/apache/gravitino/rel/Table.java b/api/src/main/java/org/apache/gravitino/rel/Table.java index 619694b1a28..316ba188394 100644 --- a/api/src/main/java/org/apache/gravitino/rel/Table.java +++ b/api/src/main/java/org/apache/gravitino/rel/Table.java @@ -99,6 +99,37 @@ default Map properties() { return Collections.emptyMap(); } + /** + * Table format of the table. For example, in a file-based table, it could be "parquet", "Lance", + * "Iceberg", etc. + * + * @return the table format name, for more information: LakehouseTableFormat + */ + default String format() { + throw new UnsupportedOperationException("Table format is not supported."); + } + + /** + * Gets the location of the table if the table has a location. For example, in a file-based table, + * it could be the root path where the table data is stored. + * + * @return the location of the table as a string. + */ + default String location() { + throw new UnsupportedOperationException("Table location is not supported."); + } + + /** + * Indicates whether the table is external. An external table is a table that is not managed by + * the catalog and the drop operation will not delete the underlying data. If it's a managed + * table, dropping the table will delete the underlying data. + * + * @return true if the table is external, false otherwise + */ + default boolean external() { + return false; + } + /** * Table method for working with partitions. If the table does not support partition operations, * an {@link UnsupportedOperationException} is thrown. diff --git a/catalogs/catalog-generic-lakehouse/build.gradle.kts b/catalogs/catalog-generic-lakehouse/build.gradle.kts index df401dcde41..704dbda7e36 100644 --- a/catalogs/catalog-generic-lakehouse/build.gradle.kts +++ b/catalogs/catalog-generic-lakehouse/build.gradle.kts @@ -43,7 +43,6 @@ dependencies { implementation(libs.commons.lang3) implementation(libs.guava) implementation(libs.hadoop3.client.api) - implementation(libs.hadoop3.client.runtime) implementation(libs.lance) annotationProcessor(libs.lombok) diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java index 358c2dcab5c..89a0ef58eff 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogOperations.java @@ -19,15 +19,16 @@ package org.apache.gravitino.catalog.lakehouse; import static org.apache.gravitino.Entity.EntityType.TABLE; -import static org.apache.gravitino.catalog.lakehouse.GenericLakehouseTablePropertiesMetadata.LOCATION; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Maps; import java.io.IOException; +import java.time.Instant; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; +import java.util.stream.IntStream; import org.apache.commons.collections4.MapUtils; import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.Catalog; @@ -40,8 +41,10 @@ import org.apache.gravitino.SchemaChange; import org.apache.gravitino.catalog.ManagedSchemaOperations; import org.apache.gravitino.catalog.lakehouse.lance.LanceCatalogOperations; +import org.apache.gravitino.catalog.lakehouse.utils.EntityConverter; import org.apache.gravitino.connector.CatalogInfo; import org.apache.gravitino.connector.CatalogOperations; +import org.apache.gravitino.connector.GenericLakehouseTable; import org.apache.gravitino.connector.HasPropertyMetadata; import org.apache.gravitino.connector.SupportsSchemas; import org.apache.gravitino.exceptions.NoSuchCatalogException; @@ -51,8 +54,10 @@ import org.apache.gravitino.exceptions.NonEmptySchemaException; import org.apache.gravitino.exceptions.SchemaAlreadyExistsException; import org.apache.gravitino.exceptions.TableAlreadyExistsException; -import org.apache.gravitino.meta.GenericTableEntity; +import org.apache.gravitino.meta.AuditInfo; +import org.apache.gravitino.meta.ColumnEntity; import org.apache.gravitino.meta.SchemaEntity; +import org.apache.gravitino.meta.TableEntity; import org.apache.gravitino.rel.Column; import org.apache.gravitino.rel.Table; import org.apache.gravitino.rel.TableCatalog; @@ -61,6 +66,8 @@ import org.apache.gravitino.rel.expressions.sorts.SortOrder; import org.apache.gravitino.rel.expressions.transforms.Transform; import org.apache.gravitino.rel.indexes.Index; +import org.apache.gravitino.storage.IdGenerator; +import org.apache.gravitino.utils.PrincipalUtils; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -74,14 +81,16 @@ public class GenericLakehouseCatalogOperations private static final String SLASH = "/"; private final ManagedSchemaOperations managedSchemaOps; - private static final Map SUPPORTED_FORMATS = - Maps.newHashMap(); - private Optional catalogLakehouseDir; + private Optional catalogLakehouseLocation; + + private static final Map SUPPORTED_FORMATS = + Maps.newConcurrentMap(); + private Map catalogConfig; private CatalogInfo catalogInfo; private HasPropertyMetadata propertiesMetadata; - + private EntityStore store; /** * Initializes the generic lakehouse catalog operations with the provided configuration. * @@ -94,15 +103,16 @@ public class GenericLakehouseCatalogOperations public void initialize( Map conf, CatalogInfo info, HasPropertyMetadata propertiesMetadata) throws RuntimeException { - String catalogDir = + String catalogLocation = (String) propertiesMetadata .catalogPropertiesMetadata() - .getOrDefault(conf, GenericLakehouseCatalogPropertiesMetadata.LAKEHOUSE_DIR); - this.catalogLakehouseDir = - StringUtils.isNotBlank(catalogDir) - ? Optional.of(catalogDir).map(this::ensureTrailingSlash).map(Path::new) + .getOrDefault(conf, GenericLakehouseCatalogPropertiesMetadata.LAKEHOUSE_LOCATION); + this.catalogLakehouseLocation = + StringUtils.isNotBlank(catalogLocation) + ? Optional.of(catalogLocation).map(this::ensureTrailingSlash).map(Path::new) : Optional.empty(); + this.store = GravitinoEnv.getInstance().entityStore(); this.catalogConfig = conf; this.catalogInfo = info; this.propertiesMetadata = propertiesMetadata; @@ -165,19 +175,17 @@ public boolean dropSchema(NameIdentifier ident, boolean cascade) throws NonEmpty @Override public NameIdentifier[] listTables(Namespace namespace) throws NoSuchSchemaException { - EntityStore store = GravitinoEnv.getInstance().entityStore(); NameIdentifier identifier = NameIdentifier.of(namespace.levels()); try { store.get(identifier, Entity.EntityType.SCHEMA, SchemaEntity.class); - } catch (NoSuchTableException e) { - throw new NoSuchEntityException(e, "Schema %s does not exist", namespace); + } catch (NoSuchEntityException e) { + throw new NoSuchSchemaException(e, "Schema %s does not exist", namespace); } catch (IOException ioe) { throw new RuntimeException("Failed to get schema " + identifier); } try { - List tableEntityList = - store.list(namespace, GenericTableEntity.class, TABLE); + List tableEntityList = store.list(namespace, TableEntity.class, TABLE); return tableEntityList.stream() .map(e -> NameIdentifier.of(namespace, e.name())) .toArray(NameIdentifier[]::new); @@ -188,7 +196,23 @@ public NameIdentifier[] listTables(Namespace namespace) throws NoSuchSchemaExcep @Override public Table loadTable(NameIdentifier ident) throws NoSuchTableException { - throw new UnsupportedOperationException("Not implemented yet."); + try { + TableEntity tableEntity = store.get(ident, Entity.EntityType.TABLE, TableEntity.class); + return GenericLakehouseTable.builder() + .withFormat(tableEntity.getFormat()) + .withProperties(tableEntity.getProperties()) + .withAuditInfo(tableEntity.auditInfo()) + .withSortOrders(tableEntity.getSortOrder()) + .withPartitioning(tableEntity.getPartitions()) + .withDistribution(tableEntity.getDistribution()) + .withColumns(EntityConverter.toColumns(tableEntity.columns())) + .withIndexes(tableEntity.getIndexes()) + .withName(tableEntity.name()) + .withComment(tableEntity.getComment()) + .build(); + } catch (IOException e) { + throw new RuntimeException("Failed to list tables under schema " + ident.namespace(), e); + } } @Override @@ -202,54 +226,92 @@ public Table createTable( SortOrder[] sortOrders, Index[] indexes) throws NoSuchSchemaException, TableAlreadyExistsException { + LakehouseTableFormat format = + (LakehouseTableFormat) + propertiesMetadata + .tablePropertiesMetadata() + .getOrDefault(properties, GenericLakehouseTablePropertiesMetadata.LAKEHOUSE_FORMAT); Schema schema = loadSchema(NameIdentifier.of(ident.namespace().levels())); + String tableLocation = calculateTableLocation(schema, ident, properties); Map tableStorageProps = calculateTableStorageProps(schema, properties); Map newProperties = Maps.newHashMap(properties); - newProperties.put(LOCATION, tableLocation); + newProperties.put(GenericLakehouseTablePropertiesMetadata.LAKEHOUSE_LOCATION, tableLocation); newProperties.putAll(tableStorageProps); - String format = properties.getOrDefault("format", "lance"); - LakehouseCatalogOperations lakehouseCatalogOperations = - SUPPORTED_FORMATS.compute( - format, - (k, v) -> - v == null - ? createLakehouseCatalogOperations( - format, properties, catalogInfo, propertiesMetadata) - : v); - - return lakehouseCatalogOperations.createTable( - ident, columns, comment, newProperties, partitions, distribution, sortOrders, indexes); + AuditInfo auditInfo = + AuditInfo.builder() + .withCreator(PrincipalUtils.getCurrentUserName()) + .withCreateTime(Instant.now()) + .build(); + IdGenerator idGenerator = GravitinoEnv.getInstance().idGenerator(); + List columnEntityList = + IntStream.range(0, columns.length) + .mapToObj( + i -> ColumnEntity.toColumnEntity(columns[i], i, idGenerator.nextId(), auditInfo)) + .collect(Collectors.toList()); + + TableEntity entityToStore; + try { + entityToStore = + TableEntity.builder() + .withName(ident.name()) + .withNamespace(ident.namespace()) + .withColumns(columnEntityList) + .withFormat(format.lowerName()) + .withProperties(newProperties) + .withComment(comment) + .withPartitions(partitions) + .withSortOrder(sortOrders) + .withDistribution(distribution) + .withIndexes(indexes) + .withId(idGenerator.nextId()) + .withAuditInfo(auditInfo) + .build(); + store.put(entityToStore); + LakehouseCatalogOperations lanceCatalogOperations = + getLakehouseCatalogOperations(newProperties); + return lanceCatalogOperations.createTable( + ident, columns, comment, newProperties, partitions, distribution, sortOrders, indexes); + } catch (IOException e) { + throw new RuntimeException("Failed to create table " + ident, e); + } } private String calculateTableLocation( Schema schema, NameIdentifier tableIdent, Map tableProperties) { - String tableLocation = tableProperties.get(LOCATION); + String tableLocation = + (String) + propertiesMetadata + .tablePropertiesMetadata() + .getOrDefault( + tableProperties, GenericLakehouseTablePropertiesMetadata.LAKEHOUSE_LOCATION); if (StringUtils.isNotBlank(tableLocation)) { return ensureTrailingSlash(tableLocation); } - String schemaLocation = schema.properties() == null ? null : schema.properties().get(LOCATION); + String schemaLocation = + schema.properties() == null + ? null + : schema.properties().get(GenericLakehouseSchemaPropertiesMetadata.LAKEHOUSE_LOCATION); // If we do not set location in table properties, and schema location is set, use schema - // location - // as the base path. + // location as the base path. if (StringUtils.isNotBlank(schemaLocation)) { return ensureTrailingSlash(schemaLocation) + tableIdent.name() + SLASH; } // If the schema location is not set, use catalog lakehouse dir as the base path. Or else, throw // an exception. - if (catalogLakehouseDir.isEmpty()) { + if (catalogLakehouseLocation.isEmpty()) { throw new RuntimeException( String.format( "No location specified for table %s, you need to set location either in catalog, schema, or table properties", tableIdent)); } - String catalogLakehousePath = catalogLakehouseDir.get().toString(); + String catalogLakehousePath = catalogLakehouseLocation.get().toString(); String[] nsLevels = tableIdent.namespace().levels(); String schemaName = nsLevels[nsLevels.length - 1]; return ensureTrailingSlash(catalogLakehousePath) @@ -262,21 +324,12 @@ private String calculateTableLocation( @Override public Table alterTable(NameIdentifier ident, TableChange... changes) throws NoSuchTableException, IllegalArgumentException { - EntityStore store = GravitinoEnv.getInstance().entityStore(); Namespace namespace = ident.namespace(); try { - GenericTableEntity tableEntity = - store.get(ident, Entity.EntityType.TABLE, GenericTableEntity.class); + TableEntity tableEntity = store.get(ident, Entity.EntityType.TABLE, TableEntity.class); Map tableProperties = tableEntity.getProperties(); - String format = tableProperties.getOrDefault("format", "lance"); LakehouseCatalogOperations lakehouseCatalogOperations = - SUPPORTED_FORMATS.compute( - format, - (k, v) -> - v == null - ? createLakehouseCatalogOperations( - format, tableProperties, catalogInfo, propertiesMetadata) - : v); + getLakehouseCatalogOperations(tableProperties); return lakehouseCatalogOperations.alterTable(ident, changes); } catch (IOException e) { throw new RuntimeException("Failed to list tables under schema " + namespace, e); @@ -285,28 +338,36 @@ public Table alterTable(NameIdentifier ident, TableChange... changes) @Override public boolean dropTable(NameIdentifier ident) { - EntityStore store = GravitinoEnv.getInstance().entityStore(); - GenericTableEntity tableEntity; + Namespace namespace = ident.namespace(); try { - tableEntity = store.get(ident, Entity.EntityType.TABLE, GenericTableEntity.class); - } catch (NoSuchEntityException e) { - LOG.warn("Table {} does not exist, skip dropping.", ident); + TableEntity tableEntity = store.get(ident, Entity.EntityType.TABLE, TableEntity.class); + LakehouseCatalogOperations lakehouseCatalogOperations = + getLakehouseCatalogOperations(tableEntity.getProperties()); + return lakehouseCatalogOperations.dropTable(ident); + } catch (NoSuchTableException e) { + LOG.warn("Table {} does not exist, skip dropping it.", ident); return false; - } catch (IOException ioe) { - throw new RuntimeException("Failed to get table " + ident); + } catch (IOException e) { + throw new RuntimeException("Failed to list tables under schema " + namespace, e); } + } - Map tableProperties = tableEntity.getProperties(); - String format = tableProperties.getOrDefault("format", "lance"); - LakehouseCatalogOperations lakehouseCatalogOperations = - SUPPORTED_FORMATS.compute( - format, - (k, v) -> - v == null - ? createLakehouseCatalogOperations( - format, tableProperties, catalogInfo, propertiesMetadata) - : v); - return lakehouseCatalogOperations.dropTable(ident); + private LakehouseCatalogOperations getLakehouseCatalogOperations( + Map tableProperties) { + LakehouseTableFormat format = + (LakehouseTableFormat) + propertiesMetadata + .tablePropertiesMetadata() + .getOrDefault( + tableProperties, GenericLakehouseTablePropertiesMetadata.LAKEHOUSE_FORMAT); + + return SUPPORTED_FORMATS.compute( + format, + (k, v) -> + v == null + ? createLakehouseCatalogOperations( + format, tableProperties, catalogInfo, propertiesMetadata) + : v); } private String ensureTrailingSlash(String path) { @@ -314,13 +375,13 @@ private String ensureTrailingSlash(String path) { } private LakehouseCatalogOperations createLakehouseCatalogOperations( - String format, + LakehouseTableFormat format, Map properties, CatalogInfo catalogInfo, HasPropertyMetadata propertiesMetadata) { LakehouseCatalogOperations operations; - switch (format.toLowerCase()) { - case "lance": + switch (format) { + case LANCE: operations = new LanceCatalogOperations(); break; default: diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java index e381558c321..b8c3958e9a0 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseCatalogPropertiesMetadata.java @@ -31,7 +31,7 @@ public class GenericLakehouseCatalogPropertiesMetadata extends BaseCatalogPropertiesMetadata { - public static final String LAKEHOUSE_DIR = "lakehouse-dir"; + public static final String LAKEHOUSE_LOCATION = "location"; private static final Map> PROPERTIES_METADATA; @@ -39,7 +39,7 @@ public class GenericLakehouseCatalogPropertiesMetadata extends BaseCatalogProper List> propertyEntries = ImmutableList.of( stringOptionalPropertyEntry( - LAKEHOUSE_DIR, + LAKEHOUSE_LOCATION, "The root directory of the lakehouse catalog.", false /* immutable */, null, /* defaultValue */ diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java index a6da0ac2ded..3dd0abf81d2 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseSchemaPropertiesMetadata.java @@ -29,8 +29,8 @@ import org.apache.gravitino.connector.PropertyEntry; public class GenericLakehouseSchemaPropertiesMetadata extends BasePropertiesMetadata { - public static final String LAKEHOUSE_DIR = - GenericLakehouseCatalogPropertiesMetadata.LAKEHOUSE_DIR; + public static final String LAKEHOUSE_LOCATION = + GenericLakehouseCatalogPropertiesMetadata.LAKEHOUSE_LOCATION; private static final Map> PROPERTIES_METADATA; @@ -38,7 +38,7 @@ public class GenericLakehouseSchemaPropertiesMetadata extends BasePropertiesMeta List> propertyEntries = ImmutableList.of( stringOptionalPropertyEntry( - LAKEHOUSE_DIR, + LAKEHOUSE_LOCATION, "The root directory of the lakehouse schema.", false /* immutable */, null, /* defaultValue */ diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseTablePropertiesMetadata.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseTablePropertiesMetadata.java index e9a61a6b0fc..f8ca11b0a01 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseTablePropertiesMetadata.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/GenericLakehouseTablePropertiesMetadata.java @@ -18,6 +18,7 @@ */ package org.apache.gravitino.catalog.lakehouse; +import static org.apache.gravitino.connector.PropertyEntry.enumPropertyEntry; import static org.apache.gravitino.connector.PropertyEntry.stringOptionalPropertyEntry; import com.google.common.collect.ImmutableList; @@ -28,7 +29,8 @@ import org.apache.gravitino.connector.PropertyEntry; public class GenericLakehouseTablePropertiesMetadata extends BasePropertiesMetadata { - public static final String LOCATION = "location"; + public static final String LAKEHOUSE_LOCATION = "location"; + public static final String LAKEHOUSE_FORMAT = "format"; public static final String LANCE_TABLE_STORAGE_OPTION_PREFIX = "lance.storage."; private static final Map> PROPERTIES_METADATA; @@ -37,11 +39,20 @@ public class GenericLakehouseTablePropertiesMetadata extends BasePropertiesMetad List> propertyEntries = ImmutableList.of( stringOptionalPropertyEntry( - LOCATION, - "The root directory of the lakehouse table.", - true /* immutable */, + LAKEHOUSE_LOCATION, + "The root directory of the lakehouse catalog.", + false /* immutable */, null, /* defaultValue */ false /* hidden */), + enumPropertyEntry( + LAKEHOUSE_FORMAT, + "The table format of the lakehouse table (e.g., iceberg, delta, lance)", + true /* required */, + true /* immutable */, + LakehouseTableFormat.class /* enumClass */, + null /* defaultValue */, + false /* hidden */, + false /* reserved */), PropertyEntry.stringOptionalPropertyPrefixEntry( LANCE_TABLE_STORAGE_OPTION_PREFIX, "The storage options passed to Lance table.", diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/LakehouseCatalogOperations.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/LakehouseCatalogOperations.java index 66c7147626f..d5b95845db0 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/LakehouseCatalogOperations.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/LakehouseCatalogOperations.java @@ -22,4 +22,18 @@ import org.apache.gravitino.connector.CatalogOperations; import org.apache.gravitino.rel.TableCatalog; +/** + * Interface for detailed lakehouse catalog operations, combining catalog operations and table + * catalog. {@link GenericLakehouseCatalog} will try to use this interface to provide detailed + * lakehouse catalog operations. + * + *

    + *    GenericLakehouseCatalog.createTable()
    + *       -> LakehouseCatalogOperations.createTable()
    + *         -> LanceTableOperations.createTable()
    + *         -> IcebergTableOperations.createTable()
    + *         -> DeltaTableOperations.createTable()
    + *         ...
    + * 
    + */ public interface LakehouseCatalogOperations extends CatalogOperations, TableCatalog {} diff --git a/api/src/main/java/org/apache/gravitino/rel/GenericTable.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/LakehouseTableFormat.java similarity index 58% rename from api/src/main/java/org/apache/gravitino/rel/GenericTable.java rename to catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/LakehouseTableFormat.java index 4796421c53c..57d0230f48a 100644 --- a/api/src/main/java/org/apache/gravitino/rel/GenericTable.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/LakehouseTableFormat.java @@ -17,31 +17,25 @@ * under the License. */ -package org.apache.gravitino.rel; +package org.apache.gravitino.catalog.lakehouse; -/** A generic table interface that extends the Table interface. */ -public interface GenericTable extends Table { +public enum LakehouseTableFormat { + LANCE, - /** - * Formats the table as a string representation. - * - * @return the formatted string representation of the table - */ - String format(); + DELTA, - /** - * Gets the location of the table. - * - * @return the location of the table - */ - String location(); + ICEBERG; - /** - * Indicates whether the table is external. - * - * @return true if the table is external, false otherwise - */ - default boolean external() { - return false; + public String lowerName() { + return this.name().toLowerCase(); + } + + public static LakehouseTableFormat fromFormatName(String type) { + for (LakehouseTableFormat tableType : LakehouseTableFormat.values()) { + if (tableType.name().equalsIgnoreCase(type)) { + return tableType; + } + } + throw new IllegalArgumentException("Unknown LakehouseTableFormat: " + type); } } diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java index 9572c656d23..e27f8032abf 100644 --- a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/lance/LanceCatalogOperations.java @@ -19,8 +19,8 @@ package org.apache.gravitino.catalog.lakehouse.lance; +import static org.apache.gravitino.Entity.EntityType.TABLE; import static org.apache.gravitino.catalog.lakehouse.GenericLakehouseTablePropertiesMetadata.LANCE_TABLE_STORAGE_OPTION_PREFIX; -import static org.apache.gravitino.catalog.lakehouse.GenericLakehouseTablePropertiesMetadata.LOCATION; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; @@ -46,7 +46,10 @@ import org.apache.gravitino.GravitinoEnv; import org.apache.gravitino.NameIdentifier; import org.apache.gravitino.Namespace; +import org.apache.gravitino.catalog.lakehouse.GenericLakehouseTablePropertiesMetadata; import org.apache.gravitino.catalog.lakehouse.LakehouseCatalogOperations; +import org.apache.gravitino.catalog.lakehouse.LakehouseTableFormat; +import org.apache.gravitino.catalog.lakehouse.utils.EntityConverter; import org.apache.gravitino.connector.CatalogInfo; import org.apache.gravitino.connector.GenericLakehouseTable; import org.apache.gravitino.connector.HasPropertyMetadata; @@ -55,9 +58,8 @@ import org.apache.gravitino.exceptions.NoSuchTableException; import org.apache.gravitino.exceptions.TableAlreadyExistsException; import org.apache.gravitino.meta.AuditInfo; -import org.apache.gravitino.meta.GenericTableEntity; +import org.apache.gravitino.meta.TableEntity; import org.apache.gravitino.rel.Column; -import org.apache.gravitino.rel.GenericTable; import org.apache.gravitino.rel.Table; import org.apache.gravitino.rel.TableChange; import org.apache.gravitino.rel.expressions.distributions.Distribution; @@ -66,19 +68,16 @@ import org.apache.gravitino.rel.indexes.Index; import org.apache.gravitino.rel.indexes.Indexes.IndexImpl; import org.apache.gravitino.utils.PrincipalUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; public class LanceCatalogOperations implements LakehouseCatalogOperations { - private Map lancePropertiesMap; + private EntityStore store; @Override public void initialize( Map config, CatalogInfo info, HasPropertyMetadata propertiesMetadata) throws RuntimeException { - lancePropertiesMap = ImmutableMap.copyOf(config); + store = GravitinoEnv.getInstance().entityStore(); } @Override @@ -95,13 +94,16 @@ public void close() throws IOException {} @Override public NameIdentifier[] listTables(Namespace namespace) throws NoSuchSchemaException { - return new NameIdentifier[0]; + // No need to do nothing here as GenericLakehouseCatalogOperations will do the work. + throw new UnsupportedOperationException( + "We should not reach here as we could get table info" + "from metastore directly."); } @Override public Table loadTable(NameIdentifier ident) throws NoSuchTableException { - // Should not come here. - return null; + // No need to do nothing here as GenericLakehouseCatalogOperations will do the work. + throw new UnsupportedOperationException( + "We should not reach here as we could get table info" + "from metastore directly."); } @Override @@ -116,7 +118,7 @@ public Table createTable( Index[] indexes) throws NoSuchSchemaException, TableAlreadyExistsException { // Ignore partitions, distributions, sortOrders, and indexes for Lance tables; - String location = properties.get(LOCATION); + String location = properties.get(GenericLakehouseTablePropertiesMetadata.LAKEHOUSE_LOCATION); Map storageProps = properties.entrySet().stream() .filter(e -> e.getKey().startsWith(LANCE_TABLE_STORAGE_OPTION_PREFIX)) @@ -124,7 +126,8 @@ public Table createTable( Collectors.toMap( e -> e.getKey().substring(LANCE_TABLE_STORAGE_OPTION_PREFIX.length()), Map.Entry::getValue)); - try (Dataset dataset = + + try (Dataset ignored = Dataset.create( new RootAllocator(), location, @@ -145,7 +148,7 @@ public Table createTable( .build()) .withPartitioning(partitions) .withSortOrders(sortOrders) - .withFormat("lance") + .withFormat(LakehouseTableFormat.LANCE.lowerName()) .build(); } } @@ -167,6 +170,7 @@ public Table alterTable(NameIdentifier ident, TableChange... changes) // Lance only supports adding indexes for now. List addedIndexes = Lists.newArrayList(); + // Only support for adding index for now. for (TableChange change : changes) { if (change instanceof TableChange.AddIndex addIndexChange) { Index index = @@ -179,17 +183,66 @@ public Table alterTable(NameIdentifier ident, TableChange... changes) } } - EntityStore entityStore = GravitinoEnv.getInstance().entityStore(); - GenericTableEntity entity; + TableEntity updatedEntity; try { - entity = entityStore.get(ident, Entity.EntityType.TABLE, GenericTableEntity.class); + TableEntity entity = store.get(ident, Entity.EntityType.TABLE, TableEntity.class); + updatedEntity = + store.update( + ident, + TableEntity.class, + TABLE, + tableEntity -> + TableEntity.builder() + .withId(tableEntity.id()) + .withName(tableEntity.name()) + .withNamespace(tableEntity.namespace()) + .withFormat(entity.getFormat()) + .withAuditInfo( + AuditInfo.builder() + .withCreator(tableEntity.auditInfo().creator()) + .withCreateTime(tableEntity.auditInfo().createTime()) + .withLastModifier(PrincipalUtils.getCurrentPrincipal().getName()) + .withLastModifiedTime(Instant.now()) + .build()) + .withColumns(tableEntity.columns()) + .withIndexes( + ArrayUtils.addAll( + entity.getIndexes(), addedIndexes.toArray(new Index[0]))) + .withDistribution(tableEntity.getDistribution()) + .withPartitions(tableEntity.getPartitions()) + .withSortOrder(tableEntity.getSortOrder()) + .withProperties(tableEntity.getProperties()) + .withComment(tableEntity.getComment()) + .build()); + + // Add indexes to Lance dataset + addLanceIndex(updatedEntity, addedIndexes); + + // return the updated table + return GenericLakehouseTable.builder() + .withFormat(updatedEntity.getFormat()) + .withProperties(updatedEntity.getProperties()) + .withAuditInfo(updatedEntity.auditInfo()) + .withSortOrders(updatedEntity.getSortOrder()) + .withPartitioning(updatedEntity.getPartitions()) + .withDistribution(updatedEntity.getDistribution()) + .withColumns(EntityConverter.toColumns(updatedEntity.columns())) + .withIndexes(updatedEntity.getIndexes()) + .withName(updatedEntity.name()) + .withComment(updatedEntity.getComment()) + .build(); } catch (NoSuchEntityException e) { throw new NoSuchTableException("No such table: %s", ident); } catch (IOException ioe) { throw new RuntimeException("Failed to load table entity for: " + ident, ioe); } + } - String location = entity.getProperties().get("location"); + private void addLanceIndex(TableEntity updatedEntity, List addedIndexes) { + String location = + updatedEntity + .getProperties() + .get(GenericLakehouseTablePropertiesMetadata.LAKEHOUSE_LOCATION); try (Dataset dataset = Dataset.open(location, new RootAllocator())) { // For Lance, we only support adding indexes, so in fact, we can't handle drop index here. for (Index index : addedIndexes) { @@ -205,28 +258,7 @@ public Table alterTable(NameIdentifier ident, TableChange... changes) indexParams, true); } - } catch (Exception e) { - throw new RuntimeException("Failed to alter Lance table: " + ident, e); } - - GenericTable oldTable = entity.toGenericTable(); - Index[] newIndexes = oldTable.index(); - for (Index index : addedIndexes) { - newIndexes = ArrayUtils.add(newIndexes, index); - } - - return GenericLakehouseTable.builder() - .withFormat(oldTable.format()) - .withProperties(oldTable.properties()) - .withAuditInfo((AuditInfo) oldTable.auditInfo()) - .withSortOrders(oldTable.sortOrder()) - .withPartitioning(oldTable.partitioning()) - .withDistribution(oldTable.distribution()) - .withColumns(oldTable.columns()) - .withIndexes(newIndexes) - .withName(oldTable.name()) - .withComment(oldTable.comment()) - .build(); } private IndexParams getIndexParamsByIndexType(IndexType indexType) { @@ -249,10 +281,18 @@ private IndexParams getIndexParamsByIndexType(IndexType indexType) { @Override public boolean dropTable(NameIdentifier ident) { try { - String location = lancePropertiesMap.get("location"); - // Remove the directory on storage - FileSystem fs = FileSystem.get(new Configuration()); - return fs.delete(new Path(location), true); + TableEntity tableEntity = store.get(ident, Entity.EntityType.TABLE, TableEntity.class); + Map lancePropertiesMap = tableEntity.getProperties(); + String location = + lancePropertiesMap.get(GenericLakehouseTablePropertiesMetadata.LAKEHOUSE_LOCATION); + + if (!store.delete(ident, Entity.EntityType.TABLE)) { + throw new RuntimeException("Failed to drop Lance table: " + ident.name()); + } + + // Drop the Lance dataset from cloud storage. + Dataset.drop(location, ImmutableMap.of()); + return true; } catch (IOException e) { throw new RuntimeException("Failed to drop Lance table: " + ident.name(), e); } diff --git a/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/utils/EntityConverter.java b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/utils/EntityConverter.java new file mode 100644 index 00000000000..734309a444a --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/main/java/org/apache/gravitino/catalog/lakehouse/utils/EntityConverter.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.catalog.lakehouse.utils; + +import java.util.List; +import org.apache.gravitino.connector.GenericLakehouseColumn; +import org.apache.gravitino.meta.ColumnEntity; +import org.apache.gravitino.rel.Column; + +public class EntityConverter { + public static Column[] toColumns(List columnEntities) { + return columnEntities.stream().map(EntityConverter::toColumn).toArray(Column[]::new); + } + + private static Column toColumn(ColumnEntity columnEntity) { + return GenericLakehouseColumn.builder() + .withName(columnEntity.name()) + .withComment(columnEntity.comment()) + .withAutoIncrement(columnEntity.autoIncrement()) + .withNullable(columnEntity.nullable()) + .withType(columnEntity.dataType()) + .withDefaultValue(columnEntity.defaultValue()) + .build(); + } +} diff --git a/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/TestPropertiesMetadata.java b/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/TestPropertiesMetadata.java new file mode 100644 index 00000000000..8dfd3b5ce0f --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/TestPropertiesMetadata.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.catalog.lakehouse; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.gravitino.connector.PropertiesMetadata; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +public class TestPropertiesMetadata { + public static GenericLakehouseCatalog genericLakehouseCatalog; + + @BeforeAll + static void init() { + genericLakehouseCatalog = new GenericLakehouseCatalog(); + } + + @Test + void testCatalogPropertiesMetadata() { + PropertiesMetadata catalogPropertiesMetadata = + genericLakehouseCatalog.catalogPropertiesMetadata(); + Assertions.assertNotNull(catalogPropertiesMetadata); + + Map catalogProperties = + ImmutableMap.of( + "storage.type", "s3", + "storage.s3.bucket", "my-bucket", + "storage.s3.region", "us-west-2", + "location", "/tmp/test1"); + + String catalogLocation = + (String) + catalogPropertiesMetadata.getOrDefault( + catalogProperties, GenericLakehouseCatalogPropertiesMetadata.LAKEHOUSE_LOCATION); + Assertions.assertEquals("/tmp/test1", catalogLocation); + } + + @Test + void testSchemaPropertiesMetadata() { + PropertiesMetadata schemaPropertiesMetadata = + genericLakehouseCatalog.schemaPropertiesMetadata(); + Assertions.assertNotNull(schemaPropertiesMetadata); + + Map schemaProperties = + ImmutableMap.of( + "storage.type", "s3", + "storage.s3.bucket", "my-bucket", + "storage.s3.region", "us-west-2", + "location", "/tmp/test_schema"); + + String schemaLocation = + (String) + schemaPropertiesMetadata.getOrDefault( + schemaProperties, GenericLakehouseSchemaPropertiesMetadata.LAKEHOUSE_LOCATION); + Assertions.assertEquals("/tmp/test_schema", schemaLocation); + } + + @Test + void testTablePropertiesMetadata() { + PropertiesMetadata tablePropertiesMetadata = genericLakehouseCatalog.tablePropertiesMetadata(); + Assertions.assertNotNull(tablePropertiesMetadata); + + Map tableProperties = + ImmutableMap.of( + "storage.type", "s3", + "storage.s3.bucket", "my-bucket", + "storage.s3.region", "us-west-2", + "location", "/tmp/test_table", + "format", "iceberg"); + + String tableLocation = + (String) + tablePropertiesMetadata.getOrDefault( + tableProperties, GenericLakehouseTablePropertiesMetadata.LAKEHOUSE_LOCATION); + Assertions.assertEquals("/tmp/test_table", tableLocation); + + LakehouseTableFormat tableFormat = + (LakehouseTableFormat) + tablePropertiesMetadata.getOrDefault( + tableProperties, GenericLakehouseTablePropertiesMetadata.LAKEHOUSE_FORMAT); + Assertions.assertEquals(LakehouseTableFormat.ICEBERG, tableFormat); + } +} diff --git a/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/utils/TestEntityConverter.java b/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/utils/TestEntityConverter.java new file mode 100644 index 00000000000..9da5ed530ed --- /dev/null +++ b/catalogs/catalog-generic-lakehouse/src/test/java/org/apache/gravitino/catalog/lakehouse/utils/TestEntityConverter.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.catalog.lakehouse.utils; + +import static org.apache.gravitino.rel.Column.DEFAULT_VALUE_NOT_SET; + +import java.util.List; +import org.apache.gravitino.meta.AuditInfo; +import org.apache.gravitino.meta.ColumnEntity; +import org.apache.gravitino.rel.types.Types; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestEntityConverter { + + @Test + void testToColumns() { + AuditInfo auditInfo = AuditInfo.builder().build(); + List columnEntities = + List.of( + ColumnEntity.builder() + .withName("id") + .withId(1L) + .withDataType(Types.IntegerType.get()) + .withComment("Identifier") + .withAutoIncrement(true) + .withNullable(false) + .withDefaultValue(DEFAULT_VALUE_NOT_SET) + .withAuditInfo(auditInfo) + .withPosition(1) + .build(), + ColumnEntity.builder() + .withName("name") + .withId(2L) + .withDataType(Types.StringType.get()) + .withComment("Name of the entity") + .withAutoIncrement(false) + .withNullable(true) + .withDefaultValue(DEFAULT_VALUE_NOT_SET) + .withPosition(2) + .withAuditInfo(auditInfo) + .build()); + var columns = EntityConverter.toColumns(columnEntities); + Assertions.assertEquals(2, columns.length); + for (var column : columns) { + if (column.name().equals("id")) { + Assertions.assertEquals(Types.IntegerType.get(), column.dataType()); + Assertions.assertEquals("Identifier", column.comment()); + Assertions.assertTrue(column.autoIncrement()); + Assertions.assertFalse(column.nullable()); + Assertions.assertEquals(DEFAULT_VALUE_NOT_SET, column.defaultValue()); + } else if (column.name().equals("name")) { + Assertions.assertEquals(Types.StringType.get(), column.dataType()); + Assertions.assertEquals("Name of the entity", column.comment()); + Assertions.assertFalse(column.autoIncrement()); + Assertions.assertTrue(column.nullable()); + Assertions.assertEquals(DEFAULT_VALUE_NOT_SET, column.defaultValue()); + } + } + } +} diff --git a/core/src/main/java/org/apache/gravitino/catalog/EntityCombinedTable.java b/core/src/main/java/org/apache/gravitino/catalog/EntityCombinedTable.java index 921b14dcdf5..7a0d90ead75 100644 --- a/core/src/main/java/org/apache/gravitino/catalog/EntityCombinedTable.java +++ b/core/src/main/java/org/apache/gravitino/catalog/EntityCombinedTable.java @@ -129,6 +129,16 @@ public Index[] index() { return table.index(); } + @Override + public String format() { + return table.format(); + } + + @Override + public String location() { + return table.location(); + } + public boolean imported() { return imported; } diff --git a/core/src/main/java/org/apache/gravitino/catalog/TableOperationDispatcher.java b/core/src/main/java/org/apache/gravitino/catalog/TableOperationDispatcher.java index 27119f0c999..e549b806d82 100644 --- a/core/src/main/java/org/apache/gravitino/catalog/TableOperationDispatcher.java +++ b/core/src/main/java/org/apache/gravitino/catalog/TableOperationDispatcher.java @@ -27,7 +27,6 @@ import com.google.common.base.Objects; import com.google.common.collect.Lists; -import java.io.IOException; import java.time.Instant; import java.util.Arrays; import java.util.Collections; @@ -37,15 +36,16 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; import org.apache.commons.lang3.tuple.Pair; -import org.apache.gravitino.Catalog; import org.apache.gravitino.EntityAlreadyExistsException; import org.apache.gravitino.EntityStore; import org.apache.gravitino.GravitinoEnv; import org.apache.gravitino.NameIdentifier; import org.apache.gravitino.Namespace; import org.apache.gravitino.StringIdentifier; +import org.apache.gravitino.catalog.CatalogManager.CatalogWrapper; import org.apache.gravitino.connector.HasPropertyMetadata; import org.apache.gravitino.connector.capability.Capability; +import org.apache.gravitino.connector.capability.CapabilityResult; import org.apache.gravitino.exceptions.NoSuchEntityException; import org.apache.gravitino.exceptions.NoSuchSchemaException; import org.apache.gravitino.exceptions.NoSuchTableException; @@ -54,10 +54,8 @@ import org.apache.gravitino.lock.TreeLockUtils; import org.apache.gravitino.meta.AuditInfo; import org.apache.gravitino.meta.ColumnEntity; -import org.apache.gravitino.meta.GenericTableEntity; import org.apache.gravitino.meta.TableEntity; import org.apache.gravitino.rel.Column; -import org.apache.gravitino.rel.GenericTable; import org.apache.gravitino.rel.Table; import org.apache.gravitino.rel.TableChange; import org.apache.gravitino.rel.expressions.distributions.Distribution; @@ -117,6 +115,18 @@ public NameIdentifier[] listTables(Namespace namespace) throws NoSuchSchemaExcep */ @Override public Table loadTable(NameIdentifier ident) throws NoSuchTableException { + NameIdentifier catalogIdent = getCatalogIdentifier(ident); + if (isManagedTable(catalogIdent)) { + return TreeLockUtils.doWithTreeLock( + ident, + LockType.READ, + () -> + doWithCatalog( + catalogIdent, + c -> c.doWithTableOps(t -> t.loadTable(ident)), + NoSuchTableException.class)); + } + EntityCombinedTable entityCombinedTable = TreeLockUtils.doWithTreeLock(ident, LockType.READ, () -> internalLoadTable(ident)); @@ -240,6 +250,17 @@ public Table alterTable(NameIdentifier ident, TableChange... changes) NoSuchTableException.class, IllegalArgumentException.class); + if (isManagedTable(catalogIdent)) { + // For generic lakehouse catalog, all operations will be dispatched to the underlying + // catalog. + return EntityCombinedTable.of(alteredTable) + .withHiddenProperties( + getHiddenPropertyNames( + getCatalogIdentifier(ident), + HasPropertyMetadata::tablePropertiesMetadata, + alteredTable.properties())); + } + StringIdentifier stringId = getStringIdFromProperties(alteredTable.properties()); // Case 1: The table is not created by Gravitino and this table is never imported. TableEntity te = null; @@ -262,57 +283,6 @@ public Table alterTable(NameIdentifier ident, TableChange... changes) tableId = te.id(); } - if (isGenericLakehouseCatalog(catalogIdent)) { - // For generic lakehouse catalog, we only update the table entity with basic info. - GenericTableEntity genericTableEntity = - operateOnEntity( - ident, id -> store.get(id, TABLE, GenericTableEntity.class), "GET", tableId); - if (genericTableEntity == null) { - throw new NoSuchTableException("No such table: %s", ident); - } - - GenericTable genericTable = (GenericTable) alteredTable; - GenericTableEntity updatedGenericTableEntity = - operateOnEntity( - ident, - id -> - store.update( - id, - GenericTableEntity.class, - TABLE, - tableEntity -> - GenericTableEntity.getBuilder() - .withId(tableEntity.id()) - .withName(alteredTable.name()) - .withNamespace(getNewNamespace(ident, changes)) - .withFormat(genericTable.format()) - .withAuditInfo( - AuditInfo.builder() - .withCreator(tableEntity.auditInfo().creator()) - .withCreateTime(tableEntity.auditInfo().createTime()) - .withLastModifier( - PrincipalUtils.getCurrentPrincipal().getName()) - .withLastModifiedTime(Instant.now()) - .build()) - .withColumns(tableEntity.columns()) - .withIndexes(genericTable.index()) - .withDistribution(genericTable.distribution()) - .withPartitions(genericTable.partitioning()) - .withSortOrder(genericTable.sortOrder()) - .withProperties(genericTable.properties()) - .withComment(genericTable.comment()) - .build()), - "UPDATE", - tableId); - - return EntityCombinedTable.of(alteredTable, updatedGenericTableEntity) - .withHiddenProperties( - getHiddenPropertyNames( - getCatalogIdentifier(ident), - HasPropertyMetadata::tablePropertiesMetadata, - alteredTable.properties())); - } - TableEntity updatedTableEntity = operateOnEntity( ident, @@ -371,6 +341,13 @@ public boolean dropTable(NameIdentifier ident) { LockType.WRITE, () -> { NameIdentifier catalogIdent = getCatalogIdentifier(ident); + if (isManagedTable(catalogIdent)) { + return doWithCatalog( + catalogIdent, + c -> c.doWithTableOps(t -> t.dropTable(ident)), + RuntimeException.class); + } + boolean droppedFromCatalog = doWithCatalog( catalogIdent, @@ -542,19 +519,6 @@ private EntityCombinedTable importTable(NameIdentifier identifier) { } private EntityCombinedTable internalLoadTable(NameIdentifier ident) { - NameIdentifier catalogIdent = getCatalogIdentifier(ident); - if (isGenericLakehouseCatalog(catalogIdent)) { - try { - GenericTableEntity tableEntity = store.get(ident, TABLE, GenericTableEntity.class); - if (tableEntity != null) { - GenericTable genericTable = tableEntity.toGenericTable(); - return EntityCombinedTable.of(genericTable).withImported(true); - } - } catch (IOException ioe) { - throw new RuntimeException("Failed to load table entity " + ident, ioe); - } - } - NameIdentifier catalogIdentifier = getCatalogIdentifier(ident); Table table = doWithCatalog( @@ -627,6 +591,32 @@ private Table internalCreateTable( return null; }), IllegalArgumentException.class); + + if (isManagedTable(catalogIdent)) { + // For generic lakehouse catalog, all operations will be dispatched to the underlying catalog. + Table table = + doWithCatalog( + catalogIdent, + c -> + c.doWithTableOps( + t -> + t.createTable( + ident, + columns, + comment, + properties, + partitions == null ? EMPTY_TRANSFORM : partitions, + distribution == null ? Distributions.NONE : distribution, + sortOrders == null ? new SortOrder[0] : sortOrders, + indexes == null ? Indexes.EMPTY_INDEXES : indexes)), + NoSuchSchemaException.class, + TableAlreadyExistsException.class); + return EntityCombinedTable.of(table) + .withHiddenProperties( + getHiddenPropertyNames( + catalogIdent, HasPropertyMetadata::tablePropertiesMetadata, table.properties())); + } + long uid = idGenerator.nextId(); // Add StringIdentifier to the properties, the specific catalog will handle this // StringIdentifier to make sure only when the operation is successful, the related @@ -665,41 +655,19 @@ private Table internalCreateTable( .mapToObj(i -> ColumnEntity.toColumnEntity(columns[i], i, idGenerator.nextId(), audit)) .collect(Collectors.toList()); - TableEntity tableEntity; - if (isGenericLakehouseCatalog(catalogIdent)) { - // For generic lakehouse catalog, we only create the table entity with basic info. - GenericTable genericTable = (GenericTable) table; - tableEntity = - GenericTableEntity.getBuilder() - .withId(uid) - .withName(ident.name()) - .withNamespace(ident.namespace()) - .withFormat(genericTable.format()) - .withAuditInfo(audit) - .withColumns(columnEntityList) - .withIndexes(table.index()) - .withDistribution(table.distribution()) - .withFormat(genericTable.format()) - .withPartitions(table.partitioning()) - .withSortOrder(table.sortOrder()) - .withProperties(genericTable.properties()) - .withComment(genericTable.comment()) - .build(); - } else { - tableEntity = - TableEntity.builder() - .withId(uid) - .withName(ident.name()) - .withNamespace(ident.namespace()) - .withColumns(columnEntityList) - .withAuditInfo(audit) - .build(); - } + TableEntity tableEntity = + TableEntity.builder() + .withId(uid) + .withName(ident.name()) + .withNamespace(ident.namespace()) + .withColumns(columnEntityList) + .withAuditInfo(audit) + .build(); try { store.put(tableEntity, true /* overwrite */); } catch (Exception e) { - if (isGenericLakehouseCatalog(catalogIdent)) { + if (isManagedTable(catalogIdent)) { // Drop table doWithCatalog( catalogIdent, c -> c.doWithTableOps(t -> t.dropTable(ident)), RuntimeException.class); @@ -727,16 +695,13 @@ private List toColumnEntities(Column[] columns, AuditInfo audit) { .collect(Collectors.toList()); } - private boolean isGenericLakehouseCatalog(NameIdentifier catalogIdent) { + private boolean isManagedTable(NameIdentifier catalogIdent) { CatalogManager catalogManager = GravitinoEnv.getInstance().catalogManager(); - try { - Catalog catalog = catalogManager.loadCatalog(catalogIdent); - return catalog.type() == Catalog.Type.RELATIONAL - && catalog.provider().equals("generic-lakehouse"); - } catch (NoSuchEntityException e) { - LOG.warn("Catalog not found: {}", catalogIdent, e); - return false; - } + CatalogWrapper wrapper = catalogManager.loadCatalogAndWrap(catalogIdent); + Capability capability = wrapper.catalog().capability(); + + CapabilityResult result = capability.managedStorage(Capability.Scope.TABLE); + return result == CapabilityResult.SUPPORTED; } private boolean isSameColumn(Column left, int columnPosition, ColumnEntity right) { diff --git a/core/src/main/java/org/apache/gravitino/connector/GenericLakehouseColumn.java b/core/src/main/java/org/apache/gravitino/connector/GenericLakehouseColumn.java index b84b2652566..4fb7b5d12d5 100644 --- a/core/src/main/java/org/apache/gravitino/connector/GenericLakehouseColumn.java +++ b/core/src/main/java/org/apache/gravitino/connector/GenericLakehouseColumn.java @@ -50,6 +50,7 @@ protected GenericLakehouseColumn internalBuild() { hiveColumn.dataType = dataType; hiveColumn.nullable = nullable; hiveColumn.defaultValue = defaultValue == null ? DEFAULT_VALUE_NOT_SET : defaultValue; + hiveColumn.autoIncrement = autoIncrement; return hiveColumn; } } diff --git a/core/src/main/java/org/apache/gravitino/connector/GenericLakehouseTable.java b/core/src/main/java/org/apache/gravitino/connector/GenericLakehouseTable.java index a9379a5b316..72061253866 100644 --- a/core/src/main/java/org/apache/gravitino/connector/GenericLakehouseTable.java +++ b/core/src/main/java/org/apache/gravitino/connector/GenericLakehouseTable.java @@ -19,12 +19,7 @@ package org.apache.gravitino.connector; -import org.apache.gravitino.rel.GenericTable; - -public class GenericLakehouseTable extends BaseTable implements GenericTable { - @SuppressWarnings("unused") - private String schemaName; - +public class GenericLakehouseTable extends BaseTable { private String format; public static Builder builder() { @@ -53,14 +48,8 @@ protected TableOperations newOps() throws UnsupportedOperationException { public static class Builder extends BaseTableBuilder { - private String schemaName; private String format; - public Builder withSchemaName(String schemaName) { - this.schemaName = schemaName; - return this; - } - public Builder withFormat(String format) { this.format = format; return this; @@ -69,7 +58,6 @@ public Builder withFormat(String format) { @Override protected GenericLakehouseTable internalBuild() { GenericLakehouseTable genericLakehouseTable = new GenericLakehouseTable(); - genericLakehouseTable.schemaName = this.schemaName; genericLakehouseTable.format = this.format; genericLakehouseTable.columns = this.columns; genericLakehouseTable.comment = this.comment; diff --git a/core/src/main/java/org/apache/gravitino/meta/GenericTableEntity.java b/core/src/main/java/org/apache/gravitino/meta/GenericTableEntity.java deleted file mode 100644 index 4b2dd9ad039..00000000000 --- a/core/src/main/java/org/apache/gravitino/meta/GenericTableEntity.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.gravitino.meta; - -import com.google.common.collect.Maps; -import java.util.Map; -import lombok.Getter; -import org.apache.gravitino.Field; -import org.apache.gravitino.connector.GenericLakehouseColumn; -import org.apache.gravitino.connector.GenericLakehouseTable; -import org.apache.gravitino.rel.GenericTable; -import org.apache.gravitino.rel.expressions.distributions.Distribution; -import org.apache.gravitino.rel.expressions.sorts.SortOrder; -import org.apache.gravitino.rel.expressions.transforms.Transform; -import org.apache.gravitino.rel.indexes.Index; - -@Getter -public class GenericTableEntity extends TableEntity { - public static final Field FORMAT = Field.required("format", Long.class, "The table's format"); - public static final Field PROPERTIES = - Field.optional("properties", Map.class, "The table's properties"); - - public static final Field PARTITIONS = - Field.optional("partitions", Transform[].class, "The table's partition"); - - public static final Field SORT_ORDER = - Field.optional("sortOrders", SortOrder[].class, "The table's sort order"); - - public static final Field DISTRIBUTION = - Field.optional("distribution", Distribution.class, "The table's distribution"); - - public static final Field INDEXES = - Field.optional("indexes", Index[].class, "The table's indexes"); - - public static final Field COMMENT = - Field.optional("comment", String.class, "The table's comment"); - - public GenericTableEntity() { - super(); - } - - @Override - public Map fields() { - Map superFields = super.fields(); - Map result = Maps.newHashMap(superFields); - result.put(FORMAT, format); - result.put(PROPERTIES, properties); - result.put(PARTITIONS, partitions); - result.put(SORT_ORDER, sortOrder); - result.put(DISTRIBUTION, distribution); - result.put(INDEXES, indexes); - result.put(COMMENT, comment); - - return result; - } - - private String format; - @Getter private Map properties; - private Transform[] partitions; - private SortOrder[] sortOrder; - private Distribution distribution; - private Index[] indexes; - private String comment; - - public static class Builder { - private final GenericTableEntity tableEntity; - - public Builder() { - this.tableEntity = new GenericTableEntity(); - } - - public Builder withId(Long id) { - tableEntity.id = id; - return this; - } - - public Builder withName(String name) { - tableEntity.name = name; - return this; - } - - public Builder withAuditInfo(AuditInfo auditInfo) { - tableEntity.auditInfo = auditInfo; - return this; - } - - public Builder withColumns(java.util.List columns) { - tableEntity.columns = columns; - return this; - } - - public Builder withNamespace(org.apache.gravitino.Namespace namespace) { - tableEntity.namespace = namespace; - return this; - } - - public Builder withFormat(String format) { - tableEntity.format = format; - return this; - } - - public Builder withProperties(Map properties) { - tableEntity.properties = properties; - return this; - } - - public Builder withPartitions(Transform[] partitions) { - tableEntity.partitions = partitions; - return this; - } - - public Builder withSortOrder(SortOrder[] sortOrder) { - tableEntity.sortOrder = sortOrder; - return this; - } - - public Builder withDistribution(Distribution distribution) { - tableEntity.distribution = distribution; - return this; - } - - public Builder withIndexes(Index[] indexes) { - tableEntity.indexes = indexes; - return this; - } - - public Builder withComment(String comment) { - tableEntity.comment = comment; - return this; - } - - public GenericTableEntity build() { - return tableEntity; - } - } - - public static GenericTableEntity.Builder getBuilder() { - return new GenericTableEntity.Builder(); - } - - public GenericTable toGenericTable() { - return GenericLakehouseTable.builder() - .withFormat(format) - .withProperties(properties) - .withAuditInfo(auditInfo) - .withSortOrders(sortOrder) - .withPartitioning(partitions) - .withDistribution(distribution) - .withColumns( - columns.stream() - .map(this::toGenericLakehouseColumn) - .toArray(GenericLakehouseColumn[]::new)) - .withIndexes(indexes) - .withName(name) - .withComment(comment) - .build(); - } - - private GenericLakehouseColumn toGenericLakehouseColumn(ColumnEntity columnEntity) { - return GenericLakehouseColumn.builder() - .withName(columnEntity.name()) - .withComment(columnEntity.comment()) - .withAutoIncrement(columnEntity.autoIncrement()) - .withNullable(columnEntity.nullable()) - .withType(columnEntity.dataType()) - .withDefaultValue(columnEntity.defaultValue()) - .build(); - } -} diff --git a/core/src/main/java/org/apache/gravitino/meta/TableEntity.java b/core/src/main/java/org/apache/gravitino/meta/TableEntity.java index 595defed086..795db870d96 100644 --- a/core/src/main/java/org/apache/gravitino/meta/TableEntity.java +++ b/core/src/main/java/org/apache/gravitino/meta/TableEntity.java @@ -20,15 +20,21 @@ import com.google.common.base.Objects; import com.google.common.collect.Maps; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; +import lombok.Getter; import lombok.ToString; import org.apache.gravitino.Auditable; import org.apache.gravitino.Entity; import org.apache.gravitino.Field; import org.apache.gravitino.HasIdentifier; import org.apache.gravitino.Namespace; +import org.apache.gravitino.rel.expressions.distributions.Distribution; +import org.apache.gravitino.rel.expressions.sorts.SortOrder; +import org.apache.gravitino.rel.expressions.transforms.Transform; +import org.apache.gravitino.rel.indexes.Index; import org.apache.gravitino.utils.CollectionUtils; /** A class representing a table entity in Apache Gravitino. */ @@ -42,15 +48,42 @@ public class TableEntity implements Entity, Auditable, HasIdentifier { public static final Field COLUMNS = Field.optional("columns", List.class, "The columns of the table"); - protected Long id; + public static final Field FORMAT = Field.optional("format", String.class, "The table's format"); + public static final Field PROPERTIES = + Field.optional("properties", Map.class, "The table's properties"); - protected String name; + public static final Field PARTITIONS = + Field.optional("partitions", Transform[].class, "The table's partition"); - protected AuditInfo auditInfo; + public static final Field SORT_ORDER = + Field.optional("sortOrders", SortOrder[].class, "The table's sort order"); - protected Namespace namespace; + public static final Field DISTRIBUTION = + Field.optional("distribution", Distribution.class, "The table's distribution"); - protected List columns; + public static final Field INDEXES = + Field.optional("indexes", Index[].class, "The table's indexes"); + + public static final Field COMMENT = + Field.optional("comment", String.class, "The table's comment"); + + private Long id; + + private String name; + + private AuditInfo auditInfo; + + private Namespace namespace; + + private List columns; + + @Getter private String format; + @Getter private Map properties; + @Getter private Transform[] partitions; + @Getter private SortOrder[] sortOrder; + @Getter private Distribution distribution; + @Getter private Index[] indexes; + @Getter private String comment; /** * Returns a map of the fields and their corresponding values for this table. @@ -65,6 +98,14 @@ public Map fields() { fields.put(AUDIT_INFO, auditInfo); fields.put(COLUMNS, columns); + fields.put(FORMAT, format); + fields.put(PROPERTIES, properties); + fields.put(PARTITIONS, partitions); + fields.put(SORT_ORDER, sortOrder); + fields.put(DISTRIBUTION, distribution); + fields.put(INDEXES, indexes); + fields.put(COMMENT, comment); + return fields; } @@ -136,7 +177,15 @@ public boolean equals(Object o) { && Objects.equal(name, baseTable.name) && Objects.equal(namespace, baseTable.namespace) && Objects.equal(auditInfo, baseTable.auditInfo) - && CollectionUtils.isEqualCollection(columns, baseTable.columns); + && CollectionUtils.isEqualCollection(columns, baseTable.columns) + && Objects.equal(format, baseTable.format) + // Please check the correctness of this comparison. + && Objects.equal(properties, baseTable.properties) + && Arrays.equals(partitions, baseTable.partitions) + && Arrays.equals(sortOrder, baseTable.sortOrder) + && Objects.equal(distribution, baseTable.distribution) + && Arrays.equals(indexes, baseTable.indexes) + && Objects.equal(comment, baseTable.comment); } @Override @@ -177,6 +226,41 @@ public Builder withColumns(List columns) { return this; } + public Builder withFormat(String format) { + tableEntity.format = format; + return this; + } + + public Builder withProperties(Map properties) { + tableEntity.properties = properties; + return this; + } + + public Builder withPartitions(Transform[] partitions) { + tableEntity.partitions = partitions; + return this; + } + + public Builder withSortOrder(SortOrder[] sortOrder) { + tableEntity.sortOrder = sortOrder; + return this; + } + + public Builder withDistribution(Distribution distribution) { + tableEntity.distribution = distribution; + return this; + } + + public Builder withIndexes(Index[] indexes) { + tableEntity.indexes = indexes; + return this; + } + + public Builder withComment(String comment) { + tableEntity.comment = comment; + return this; + } + public TableEntity build() { tableEntity.validate(); @@ -184,6 +268,22 @@ public TableEntity build() { tableEntity.columns = Collections.emptyList(); } + if (tableEntity.properties == null) { + tableEntity.properties = Collections.emptyMap(); + } + + if (tableEntity.indexes == null) { + tableEntity.indexes = new Index[0]; + } + + if (tableEntity.partitions == null) { + tableEntity.partitions = new Transform[0]; + } + + if (tableEntity.sortOrder == null) { + tableEntity.sortOrder = new SortOrder[0]; + } + return tableEntity; } } diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableMetaMapper.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableMetaMapper.java index e919e539e96..0eb13e4a1ec 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableMetaMapper.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableMetaMapper.java @@ -56,9 +56,6 @@ Long selectTableIdBySchemaIdAndName( TablePO selectTableMetaBySchemaIdAndName( @Param("schemaId") Long schemaId, @Param("tableName") String name); - @SelectProvider(type = TableMetaSQLProviderFactory.class, method = "selectTableMetaById") - TablePO selectTableMetaById(@Param("tableId") Long tableId); - @InsertProvider(type = TableMetaSQLProviderFactory.class, method = "insertTableMeta") void insertTableMeta(@Param("tableMeta") TablePO tablePO); diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionMapper.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionMapper.java index a723c3db4a8..16f1ad7a348 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionMapper.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionMapper.java @@ -20,8 +20,10 @@ package org.apache.gravitino.storage.relational.mapper; import org.apache.gravitino.storage.relational.po.TablePO; +import org.apache.ibatis.annotations.DeleteProvider; import org.apache.ibatis.annotations.InsertProvider; import org.apache.ibatis.annotations.Param; +import org.apache.ibatis.annotations.UpdateProvider; public interface TableVersionMapper { String TABLE_NAME = "table_version_info"; @@ -33,4 +35,16 @@ public interface TableVersionMapper { type = TableVersionSQLProviderFactory.class, method = "insertTableVersionOnDuplicateKeyUpdate") void insertTableVersionOnDuplicateKeyUpdate(@Param("tablePO") TablePO tablePO); + + @UpdateProvider( + type = TableVersionSQLProviderFactory.class, + method = "softDeleteTableVersionByTableIdAndVersion") + void softDeleteTableVersionByTableIdAndVersion( + @Param("tableId") Long tableId, @Param("version") Long version); + + @DeleteProvider( + type = TableVersionSQLProviderFactory.class, + method = "deleteTableVersionByLegacyTimeline") + Integer deleteTableVersionByLegacyTimeline( + @Param("legacyTimeline") Long legacyTimeline, @Param("limit") int limit); } diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionSQLProviderFactory.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionSQLProviderFactory.java index ab27353c002..4c518ef4bd0 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionSQLProviderFactory.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/TableVersionSQLProviderFactory.java @@ -59,4 +59,14 @@ public static String insertTableVersion(@Param("tablePO") TablePO tablePO) { public static String insertTableVersionOnDuplicateKeyUpdate(@Param("tablePO") TablePO tablePO) { return getProvider().insertTableVersionOnDuplicateKeyUpdate(tablePO); } + + public static String softDeleteTableVersionByTableIdAndVersion( + @Param("tableId") Long tableId, @Param("version") Long version) { + return getProvider().softDeleteTableVersionByTableIdAndVersion(tableId, version); + } + + public static String deleteTableVersionByLegacyTimeline( + @Param("legacyTimeline") Long legacyTimeline, @Param("limit") int limit) { + return getProvider().deleteTableVersionByLegacyTimeline(legacyTimeline, limit); + } } diff --git a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableMetaBaseSQLProvider.java b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableMetaBaseSQLProvider.java index 8065476a613..f8f116c5b30 100644 --- a/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableMetaBaseSQLProvider.java +++ b/core/src/main/java/org/apache/gravitino/storage/relational/mapper/provider/base/TableMetaBaseSQLProvider.java @@ -48,15 +48,23 @@ public String listTablePOsBySchemaId(@Param("schemaId") Long schemaId) { public String listTablePOsByTableIds(List tableIds) { return "