From ea9fc5b0384b9f4d72b82487185518e759abc6ee Mon Sep 17 00:00:00 2001 From: Wenye Zhang Date: Thu, 20 Jan 2022 17:07:41 -0800 Subject: [PATCH 1/5] separate class under legacy to new hiveberg module --- build.gradle | 58 ++ .../org/apache/iceberg/hive/HiveCatalogs.java | 2 +- .../iceberg/hive/legacy/DirectoryInfo.java | 51 -- .../iceberg/hiveberg/DirectoryInfo.java | 46 ++ .../legacy => hiveberg}/FileSystemUtils.java | 25 +- .../legacy => hiveberg}/HiveExpressions.java | 25 +- .../HiveSchemaWithPartnerVisitor.java | 25 +- .../HiveTypeToIcebergType.java | 25 +- .../legacy => hiveberg}/HiveTypeUtil.java | 25 +- .../LegacyHiveCatalog.java | 25 +- .../legacy => hiveberg}/LegacyHiveTable.java | 25 +- .../LegacyHiveTableOperations.java | 25 +- .../LegacyHiveTableScan.java | 25 +- .../LegacyHiveTableUtils.java | 25 +- .../MergeHiveSchemaWithAvro.java | 27 +- .../iceberg/hiveberg/DirectoryInfo.java | 60 ++ .../iceberg/hiveberg/FileSystemUtils.java | 81 ++ .../iceberg/hiveberg/HiveExpressions.java | 349 +++++++++ .../HiveSchemaWithPartnerVisitor.java | 137 ++++ .../hiveberg/HiveTypeToIcebergType.java | 101 +++ .../apache/iceberg/hiveberg/HiveTypeUtil.java | 92 +++ .../iceberg/hiveberg/LegacyHiveCatalog.java | 114 +++ .../iceberg/hiveberg/LegacyHiveTable.java | 247 ++++++ .../hiveberg/LegacyHiveTableOperations.java | 290 +++++++ .../iceberg/hiveberg/LegacyHiveTableScan.java | 106 +++ .../hiveberg/LegacyHiveTableUtils.java | 232 ++++++ .../hiveberg/MergeHiveSchemaWithAvro.java | 268 +++++++ .../iceberg/hiveberg/HiveMetastoreTest.java | 66 ++ .../apache/iceberg/hiveberg/ScriptRunner.java | 249 ++++++ .../hiveberg}/TestHiveExpressions.java | 27 +- .../iceberg/hiveberg/TestHiveMetastore.java | 236 ++++++ .../hiveberg}/TestHiveSchemaConversions.java | 25 +- .../hiveberg}/TestLegacyHiveTableScan.java | 26 +- .../TestMergeHiveSchemaWithAvro.java | 25 +- .../resources/hive-schema-3.1.0.derby.sql | 726 ++++++++++++++++++ settings.gradle | 2 + .../apache/iceberg/spark/source/Reader.java | 2 +- .../iceberg/spark/source/SparkBatchScan.java | 2 +- 38 files changed, 3615 insertions(+), 282 deletions(-) delete mode 100644 hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/DirectoryInfo.java create mode 100644 hive-metastore/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java rename hive-metastore/src/main/java/org/apache/iceberg/{hive/legacy => hiveberg}/FileSystemUtils.java (64%) rename hive-metastore/src/main/java/org/apache/iceberg/{hive/legacy => hiveberg}/HiveExpressions.java (93%) rename hive-metastore/src/main/java/org/apache/iceberg/{hive/legacy => hiveberg}/HiveSchemaWithPartnerVisitor.java (86%) rename hive-metastore/src/main/java/org/apache/iceberg/{hive/legacy => hiveberg}/HiveTypeToIcebergType.java (78%) rename hive-metastore/src/main/java/org/apache/iceberg/{hive/legacy => hiveberg}/HiveTypeUtil.java (76%) rename hive-metastore/src/main/java/org/apache/iceberg/{hive/legacy => hiveberg}/LegacyHiveCatalog.java (80%) rename hive-metastore/src/main/java/org/apache/iceberg/{hive/legacy => hiveberg}/LegacyHiveTable.java (88%) rename hive-metastore/src/main/java/org/apache/iceberg/{hive/legacy => hiveberg}/LegacyHiveTableOperations.java (93%) rename hive-metastore/src/main/java/org/apache/iceberg/{hive/legacy => hiveberg}/LegacyHiveTableScan.java (78%) rename hive-metastore/src/main/java/org/apache/iceberg/{hive/legacy => hiveberg}/LegacyHiveTableUtils.java (91%) rename hive-metastore/src/main/java/org/apache/iceberg/{hive/legacy => hiveberg}/MergeHiveSchemaWithAvro.java (92%) create mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java create mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java create mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java create mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java create mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java create mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java create mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java create mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java create mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java create mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java create mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java create mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java create mode 100644 hiveberg/src/test/java/org/apache/iceberg/hiveberg/HiveMetastoreTest.java create mode 100644 hiveberg/src/test/java/org/apache/iceberg/hiveberg/ScriptRunner.java rename {hive-metastore/src/test/java/org/apache/iceberg/hive/legacy => hiveberg/src/test/java/org/apache/iceberg/hiveberg}/TestHiveExpressions.java (82%) create mode 100644 hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveMetastore.java rename {hive-metastore/src/test/java/org/apache/iceberg/hive/legacy => hiveberg/src/test/java/org/apache/iceberg/hiveberg}/TestHiveSchemaConversions.java (78%) rename {hive-metastore/src/test/java/org/apache/iceberg/hive/legacy => hiveberg/src/test/java/org/apache/iceberg/hiveberg}/TestLegacyHiveTableScan.java (94%) rename {hive-metastore/src/test/java/org/apache/iceberg/hive/legacy => hiveberg/src/test/java/org/apache/iceberg/hiveberg}/TestMergeHiveSchemaWithAvro.java (94%) create mode 100644 hiveberg/src/test/resources/hive-schema-3.1.0.derby.sql diff --git a/build.gradle b/build.gradle index 0d34ecedc6..b1429c36af 100644 --- a/build.gradle +++ b/build.gradle @@ -495,6 +495,64 @@ project(':iceberg-hive-metastore') { } } +project(':iceberg-hiveberg') { + dependencies { + compile project(':iceberg-api') + compile project(':iceberg-core') + compile project(':iceberg-hive-metastore') + + compileOnly "org.apache.avro:avro" + + compileOnly("org.apache.hive:hive-metastore") { + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + exclude group: 'org.pentaho' // missing dependency + exclude group: 'org.apache.hbase' + exclude group: 'org.apache.logging.log4j' + exclude group: 'co.cask.tephra' + exclude group: 'com.google.code.findbugs', module: 'jsr305' + exclude group: 'org.eclipse.jetty.aggregate', module: 'jetty-all' + exclude group: 'org.eclipse.jetty.orbit', module: 'javax.servlet' + exclude group: 'org.apache.parquet', module: 'parquet-hadoop-bundle' + exclude group: 'com.tdunning', module: 'json' + exclude group: 'javax.transaction', module: 'transaction-api' + exclude group: 'com.zaxxer', module: 'HikariCP' + } + + // By default, hive-exec is a fat/uber jar and it exports a guava library + // that's really old. We use the core classifier to be able to override our guava + // version. Luckily, hive-exec seems to work okay so far with this version of guava + // See: https://github.com/apache/hive/blob/master/ql/pom.xml#L911 for more context. + testCompile("org.apache.hive:hive-exec::core") { + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + exclude group: 'org.pentaho' // missing dependency + exclude group: 'org.apache.hive', module: 'hive-llap-tez' + exclude group: 'org.apache.logging.log4j' + exclude group: 'com.google.protobuf', module: 'protobuf-java' + exclude group: 'org.apache.calcite' + exclude group: 'org.apache.calcite.avatica' + exclude group: 'com.google.code.findbugs', module: 'jsr305' + } + + testCompile("org.apache.hive:hive-metastore") { + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + exclude group: 'org.pentaho' // missing dependency + exclude group: 'org.apache.hbase' + exclude group: 'org.apache.logging.log4j' + exclude group: 'co.cask.tephra' + exclude group: 'com.google.code.findbugs', module: 'jsr305' + exclude group: 'org.eclipse.jetty.aggregate', module: 'jetty-all' + exclude group: 'org.eclipse.jetty.orbit', module: 'javax.servlet' + exclude group: 'org.apache.parquet', module: 'parquet-hadoop-bundle' + exclude group: 'com.tdunning', module: 'json' + exclude group: 'javax.transaction', module: 'transaction-api' + exclude group: 'com.zaxxer', module: 'HikariCP' + } + } +} + project(':iceberg-mr') { configurations { testCompile { diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalogs.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalogs.java index b03f7d8768..cb2859bea4 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalogs.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalogs.java @@ -23,7 +23,7 @@ import com.github.benmanes.caffeine.cache.Caffeine; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.iceberg.hive.legacy.LegacyHiveCatalog; +import org.apache.iceberg.hiveberg.LegacyHiveCatalog; public final class HiveCatalogs { diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/DirectoryInfo.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/DirectoryInfo.java deleted file mode 100644 index 50de7957ce..0000000000 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/DirectoryInfo.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.iceberg.hive.legacy; - -import org.apache.iceberg.FileFormat; -import org.apache.iceberg.StructLike; - - -/** - * Metadata for a data directory referenced by either a Hive table or a partition - */ -class DirectoryInfo { - private final String location; - private final FileFormat format; - private final StructLike partitionData; - - DirectoryInfo(String location, FileFormat format, StructLike partitionData) { - this.location = location; - this.format = format; - this.partitionData = partitionData; - } - - public String location() { - return location; - } - - public FileFormat format() { - return format; - } - - public StructLike partitionData() { - return partitionData; - } -} diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java new file mode 100644 index 0000000000..20d499535d --- /dev/null +++ b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java @@ -0,0 +1,46 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hiveberg; + +import org.apache.iceberg.FileFormat; +import org.apache.iceberg.StructLike; + + +/** + * Metadata for a data directory referenced by either a Hive table or a partition + */ +class DirectoryInfo { + private final String location; + private final FileFormat format; + private final StructLike partitionData; + + DirectoryInfo(String location, FileFormat format, StructLike partitionData) { + this.location = location; + this.format = format; + this.partitionData = partitionData; + } + + public String location() { + return location; + } + + public FileFormat format() { + return format; + } + + public StructLike partitionData() { + return partitionData; + } +} diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/FileSystemUtils.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java similarity index 64% rename from hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/FileSystemUtils.java rename to hive-metastore/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java index 9a7f8138a4..51f8ac81fd 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/FileSystemUtils.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java @@ -1,23 +1,18 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.hive.legacy; +package org.apache.iceberg.hiveberg; import java.io.IOException; import java.io.UncheckedIOException; diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/HiveExpressions.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java similarity index 93% rename from hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/HiveExpressions.java rename to hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java index 3ce14f37fc..419d48f418 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/HiveExpressions.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java @@ -1,23 +1,18 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.hive.legacy; +package org.apache.iceberg.hiveberg; import java.time.Instant; import java.time.OffsetDateTime; diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/HiveSchemaWithPartnerVisitor.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java similarity index 86% rename from hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/HiveSchemaWithPartnerVisitor.java rename to hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java index f3d702b4f9..f77f19feaa 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/HiveSchemaWithPartnerVisitor.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java @@ -1,23 +1,18 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.hive.legacy; +package org.apache.iceberg.hiveberg; import java.util.List; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/HiveTypeToIcebergType.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java similarity index 78% rename from hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/HiveTypeToIcebergType.java rename to hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java index 5871b3ed83..a5f043fe26 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/HiveTypeToIcebergType.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java @@ -1,23 +1,18 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.hive.legacy; +package org.apache.iceberg.hiveberg; import java.util.List; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/HiveTypeUtil.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java similarity index 76% rename from hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/HiveTypeUtil.java rename to hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java index 9d58ff4b6f..5603d6d46e 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/HiveTypeUtil.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java @@ -1,23 +1,18 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.hive.legacy; +package org.apache.iceberg.hiveberg; import java.util.List; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java similarity index 80% rename from hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveCatalog.java rename to hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java index c97d29a94c..e4af44917d 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveCatalog.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java @@ -1,23 +1,18 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.hive.legacy; +package org.apache.iceberg.hiveberg; import java.util.Map; import org.apache.hadoop.conf.Configuration; diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTable.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java similarity index 88% rename from hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTable.java rename to hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java index 84d5f24977..69e8794790 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTable.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java @@ -1,23 +1,18 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.hive.legacy; +package org.apache.iceberg.hiveberg; import java.util.List; import java.util.Map; diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java similarity index 93% rename from hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTableOperations.java rename to hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java index dbd1773114..fdd540ef65 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java @@ -1,23 +1,18 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.hive.legacy; +package org.apache.iceberg.hiveberg; import java.time.LocalDate; import java.time.LocalDateTime; diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTableScan.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java similarity index 78% rename from hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTableScan.java rename to hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java index 890faea7d5..b206f9a5c4 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTableScan.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java @@ -1,23 +1,18 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.hive.legacy; +package org.apache.iceberg.hiveberg; import org.apache.iceberg.BaseFileScanTask; import org.apache.iceberg.DataTableScan; diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTableUtils.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java similarity index 91% rename from hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTableUtils.java rename to hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java index 0256c97436..67dbc51a72 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/LegacyHiveTableUtils.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java @@ -1,23 +1,18 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.hive.legacy; +package org.apache.iceberg.hiveberg; import java.util.HashMap; import java.util.List; diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/MergeHiveSchemaWithAvro.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java similarity index 92% rename from hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/MergeHiveSchemaWithAvro.java rename to hive-metastore/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java index 0dec45757f..4edd620502 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/legacy/MergeHiveSchemaWithAvro.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java @@ -1,23 +1,18 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.hive.legacy; +package org.apache.iceberg.hiveberg; import java.util.ArrayList; import java.util.List; @@ -52,7 +47,7 @@ class MergeHiveSchemaWithAvro extends HiveSchemaWithPartnerVisitor { static Schema visit(StructTypeInfo typeInfo, Schema schema) { - return HiveSchemaWithPartnerVisitor.visit(typeInfo, schema, new MergeHiveSchemaWithAvro(), + return visit(typeInfo, schema, new MergeHiveSchemaWithAvro(), AvroPartnerAccessor.INSTANCE); } diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java new file mode 100644 index 0000000000..fd16c18906 --- /dev/null +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java @@ -0,0 +1,60 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hiveberg; + +import org.apache.iceberg.FileFormat; +import org.apache.iceberg.StructLike; + + +/** + * Metadata for a data directory referenced by either a Hive table or a partition + */ +class DirectoryInfo { + private final String location; + private final FileFormat format; + private final StructLike partitionData; + + DirectoryInfo(String location, FileFormat format, StructLike partitionData) { + this.location = location; + this.format = format; + this.partitionData = partitionData; + } + + public String location() { + return location; + } + + public FileFormat format() { + return format; + } + + public StructLike partitionData() { + return partitionData; + } +} diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java new file mode 100644 index 0000000000..2db783f81e --- /dev/null +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java @@ -0,0 +1,81 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hiveberg; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Arrays; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.iceberg.exceptions.RuntimeIOException; + +class FileSystemUtils { + + private FileSystemUtils() { + } + + /** + * Lists all non-hidden files for the given directory + */ + static List listFiles(String directory, Configuration conf) { + + final Path directoryPath = new Path(directory); + final FileStatus[] files; + try { + FileSystem fs = directoryPath.getFileSystem(conf); + files = fs.listStatus(directoryPath, HiddenPathFilter.INSTANCE); + } catch (IOException e) { + throw new UncheckedIOException("Error listing files for directory: " + directory, e); + } + return Arrays.asList(files); + } + + static boolean exists(String file, Configuration conf) { + final Path filePath = new Path(file); + try { + FileSystem fs = filePath.getFileSystem(conf); + return fs.exists(filePath); + } catch (IOException e) { + throw new RuntimeIOException(e, "Error determining if file or directory exists: %s", file); + } + } + + private enum HiddenPathFilter implements PathFilter { + INSTANCE; + + @Override + public boolean accept(Path path) { + return !path.getName().startsWith("_") && !path.getName().startsWith("."); + } + } +} diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java new file mode 100644 index 0000000000..e931ba015f --- /dev/null +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java @@ -0,0 +1,349 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hiveberg; + +import java.time.Instant; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.time.temporal.ChronoUnit; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.iceberg.expressions.BoundPredicate; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.ExpressionVisitors; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.expressions.Literal; +import org.apache.iceberg.expressions.UnboundPredicate; +import org.apache.iceberg.expressions.UnboundTerm; +import org.apache.iceberg.types.Type; + + +class HiveExpressions { + + private static final Expression REMOVED = (Expression) () -> null; + + private HiveExpressions() { + } + + /** + * Simplifies the {@link Expression} so that it fits the restrictions of the expression that can be passed + * to the Hive metastore. For details about the simplification, please see {@link RemoveNonPartitionPredicates} and + * {@link RewriteUnsupportedOperators} + * @param expr The {@link Expression} to be simplified + * @param partitionColumnNames The set of partition column names + * @return TRUE if the simplified expression results in an always true expression or if there are no predicates on + * partition columns in the simplified expression, + * FALSE if the simplified expression results in an always false expression, + * otherwise returns the simplified expression + */ + static Expression simplifyPartitionFilter(Expression expr, Set partitionColumnNames) { + try { + // Pushing down NOTs is critical for the correctness of RemoveNonPartitionPredicates + // e.g. consider a predicate on a partition field (P) and a predicate on a non-partition field (NP) + // With simply ignoring NP, NOT(P and NP) will be written to NOT(P) + // However the correct behaviour is NOT(P and NP) => NOT(P) OR NOT(NP) => True + Expression notPushedDown = Expressions.rewriteNot(expr); + Expression partitionPredicatesOnly = ExpressionVisitors.visit(notPushedDown, + new RemoveNonPartitionPredicates(partitionColumnNames)); + if (partitionPredicatesOnly == REMOVED) { + return Expressions.alwaysTrue(); + } else { + return ExpressionVisitors.visit(partitionPredicatesOnly, new RewriteUnsupportedOperators()); + } + } catch (Exception e) { + throw new RuntimeException("Error while processing expression: " + expr, e); + } + } + + /** + * Converts an {@link Expression} into a filter string which can be passed to the Hive metastore + * + * It is expected that caller handles TRUE and FALSE expressions before calling this method. The given + * {@link Expressions} must also be passed through {@link #simplifyPartitionFilter(Expression, Set)} first to + * remove any unsupported predicates. + * @param expr The {@link Expression} to be converted into a filter string + * @return a filter string equivalent to the given {@link Expression} which can be passed to the Hive metastore + */ + static String toPartitionFilterString(Expression expr) { + return ExpressionVisitors.visit(expr, ExpressionToPartitionFilterString.get()); + } + + /** + * Removes any predicates on non-partition columns from the given {@link Expression} + */ + private static class RemoveNonPartitionPredicates extends ExpressionVisitors.ExpressionVisitor { + + private final Set partitionColumnNamesLowerCase; + + RemoveNonPartitionPredicates(Set partitionColumnNames) { + this.partitionColumnNamesLowerCase = + partitionColumnNames.stream().map(String::toLowerCase).collect(Collectors.toSet()); + } + + @Override + public Expression alwaysTrue() { + return Expressions.alwaysTrue(); + } + + @Override + public Expression alwaysFalse() { + return Expressions.alwaysFalse(); + } + + @Override + public Expression not(Expression result) { + return (result == REMOVED) ? REMOVED : Expressions.not(result); + } + + @Override + public Expression and(Expression leftResult, Expression rightResult) { + // if one of the children is a non partition predicate, we can ignore it as it will be applied as a post-scan + // filter + if (leftResult == REMOVED && rightResult == REMOVED) { + return REMOVED; + } else if (leftResult == REMOVED) { + return rightResult; + } else if (rightResult == REMOVED) { + return leftResult; + } else { + return Expressions.and(leftResult, rightResult); + } + } + + @Override + public Expression or(Expression leftResult, Expression rightResult) { + return (leftResult == REMOVED || rightResult == REMOVED) ? REMOVED : Expressions.or(leftResult, rightResult); + } + + @Override + public Expression predicate(BoundPredicate pred) { + throw new IllegalStateException("Bound predicate not expected: " + pred.getClass().getName()); + } + + @Override + public Expression predicate(UnboundPredicate pred) { + return (partitionColumnNamesLowerCase.contains(pred.ref().name().toLowerCase())) ? pred : REMOVED; + } + } + + /** + * Rewrites the {@link Expression} so that it fits the restrictions of the expression that can be passed + * to the Hive metastore. + * + * This visitor assumes that all predicates are on partition columns. Predicates on non-partition columns should be + * removed using {@link RemoveNonPartitionPredicates} before calling this visitor. It performs the following changes: + * 1. Rewrites NOT operators by inverting binary operators, negating unary literals and using De Morgan's laws + * e.g. NOT(value > 0 AND TRUE) => value <= 0 OR FALSE + * NOT(value < 0 OR value > 10) => value >= 0 AND value <= 10 + * 2. Removes IS NULL and IS NOT NULL predicates (Replaced with FALSE and TRUE respectively as partition column values + * are always non null for Hive) + * e.g. partitionColumn IS NULL => FALSE + * partitionColumn IS NOT NULL => TRUE + * 3. Expands IN and NOT IN operators into ORs of EQUAL operations and ANDs of NOT EQUAL operations respectively + * e.g. value IN (1, 2, 3) => value = 1 OR value = 2 OR value = 3 + * value NOT IN (1, 2, 3) => value != 1 AND value != 2 AND value != 3 + * 4. Removes any children TRUE and FALSE expressions. The checks to remove these are happening inside + * {@link Expressions#and(Expression, Expression)} and {@link Expressions#or(Expression, Expression)} + * (Note that the rewritten expression still can be TRUE and FALSE at the root and will have to be handled + * appropriately by the caller) + * + * For examples take a look at the tests in {@code TestHiveExpressions} + */ + private static class RewriteUnsupportedOperators extends ExpressionVisitors.ExpressionVisitor { + + @Override + public Expression alwaysTrue() { + return Expressions.alwaysTrue(); + } + + @Override + public Expression alwaysFalse() { + return Expressions.alwaysFalse(); + } + + @Override + public Expression not(Expression result) { + return result.negate(); + } + + @Override + public Expression and(Expression leftResult, Expression rightResult) { + return Expressions.and(leftResult, rightResult); + } + + @Override + public Expression or(Expression leftResult, Expression rightResult) { + return Expressions.or(leftResult, rightResult); + } + + Expression in(UnboundTerm term, List> literals) { + Expression in = alwaysFalse(); + for (Literal literal : literals) { + in = Expressions.or(in, Expressions.equal(term, literal.value())); + } + return in; + } + + Expression notIn(UnboundTerm term, List> literals) { + Expression notIn = alwaysTrue(); + for (Literal literal : literals) { + notIn = Expressions.and(notIn, Expressions.notEqual(term, literal.value())); + } + return notIn; + } + + @Override + public Expression predicate(BoundPredicate pred) { + throw new IllegalStateException("Bound predicate not expected: " + pred.getClass().getName()); + } + + @Override + public Expression predicate(UnboundPredicate pred) { + switch (pred.op()) { + case LT: + case LT_EQ: + case GT: + case GT_EQ: + case EQ: + case NOT_EQ: + return pred; + case IS_NULL: + return Expressions.alwaysFalse(); + case NOT_NULL: + return Expressions.alwaysTrue(); + case IN: + return in(pred.term(), pred.literals()); + case NOT_IN: + return notIn(pred.term(), pred.literals()); + case STARTS_WITH: + throw new UnsupportedOperationException("STARTS_WITH predicate not supported in partition filter " + + "expression. Please use a combination of greater than AND less than predicates instead."); + default: + throw new IllegalStateException("Unexpected predicate: " + pred.op()); + } + } + } + + private static class ExpressionToPartitionFilterString extends ExpressionVisitors.ExpressionVisitor { + private static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC); + private static final ExpressionToPartitionFilterString INSTANCE = new ExpressionToPartitionFilterString(); + + private ExpressionToPartitionFilterString() { + } + + static ExpressionToPartitionFilterString get() { + return INSTANCE; + } + + @Override + public String alwaysTrue() { + throw new IllegalStateException("TRUE literal not allowed in Hive partition filter string"); + } + + @Override + public String alwaysFalse() { + throw new IllegalStateException("FALSE literal not allowed in Hive partition filter string"); + } + + @Override + public String not(String result) { + throw new IllegalStateException("NOT operator not allowed in Hive partition filter string"); + } + + @Override + public String and(String leftResult, String rightResult) { + return String.format("((%s) AND (%s))", leftResult, rightResult); + } + + @Override + public String or(String leftResult, String rightResult) { + return String.format("((%s) OR (%s))", leftResult, rightResult); + } + + @Override + public String predicate(BoundPredicate pred) { + switch (pred.op()) { + case LT: + case LT_EQ: + case GT: + case GT_EQ: + case EQ: + case NOT_EQ: + return getBinaryExpressionString(pred); + default: + throw new IllegalStateException("Unexpected operator in Hive partition filter string: " + pred.op()); + } + } + + @Override + public String predicate(UnboundPredicate pred) { + throw new IllegalStateException("Unbound predicate not expected: " + pred.getClass().getName()); + } + + private String getBinaryExpressionString(BoundPredicate pred) { + String columnName = pred.ref().field().name(); + String opName = getOperationString(pred.op()); + String litValue = getLiteralValue(pred.asLiteralPredicate().literal(), pred.ref().type()); + return String.format("( %s %s %s )", columnName, opName, litValue); + } + + private String getOperationString(Expression.Operation op) { + switch (op) { + case LT: + return "<"; + case LT_EQ: + return "<="; + case GT: + return ">"; + case GT_EQ: + return ">="; + case EQ: + return "="; + case NOT_EQ: + return "!="; + default: + throw new IllegalStateException("Unexpected operator in Hive partition filter string: " + op); + } + } + + private String getLiteralValue(Literal lit, Type type) { + Object value = lit.value(); + if (type.typeId() == Type.TypeID.DATE) { + value = EPOCH.plus((Integer) value, ChronoUnit.DAYS).toLocalDate().toString(); + } + if (value instanceof String) { + String escapedString = ((String) value).replace("'", "\\'"); + return String.format("'%s'", escapedString); + } else { + return String.valueOf(value); + } + } + } +} diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java new file mode 100644 index 0000000000..f77f19feaa --- /dev/null +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java @@ -0,0 +1,137 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hiveberg; + +import java.util.List; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; + +/** + * A Hive {@link TypeInfo} visitor with an accompanying partner schema + * + * This visitor traverses the Hive {@link TypeInfo} tree contiguously accessing the schema tree for the partner schema + * using {@link PartnerAccessor}. When visiting each type in the Hive tree, the implementation is also presented + * with the corresponding type from the partner schema, or else a {@code null} if no match was found. Matching + * behavior can be controlled by implementing the methods in {@link PartnerAccessor} + * + * @param

type of partner schema + * @param type of the field representation in the partner schema + * @param type of the resultant schema generated by the visitor + * @param type of the field representation in the resultant schema + */ +@SuppressWarnings("ClassTypeParameterName") +public abstract class HiveSchemaWithPartnerVisitor { + + /** + * Methods to access types in the partner schema corresponding to types in the Hive schema being traversed + * + * @param

type of partner schema + * @param type of the field representation in the partner schema + */ + public interface PartnerAccessor { + + FP fieldPartner(P partnerStruct, String fieldName); + + P fieldType(FP partnerField); + + P mapKeyPartner(P partnerMap); + + P mapValuePartner(P partnerMap); + + P listElementPartner(P partnerList); + + P unionObjectPartner(P partnerUnion, int ordinal); + } + + @SuppressWarnings("MethodTypeParameterName") + public static R visit(TypeInfo typeInfo, P partner, HiveSchemaWithPartnerVisitor visitor, + PartnerAccessor accessor) { + switch (typeInfo.getCategory()) { + case STRUCT: + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List names = structTypeInfo.getAllStructFieldNames(); + List results = Lists.newArrayListWithExpectedSize(names.size()); + for (String name : names) { + TypeInfo fieldTypeInfo = structTypeInfo.getStructFieldTypeInfo(name); + FP fieldPartner = partner != null ? accessor.fieldPartner(partner, name) : null; + P fieldPartnerType = fieldPartner != null ? accessor.fieldType(fieldPartner) : null; + R result = visit(fieldTypeInfo, fieldPartnerType, visitor, accessor); + results.add(visitor.field(name, fieldTypeInfo, fieldPartner, result)); + } + return visitor.struct(structTypeInfo, partner, results); + + case LIST: + ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; + TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo(); + P elementPartner = partner != null ? accessor.listElementPartner(partner) : null; + R elementResult = visit(elementTypeInfo, elementPartner, visitor, accessor); + return visitor.list(listTypeInfo, partner, elementResult); + + case MAP: + MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; + P keyPartner = partner != null ? accessor.mapKeyPartner(partner) : null; + R keyResult = visit(mapTypeInfo.getMapKeyTypeInfo(), keyPartner, visitor, accessor); + P valuePartner = partner != null ? accessor.mapValuePartner(partner) : null; + R valueResult = visit(mapTypeInfo.getMapValueTypeInfo(), valuePartner, visitor, accessor); + return visitor.map(mapTypeInfo, partner, keyResult, valueResult); + + case PRIMITIVE: + return visitor.primitive((PrimitiveTypeInfo) typeInfo, partner); + + case UNION: + UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; + List allAlternatives = unionTypeInfo.getAllUnionObjectTypeInfos(); + List unionResults = Lists.newArrayListWithExpectedSize(allAlternatives.size()); + for (int i = 0; i < allAlternatives.size(); i++) { + P unionObjectPartner = partner != null ? accessor.unionObjectPartner(partner, i) : null; + R result = visit(allAlternatives.get(i), unionObjectPartner, visitor, accessor); + unionResults.add(result); + } + return visitor.union(unionTypeInfo, partner, unionResults); + + default: + throw new UnsupportedOperationException(typeInfo + " not supported"); + } + } + + public R struct(StructTypeInfo struct, P partner, List fieldResults) { + return null; + } + + public FR field(String name, TypeInfo field, FP partner, R fieldResult) { + return null; + } + + public R list(ListTypeInfo list, P partner, R elementResult) { + return null; + } + + public R map(MapTypeInfo map, P partner, R keyResult, R valueResult) { + return null; + } + + public R union(UnionTypeInfo union, P partner, List results) { + return null; + } + + public R primitive(PrimitiveTypeInfo primitive, P partner) { + return null; + } +} diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java new file mode 100644 index 0000000000..a5f043fe26 --- /dev/null +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java @@ -0,0 +1,101 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hiveberg; + +import java.util.List; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; + + +public class HiveTypeToIcebergType extends HiveTypeUtil.HiveSchemaVisitor { + private static final String UNION_TO_STRUCT_CONVERSION_PREFIX = "field"; + private int nextId = 1; + + @Override + public Type struct(StructTypeInfo struct, List names, List fieldResults) { + List fields = Lists.newArrayListWithExpectedSize(fieldResults.size()); + for (int i = 0; i < names.size(); i++) { + fields.add(Types.NestedField.optional(allocateId(), names.get(i), fieldResults.get(i))); + } + return Types.StructType.of(fields); + } + + @Override + public Type map(MapTypeInfo map, Type keyResult, Type valueResult) { + return Types.MapType.ofOptional(allocateId(), allocateId(), keyResult, valueResult); + } + + @Override + public Type list(ListTypeInfo list, Type elementResult) { + return Types.ListType.ofOptional(allocateId(), elementResult); + } + + // Mimic the struct call behavior to construct a union converted struct type + @Override + public Type union(UnionTypeInfo union, List unionResults) { + List fields = Lists.newArrayListWithExpectedSize(unionResults.size() + 1); + fields.add(Types.NestedField.required(allocateId(), "tag", Types.IntegerType.get())); + for (int i = 0; i < unionResults.size(); i++) { + fields.add(Types.NestedField.optional(allocateId(), UNION_TO_STRUCT_CONVERSION_PREFIX + i, unionResults.get(i))); + } + return Types.StructType.of(fields); + } + + @Override + public Type primitive(PrimitiveTypeInfo primitive) { + switch (primitive.getPrimitiveCategory()) { + case FLOAT: + return Types.FloatType.get(); + case DOUBLE: + return Types.DoubleType.get(); + case BOOLEAN: + return Types.BooleanType.get(); + case BYTE: + case SHORT: + case INT: + return Types.IntegerType.get(); + case LONG: + return Types.LongType.get(); + case CHAR: + case VARCHAR: + case STRING: + return Types.StringType.get(); + case BINARY: + return Types.BinaryType.get(); + case DECIMAL: + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitive; + return Types.DecimalType.of(decimalTypeInfo.precision(), decimalTypeInfo.scale()); + case TIMESTAMP: + return Types.TimestampType.withoutZone(); + case DATE: + return Types.DateType.get(); + default: + throw new UnsupportedOperationException("Unsupported primitive type " + primitive); + } + } + + private int allocateId() { + int current = nextId; + nextId += 1; + return current; + } +} diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java new file mode 100644 index 0000000000..5603d6d46e --- /dev/null +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java @@ -0,0 +1,92 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hiveberg; + +import java.util.List; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.types.Type; + + +public class HiveTypeUtil { + private HiveTypeUtil() { + } + + public static Type convert(TypeInfo typeInfo) { + return HiveTypeUtil.visit(typeInfo, new HiveTypeToIcebergType()); + } + + public static T visit(TypeInfo typeInfo, HiveSchemaVisitor visitor) { + switch (typeInfo.getCategory()) { + case STRUCT: + final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List names = structTypeInfo.getAllStructFieldNames(); + List results = Lists.newArrayListWithExpectedSize(names.size()); + for (String name : names) { + results.add(visit(structTypeInfo.getStructFieldTypeInfo(name), visitor)); + } + return visitor.struct(structTypeInfo, names, results); + + case UNION: + final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; + List unionResults = Lists.newArrayListWithExpectedSize(unionTypeInfo.getAllUnionObjectTypeInfos().size()); + for (TypeInfo unionObjectTypeInfo : unionTypeInfo.getAllUnionObjectTypeInfos()) { + unionResults.add(visit(unionObjectTypeInfo, visitor)); + } + return visitor.union(unionTypeInfo, unionResults); + + case LIST: + ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; + return visitor.list(listTypeInfo, visit(listTypeInfo.getListElementTypeInfo(), visitor)); + + case MAP: + final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; + return visitor.map(mapTypeInfo, + visit(mapTypeInfo.getMapKeyTypeInfo(), visitor), + visit(mapTypeInfo.getMapValueTypeInfo(), visitor)); + + default: + final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + return visitor.primitive(primitiveTypeInfo); + } + } + + public static class HiveSchemaVisitor { + public T struct(StructTypeInfo struct, List names, List fieldResults) { + return null; + } + + public T list(ListTypeInfo list, T elementResult) { + return null; + } + + public T map(MapTypeInfo map, T keyResult, T valueResult) { + return null; + } + + public T union(UnionTypeInfo union, List unionResults) { + return null; + } + + public T primitive(PrimitiveTypeInfo primitive) { + return null; + } + } +} diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java new file mode 100644 index 0000000000..e3be31e8ac --- /dev/null +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java @@ -0,0 +1,114 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hiveberg; + +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.Table; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.Transaction; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.hive.HiveCatalog; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * A {@link HiveCatalog} which uses Hive metadata to read tables. Features like time travel, snapshot isolation and + * incremental computation are not supported along with any WRITE operations to either the data or metadata. + */ +public class LegacyHiveCatalog extends HiveCatalog { + + private static final Logger LOG = LoggerFactory.getLogger(LegacyHiveCatalog.class); + + public LegacyHiveCatalog(Configuration conf) { + super(conf); + } + + @Override + @SuppressWarnings("CatchBlockLogException") + public Table loadTable(TableIdentifier identifier) { + if (isValidIdentifier(identifier)) { + TableOperations ops = newTableOps(identifier); + if (ops.current() == null) { + throw new NoSuchTableException("Table does not exist: %s", identifier); + } + + return new LegacyHiveTable(ops, fullTableName(name(), identifier)); + } else if (isValidMetadataIdentifier(identifier)) { + throw new UnsupportedOperationException( + "Metadata views not supported for Hive tables without Iceberg metadata. Table: " + identifier); + } else { + throw new NoSuchTableException("Invalid table identifier: %s", identifier); + } + } + + @Override + public TableOperations newTableOps(TableIdentifier tableIdentifier) { + String dbName = tableIdentifier.namespace().level(0); + String tableName = tableIdentifier.name(); + return new LegacyHiveTableOperations(conf(), clientPool(), dbName, tableName); + } + + @Override + public boolean dropTable(TableIdentifier identifier, boolean purge) { + throw new UnsupportedOperationException( + "Dropping tables not supported through legacy Hive catalog. Table: " + identifier); + } + + @Override + public void renameTable(TableIdentifier from, TableIdentifier to) { + throw new UnsupportedOperationException( + "Renaming tables not supported through legacy Hive catalog. From: " + from + " To: " + to); + } + + @Override + public Table createTable(TableIdentifier identifier, Schema schema, PartitionSpec spec, String location, + Map properties) { + throw new UnsupportedOperationException( + "Creating tables not supported through legacy Hive catalog. Table: " + identifier); + } + + @Override + public Transaction newCreateTableTransaction(TableIdentifier identifier, Schema schema, PartitionSpec spec, + String location, Map properties) { + throw new UnsupportedOperationException( + "Creating tables not supported through legacy Hive catalog. Table: " + identifier); + } + + @Override + public Transaction newReplaceTableTransaction(TableIdentifier identifier, Schema schema, PartitionSpec spec, + String location, Map properties, boolean orCreate) { + throw new UnsupportedOperationException( + "Replacing tables not supported through legacy Hive catalog. Table: " + identifier); + } +} diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java new file mode 100644 index 0000000000..66b8035f25 --- /dev/null +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java @@ -0,0 +1,247 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hiveberg; + +import java.util.List; +import java.util.Map; +import org.apache.iceberg.AppendFiles; +import org.apache.iceberg.DeleteFiles; +import org.apache.iceberg.ExpireSnapshots; +import org.apache.iceberg.HasTableOperations; +import org.apache.iceberg.HistoryEntry; +import org.apache.iceberg.ManageSnapshots; +import org.apache.iceberg.OverwriteFiles; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.ReplacePartitions; +import org.apache.iceberg.ReplaceSortOrder; +import org.apache.iceberg.RewriteFiles; +import org.apache.iceberg.RewriteManifests; +import org.apache.iceberg.Rollback; +import org.apache.iceberg.RowDelta; +import org.apache.iceberg.Schema; +import org.apache.iceberg.Snapshot; +import org.apache.iceberg.SortOrder; +import org.apache.iceberg.Table; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.TableScan; +import org.apache.iceberg.Transaction; +import org.apache.iceberg.UpdateLocation; +import org.apache.iceberg.UpdatePartitionSpec; +import org.apache.iceberg.UpdateProperties; +import org.apache.iceberg.UpdateSchema; +import org.apache.iceberg.encryption.EncryptionManager; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.LocationProvider; + + +/** + * A {@link Table} which uses Hive table/partition metadata to perform scans using {@link LegacyHiveTableScan}. + * This table does not provide any time travel, snapshot isolation, incremental computation benefits. + * It also does not allow any WRITE operations to either the data or metadata. + */ +public class LegacyHiveTable implements Table, HasTableOperations { + private final TableOperations ops; + private final String name; + + protected LegacyHiveTable(TableOperations ops, String name) { + this.ops = ops; + this.name = name; + } + + @Override + public TableOperations operations() { + return ops; + } + + @Override + public void refresh() { + ops.refresh(); + } + + @Override + public TableScan newScan() { + return new LegacyHiveTableScan(ops, this); + } + + @Override + public Schema schema() { + return ops.current().schema(); + } + + @Override + public PartitionSpec spec() { + return ops.current().spec(); + } + + @Override + public Map specs() { + throw new UnsupportedOperationException( + "Multiple partition specs not supported for Hive tables without Iceberg metadata"); + } + + @Override + public SortOrder sortOrder() { + throw new UnsupportedOperationException("Sort order not supported for Hive tables without Iceberg metadata"); + } + + @Override + public Map sortOrders() { + throw new UnsupportedOperationException("Sort orders not supported for Hive tables without Iceberg metadata"); + } + + @Override + public Map properties() { + return ops.current().properties(); + } + + @Override + public String location() { + return ops.current().location(); + } + + @Override + public Snapshot currentSnapshot() { + throw new UnsupportedOperationException("Snapshots not supported for Hive tables without Iceberg metadata"); + } + + @Override + public Snapshot snapshot(long snapshotId) { + throw new UnsupportedOperationException("Snapshots not supported for Hive tables without Iceberg metadata"); + } + + @Override + public Iterable snapshots() { + throw new UnsupportedOperationException("Snapshots not supported for Hive tables without Iceberg metadata"); + } + + @Override + public List history() { + throw new UnsupportedOperationException("History not available for Hive tables without Iceberg metadata"); + } + + @Override + public UpdateSchema updateSchema() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public UpdatePartitionSpec updateSpec() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public UpdateProperties updateProperties() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public ReplaceSortOrder replaceSortOrder() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public UpdateLocation updateLocation() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public AppendFiles newAppend() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public RewriteFiles newRewrite() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public RewriteManifests rewriteManifests() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public OverwriteFiles newOverwrite() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public RowDelta newRowDelta() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public ReplacePartitions newReplacePartitions() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public DeleteFiles newDelete() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public ExpireSnapshots expireSnapshots() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public Rollback rollback() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public ManageSnapshots manageSnapshots() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public Transaction newTransaction() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public FileIO io() { + return ops.io(); + } + + @Override + public EncryptionManager encryption() { + return ops.encryption(); + } + + @Override + public LocationProvider locationProvider() { + return ops.locationProvider(); + } + + @Override + public String toString() { + return name; + } +} diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java new file mode 100644 index 0000000000..6dcbe0fee5 --- /dev/null +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java @@ -0,0 +1,290 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hiveberg; + +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.temporal.ChronoField; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.iceberg.BaseMetastoreTableOperations; +import org.apache.iceberg.DataFile; +import org.apache.iceberg.DataFiles; +import org.apache.iceberg.FileFormat; +import org.apache.iceberg.Metrics; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.StructLike; +import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.TableProperties; +import org.apache.iceberg.data.GenericRecord; +import org.apache.iceberg.expressions.Binder; +import org.apache.iceberg.expressions.Bound; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.hadoop.HadoopFileIO; +import org.apache.iceberg.hive.HiveClientPool; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.LocationProvider; +import org.apache.iceberg.mapping.MappingUtil; +import org.apache.iceberg.mapping.NameMappingParser; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Iterables; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.types.Types; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class LegacyHiveTableOperations extends BaseMetastoreTableOperations { + + private static final Logger LOG = LoggerFactory.getLogger(LegacyHiveTableOperations.class); + + private final HiveClientPool metaClients; + private final String databaseName; + private final String tableName; + private final Configuration conf; + + private FileIO fileIO; + + protected LegacyHiveTableOperations(Configuration conf, HiveClientPool metaClients, String database, String table) { + this.conf = conf; + this.metaClients = metaClients; + this.databaseName = database; + this.tableName = table; + } + + @Override + public FileIO io() { + if (fileIO == null) { + fileIO = new HadoopFileIO(conf); + } + + return fileIO; + } + + @Override + protected void doRefresh() { + try { + org.apache.hadoop.hive.metastore.api.Table hiveTable = + metaClients.run(client -> client.getTable(databaseName, tableName)); + + Schema schema = LegacyHiveTableUtils.getSchema(hiveTable); + PartitionSpec spec = LegacyHiveTableUtils.getPartitionSpec(hiveTable, schema); + + Map tableProperties = Maps.newHashMap(LegacyHiveTableUtils.getTableProperties(hiveTable)); + // Provide a case insensitive name mapping for Hive tables + tableProperties.put(TableProperties.DEFAULT_NAME_MAPPING, + NameMappingParser.toJson(MappingUtil.create(schema, false))); + TableMetadata metadata = TableMetadata.newTableMetadataWithoutFreshIds(schema, spec, + hiveTable.getSd().getLocation(), tableProperties); + setCurrentMetadata(metadata); + } catch (TException e) { + String errMsg = String.format("Failed to get table info from metastore %s.%s", databaseName, tableName); + throw new RuntimeException(errMsg, e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Interrupted during refresh", e); + } + setShouldRefresh(false); + } + + /** + * Returns an {@link Iterable} of {@link Iterable}s of {@link DataFile}s which belong to the current table and + * match the partition predicates from the given expression. + *

+ * Each element in the outer {@link Iterable} maps to an {@link Iterable} of {@link DataFile}s originating from the + * same directory + */ + Iterable> getFilesByFilter(Expression expression) { + Iterable matchingDirectories; + if (current().spec().fields().isEmpty()) { + matchingDirectories = ImmutableList.of(getDirectoryInfo()); + } else { + matchingDirectories = getDirectoryInfosByFilter(expression); + } + + Iterable> filesPerDirectory = Iterables.transform( + matchingDirectories, + directory -> { + List files; + if (FileSystemUtils.exists(directory.location(), conf)) { + files = FileSystemUtils.listFiles(directory.location(), conf); + } else { + LOG.warn("Cannot find directory: {}. Skipping.", directory.location()); + files = ImmutableList.of(); + } + return Iterables.transform( + files, + file -> createDataFile(file, current().spec(), directory.partitionData(), directory.format()) + ); + }); + + // Note that we return an Iterable of Iterables here so that the TableScan can process iterables of individual + // directories in parallel hence resulting in a parallel file listing + return filesPerDirectory; + } + + private DirectoryInfo getDirectoryInfo() { + Preconditions.checkArgument(current().spec().fields().isEmpty(), + "getDirectoryInfo only allowed for unpartitioned tables"); + try { + org.apache.hadoop.hive.metastore.api.Table hiveTable = + metaClients.run(client -> client.getTable(databaseName, tableName)); + + return LegacyHiveTableUtils.toDirectoryInfo(hiveTable); + } catch (TException e) { + String errMsg = String.format("Failed to get table info for %s.%s from metastore", databaseName, tableName); + throw new RuntimeException(errMsg, e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Interrupted in call to getDirectoryInfo", e); + } + } + + private List getDirectoryInfosByFilter(Expression expression) { + Preconditions.checkArgument(!current().spec().fields().isEmpty(), + "getDirectoryInfosByFilter only allowed for partitioned tables"); + try { + LOG.info("Fetching partitions for {}.{} with expression: {}", databaseName, tableName, expression); + Set partitionColumnNames = current().spec() + .identitySourceIds() + .stream() + .map(id -> current().schema().findColumnName(id)) + .collect(Collectors.toSet()); + Expression simplified = HiveExpressions.simplifyPartitionFilter(expression, partitionColumnNames); + Types.StructType partitionSchema = current().spec().partitionType(); + LOG.info("Simplified expression for {}.{} to {}", databaseName, tableName, simplified); + + List partitions; + Expression boundExpression; + if (simplified.equals(Expressions.alwaysFalse())) { + // If simplifyPartitionFilter returns FALSE, no partitions are going to match the filter expression + partitions = ImmutableList.of(); + } else if (simplified.equals(Expressions.alwaysTrue())) { + // If simplifyPartitionFilter returns TRUE, all partitions are going to match the filter expression + partitions = metaClients.run(client -> client.listPartitionsByFilter( + databaseName, tableName, null, (short) -1)); + } else { + boundExpression = Binder.bind(partitionSchema, simplified, false); + String partitionFilterString = HiveExpressions.toPartitionFilterString(boundExpression); + LOG.info("Listing partitions for {}.{} with filter string: {}", databaseName, tableName, partitionFilterString); + try { + // We first try to use HMS API call to get the filtered partitions. + partitions = metaClients.run( + client -> client.listPartitionsByFilter(databaseName, tableName, partitionFilterString, (short) -1)); + } catch (MetaException e) { + // If the above HMS call fails, we here try to do the partition filtering ourselves, + // by evaluating all the partitions we got back from HMS against the boundExpression, + // if the evaluation results in true, we include such partition, if false, we filter. + List allPartitions = metaClients.run( + client -> client.listPartitionsByFilter(databaseName, tableName, null, (short) -1)); + partitions = allPartitions.stream().filter(partition -> { + GenericRecord record = GenericRecord.create(partitionSchema); + for (int i = 0; i < record.size(); i++) { + String value = partition.getValues().get(i); + switch (partitionSchema.fields().get(i).type().typeId()) { + case DATE: + record.set(i, + (int) LocalDate.parse(value).toEpochDay()); + break; + case TIMESTAMP: + // This format seems to be matching the hive timestamp column partition string literal value + record.set(i, + LocalDateTime.parse(value, + new DateTimeFormatterBuilder() + .parseLenient() + .append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true) + .toFormatter()) + .toInstant(ZoneOffset.UTC).toEpochMilli() * 1000); + break; + default: + record.set(i, partition.getValues().get(i)); + break; + } + } + return ((Bound) boundExpression).eval(record); + }).collect(Collectors.toList()); + } + } + + return LegacyHiveTableUtils.toDirectoryInfos(partitions, current().spec()); + } catch (TException e) { + String errMsg = String.format("Failed to get partition info for %s.%s + expression %s from metastore", + databaseName, tableName, expression); + throw new RuntimeException(errMsg, e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Interrupted in call to getPartitionsByFilter", e); + } + } + + private static DataFile createDataFile(FileStatus fileStatus, PartitionSpec partitionSpec, StructLike partitionData, + FileFormat format) { + DataFiles.Builder builder = DataFiles.builder(partitionSpec) + .withPath(fileStatus.getPath().toString()) + .withFormat(format) + .withFileSizeInBytes(fileStatus.getLen()) + .withMetrics(new Metrics(10000L, null, null, null, null, null)); + + if (partitionSpec.fields().isEmpty()) { + return builder.build(); + } else { + return builder.withPartition(partitionData).build(); + } + } + + @Override + public void commit(TableMetadata base, TableMetadata metadata) { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } + + @Override + public String metadataFileLocation(String filename) { + throw new UnsupportedOperationException( + "Metadata file location not available for Hive tables without Iceberg metadata"); + } + + @Override + public LocationProvider locationProvider() { + throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); + } +} diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java new file mode 100644 index 0000000000..87a4d4e2e1 --- /dev/null +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java @@ -0,0 +1,106 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hiveberg; + +import org.apache.iceberg.BaseFileScanTask; +import org.apache.iceberg.DataTableScan; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.PartitionSpecParser; +import org.apache.iceberg.Schema; +import org.apache.iceberg.SchemaParser; +import org.apache.iceberg.Snapshot; +import org.apache.iceberg.Table; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.TableScan; +import org.apache.iceberg.TableScanContext; +import org.apache.iceberg.events.Listeners; +import org.apache.iceberg.events.ScanEvent; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.ResidualEvaluator; +import org.apache.iceberg.io.CloseableIterable; +import org.apache.iceberg.relocated.com.google.common.collect.Iterables; +import org.apache.iceberg.util.ParallelIterable; +import org.apache.iceberg.util.ThreadPools; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * A {@link DataTableScan} which uses Hive table and partition metadata to read tables. + * This scan does not provide any time travel, snapshot isolation, incremental computation benefits. + */ +public class LegacyHiveTableScan extends DataTableScan { + private static final Logger LOG = LoggerFactory.getLogger(LegacyHiveTableScan.class); + + protected LegacyHiveTableScan(TableOperations ops, Table table) { + super(ops, table); + } + + protected LegacyHiveTableScan(TableOperations ops, Table table, Schema schema, TableScanContext context) { + super(ops, table, schema, context); + } + + @Override + @SuppressWarnings("checkstyle:HiddenField") + protected TableScan newRefinedScan(TableOperations ops, Table table, Schema schema, TableScanContext context) { + return new LegacyHiveTableScan(ops, table, schema, context); + } + + @Override + public CloseableIterable planFiles() { + LOG.info("Scanning table {} with filter {}", table().toString(), filter()); + + Listeners.notifyAll( + new ScanEvent(table().toString(), -1, filter(), schema())); + + LegacyHiveTableOperations hiveOps = (LegacyHiveTableOperations) tableOps(); + PartitionSpec spec = hiveOps.current().spec(); + String schemaString = SchemaParser.toJson(spec.schema()); + String specString = PartitionSpecParser.toJson(spec); + ResidualEvaluator residuals = ResidualEvaluator.of(spec, filter(), isCaseSensitive()); + + Iterable> tasks = Iterables.transform( + hiveOps.getFilesByFilter(filter()), + fileIterable -> + Iterables.transform( + fileIterable, + file -> new BaseFileScanTask(file, new DeleteFile[0], schemaString, specString, residuals))); + + return new ParallelIterable<>(tasks, ThreadPools.getWorkerPool()); + } + + @Override + public CloseableIterable planFiles(TableOperations ops, Snapshot snapshot, + Expression rowFilter, boolean ignoreResiduals, + boolean caseSensitive, boolean colStats) { + throw new IllegalStateException("Control flow should never reach here"); + } +} diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java new file mode 100644 index 0000000000..4a33ced42b --- /dev/null +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java @@ -0,0 +1,232 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hiveberg; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.iceberg.FileFormat; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.StructLike; +import org.apache.iceberg.avro.AvroSchemaUtil; +import org.apache.iceberg.avro.AvroSchemaVisitor; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.types.Conversions; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +class LegacyHiveTableUtils { + + private LegacyHiveTableUtils() { + } + + private static final Logger LOG = LoggerFactory.getLogger(LegacyHiveTableUtils.class); + + static Schema getSchema(org.apache.hadoop.hive.metastore.api.Table table) { + Map props = getTableProperties(table); + String schemaStr = props.get("avro.schema.literal"); + // Disable default value validation for backward compatibility with Avro 1.7 + org.apache.avro.Schema avroSchema = + schemaStr != null ? new org.apache.avro.Schema.Parser().setValidateDefaults(false).parse(schemaStr) : null; + Schema schema; + if (avroSchema != null) { + String serde = table.getSd().getSerdeInfo().getSerializationLib(); + org.apache.avro.Schema finalAvroSchema; + if (serde.equals("org.apache.hadoop.hive.serde2.avro.AvroSerDe") || + HasDuplicateLowercaseColumnNames.visit(avroSchema)) { + // Case 1: If serde == AVRO, early escape; Hive column info is not reliable and can be empty for these tables + // Hive itself uses avro.schema.literal as source of truth for these tables, so this should be fine + // Case 2: If avro.schema.literal has duplicate column names when lowercased, that means we cannot do reliable + // matching with Hive schema as multiple Avro fields can map to the same Hive field + finalAvroSchema = avroSchema; + } else { + finalAvroSchema = MergeHiveSchemaWithAvro.visit(structTypeInfoFromCols(table.getSd().getCols()), avroSchema); + } + schema = AvroSchemaUtil.toIceberg(finalAvroSchema); + } else { + // TODO: Do we need to support column and column.types properties for ORC tables? + LOG.info("Table {}.{} does not have an avro.schema.literal set; using Hive schema instead. " + + "The schema will not have case sensitivity and nullability information", + table.getDbName(), table.getTableName()); + Type icebergType = HiveTypeUtil.convert(structTypeInfoFromCols(table.getSd().getCols())); + schema = new Schema(icebergType.asNestedType().asStructType().fields()); + } + Types.StructType dataStructType = schema.asStruct(); + List fields = Lists.newArrayList(dataStructType.fields()); + + Schema partitionSchema = partitionSchema(table.getPartitionKeys(), schema); + Types.StructType partitionStructType = partitionSchema.asStruct(); + fields.addAll(partitionStructType.fields()); + return new Schema(fields); + } + + static StructTypeInfo structTypeInfoFromCols(List cols) { + Preconditions.checkArgument(cols != null && cols.size() > 0, "No Hive schema present"); + List fieldNames = cols + .stream() + .map(FieldSchema::getName) + .collect(Collectors.toList()); + List fieldTypeInfos = cols + .stream() + .map(f -> TypeInfoUtils.getTypeInfoFromTypeString(f.getType())) + .collect(Collectors.toList()); + return (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(fieldNames, fieldTypeInfos); + } + + private static Schema partitionSchema(List partitionKeys, Schema dataSchema) { + AtomicInteger fieldId = new AtomicInteger(10000); + List partitionFields = Lists.newArrayList(); + partitionKeys.forEach(f -> { + Types.NestedField field = dataSchema.findField(f.getName()); + if (field != null) { + throw new IllegalStateException(String.format("Partition field %s also present in data", field.name())); + } + partitionFields.add( + Types.NestedField.optional( + fieldId.incrementAndGet(), f.getName(), primitiveIcebergType(f.getType()), f.getComment())); + }); + return new Schema(partitionFields); + } + + private static Type primitiveIcebergType(String hiveTypeString) { + PrimitiveTypeInfo primitiveTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(hiveTypeString); + return HiveTypeUtil.convert(primitiveTypeInfo); + } + + static Map getTableProperties(org.apache.hadoop.hive.metastore.api.Table table) { + Map props = new HashMap<>(); + props.putAll(table.getSd().getParameters()); + props.putAll(table.getParameters()); + props.putAll(table.getSd().getSerdeInfo().getParameters()); + return props; + } + + static PartitionSpec getPartitionSpec(org.apache.hadoop.hive.metastore.api.Table table, Schema schema) { + PartitionSpec.Builder builder = PartitionSpec.builderFor(schema); + table.getPartitionKeys().forEach(fieldSchema -> builder.identity(fieldSchema.getName())); + return builder.build(); + } + + static DirectoryInfo toDirectoryInfo(org.apache.hadoop.hive.metastore.api.Table table) { + return new DirectoryInfo(table.getSd().getLocation(), + serdeToFileFormat(table.getSd().getSerdeInfo().getSerializationLib()), null); + } + + static List toDirectoryInfos(List partitions, PartitionSpec spec) { + return partitions.stream().map( + p -> new DirectoryInfo( + p.getSd().getLocation(), + serdeToFileFormat( + p.getSd().getSerdeInfo().getSerializationLib()), + buildPartitionStructLike(p.getValues(), spec)) + ).collect(Collectors.toList()); + } + + private static StructLike buildPartitionStructLike(List partitionValues, PartitionSpec spec) { + List fields = spec.partitionType().fields(); + return new StructLike() { + @Override + public int size() { + return partitionValues.size(); + } + + @Override + public T get(int pos, Class javaClass) { + final Object partitionValue = Conversions.fromPartitionString( + fields.get(pos).type(), + partitionValues.get(pos)); + return javaClass.cast(partitionValue); + } + + @Override + public void set(int pos, T value) { + throw new IllegalStateException("Read-only"); + } + }; + } + + private static FileFormat serdeToFileFormat(String serde) { + switch (serde) { + case "org.apache.hadoop.hive.serde2.avro.AvroSerDe": + return FileFormat.AVRO; + case "org.apache.hadoop.hive.ql.io.orc.OrcSerde": + return FileFormat.ORC; + default: + throw new IllegalArgumentException("Unrecognized serde: " + serde); + } + } + + private static class HasDuplicateLowercaseColumnNames extends AvroSchemaVisitor { + + private static boolean visit(org.apache.avro.Schema schema) { + return AvroSchemaVisitor.visit(schema, new HasDuplicateLowercaseColumnNames()); + } + + @Override + public Boolean record(org.apache.avro.Schema record, List names, List fieldResults) { + return fieldResults.stream().anyMatch(x -> x) || + names.stream().collect(Collectors.groupingBy(String::toLowerCase)) + .values().stream().anyMatch(x -> x.size() > 1); + } + + @Override + public Boolean union(org.apache.avro.Schema union, List optionResults) { + return optionResults.stream().anyMatch(x -> x); + } + + @Override + public Boolean array(org.apache.avro.Schema array, Boolean elementResult) { + return elementResult; + } + + @Override + public Boolean map(org.apache.avro.Schema map, Boolean valueResult) { + return valueResult; + } + + @Override + public Boolean primitive(org.apache.avro.Schema primitive) { + return false; + } + } +} diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java new file mode 100644 index 0000000000..4edd620502 --- /dev/null +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java @@ -0,0 +1,268 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.hiveberg; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.avro.JsonProperties; +import org.apache.avro.LogicalTypes; +import org.apache.avro.Schema; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; +import org.apache.iceberg.avro.AvroSchemaUtil; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + + +/** + * A {@link HiveSchemaWithPartnerVisitor} which augments a Hive schema with extra metadata from a partner Avro schema + * and generates a resultant "merged" Avro schema + * + * 1. Fields are matched between Hive and Avro schemas using a case insensitive search by field name + * 2. Copies field names, nullability, default value, field props from the Avro schema + * 3. Copies field type from the Hive schema. + * TODO: We should also handle some cases of type promotion where the types in Avro are potentially more correct + * e.g.BINARY in Hive -> FIXED in Avro, STRING in Hive -> ENUM in Avro, etc + * 4. Retains fields found only in the Hive schema; Ignores fields found only in the Avro schema + * 5. Fields found only in Hive schema are represented as optional fields in the resultant Avro schema + * 6. For fields found only in Hive schema, field names are sanitized to make them compatible with Avro identifier spec + */ +class MergeHiveSchemaWithAvro extends HiveSchemaWithPartnerVisitor { + + static Schema visit(StructTypeInfo typeInfo, Schema schema) { + return visit(typeInfo, schema, new MergeHiveSchemaWithAvro(), + AvroPartnerAccessor.INSTANCE); + } + + private final AtomicInteger recordCounter = new AtomicInteger(0); + + @Override + public Schema struct(StructTypeInfo struct, Schema partner, List fieldResults) { + boolean shouldResultBeOptional = partner == null || AvroSchemaUtil.isOptionSchema(partner); + Schema result; + if (partner == null || extractIfOption(partner).getType() != Schema.Type.RECORD) { + // if there was no matching Avro struct, return a struct with new record/namespace + int recordNum = recordCounter.incrementAndGet(); + result = Schema.createRecord("record" + recordNum, null, "namespace" + recordNum, false, fieldResults); + } else { + result = AvroSchemaUtil.copyRecord(extractIfOption(partner), fieldResults, null); + } + return shouldResultBeOptional ? AvroSchemaUtil.toOption(result) : result; + } + + @Override + public Schema.Field field(String name, TypeInfo field, Schema.Field partner, Schema fieldResult) { + // No need to infer `shouldResultBeOptional`. We expect other visitor methods to return optional schemas + // in their field results if required + if (partner == null) { + // if there was no matching Avro field, use name form the Hive schema and set a null default + return new Schema.Field( + AvroSchemaUtil.makeCompatibleName(name), fieldResult, null, Schema.Field.NULL_DEFAULT_VALUE); + } else { + // TODO: How to ensure that field default value is compatible with new field type generated from Hive? + // Copy field type from the visitor result, copy everything else from the partner + // Avro requires the default value to match the first type in the option, reorder option if required + Schema reordered = reorderOptionIfRequired(fieldResult, partner.defaultVal()); + return AvroSchemaUtil.copyField(partner, reordered, partner.name()); + } + } + + /** + * Reorders an option schema so that the type of the provided default value is the first type in the option schema + * + * e.g. If the schema is (NULL, INT) and the default value is 1, the returned schema is (INT, NULL) + * If the schema is not an option schema or if there is no default value, schema is returned as-is + */ + private Schema reorderOptionIfRequired(Schema schema, Object defaultValue) { + if (AvroSchemaUtil.isOptionSchema(schema) && defaultValue != null) { + boolean isNullFirstOption = schema.getTypes().get(0).getType() == Schema.Type.NULL; + if (isNullFirstOption && defaultValue.equals(JsonProperties.NULL_VALUE)) { + return schema; + } else { + return Schema.createUnion(schema.getTypes().get(1), schema.getTypes().get(0)); + } + } else { + return schema; + } + } + + @Override + public Schema list(ListTypeInfo list, Schema partner, Schema elementResult) { + // if there was no matching Avro list, or if matching Avro list was an option, return an optional list + boolean shouldResultBeOptional = partner == null || AvroSchemaUtil.isOptionSchema(partner); + Schema result = Schema.createArray(elementResult); + return shouldResultBeOptional ? AvroSchemaUtil.toOption(result) : result; + } + + @Override + public Schema map(MapTypeInfo map, Schema partner, Schema keyResult, Schema valueResult) { + Preconditions.checkArgument(extractIfOption(keyResult).getType() == Schema.Type.STRING, + "Map keys should always be non-nullable strings. Found: %s", keyResult); + // if there was no matching Avro map, or if matching Avro map was an option, return an optional map + boolean shouldResultBeOptional = partner == null || AvroSchemaUtil.isOptionSchema(partner); + Schema result = Schema.createMap(valueResult); + return shouldResultBeOptional ? AvroSchemaUtil.toOption(result) : result; + } + + @Override + public Schema union(UnionTypeInfo union, Schema partner, List results) { + if (AvroSchemaUtil.nullExistInUnion(partner)) { + List toAddNull = new ArrayList<>(); + toAddNull.add(Schema.create(Schema.Type.NULL)); + toAddNull.addAll(results); + return Schema.createUnion(toAddNull); + } + return Schema.createUnion(results); + } + + @Override + public Schema primitive(PrimitiveTypeInfo primitive, Schema partner) { + boolean shouldResultBeOptional = partner == null || AvroSchemaUtil.isOptionSchema(partner); + Schema hivePrimitive = hivePrimitiveToAvro(primitive); + // if there was no matching Avro primitive, use the Hive primitive + Schema result = partner == null ? hivePrimitive : checkCompatibilityAndPromote(hivePrimitive, partner); + return shouldResultBeOptional ? AvroSchemaUtil.toOption(result) : result; + } + + private Schema checkCompatibilityAndPromote(Schema schema, Schema partner) { + // TODO: Check if schema is compatible with partner + // Also do type promotion if required, schema = string & partner = enum, schema = bytes & partner = fixed, etc + return schema; + } + + /** + * A {@link PartnerAccessor} which matches the requested field from a partner Avro struct by case insensitive + * field name match + */ + private static class AvroPartnerAccessor implements PartnerAccessor { + private static final AvroPartnerAccessor INSTANCE = new AvroPartnerAccessor(); + + private static final Schema MAP_KEY = Schema.create(Schema.Type.STRING); + + @Override + public Schema.Field fieldPartner(Schema partner, String fieldName) { + Schema schema = extractIfOption(partner); + return (schema.getType() == Schema.Type.RECORD) ? findCaseInsensitive(schema, fieldName) : null; + } + + @Override + public Schema fieldType(Schema.Field partnerField) { + return partnerField.schema(); + } + + @Override + public Schema mapKeyPartner(Schema partner) { + Schema schema = extractIfOption(partner); + return (schema.getType() == Schema.Type.MAP) ? MAP_KEY : null; + } + + @Override + public Schema mapValuePartner(Schema partner) { + Schema schema = extractIfOption(partner); + return (schema.getType() == Schema.Type.MAP) ? schema.getValueType() : null; + } + + @Override + public Schema listElementPartner(Schema partner) { + Schema schema = extractIfOption(partner); + return (schema.getType() == Schema.Type.ARRAY) ? schema.getElementType() : null; + } + + @Override + public Schema unionObjectPartner(Schema partner, int ordinal) { + if (partner.getType() != Schema.Type.UNION) { + return null; + } + Schema schema = AvroSchemaUtil.discardNullFromUnionIfExist(partner); + return schema.getTypes().get(ordinal); + } + + private Schema.Field findCaseInsensitive(Schema struct, String fieldName) { + Preconditions.checkArgument(struct.getType() == Schema.Type.RECORD); + // TODO: Optimize? This will be called for every struct field, we will run the for loop for every struct field + for (Schema.Field field : struct.getFields()) { + if (field.name().equalsIgnoreCase(fieldName)) { + return field; + } + } + return null; + } + } + + private static Schema extractIfOption(Schema schema) { + if (AvroSchemaUtil.isOptionSchema(schema)) { + return AvroSchemaUtil.fromOption(schema); + } else { + return schema; + } + } + + // Additional numeric type, similar to other logical type names in AvroSerde + private static final String SHORT_TYPE_NAME = "short"; + private static final String BYTE_TYPE_NAME = "byte"; + + // TODO: This should be refactored into a visitor if we ever require conversion of complex types + public Schema hivePrimitiveToAvro(PrimitiveTypeInfo primitive) { + switch (primitive.getPrimitiveCategory()) { + case INT: + case BYTE: + case SHORT: + return Schema.create(Schema.Type.INT); + + case LONG: + return Schema.create(Schema.Type.LONG); + + case FLOAT: + return Schema.create(Schema.Type.FLOAT); + + case DOUBLE: + return Schema.create(Schema.Type.DOUBLE); + + case BOOLEAN: + return Schema.create(Schema.Type.BOOLEAN); + + case CHAR: + case STRING: + case VARCHAR: + return Schema.create(Schema.Type.STRING); + + case BINARY: + return Schema.create(Schema.Type.BYTES); + + case VOID: + return Schema.create(Schema.Type.NULL); + + case DATE: + return LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT)); + + case TIMESTAMP: + Schema schema = Schema.create(Schema.Type.LONG); + schema.addProp(AvroSchemaUtil.ADJUST_TO_UTC_PROP, false); + return LogicalTypes.timestampMillis().addToSchema(schema); + + case DECIMAL: + DecimalTypeInfo dti = (DecimalTypeInfo) primitive; + return LogicalTypes.decimal(dti.getPrecision(), dti.getScale()).addToSchema(Schema.create(Schema.Type.BYTES)); + + default: + throw new UnsupportedOperationException(primitive + " is not supported."); + } + } +} diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/HiveMetastoreTest.java b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/HiveMetastoreTest.java new file mode 100644 index 0000000000..05d5ac648f --- /dev/null +++ b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/HiveMetastoreTest.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hiveberg; + +import java.util.HashMap; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.iceberg.hive.HiveCatalog; +import org.junit.AfterClass; +import org.junit.BeforeClass; + + +/** + * This class is copied from iceberg-hive-metastore module test code + */ +public abstract class HiveMetastoreTest { + + protected static final String DB_NAME = "hivedb"; + + protected static HiveMetaStoreClient metastoreClient; + protected static HiveCatalog catalog; + protected static HiveConf hiveConf; + protected static TestHiveMetastore metastore; + + @BeforeClass + public static void startMetastore() throws Exception { + HiveMetastoreTest.metastore = new TestHiveMetastore(); + metastore.start(); + HiveMetastoreTest.hiveConf = metastore.hiveConf(); + HiveMetastoreTest.metastoreClient = new HiveMetaStoreClient(hiveConf); + String dbPath = metastore.getDatabasePath(DB_NAME); + Database db = new Database(DB_NAME, "description", dbPath, new HashMap<>()); + metastoreClient.createDatabase(db); + HiveMetastoreTest.catalog = new HiveCatalog(hiveConf); + } + + @AfterClass + public static void stopMetastore() { + catalog.close(); + HiveMetastoreTest.catalog = null; + + metastoreClient.close(); + HiveMetastoreTest.metastoreClient = null; + + metastore.stop(); + HiveMetastoreTest.metastore = null; + } +} diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/ScriptRunner.java b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/ScriptRunner.java new file mode 100644 index 0000000000..701ea98ae7 --- /dev/null +++ b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/ScriptRunner.java @@ -0,0 +1,249 @@ +/* + * + * Copyright 2004 Clinton Begin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Slightly modified version of the com.ibatis.common.jdbc.ScriptRunner class + * from the iBATIS Apache project. Only removed dependency on Resource class + * and a constructor. + */ + +package org.apache.iceberg.hiveberg; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.PrintWriter; +import java.io.Reader; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; + + +/** + * This class is copied from iceberg-hive-metastore module test code + */ +public class ScriptRunner { + + private static final String DEFAULT_DELIMITER = ";"; + + private Connection connection; + + private boolean stopOnError; + private boolean autoCommit; + + private PrintWriter logWriter = new PrintWriter(System.out); + private PrintWriter errorLogWriter = new PrintWriter(System.err); + + private String delimiter = DEFAULT_DELIMITER; + private boolean fullLineDelimiter = false; + + /** + * Default constructor + */ + public ScriptRunner(Connection connection, boolean autoCommit, + boolean stopOnError) { + this.connection = connection; + this.autoCommit = autoCommit; + this.stopOnError = stopOnError; + } + + public void setDelimiter(String newDelimiter, boolean newFullLineDelimiter) { + this.delimiter = newDelimiter; + this.fullLineDelimiter = newFullLineDelimiter; + } + + /** + * Setter for logWriter property + * + * @param logWriter + * - the new value of the logWriter property + */ + public void setLogWriter(PrintWriter logWriter) { + this.logWriter = logWriter; + } + + /** + * Setter for errorLogWriter property + * + * @param errorLogWriter + * - the new value of the errorLogWriter property + */ + public void setErrorLogWriter(PrintWriter errorLogWriter) { + this.errorLogWriter = errorLogWriter; + } + + /** + * Runs an SQL script (read in using the Reader parameter) + * + * @param reader + * - the source of the script + */ + public void runScript(Reader reader) throws IOException, SQLException { + try { + boolean originalAutoCommit = connection.getAutoCommit(); + try { + if (originalAutoCommit != this.autoCommit) { + connection.setAutoCommit(this.autoCommit); + } + runScript(connection, reader); + } finally { + connection.setAutoCommit(originalAutoCommit); + } + } catch (IOException e) { + throw e; + } catch (SQLException e) { + throw e; + } catch (Exception e) { + throw new RuntimeException("Error running script. Cause: " + e, e); + } + } + + /** + * Runs an SQL script (read in using the Reader parameter) using the + * connection passed in + * + * @param conn + * - the connection to use for the script + * @param reader + * - the source of the script + * @throws SQLException + * if any SQL errors occur + * @throws IOException + * if there is an error reading from the Reader + */ + @SuppressWarnings("checkstyle:CyclomaticComplexity") + private void runScript(Connection conn, Reader reader) throws IOException, SQLException { + StringBuilder command = null; + try { + LineNumberReader lineReader = new LineNumberReader(reader); + String line = null; + while ((line = lineReader.readLine()) != null) { + if (command == null) { + command = new StringBuilder(); + } + String trimmedLine = line.trim(); + if (trimmedLine.startsWith("--")) { + println(trimmedLine); + } else if (trimmedLine.length() < 1 || trimmedLine.startsWith("//")) { + // Do nothing + } else if (trimmedLine.length() < 1 || trimmedLine.startsWith("--")) { + // Do nothing + } else if (!fullLineDelimiter && trimmedLine.endsWith(getDelimiter()) || + fullLineDelimiter && trimmedLine.equals(getDelimiter())) { + command.append(line.substring(0, line + .lastIndexOf(getDelimiter()))); + command.append(" "); + Statement statement = conn.createStatement(); + + println(command); + + boolean hasResults = false; + if (stopOnError) { + hasResults = statement.execute(command.toString()); + } else { + try { + statement.execute(command.toString()); + } catch (SQLException e) { + e.fillInStackTrace(); + printlnError("Error executing: " + command); + printlnError(e); + } + } + + if (autoCommit && !conn.getAutoCommit()) { + conn.commit(); + } + + ResultSet rs = statement.getResultSet(); + if (hasResults && rs != null) { + ResultSetMetaData md = rs.getMetaData(); + int cols = md.getColumnCount(); + for (int i = 0; i < cols; i++) { + String name = md.getColumnLabel(i); + print(name + "\t"); + } + println(""); + while (rs.next()) { + for (int i = 0; i < cols; i++) { + String value = rs.getString(i); + print(value + "\t"); + } + println(""); + } + } + + command = null; + try { + statement.close(); + } catch (Exception e) { + // Ignore to workaround a bug in Jakarta DBCP + } + Thread.yield(); + } else { + command.append(line); + command.append(" "); + } + } + if (!autoCommit) { + conn.commit(); + } + } catch (SQLException e) { + e.fillInStackTrace(); + printlnError("Error executing: " + command); + printlnError(e); + throw e; + } catch (IOException e) { + e.fillInStackTrace(); + printlnError("Error executing: " + command); + printlnError(e); + throw e; + } finally { + conn.rollback(); + flush(); + } + } + + private String getDelimiter() { + return delimiter; + } + + private void print(Object obj) { + if (logWriter != null) { + System.out.print(obj); + } + } + + private void println(Object obj) { + if (logWriter != null) { + logWriter.println(obj); + } + } + + private void printlnError(Object obj) { + if (errorLogWriter != null) { + errorLogWriter.println(obj); + } + } + + private void flush() { + if (logWriter != null) { + logWriter.flush(); + } + if (errorLogWriter != null) { + errorLogWriter.flush(); + } + } +} diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/legacy/TestHiveExpressions.java b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveExpressions.java similarity index 82% rename from hive-metastore/src/test/java/org/apache/iceberg/hive/legacy/TestHiveExpressions.java rename to hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveExpressions.java index 9acfc3d41d..0c75c251b3 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/legacy/TestHiveExpressions.java +++ b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveExpressions.java @@ -1,23 +1,18 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.hive.legacy; +package org.apache.iceberg.hiveberg; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; @@ -35,7 +30,7 @@ import static org.apache.iceberg.expressions.Expressions.notIn; import static org.apache.iceberg.expressions.Expressions.notNull; import static org.apache.iceberg.expressions.Expressions.or; -import static org.apache.iceberg.hive.legacy.HiveExpressions.simplifyPartitionFilter; +import static org.apache.iceberg.hiveberg.HiveExpressions.simplifyPartitionFilter; public class TestHiveExpressions { diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveMetastore.java b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveMetastore.java new file mode 100644 index 0000000000..991fe61013 --- /dev/null +++ b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveMetastore.java @@ -0,0 +1,236 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hiveberg; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStore; +import org.apache.hadoop.hive.metastore.IHMSHandler; +import org.apache.hadoop.hive.metastore.RetryingHMSHandler; +import org.apache.hadoop.hive.metastore.TSetIpAddressProcessor; +import org.apache.iceberg.common.DynConstructors; +import org.apache.iceberg.common.DynMethods; +import org.apache.iceberg.hadoop.Util; +import org.apache.iceberg.hive.HiveClientPool; +import org.apache.thrift.protocol.TBinaryProtocol; +import org.apache.thrift.server.TServer; +import org.apache.thrift.server.TThreadPoolServer; +import org.apache.thrift.transport.TServerSocket; +import org.apache.thrift.transport.TTransportFactory; + +import static java.nio.file.Files.*; +import static java.nio.file.attribute.PosixFilePermissions.*; + +/** + * This class is copied from iceberg-hive-metastore module test code + */ +public class TestHiveMetastore { + + private static final String DEFAULT_DATABASE_NAME = "default"; + private static final int DEFAULT_POOL_SIZE = 5; + + // create the metastore handlers based on whether we're working with Hive2 or Hive3 dependencies + // we need to do this because there is a breaking API change between Hive2 and Hive3 + private static final DynConstructors.Ctor HMS_HANDLER_CTOR = DynConstructors.builder() + .impl(HiveMetaStore.HMSHandler.class, String.class, Configuration.class) + .impl(HiveMetaStore.HMSHandler.class, String.class, HiveConf.class) + .build(); + + private static final DynMethods.StaticMethod GET_BASE_HMS_HANDLER = DynMethods.builder("getProxy") + .impl(RetryingHMSHandler.class, Configuration.class, IHMSHandler.class, boolean.class) + .impl(RetryingHMSHandler.class, HiveConf.class, IHMSHandler.class, boolean.class) + .buildStatic(); + + // Hive3 introduces background metastore tasks (MetastoreTaskThread) for performing various cleanup duties. These + // threads are scheduled and executed in a static thread pool (org.apache.hadoop.hive.metastore.ThreadPool). + // This thread pool is shut down normally as part of the JVM shutdown hook, but since we're creating and tearing down + // multiple metastore instances within the same JVM, we have to call this cleanup method manually, otherwise + // threads from our previous test suite will be stuck in the pool with stale config, and keep on being scheduled. + // This can lead to issues, e.g. accidental Persistence Manager closure by ScheduledQueryExecutionsMaintTask. + private static final DynMethods.StaticMethod METASTORE_THREADS_SHUTDOWN = DynMethods.builder("shutdown") + .impl("org.apache.hadoop.hive.metastore.ThreadPool") + .orNoop() + .buildStatic(); + + private File hiveLocalDir; + private HiveConf hiveConf; + private ExecutorService executorService; + private TServer server; + private HiveMetaStore.HMSHandler baseHandler; + private HiveClientPool clientPool; + + /** + * Starts a TestHiveMetastore with the default connection pool size (5) and the default HiveConf. + */ + public void start() { + start(new HiveConf(new Configuration(), TestHiveMetastore.class), DEFAULT_POOL_SIZE); + } + + /** + * Starts a TestHiveMetastore with the default connection pool size (5) with the provided HiveConf. + * @param hiveConf The hive configuration to use + */ + public void start(HiveConf conf) { + start(conf, DEFAULT_POOL_SIZE); + } + + /** + * Starts a TestHiveMetastore with a provided connection pool size and HiveConf. + * @param hiveConf The hive configuration to use + * @param poolSize The number of threads in the executor pool + */ + public void start(HiveConf conf, int poolSize) { + try { + this.hiveLocalDir = createTempDirectory("hive", asFileAttribute(fromString("rwxrwxrwx"))).toFile(); + File derbyLogFile = new File(hiveLocalDir, "derby.log"); + System.setProperty("derby.stream.error.file", derbyLogFile.getAbsolutePath()); + setupMetastoreDB("jdbc:derby:" + getDerbyPath() + ";create=true"); + + TServerSocket socket = new TServerSocket(0); + int port = socket.getServerSocket().getLocalPort(); + initConf(conf, port); + + this.hiveConf = conf; + this.server = newThriftServer(socket, poolSize, hiveConf); + this.executorService = Executors.newSingleThreadExecutor(); + this.executorService.submit(() -> server.serve()); + + // in Hive3, setting this as a system prop ensures that it will be picked up whenever a new HiveConf is created + System.setProperty(HiveConf.ConfVars.METASTOREURIS.varname, hiveConf.getVar(HiveConf.ConfVars.METASTOREURIS)); + + this.clientPool = new HiveClientPool(1, hiveConf); + } catch (Exception e) { + throw new RuntimeException("Cannot start TestHiveMetastore", e); + } + } + + public void stop() { + if (clientPool != null) { + clientPool.close(); + } + if (server != null) { + server.stop(); + } + if (executorService != null) { + executorService.shutdown(); + } + if (hiveLocalDir != null) { + hiveLocalDir.delete(); + } + if (baseHandler != null) { + baseHandler.shutdown(); + } + METASTORE_THREADS_SHUTDOWN.invoke(); + } + + public HiveConf hiveConf() { + return hiveConf; + } + + public HiveClientPool clientPool() { + return clientPool; + } + + public String getDatabasePath(String dbName) { + File dbDir = new File(hiveLocalDir, dbName + ".db"); + return dbDir.getPath(); + } + + public void reset() throws Exception { + for (String dbName : clientPool.run(client -> client.getAllDatabases())) { + for (String tblName : clientPool.run(client -> client.getAllTables(dbName))) { + clientPool.run(client -> { + client.dropTable(dbName, tblName, true, true, true); + return null; + }); + } + + if (!DEFAULT_DATABASE_NAME.equals(dbName)) { + // Drop cascade, functions dropped by cascade + clientPool.run(client -> { + client.dropDatabase(dbName, true, true, true); + return null; + }); + } + } + + Path warehouseRoot = new Path(hiveLocalDir.getAbsolutePath()); + FileSystem fs = Util.getFs(warehouseRoot, hiveConf); + for (FileStatus fileStatus : fs.listStatus(warehouseRoot)) { + if (!fileStatus.getPath().getName().equals("derby.log") && + !fileStatus.getPath().getName().equals("metastore_db")) { + fs.delete(fileStatus.getPath(), true); + } + } + } + + private TServer newThriftServer(TServerSocket socket, int poolSize, HiveConf conf) throws Exception { + HiveConf serverConf = new HiveConf(conf); + serverConf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, "jdbc:derby:" + getDerbyPath() + ";create=true"); + baseHandler = HMS_HANDLER_CTOR.newInstance("new db based metaserver", serverConf); + IHMSHandler handler = GET_BASE_HMS_HANDLER.invoke(serverConf, baseHandler, false); + + TThreadPoolServer.Args args = new TThreadPoolServer.Args(socket) + .processor(new TSetIpAddressProcessor<>(handler)) + .transportFactory(new TTransportFactory()) + .protocolFactory(new TBinaryProtocol.Factory()) + .minWorkerThreads(poolSize) + .maxWorkerThreads(poolSize); + + return new TThreadPoolServer(args); + } + + private void initConf(HiveConf conf, int port) { + conf.set(HiveConf.ConfVars.METASTOREURIS.varname, "thrift://localhost:" + port); + conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, "file:" + hiveLocalDir.getAbsolutePath()); + conf.set(HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname, "true"); + conf.set(HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES.varname, "false"); + conf.set("iceberg.hive.client-pool-size", "2"); + } + + private void setupMetastoreDB(String dbURL) throws SQLException, IOException { + Connection connection = DriverManager.getConnection(dbURL); + ScriptRunner scriptRunner = new ScriptRunner(connection, true, true); + + ClassLoader classLoader = ClassLoader.getSystemClassLoader(); + InputStream inputStream = classLoader.getResourceAsStream("hive-schema-3.1.0.derby.sql"); + try (Reader reader = new InputStreamReader(inputStream)) { + scriptRunner.runScript(reader); + } + } + + private String getDerbyPath() { + File metastoreDB = new File(hiveLocalDir, "metastore_db"); + return metastoreDB.getPath(); + } +} diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/legacy/TestHiveSchemaConversions.java b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveSchemaConversions.java similarity index 78% rename from hive-metastore/src/test/java/org/apache/iceberg/hive/legacy/TestHiveSchemaConversions.java rename to hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveSchemaConversions.java index 8c770437b3..db4bfa094a 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/legacy/TestHiveSchemaConversions.java +++ b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveSchemaConversions.java @@ -1,23 +1,18 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.hive.legacy; +package org.apache.iceberg.hiveberg; import java.util.List; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/legacy/TestLegacyHiveTableScan.java b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestLegacyHiveTableScan.java similarity index 94% rename from hive-metastore/src/test/java/org/apache/iceberg/hive/legacy/TestLegacyHiveTableScan.java rename to hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestLegacyHiveTableScan.java index 9baa05344f..49e8818fdc 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/legacy/TestLegacyHiveTableScan.java +++ b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestLegacyHiveTableScan.java @@ -1,23 +1,18 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.hive.legacy; +package org.apache.iceberg.hiveberg; import java.io.File; import java.io.IOException; @@ -51,7 +46,6 @@ import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.hive.HiveCatalog; -import org.apache.iceberg.hive.HiveMetastoreTest; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/legacy/TestMergeHiveSchemaWithAvro.java b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestMergeHiveSchemaWithAvro.java similarity index 94% rename from hive-metastore/src/test/java/org/apache/iceberg/hive/legacy/TestMergeHiveSchemaWithAvro.java rename to hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestMergeHiveSchemaWithAvro.java index 4310a1e9df..4298069e7b 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/legacy/TestMergeHiveSchemaWithAvro.java +++ b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestMergeHiveSchemaWithAvro.java @@ -1,23 +1,18 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.hive.legacy; +package org.apache.iceberg.hiveberg; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; diff --git a/hiveberg/src/test/resources/hive-schema-3.1.0.derby.sql b/hiveberg/src/test/resources/hive-schema-3.1.0.derby.sql new file mode 100644 index 0000000000..55097d6639 --- /dev/null +++ b/hiveberg/src/test/resources/hive-schema-3.1.0.derby.sql @@ -0,0 +1,726 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +-- This file was copied from Apache Hive, at: +-- https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/sql/derby/hive-schema-3.1.0.derby.sql +-- +-- This has been modified slightly for compatibility with older Hive versions. +-- +-- Timestamp: 2011-09-22 15:32:02.024 +-- Source database is: /home/carl/Work/repos/hive1/metastore/scripts/upgrade/derby/mdb +-- Connection URL is: jdbc:derby:/home/carl/Work/repos/hive1/metastore/scripts/upgrade/derby/mdb +-- Specified schema is: APP +-- appendLogs: false + +-- ---------------------------------------------- +-- DDL Statements for functions +-- ---------------------------------------------- + +CREATE FUNCTION "APP"."NUCLEUS_ASCII" (C CHAR(1)) RETURNS INTEGER LANGUAGE JAVA PARAMETER STYLE JAVA READS SQL DATA CALLED ON NULL INPUT EXTERNAL NAME 'org.datanucleus.store.rdbms.adapter.DerbySQLFunction.ascii' ; + +CREATE FUNCTION "APP"."NUCLEUS_MATCHES" (TEXT VARCHAR(8000),PATTERN VARCHAR(8000)) RETURNS INTEGER LANGUAGE JAVA PARAMETER STYLE JAVA READS SQL DATA CALLED ON NULL INPUT EXTERNAL NAME 'org.datanucleus.store.rdbms.adapter.DerbySQLFunction.matches' ; + +-- ---------------------------------------------- +-- DDL Statements for tables +-- ---------------------------------------------- +CREATE TABLE "APP"."DBS" ( + "DB_ID" BIGINT NOT NULL, + "DESC" VARCHAR(4000), + "DB_LOCATION_URI" VARCHAR(4000) NOT NULL, + "NAME" VARCHAR(128), + "OWNER_NAME" VARCHAR(128), + "OWNER_TYPE" VARCHAR(10), + "CTLG_NAME" VARCHAR(256) +); + +CREATE TABLE "APP"."TBL_PRIVS" ("TBL_GRANT_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "TBL_PRIV" VARCHAR(128), "TBL_ID" BIGINT, "AUTHORIZER" VARCHAR(128)); + +CREATE TABLE "APP"."DATABASE_PARAMS" ("DB_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(180) NOT NULL, "PARAM_VALUE" VARCHAR(4000)); + +CREATE TABLE "APP"."TBL_COL_PRIVS" ("TBL_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(767), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "TBL_COL_PRIV" VARCHAR(128), "TBL_ID" BIGINT, "AUTHORIZER" VARCHAR(128)); + +CREATE TABLE "APP"."SERDE_PARAMS" ("SERDE_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" CLOB); + +CREATE TABLE "APP"."COLUMNS_V2" ("CD_ID" BIGINT NOT NULL, "COMMENT" VARCHAR(4000), "COLUMN_NAME" VARCHAR(767) NOT NULL, "TYPE_NAME" CLOB, "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."SORT_COLS" ("SD_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(767), "ORDER" INTEGER NOT NULL, "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."CDS" ("CD_ID" BIGINT NOT NULL); + +CREATE TABLE "APP"."PARTITION_KEY_VALS" ("PART_ID" BIGINT NOT NULL, "PART_KEY_VAL" VARCHAR(256), "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."DB_PRIVS" ("DB_GRANT_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "DB_ID" BIGINT, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "DB_PRIV" VARCHAR(128), "AUTHORIZER" VARCHAR(128)); + +CREATE TABLE "APP"."IDXS" ("INDEX_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "DEFERRED_REBUILD" CHAR(1) NOT NULL, "INDEX_HANDLER_CLASS" VARCHAR(4000), "INDEX_NAME" VARCHAR(128), "INDEX_TBL_ID" BIGINT, "LAST_ACCESS_TIME" INTEGER NOT NULL, "ORIG_TBL_ID" BIGINT, "SD_ID" BIGINT); + +CREATE TABLE "APP"."INDEX_PARAMS" ("INDEX_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" VARCHAR(4000)); + +CREATE TABLE "APP"."PARTITIONS" ("PART_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "LAST_ACCESS_TIME" INTEGER NOT NULL, "PART_NAME" VARCHAR(767), "SD_ID" BIGINT, "TBL_ID" BIGINT); + +CREATE TABLE "APP"."SERDES" ("SERDE_ID" BIGINT NOT NULL, "NAME" VARCHAR(128), "SLIB" VARCHAR(4000), "DESCRIPTION" VARCHAR(4000), "SERIALIZER_CLASS" VARCHAR(4000), "DESERIALIZER_CLASS" VARCHAR(4000), SERDE_TYPE INTEGER); + +CREATE TABLE "APP"."PART_PRIVS" ("PART_GRANT_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PART_ID" BIGINT, "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "PART_PRIV" VARCHAR(128), "AUTHORIZER" VARCHAR(128)); + +CREATE TABLE "APP"."ROLE_MAP" ("ROLE_GRANT_ID" BIGINT NOT NULL, "ADD_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "ROLE_ID" BIGINT); + +CREATE TABLE "APP"."TYPES" ("TYPES_ID" BIGINT NOT NULL, "TYPE_NAME" VARCHAR(128), "TYPE1" VARCHAR(767), "TYPE2" VARCHAR(767)); + +CREATE TABLE "APP"."GLOBAL_PRIVS" ("USER_GRANT_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "USER_PRIV" VARCHAR(128), "AUTHORIZER" VARCHAR(128)); + +CREATE TABLE "APP"."PARTITION_PARAMS" ("PART_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" VARCHAR(4000)); + +CREATE TABLE "APP"."PARTITION_EVENTS" ( + "PART_NAME_ID" BIGINT NOT NULL, + "CAT_NAME" VARCHAR(256), + "DB_NAME" VARCHAR(128), + "EVENT_TIME" BIGINT NOT NULL, + "EVENT_TYPE" INTEGER NOT NULL, + "PARTITION_NAME" VARCHAR(767), + "TBL_NAME" VARCHAR(256) +); + +CREATE TABLE "APP"."COLUMNS" ("SD_ID" BIGINT NOT NULL, "COMMENT" VARCHAR(256), "COLUMN_NAME" VARCHAR(128) NOT NULL, "TYPE_NAME" VARCHAR(4000) NOT NULL, "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."ROLES" ("ROLE_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "OWNER_NAME" VARCHAR(128), "ROLE_NAME" VARCHAR(128)); + +CREATE TABLE "APP"."TBLS" ("TBL_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT NULL, "DB_ID" BIGINT, "LAST_ACCESS_TIME" INTEGER NOT NULL, "OWNER" VARCHAR(767), "OWNER_TYPE" VARCHAR(10), "RETENTION" INTEGER NOT NULL, "SD_ID" BIGINT, "TBL_NAME" VARCHAR(256), "TBL_TYPE" VARCHAR(128), "VIEW_EXPANDED_TEXT" LONG VARCHAR, "VIEW_ORIGINAL_TEXT" LONG VARCHAR, "IS_REWRITE_ENABLED" CHAR(1) NOT NULL DEFAULT 'N'); + +CREATE TABLE "APP"."PARTITION_KEYS" ("TBL_ID" BIGINT NOT NULL, "PKEY_COMMENT" VARCHAR(4000), "PKEY_NAME" VARCHAR(128) NOT NULL, "PKEY_TYPE" VARCHAR(767) NOT NULL, "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."PART_COL_PRIVS" ("PART_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(767), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PART_ID" BIGINT, "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "PART_COL_PRIV" VARCHAR(128), "AUTHORIZER" VARCHAR(128)); + +CREATE TABLE "APP"."SDS" ("SD_ID" BIGINT NOT NULL, "INPUT_FORMAT" VARCHAR(4000), "IS_COMPRESSED" CHAR(1) NOT NULL, "LOCATION" VARCHAR(4000), "NUM_BUCKETS" INTEGER NOT NULL, "OUTPUT_FORMAT" VARCHAR(4000), "SERDE_ID" BIGINT, "CD_ID" BIGINT, "IS_STOREDASSUBDIRECTORIES" CHAR(1) NOT NULL); + +CREATE TABLE "APP"."SEQUENCE_TABLE" ("SEQUENCE_NAME" VARCHAR(256) NOT NULL, "NEXT_VAL" BIGINT NOT NULL); + +CREATE TABLE "APP"."TAB_COL_STATS"( + "CAT_NAME" VARCHAR(256) NOT NULL, + "DB_NAME" VARCHAR(128) NOT NULL, + "TABLE_NAME" VARCHAR(256) NOT NULL, + "COLUMN_NAME" VARCHAR(767) NOT NULL, + "COLUMN_TYPE" VARCHAR(128) NOT NULL, + "LONG_LOW_VALUE" BIGINT, + "LONG_HIGH_VALUE" BIGINT, + "DOUBLE_LOW_VALUE" DOUBLE, + "DOUBLE_HIGH_VALUE" DOUBLE, + "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), + "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000), + "NUM_DISTINCTS" BIGINT, + "NUM_NULLS" BIGINT NOT NULL, + "AVG_COL_LEN" DOUBLE, + "MAX_COL_LEN" BIGINT, + "NUM_TRUES" BIGINT, + "NUM_FALSES" BIGINT, + "LAST_ANALYZED" BIGINT, + "CS_ID" BIGINT NOT NULL, + "TBL_ID" BIGINT NOT NULL, + "BIT_VECTOR" BLOB +); + +CREATE TABLE "APP"."TABLE_PARAMS" ("TBL_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" CLOB); + +CREATE TABLE "APP"."BUCKETING_COLS" ("SD_ID" BIGINT NOT NULL, "BUCKET_COL_NAME" VARCHAR(256), "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."TYPE_FIELDS" ("TYPE_NAME" BIGINT NOT NULL, "COMMENT" VARCHAR(256), "FIELD_NAME" VARCHAR(128) NOT NULL, "FIELD_TYPE" VARCHAR(767) NOT NULL, "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."NUCLEUS_TABLES" ("CLASS_NAME" VARCHAR(128) NOT NULL, "TABLE_NAME" VARCHAR(128) NOT NULL, "TYPE" VARCHAR(4) NOT NULL, "OWNER" VARCHAR(2) NOT NULL, "VERSION" VARCHAR(20) NOT NULL, "INTERFACE_NAME" VARCHAR(256) DEFAULT NULL); + +CREATE TABLE "APP"."SD_PARAMS" ("SD_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" CLOB); + +CREATE TABLE "APP"."SKEWED_STRING_LIST" ("STRING_LIST_ID" BIGINT NOT NULL); + +CREATE TABLE "APP"."SKEWED_STRING_LIST_VALUES" ("STRING_LIST_ID" BIGINT NOT NULL, "STRING_LIST_VALUE" VARCHAR(256), "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."SKEWED_COL_NAMES" ("SD_ID" BIGINT NOT NULL, "SKEWED_COL_NAME" VARCHAR(256), "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."SKEWED_COL_VALUE_LOC_MAP" ("SD_ID" BIGINT NOT NULL, "STRING_LIST_ID_KID" BIGINT NOT NULL, "LOCATION" VARCHAR(4000)); + +CREATE TABLE "APP"."SKEWED_VALUES" ("SD_ID_OID" BIGINT NOT NULL, "STRING_LIST_ID_EID" BIGINT NOT NULL, "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."MASTER_KEYS" ("KEY_ID" INTEGER NOT NULL generated always as identity (start with 1), "MASTER_KEY" VARCHAR(767)); + +CREATE TABLE "APP"."DELEGATION_TOKENS" ( "TOKEN_IDENT" VARCHAR(767) NOT NULL, "TOKEN" VARCHAR(767)); + +CREATE TABLE "APP"."PART_COL_STATS"( + "CAT_NAME" VARCHAR(256) NOT NULL, + "DB_NAME" VARCHAR(128) NOT NULL, + "TABLE_NAME" VARCHAR(256) NOT NULL, + "PARTITION_NAME" VARCHAR(767) NOT NULL, + "COLUMN_NAME" VARCHAR(767) NOT NULL, + "COLUMN_TYPE" VARCHAR(128) NOT NULL, + "LONG_LOW_VALUE" BIGINT, + "LONG_HIGH_VALUE" BIGINT, + "DOUBLE_LOW_VALUE" DOUBLE, + "DOUBLE_HIGH_VALUE" DOUBLE, + "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), + "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000), + "NUM_DISTINCTS" BIGINT, + "BIT_VECTOR" BLOB, + "NUM_NULLS" BIGINT NOT NULL, + "AVG_COL_LEN" DOUBLE, + "MAX_COL_LEN" BIGINT, + "NUM_TRUES" BIGINT, + "NUM_FALSES" BIGINT, + "LAST_ANALYZED" BIGINT, + "CS_ID" BIGINT NOT NULL, + "PART_ID" BIGINT NOT NULL +); + +CREATE TABLE "APP"."VERSION" ("VER_ID" BIGINT NOT NULL, "SCHEMA_VERSION" VARCHAR(127) NOT NULL, "VERSION_COMMENT" VARCHAR(255)); + +CREATE TABLE "APP"."FUNCS" ("FUNC_ID" BIGINT NOT NULL, "CLASS_NAME" VARCHAR(4000), "CREATE_TIME" INTEGER NOT NULL, "DB_ID" BIGINT, "FUNC_NAME" VARCHAR(128), "FUNC_TYPE" INTEGER NOT NULL, "OWNER_NAME" VARCHAR(128), "OWNER_TYPE" VARCHAR(10)); + +CREATE TABLE "APP"."FUNC_RU" ("FUNC_ID" BIGINT NOT NULL, "RESOURCE_TYPE" INTEGER NOT NULL, "RESOURCE_URI" VARCHAR(4000), "INTEGER_IDX" INTEGER NOT NULL); + +CREATE TABLE "APP"."NOTIFICATION_LOG" ( + "NL_ID" BIGINT NOT NULL, + "CAT_NAME" VARCHAR(256), + "DB_NAME" VARCHAR(128), + "EVENT_ID" BIGINT NOT NULL, + "EVENT_TIME" INTEGER NOT NULL, + "EVENT_TYPE" VARCHAR(32) NOT NULL, + "MESSAGE" CLOB, + "TBL_NAME" VARCHAR(256), + "MESSAGE_FORMAT" VARCHAR(16) +); + +CREATE TABLE "APP"."NOTIFICATION_SEQUENCE" ("NNI_ID" BIGINT NOT NULL, "NEXT_EVENT_ID" BIGINT NOT NULL); + +CREATE TABLE "APP"."KEY_CONSTRAINTS" ("CHILD_CD_ID" BIGINT, "CHILD_INTEGER_IDX" INTEGER, "CHILD_TBL_ID" BIGINT, "PARENT_CD_ID" BIGINT , "PARENT_INTEGER_IDX" INTEGER, "PARENT_TBL_ID" BIGINT NOT NULL, "POSITION" BIGINT NOT NULL, "CONSTRAINT_NAME" VARCHAR(400) NOT NULL, "CONSTRAINT_TYPE" SMALLINT NOT NULL, "UPDATE_RULE" SMALLINT, "DELETE_RULE" SMALLINT, "ENABLE_VALIDATE_RELY" SMALLINT NOT NULL, "DEFAULT_VALUE" VARCHAR(400)); + +CREATE TABLE "APP"."METASTORE_DB_PROPERTIES" ("PROPERTY_KEY" VARCHAR(255) NOT NULL, "PROPERTY_VALUE" VARCHAR(1000) NOT NULL, "DESCRIPTION" VARCHAR(1000)); + +CREATE TABLE "APP"."WM_RESOURCEPLAN" (RP_ID BIGINT NOT NULL, NAME VARCHAR(128) NOT NULL, QUERY_PARALLELISM INTEGER, STATUS VARCHAR(20) NOT NULL, DEFAULT_POOL_ID BIGINT); + +CREATE TABLE "APP"."WM_POOL" (POOL_ID BIGINT NOT NULL, RP_ID BIGINT NOT NULL, PATH VARCHAR(1024) NOT NULL, ALLOC_FRACTION DOUBLE, QUERY_PARALLELISM INTEGER, SCHEDULING_POLICY VARCHAR(1024)); + +CREATE TABLE "APP"."WM_TRIGGER" (TRIGGER_ID BIGINT NOT NULL, RP_ID BIGINT NOT NULL, NAME VARCHAR(128) NOT NULL, TRIGGER_EXPRESSION VARCHAR(1024), ACTION_EXPRESSION VARCHAR(1024), IS_IN_UNMANAGED INTEGER NOT NULL DEFAULT 0); + +CREATE TABLE "APP"."WM_POOL_TO_TRIGGER" (POOL_ID BIGINT NOT NULL, TRIGGER_ID BIGINT NOT NULL); + +CREATE TABLE "APP"."WM_MAPPING" (MAPPING_ID BIGINT NOT NULL, RP_ID BIGINT NOT NULL, ENTITY_TYPE VARCHAR(128) NOT NULL, ENTITY_NAME VARCHAR(128) NOT NULL, POOL_ID BIGINT, ORDERING INTEGER); + +CREATE TABLE "APP"."MV_CREATION_METADATA" ( + "MV_CREATION_METADATA_ID" BIGINT NOT NULL, + "CAT_NAME" VARCHAR(256) NOT NULL, + "DB_NAME" VARCHAR(128) NOT NULL, + "TBL_NAME" VARCHAR(256) NOT NULL, + "TXN_LIST" CLOB, + "MATERIALIZATION_TIME" BIGINT NOT NULL +); + +CREATE TABLE "APP"."MV_TABLES_USED" ( + "MV_CREATION_METADATA_ID" BIGINT NOT NULL, + "TBL_ID" BIGINT NOT NULL +); + +CREATE TABLE "APP"."CTLGS" ( + "CTLG_ID" BIGINT NOT NULL, + "NAME" VARCHAR(256) UNIQUE, + "DESC" VARCHAR(4000), + "LOCATION_URI" VARCHAR(4000) NOT NULL); + +-- ---------------------------------------------- +-- DML Statements +-- ---------------------------------------------- + +INSERT INTO "APP"."NOTIFICATION_SEQUENCE" ("NNI_ID", "NEXT_EVENT_ID") SELECT * FROM (VALUES (1,1)) tmp_table WHERE NOT EXISTS ( SELECT "NEXT_EVENT_ID" FROM "APP"."NOTIFICATION_SEQUENCE"); + +INSERT INTO "APP"."SEQUENCE_TABLE" ("SEQUENCE_NAME", "NEXT_VAL") SELECT * FROM (VALUES ('org.apache.hadoop.hive.metastore.model.MNotificationLog', 1)) tmp_table WHERE NOT EXISTS ( SELECT "NEXT_VAL" FROM "APP"."SEQUENCE_TABLE" WHERE "SEQUENCE_NAME" = 'org.apache.hadoop.hive.metastore.model.MNotificationLog'); + +-- ---------------------------------------------- +-- DDL Statements for indexes +-- ---------------------------------------------- + +CREATE UNIQUE INDEX "APP"."UNIQUEINDEX" ON "APP"."IDXS" ("INDEX_NAME", "ORIG_TBL_ID"); + +CREATE INDEX "APP"."TABLECOLUMNPRIVILEGEINDEX" ON "APP"."TBL_COL_PRIVS" ("AUTHORIZER", "TBL_ID", "COLUMN_NAME", "PRINCIPAL_NAME", "PRINCIPAL_TYPE", "TBL_COL_PRIV", "GRANTOR", "GRANTOR_TYPE"); + +CREATE UNIQUE INDEX "APP"."DBPRIVILEGEINDEX" ON "APP"."DB_PRIVS" ("AUTHORIZER", "DB_ID", "PRINCIPAL_NAME", "PRINCIPAL_TYPE", "DB_PRIV", "GRANTOR", "GRANTOR_TYPE"); + +CREATE INDEX "APP"."PCS_STATS_IDX" ON "APP"."PART_COL_STATS" ("CAT_NAME", "DB_NAME","TABLE_NAME","COLUMN_NAME","PARTITION_NAME"); + +CREATE INDEX "APP"."TAB_COL_STATS_IDX" ON "APP"."TAB_COL_STATS" ("CAT_NAME", "DB_NAME", "TABLE_NAME", "COLUMN_NAME"); + +CREATE INDEX "APP"."PARTPRIVILEGEINDEX" ON "APP"."PART_PRIVS" ("AUTHORIZER", "PART_ID", "PRINCIPAL_NAME", "PRINCIPAL_TYPE", "PART_PRIV", "GRANTOR", "GRANTOR_TYPE"); + +CREATE UNIQUE INDEX "APP"."ROLEENTITYINDEX" ON "APP"."ROLES" ("ROLE_NAME"); + +CREATE INDEX "APP"."TABLEPRIVILEGEINDEX" ON "APP"."TBL_PRIVS" ("AUTHORIZER", "TBL_ID", "PRINCIPAL_NAME", "PRINCIPAL_TYPE", "TBL_PRIV", "GRANTOR", "GRANTOR_TYPE"); + +CREATE UNIQUE INDEX "APP"."UNIQUETABLE" ON "APP"."TBLS" ("TBL_NAME", "DB_ID"); + +CREATE UNIQUE INDEX "APP"."UNIQUE_DATABASE" ON "APP"."DBS" ("NAME", "CTLG_NAME"); + +CREATE UNIQUE INDEX "APP"."USERROLEMAPINDEX" ON "APP"."ROLE_MAP" ("PRINCIPAL_NAME", "ROLE_ID", "GRANTOR", "GRANTOR_TYPE"); + +CREATE UNIQUE INDEX "APP"."GLOBALPRIVILEGEINDEX" ON "APP"."GLOBAL_PRIVS" ("AUTHORIZER", "PRINCIPAL_NAME", "PRINCIPAL_TYPE", "USER_PRIV", "GRANTOR", "GRANTOR_TYPE"); + +CREATE UNIQUE INDEX "APP"."UNIQUE_TYPE" ON "APP"."TYPES" ("TYPE_NAME"); + +CREATE INDEX "APP"."PARTITIONCOLUMNPRIVILEGEINDEX" ON "APP"."PART_COL_PRIVS" ("AUTHORIZER", "PART_ID", "COLUMN_NAME", "PRINCIPAL_NAME", "PRINCIPAL_TYPE", "PART_COL_PRIV", "GRANTOR", "GRANTOR_TYPE"); + +CREATE UNIQUE INDEX "APP"."UNIQUEPARTITION" ON "APP"."PARTITIONS" ("PART_NAME", "TBL_ID"); + +CREATE UNIQUE INDEX "APP"."UNIQUEFUNCTION" ON "APP"."FUNCS" ("FUNC_NAME", "DB_ID"); + +CREATE INDEX "APP"."FUNCS_N49" ON "APP"."FUNCS" ("DB_ID"); + +CREATE INDEX "APP"."FUNC_RU_N49" ON "APP"."FUNC_RU" ("FUNC_ID"); + +CREATE INDEX "APP"."CONSTRAINTS_PARENT_TBL_ID_INDEX" ON "APP"."KEY_CONSTRAINTS"("PARENT_TBL_ID"); + +CREATE INDEX "APP"."CONSTRAINTS_CONSTRAINT_TYPE_INDEX" ON "APP"."KEY_CONSTRAINTS"("CONSTRAINT_TYPE"); + +CREATE UNIQUE INDEX "APP"."UNIQUE_WM_RESOURCEPLAN" ON "APP"."WM_RESOURCEPLAN" ("NAME"); + +CREATE UNIQUE INDEX "APP"."UNIQUE_WM_POOL" ON "APP"."WM_POOL" ("RP_ID", "PATH"); + +CREATE UNIQUE INDEX "APP"."UNIQUE_WM_TRIGGER" ON "APP"."WM_TRIGGER" ("RP_ID", "NAME"); + +CREATE UNIQUE INDEX "APP"."UNIQUE_WM_MAPPING" ON "APP"."WM_MAPPING" ("RP_ID", "ENTITY_TYPE", "ENTITY_NAME"); + +CREATE UNIQUE INDEX "APP"."MV_UNIQUE_TABLE" ON "APP"."MV_CREATION_METADATA" ("TBL_NAME", "DB_NAME"); + +CREATE UNIQUE INDEX "APP"."UNIQUE_CATALOG" ON "APP"."CTLGS" ("NAME"); + + +-- ---------------------------------------------- +-- DDL Statements for keys +-- ---------------------------------------------- + +-- primary/unique +ALTER TABLE "APP"."IDXS" ADD CONSTRAINT "IDXS_PK" PRIMARY KEY ("INDEX_ID"); + +ALTER TABLE "APP"."TBL_COL_PRIVS" ADD CONSTRAINT "TBL_COL_PRIVS_PK" PRIMARY KEY ("TBL_COLUMN_GRANT_ID"); + +ALTER TABLE "APP"."CDS" ADD CONSTRAINT "SQL110922153006460" PRIMARY KEY ("CD_ID"); + +ALTER TABLE "APP"."DB_PRIVS" ADD CONSTRAINT "DB_PRIVS_PK" PRIMARY KEY ("DB_GRANT_ID"); + +ALTER TABLE "APP"."INDEX_PARAMS" ADD CONSTRAINT "INDEX_PARAMS_PK" PRIMARY KEY ("INDEX_ID", "PARAM_KEY"); + +ALTER TABLE "APP"."PARTITION_KEYS" ADD CONSTRAINT "PARTITION_KEY_PK" PRIMARY KEY ("TBL_ID", "PKEY_NAME"); + +ALTER TABLE "APP"."SEQUENCE_TABLE" ADD CONSTRAINT "SEQUENCE_TABLE_PK" PRIMARY KEY ("SEQUENCE_NAME"); + +ALTER TABLE "APP"."PART_PRIVS" ADD CONSTRAINT "PART_PRIVS_PK" PRIMARY KEY ("PART_GRANT_ID"); + +ALTER TABLE "APP"."SDS" ADD CONSTRAINT "SDS_PK" PRIMARY KEY ("SD_ID"); + +ALTER TABLE "APP"."SERDES" ADD CONSTRAINT "SERDES_PK" PRIMARY KEY ("SERDE_ID"); + +ALTER TABLE "APP"."COLUMNS" ADD CONSTRAINT "COLUMNS_PK" PRIMARY KEY ("SD_ID", "COLUMN_NAME"); + +ALTER TABLE "APP"."PARTITION_EVENTS" ADD CONSTRAINT "PARTITION_EVENTS_PK" PRIMARY KEY ("PART_NAME_ID"); + +ALTER TABLE "APP"."TYPE_FIELDS" ADD CONSTRAINT "TYPE_FIELDS_PK" PRIMARY KEY ("TYPE_NAME", "FIELD_NAME"); + +ALTER TABLE "APP"."ROLES" ADD CONSTRAINT "ROLES_PK" PRIMARY KEY ("ROLE_ID"); + +ALTER TABLE "APP"."TBL_PRIVS" ADD CONSTRAINT "TBL_PRIVS_PK" PRIMARY KEY ("TBL_GRANT_ID"); + +ALTER TABLE "APP"."SERDE_PARAMS" ADD CONSTRAINT "SERDE_PARAMS_PK" PRIMARY KEY ("SERDE_ID", "PARAM_KEY"); + +ALTER TABLE "APP"."NUCLEUS_TABLES" ADD CONSTRAINT "NUCLEUS_TABLES_PK" PRIMARY KEY ("CLASS_NAME"); + +ALTER TABLE "APP"."TBLS" ADD CONSTRAINT "TBLS_PK" PRIMARY KEY ("TBL_ID"); + +ALTER TABLE "APP"."SD_PARAMS" ADD CONSTRAINT "SD_PARAMS_PK" PRIMARY KEY ("SD_ID", "PARAM_KEY"); + +ALTER TABLE "APP"."DATABASE_PARAMS" ADD CONSTRAINT "DATABASE_PARAMS_PK" PRIMARY KEY ("DB_ID", "PARAM_KEY"); + +ALTER TABLE "APP"."DBS" ADD CONSTRAINT "DBS_PK" PRIMARY KEY ("DB_ID"); + +ALTER TABLE "APP"."ROLE_MAP" ADD CONSTRAINT "ROLE_MAP_PK" PRIMARY KEY ("ROLE_GRANT_ID"); + +ALTER TABLE "APP"."GLOBAL_PRIVS" ADD CONSTRAINT "GLOBAL_PRIVS_PK" PRIMARY KEY ("USER_GRANT_ID"); + +ALTER TABLE "APP"."BUCKETING_COLS" ADD CONSTRAINT "BUCKETING_COLS_PK" PRIMARY KEY ("SD_ID", "INTEGER_IDX"); + +ALTER TABLE "APP"."SORT_COLS" ADD CONSTRAINT "SORT_COLS_PK" PRIMARY KEY ("SD_ID", "INTEGER_IDX"); + +ALTER TABLE "APP"."PARTITION_KEY_VALS" ADD CONSTRAINT "PARTITION_KEY_VALS_PK" PRIMARY KEY ("PART_ID", "INTEGER_IDX"); + +ALTER TABLE "APP"."TYPES" ADD CONSTRAINT "TYPES_PK" PRIMARY KEY ("TYPES_ID"); + +ALTER TABLE "APP"."COLUMNS_V2" ADD CONSTRAINT "SQL110922153006740" PRIMARY KEY ("CD_ID", "COLUMN_NAME"); + +ALTER TABLE "APP"."PART_COL_PRIVS" ADD CONSTRAINT "PART_COL_PRIVS_PK" PRIMARY KEY ("PART_COLUMN_GRANT_ID"); + +ALTER TABLE "APP"."PARTITION_PARAMS" ADD CONSTRAINT "PARTITION_PARAMS_PK" PRIMARY KEY ("PART_ID", "PARAM_KEY"); + +ALTER TABLE "APP"."PARTITIONS" ADD CONSTRAINT "PARTITIONS_PK" PRIMARY KEY ("PART_ID"); + +ALTER TABLE "APP"."TABLE_PARAMS" ADD CONSTRAINT "TABLE_PARAMS_PK" PRIMARY KEY ("TBL_ID", "PARAM_KEY"); + +ALTER TABLE "APP"."SKEWED_STRING_LIST" ADD CONSTRAINT "SKEWED_STRING_LIST_PK" PRIMARY KEY ("STRING_LIST_ID"); + +ALTER TABLE "APP"."SKEWED_STRING_LIST_VALUES" ADD CONSTRAINT "SKEWED_STRING_LIST_VALUES_PK" PRIMARY KEY ("STRING_LIST_ID", "INTEGER_IDX"); + +ALTER TABLE "APP"."SKEWED_COL_NAMES" ADD CONSTRAINT "SKEWED_COL_NAMES_PK" PRIMARY KEY ("SD_ID", "INTEGER_IDX"); + +ALTER TABLE "APP"."SKEWED_COL_VALUE_LOC_MAP" ADD CONSTRAINT "SKEWED_COL_VALUE_LOC_MAP_PK" PRIMARY KEY ("SD_ID", "STRING_LIST_ID_KID"); + +ALTER TABLE "APP"."SKEWED_VALUES" ADD CONSTRAINT "SKEWED_VALUES_PK" PRIMARY KEY ("SD_ID_OID", "INTEGER_IDX"); + +ALTER TABLE "APP"."TAB_COL_STATS" ADD CONSTRAINT "TAB_COL_STATS_PK" PRIMARY KEY ("CS_ID"); + +ALTER TABLE "APP"."PART_COL_STATS" ADD CONSTRAINT "PART_COL_STATS_PK" PRIMARY KEY ("CS_ID"); + +ALTER TABLE "APP"."FUNCS" ADD CONSTRAINT "FUNCS_PK" PRIMARY KEY ("FUNC_ID"); + +ALTER TABLE "APP"."FUNC_RU" ADD CONSTRAINT "FUNC_RU_PK" PRIMARY KEY ("FUNC_ID", "INTEGER_IDX"); + +ALTER TABLE "APP"."NOTIFICATION_LOG" ADD CONSTRAINT "NOTIFICATION_LOG_PK" PRIMARY KEY ("NL_ID"); + +ALTER TABLE "APP"."NOTIFICATION_SEQUENCE" ADD CONSTRAINT "NOTIFICATION_SEQUENCE_PK" PRIMARY KEY ("NNI_ID"); + +ALTER TABLE "APP"."KEY_CONSTRAINTS" ADD CONSTRAINT "CONSTRAINTS_PK" PRIMARY KEY ("CONSTRAINT_NAME", "POSITION"); + +ALTER TABLE "APP"."METASTORE_DB_PROPERTIES" ADD CONSTRAINT "PROPERTY_KEY_PK" PRIMARY KEY ("PROPERTY_KEY"); + +ALTER TABLE "APP"."MV_CREATION_METADATA" ADD CONSTRAINT "MV_CREATION_METADATA_PK" PRIMARY KEY ("MV_CREATION_METADATA_ID"); + +ALTER TABLE "APP"."CTLGS" ADD CONSTRAINT "CTLG_PK" PRIMARY KEY ("CTLG_ID"); + + +-- foreign +ALTER TABLE "APP"."IDXS" ADD CONSTRAINT "IDXS_FK1" FOREIGN KEY ("ORIG_TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."IDXS" ADD CONSTRAINT "IDXS_FK2" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."IDXS" ADD CONSTRAINT "IDXS_FK3" FOREIGN KEY ("INDEX_TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."TBL_COL_PRIVS" ADD CONSTRAINT "TBL_COL_PRIVS_FK1" FOREIGN KEY ("TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."DB_PRIVS" ADD CONSTRAINT "DB_PRIVS_FK1" FOREIGN KEY ("DB_ID") REFERENCES "APP"."DBS" ("DB_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."INDEX_PARAMS" ADD CONSTRAINT "INDEX_PARAMS_FK1" FOREIGN KEY ("INDEX_ID") REFERENCES "APP"."IDXS" ("INDEX_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PARTITION_KEYS" ADD CONSTRAINT "PARTITION_KEYS_FK1" FOREIGN KEY ("TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PART_PRIVS" ADD CONSTRAINT "PART_PRIVS_FK1" FOREIGN KEY ("PART_ID") REFERENCES "APP"."PARTITIONS" ("PART_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SDS" ADD CONSTRAINT "SDS_FK1" FOREIGN KEY ("SERDE_ID") REFERENCES "APP"."SERDES" ("SERDE_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SDS" ADD CONSTRAINT "SDS_FK2" FOREIGN KEY ("CD_ID") REFERENCES "APP"."CDS" ("CD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."COLUMNS" ADD CONSTRAINT "COLUMNS_FK1" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."TYPE_FIELDS" ADD CONSTRAINT "TYPE_FIELDS_FK1" FOREIGN KEY ("TYPE_NAME") REFERENCES "APP"."TYPES" ("TYPES_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."TBL_PRIVS" ADD CONSTRAINT "TBL_PRIVS_FK1" FOREIGN KEY ("TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SERDE_PARAMS" ADD CONSTRAINT "SERDE_PARAMS_FK1" FOREIGN KEY ("SERDE_ID") REFERENCES "APP"."SERDES" ("SERDE_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."TBLS" ADD CONSTRAINT "TBLS_FK2" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."TBLS" ADD CONSTRAINT "TBLS_FK1" FOREIGN KEY ("DB_ID") REFERENCES "APP"."DBS" ("DB_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."DBS" ADD CONSTRAINT "DBS_FK1" FOREIGN KEY ("CTLG_NAME") REFERENCES "APP"."CTLGS" ("NAME") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SD_PARAMS" ADD CONSTRAINT "SD_PARAMS_FK1" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."DATABASE_PARAMS" ADD CONSTRAINT "DATABASE_PARAMS_FK1" FOREIGN KEY ("DB_ID") REFERENCES "APP"."DBS" ("DB_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."ROLE_MAP" ADD CONSTRAINT "ROLE_MAP_FK1" FOREIGN KEY ("ROLE_ID") REFERENCES "APP"."ROLES" ("ROLE_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."BUCKETING_COLS" ADD CONSTRAINT "BUCKETING_COLS_FK1" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SORT_COLS" ADD CONSTRAINT "SORT_COLS_FK1" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PARTITION_KEY_VALS" ADD CONSTRAINT "PARTITION_KEY_VALS_FK1" FOREIGN KEY ("PART_ID") REFERENCES "APP"."PARTITIONS" ("PART_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."COLUMNS_V2" ADD CONSTRAINT "COLUMNS_V2_FK1" FOREIGN KEY ("CD_ID") REFERENCES "APP"."CDS" ("CD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PART_COL_PRIVS" ADD CONSTRAINT "PART_COL_PRIVS_FK1" FOREIGN KEY ("PART_ID") REFERENCES "APP"."PARTITIONS" ("PART_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PARTITION_PARAMS" ADD CONSTRAINT "PARTITION_PARAMS_FK1" FOREIGN KEY ("PART_ID") REFERENCES "APP"."PARTITIONS" ("PART_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PARTITIONS" ADD CONSTRAINT "PARTITIONS_FK1" FOREIGN KEY ("TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PARTITIONS" ADD CONSTRAINT "PARTITIONS_FK2" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."TABLE_PARAMS" ADD CONSTRAINT "TABLE_PARAMS_FK1" FOREIGN KEY ("TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SKEWED_STRING_LIST_VALUES" ADD CONSTRAINT "SKEWED_STRING_LIST_VALUES_FK1" FOREIGN KEY ("STRING_LIST_ID") REFERENCES "APP"."SKEWED_STRING_LIST" ("STRING_LIST_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SKEWED_COL_NAMES" ADD CONSTRAINT "SKEWED_COL_NAMES_FK1" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SKEWED_COL_VALUE_LOC_MAP" ADD CONSTRAINT "SKEWED_COL_VALUE_LOC_MAP_FK1" FOREIGN KEY ("SD_ID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SKEWED_COL_VALUE_LOC_MAP" ADD CONSTRAINT "SKEWED_COL_VALUE_LOC_MAP_FK2" FOREIGN KEY ("STRING_LIST_ID_KID") REFERENCES "APP"."SKEWED_STRING_LIST" ("STRING_LIST_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SKEWED_VALUES" ADD CONSTRAINT "SKEWED_VALUES_FK1" FOREIGN KEY ("SD_ID_OID") REFERENCES "APP"."SDS" ("SD_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."SKEWED_VALUES" ADD CONSTRAINT "SKEWED_VALUES_FK2" FOREIGN KEY ("STRING_LIST_ID_EID") REFERENCES "APP"."SKEWED_STRING_LIST" ("STRING_LIST_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."TAB_COL_STATS" ADD CONSTRAINT "TAB_COL_STATS_FK" FOREIGN KEY ("TBL_ID") REFERENCES TBLS("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."PART_COL_STATS" ADD CONSTRAINT "PART_COL_STATS_FK" FOREIGN KEY ("PART_ID") REFERENCES PARTITIONS("PART_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."VERSION" ADD CONSTRAINT "VERSION_PK" PRIMARY KEY ("VER_ID"); + +ALTER TABLE "APP"."FUNCS" ADD CONSTRAINT "FUNCS_FK1" FOREIGN KEY ("DB_ID") REFERENCES "APP"."DBS" ("DB_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."FUNC_RU" ADD CONSTRAINT "FUNC_RU_FK1" FOREIGN KEY ("FUNC_ID") REFERENCES "APP"."FUNCS" ("FUNC_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."WM_RESOURCEPLAN" ADD CONSTRAINT "WM_RESOURCEPLAN_PK" PRIMARY KEY ("RP_ID"); + +ALTER TABLE "APP"."WM_POOL" ADD CONSTRAINT "WM_POOL_PK" PRIMARY KEY ("POOL_ID"); + +ALTER TABLE "APP"."WM_POOL" ADD CONSTRAINT "WM_POOL_FK1" FOREIGN KEY ("RP_ID") REFERENCES "APP"."WM_RESOURCEPLAN" ("RP_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."WM_RESOURCEPLAN" ADD CONSTRAINT "WM_RESOURCEPLAN_FK1" FOREIGN KEY ("DEFAULT_POOL_ID") REFERENCES "APP"."WM_POOL" ("POOL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."WM_TRIGGER" ADD CONSTRAINT "WM_TRIGGER_PK" PRIMARY KEY ("TRIGGER_ID"); + +ALTER TABLE "APP"."WM_TRIGGER" ADD CONSTRAINT "WM_TRIGGER_FK1" FOREIGN KEY ("RP_ID") REFERENCES "APP"."WM_RESOURCEPLAN" ("RP_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."WM_POOL_TO_TRIGGER" ADD CONSTRAINT "WM_POOL_TO_TRIGGER_FK1" FOREIGN KEY ("POOL_ID") REFERENCES "APP"."WM_POOL" ("POOL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."WM_POOL_TO_TRIGGER" ADD CONSTRAINT "WM_POOL_TO_TRIGGER_FK2" FOREIGN KEY ("TRIGGER_ID") REFERENCES "APP"."WM_TRIGGER" ("TRIGGER_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."WM_MAPPING" ADD CONSTRAINT "WM_MAPPING_PK" PRIMARY KEY ("MAPPING_ID"); + +ALTER TABLE "APP"."WM_MAPPING" ADD CONSTRAINT "WM_MAPPING_FK1" FOREIGN KEY ("RP_ID") REFERENCES "APP"."WM_RESOURCEPLAN" ("RP_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."WM_MAPPING" ADD CONSTRAINT "WM_MAPPING_FK2" FOREIGN KEY ("POOL_ID") REFERENCES "APP"."WM_POOL" ("POOL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."MV_TABLES_USED" ADD CONSTRAINT "MV_TABLES_USED_FK1" FOREIGN KEY ("MV_CREATION_METADATA_ID") REFERENCES "APP"."MV_CREATION_METADATA" ("MV_CREATION_METADATA_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."MV_TABLES_USED" ADD CONSTRAINT "MV_TABLES_USED_FK2" FOREIGN KEY ("TBL_ID") REFERENCES "APP"."TBLS" ("TBL_ID") ON DELETE NO ACTION ON UPDATE NO ACTION; + +ALTER TABLE "APP"."DBS" ADD CONSTRAINT "DBS_CTLG_FK" FOREIGN KEY ("CTLG_NAME") REFERENCES "APP"."CTLGS" ("NAME") ON DELETE NO ACTION ON UPDATE NO ACTION; + +-- ---------------------------------------------- +-- DDL Statements for checks +-- ---------------------------------------------- + +ALTER TABLE "APP"."IDXS" ADD CONSTRAINT "SQL110318025504980" CHECK (DEFERRED_REBUILD IN ('Y','N')); + +ALTER TABLE "APP"."SDS" ADD CONSTRAINT "SQL110318025505550" CHECK (IS_COMPRESSED IN ('Y','N')); + +-- ---------------------------- +-- Transaction and Lock Tables +-- ---------------------------- +CREATE TABLE TXNS ( + TXN_ID bigint PRIMARY KEY, + TXN_STATE char(1) NOT NULL, + TXN_STARTED bigint NOT NULL, + TXN_LAST_HEARTBEAT bigint NOT NULL, + TXN_USER varchar(128) NOT NULL, + TXN_HOST varchar(128) NOT NULL, + TXN_AGENT_INFO varchar(128), + TXN_META_INFO varchar(128), + TXN_HEARTBEAT_COUNT integer, + TXN_TYPE integer +); + +CREATE TABLE TXN_COMPONENTS ( + TC_TXNID bigint NOT NULL REFERENCES TXNS (TXN_ID), + TC_DATABASE varchar(128) NOT NULL, + TC_TABLE varchar(128), + TC_PARTITION varchar(767), + TC_OPERATION_TYPE char(1) NOT NULL, + TC_WRITEID bigint +); + +CREATE INDEX TC_TXNID_INDEX ON TXN_COMPONENTS (TC_TXNID); + +CREATE TABLE COMPLETED_TXN_COMPONENTS ( + CTC_TXNID bigint NOT NULL, + CTC_DATABASE varchar(128) NOT NULL, + CTC_TABLE varchar(256), + CTC_PARTITION varchar(767), + CTC_TIMESTAMP timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL, + CTC_WRITEID bigint, + CTC_UPDATE_DELETE char(1) NOT NULL +); + +CREATE INDEX COMPLETED_TXN_COMPONENTS_IDX ON COMPLETED_TXN_COMPONENTS (CTC_DATABASE, CTC_TABLE, CTC_PARTITION); + +CREATE TABLE NEXT_TXN_ID ( + NTXN_NEXT bigint NOT NULL +); +INSERT INTO NEXT_TXN_ID VALUES(1); + +CREATE TABLE HIVE_LOCKS ( + HL_LOCK_EXT_ID bigint NOT NULL, + HL_LOCK_INT_ID bigint NOT NULL, + HL_TXNID bigint NOT NULL, + HL_DB varchar(128) NOT NULL, + HL_TABLE varchar(128), + HL_PARTITION varchar(767), + HL_LOCK_STATE char(1) NOT NULL, + HL_LOCK_TYPE char(1) NOT NULL, + HL_LAST_HEARTBEAT bigint NOT NULL, + HL_ACQUIRED_AT bigint, + HL_USER varchar(128) NOT NULL, + HL_HOST varchar(128) NOT NULL, + HL_HEARTBEAT_COUNT integer, + HL_AGENT_INFO varchar(128), + HL_BLOCKEDBY_EXT_ID bigint, + HL_BLOCKEDBY_INT_ID bigint, + PRIMARY KEY(HL_LOCK_EXT_ID, HL_LOCK_INT_ID) +); + +CREATE INDEX HL_TXNID_INDEX ON HIVE_LOCKS (HL_TXNID); + +CREATE TABLE NEXT_LOCK_ID ( + NL_NEXT bigint NOT NULL +); +INSERT INTO NEXT_LOCK_ID VALUES(1); + +CREATE TABLE COMPACTION_QUEUE ( + CQ_ID bigint PRIMARY KEY, + CQ_DATABASE varchar(128) NOT NULL, + CQ_TABLE varchar(128) NOT NULL, + CQ_PARTITION varchar(767), + CQ_STATE char(1) NOT NULL, + CQ_TYPE char(1) NOT NULL, + CQ_TBLPROPERTIES varchar(2048), + CQ_WORKER_ID varchar(128), + CQ_START bigint, + CQ_RUN_AS varchar(128), + CQ_HIGHEST_WRITE_ID bigint, + CQ_META_INFO varchar(2048) for bit data, + CQ_HADOOP_JOB_ID varchar(32) +); + +CREATE TABLE NEXT_COMPACTION_QUEUE_ID ( + NCQ_NEXT bigint NOT NULL +); +INSERT INTO NEXT_COMPACTION_QUEUE_ID VALUES(1); + +CREATE TABLE COMPLETED_COMPACTIONS ( + CC_ID bigint PRIMARY KEY, + CC_DATABASE varchar(128) NOT NULL, + CC_TABLE varchar(128) NOT NULL, + CC_PARTITION varchar(767), + CC_STATE char(1) NOT NULL, + CC_TYPE char(1) NOT NULL, + CC_TBLPROPERTIES varchar(2048), + CC_WORKER_ID varchar(128), + CC_START bigint, + CC_END bigint, + CC_RUN_AS varchar(128), + CC_HIGHEST_WRITE_ID bigint, + CC_META_INFO varchar(2048) for bit data, + CC_HADOOP_JOB_ID varchar(32) +); + +CREATE TABLE AUX_TABLE ( + MT_KEY1 varchar(128) NOT NULL, + MT_KEY2 bigint NOT NULL, + MT_COMMENT varchar(255), + PRIMARY KEY(MT_KEY1, MT_KEY2) +); + +--1st 4 cols make up a PK but since WS_PARTITION is nullable we can't declare such PK +--This is a good candidate for Index orgainzed table +CREATE TABLE WRITE_SET ( + WS_DATABASE varchar(128) NOT NULL, + WS_TABLE varchar(128) NOT NULL, + WS_PARTITION varchar(767), + WS_TXNID bigint NOT NULL, + WS_COMMIT_ID bigint NOT NULL, + WS_OPERATION_TYPE char(1) NOT NULL +); + +CREATE TABLE TXN_TO_WRITE_ID ( + T2W_TXNID bigint NOT NULL, + T2W_DATABASE varchar(128) NOT NULL, + T2W_TABLE varchar(256) NOT NULL, + T2W_WRITEID bigint NOT NULL +); + +CREATE UNIQUE INDEX TBL_TO_TXN_ID_IDX ON TXN_TO_WRITE_ID (T2W_DATABASE, T2W_TABLE, T2W_TXNID); +CREATE UNIQUE INDEX TBL_TO_WRITE_ID_IDX ON TXN_TO_WRITE_ID (T2W_DATABASE, T2W_TABLE, T2W_WRITEID); + +CREATE TABLE NEXT_WRITE_ID ( + NWI_DATABASE varchar(128) NOT NULL, + NWI_TABLE varchar(256) NOT NULL, + NWI_NEXT bigint NOT NULL +); + +CREATE UNIQUE INDEX NEXT_WRITE_ID_IDX ON NEXT_WRITE_ID (NWI_DATABASE, NWI_TABLE); + +CREATE TABLE MIN_HISTORY_LEVEL ( + MHL_TXNID bigint NOT NULL, + MHL_MIN_OPEN_TXNID bigint NOT NULL, + PRIMARY KEY(MHL_TXNID) +); + +CREATE INDEX MIN_HISTORY_LEVEL_IDX ON MIN_HISTORY_LEVEL (MHL_MIN_OPEN_TXNID); + +CREATE TABLE MATERIALIZATION_REBUILD_LOCKS ( + MRL_TXN_ID BIGINT NOT NULL, + MRL_DB_NAME VARCHAR(128) NOT NULL, + MRL_TBL_NAME VARCHAR(256) NOT NULL, + MRL_LAST_HEARTBEAT BIGINT NOT NULL, + PRIMARY KEY(MRL_TXN_ID) +); + +CREATE TABLE "APP"."I_SCHEMA" ( + "SCHEMA_ID" bigint primary key, + "SCHEMA_TYPE" integer not null, + "NAME" varchar(256) unique, + "DB_ID" bigint references "APP"."DBS" ("DB_ID"), + "COMPATIBILITY" integer not null, + "VALIDATION_LEVEL" integer not null, + "CAN_EVOLVE" char(1) not null, + "SCHEMA_GROUP" varchar(256), + "DESCRIPTION" varchar(4000) +); + +CREATE TABLE "APP"."SCHEMA_VERSION" ( + "SCHEMA_VERSION_ID" bigint primary key, + "SCHEMA_ID" bigint references "APP"."I_SCHEMA" ("SCHEMA_ID"), + "VERSION" integer not null, + "CREATED_AT" bigint not null, + "CD_ID" bigint references "APP"."CDS" ("CD_ID"), + "STATE" integer not null, + "DESCRIPTION" varchar(4000), + "SCHEMA_TEXT" clob, + "FINGERPRINT" varchar(256), + "SCHEMA_VERSION_NAME" varchar(256), + "SERDE_ID" bigint references "APP"."SERDES" ("SERDE_ID") +); + +CREATE UNIQUE INDEX "APP"."UNIQUE_SCHEMA_VERSION" ON "APP"."SCHEMA_VERSION" ("SCHEMA_ID", "VERSION"); + +CREATE TABLE REPL_TXN_MAP ( + RTM_REPL_POLICY varchar(256) NOT NULL, + RTM_SRC_TXN_ID bigint NOT NULL, + RTM_TARGET_TXN_ID bigint NOT NULL, + PRIMARY KEY (RTM_REPL_POLICY, RTM_SRC_TXN_ID) +); + +CREATE TABLE "APP"."RUNTIME_STATS" ( + "RS_ID" bigint primary key, + "CREATE_TIME" integer not null, + "WEIGHT" integer not null, + "PAYLOAD" BLOB +); + +CREATE INDEX IDX_RUNTIME_STATS_CREATE_TIME ON RUNTIME_STATS(CREATE_TIME); + +-- ----------------------------------------------------------------- +-- Record schema version. Should be the last step in the init script +-- ----------------------------------------------------------------- +INSERT INTO "APP"."VERSION" (VER_ID, SCHEMA_VERSION, VERSION_COMMENT) VALUES (1, '3.1.0', 'Hive release version 3.1.0'); diff --git a/settings.gradle b/settings.gradle index e5023ce308..bbae341730 100644 --- a/settings.gradle +++ b/settings.gradle @@ -37,6 +37,7 @@ include 'spark3-extensions' include 'spark3-runtime' include 'pig' include 'hive-metastore' +include 'hiveberg' include 'nessie' include 'runtime' @@ -59,6 +60,7 @@ project(':spark3-extensions').name = 'iceberg-spark3-extensions' project(':spark3-runtime').name = 'iceberg-spark3-runtime' project(':pig').name = 'iceberg-pig' project(':hive-metastore').name = 'iceberg-hive-metastore' +project(':hiveberg').name = 'iceberg-hiveberg' project(':nessie').name = 'iceberg-nessie' project(':runtime').name = 'iceberg-runtime' diff --git a/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java b/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java index 5d90559486..b292aa6201 100644 --- a/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java +++ b/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java @@ -44,7 +44,7 @@ import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.hadoop.HadoopFileIO; import org.apache.iceberg.hadoop.Util; -import org.apache.iceberg.hive.legacy.LegacyHiveTable; +import org.apache.iceberg.hiveberg.LegacyHiveTable; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.orc.OrcRowFilterUtils; diff --git a/spark3/src/main/java/org/apache/iceberg/spark/source/SparkBatchScan.java b/spark3/src/main/java/org/apache/iceberg/spark/source/SparkBatchScan.java index df5e10dbf9..1cf16886eb 100644 --- a/spark3/src/main/java/org/apache/iceberg/spark/source/SparkBatchScan.java +++ b/spark3/src/main/java/org/apache/iceberg/spark/source/SparkBatchScan.java @@ -37,7 +37,7 @@ import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.hadoop.HadoopInputFile; import org.apache.iceberg.hadoop.Util; -import org.apache.iceberg.hive.legacy.LegacyHiveTable; +import org.apache.iceberg.hiveberg.LegacyHiveTable; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.orc.OrcRowFilterUtils; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; From 76bda7bab2d0b385e9238c80d0c2e41ad8449e8b Mon Sep 17 00:00:00 2001 From: Wenye Zhang Date: Fri, 21 Jan 2022 15:50:02 -0800 Subject: [PATCH 2/5] fix build --- build.gradle | 8 +- .../org/apache/iceberg/hive/HiveCatalogs.java | 9 - .../iceberg/hiveberg/DirectoryInfo.java | 46 --- .../iceberg/hiveberg/FileSystemUtils.java | 67 ---- .../iceberg/hiveberg/HiveExpressions.java | 335 ------------------ .../HiveSchemaWithPartnerVisitor.java | 137 ------- .../hiveberg/HiveTypeToIcebergType.java | 101 ------ .../apache/iceberg/hiveberg/HiveTypeUtil.java | 92 ----- .../iceberg/hiveberg/LegacyHiveCatalog.java | 100 ------ .../iceberg/hiveberg/LegacyHiveTable.java | 233 ------------ .../hiveberg/LegacyHiveTableOperations.java | 276 --------------- .../iceberg/hiveberg/LegacyHiveTableScan.java | 92 ----- .../hiveberg/LegacyHiveTableUtils.java | 218 ------------ .../hiveberg/MergeHiveSchemaWithAvro.java | 268 -------------- .../iceberg/hiveberg/DirectoryInfo.java | 37 +- .../iceberg/hiveberg/FileSystemUtils.java | 37 +- .../iceberg/hiveberg/HiveExpressions.java | 37 +- .../HiveSchemaWithPartnerVisitor.java | 23 +- .../hiveberg/HiveTypeToIcebergType.java | 23 +- .../apache/iceberg/hiveberg/HiveTypeUtil.java | 23 +- .../iceberg/hiveberg/LegacyHiveCatalog.java | 50 +-- .../iceberg/hiveberg/LegacyHiveTable.java | 37 +- .../hiveberg/LegacyHiveTableOperations.java | 37 +- .../iceberg/hiveberg/LegacyHiveTableScan.java | 37 +- .../hiveberg/LegacyHiveTableUtils.java | 37 +- .../hiveberg/MergeHiveSchemaWithAvro.java | 23 +- .../iceberg/hiveberg/HiveMetastoreTest.java | 1 + .../apache/iceberg/hiveberg/ScriptRunner.java | 31 +- .../iceberg/hiveberg/TestHiveExpressions.java | 23 +- .../iceberg/hiveberg/TestHiveMetastore.java | 6 +- .../hiveberg/TestHiveSchemaConversions.java | 23 +- .../hiveberg/TestLegacyHiveTableScan.java | 23 +- .../hiveberg/TestMergeHiveSchemaWithAvro.java | 23 +- 33 files changed, 263 insertions(+), 2250 deletions(-) delete mode 100644 hive-metastore/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java delete mode 100644 hive-metastore/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java delete mode 100644 hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java delete mode 100644 hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java delete mode 100644 hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java delete mode 100644 hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java delete mode 100644 hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java delete mode 100644 hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java delete mode 100644 hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java delete mode 100644 hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java delete mode 100644 hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java delete mode 100644 hive-metastore/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java diff --git a/build.gradle b/build.gradle index b1429c36af..577627d7df 100644 --- a/build.gradle +++ b/build.gradle @@ -497,11 +497,13 @@ project(':iceberg-hive-metastore') { project(':iceberg-hiveberg') { dependencies { - compile project(':iceberg-api') - compile project(':iceberg-core') compile project(':iceberg-hive-metastore') compileOnly "org.apache.avro:avro" + compileOnly("org.apache.hadoop:hadoop-client") { + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + } compileOnly("org.apache.hive:hive-metastore") { exclude group: 'org.apache.avro', module: 'avro' @@ -870,6 +872,7 @@ if (jdkVersion == '8') { compile project(':iceberg-arrow') compile project(':iceberg-hive-metastore') compile project(':iceberg-spark') + compile project(':iceberg-hiveberg') compileOnly "org.apache.avro:avro" compileOnly("org.apache.spark:spark-hive_2.11") { @@ -979,6 +982,7 @@ project(':iceberg-spark3') { compile project(':iceberg-arrow') compile project(':iceberg-hive-metastore') compile project(':iceberg-spark') + compile project(':iceberg-hiveberg') compileOnly "org.apache.avro:avro" compileOnly("org.apache.spark:spark-hive_2.12") { diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalogs.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalogs.java index cb2859bea4..1aadecd835 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalogs.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalogs.java @@ -23,7 +23,6 @@ import com.github.benmanes.caffeine.cache.Caffeine; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.iceberg.hiveberg.LegacyHiveCatalog; public final class HiveCatalogs { @@ -35,20 +34,12 @@ private HiveCatalogs() { private static final Cache HIVE_METADATA_PRESERVING_CATALOG_CACHE = Caffeine.newBuilder().build(); - private static final Cache LEGACY_CATALOG_CACHE = Caffeine.newBuilder().build(); - public static HiveCatalog loadCatalog(Configuration conf) { // metastore URI can be null in local mode String metastoreUri = conf.get(HiveConf.ConfVars.METASTOREURIS.varname, ""); return CATALOG_CACHE.get(metastoreUri, uri -> new HiveCatalog(conf)); } - public static HiveCatalog loadLegacyCatalog(Configuration conf) { - // metastore URI can be null in local mode - String metastoreUri = conf.get(HiveConf.ConfVars.METASTOREURIS.varname, ""); - return LEGACY_CATALOG_CACHE.get(metastoreUri, uri -> new LegacyHiveCatalog(conf)); - } - /** * @deprecated Use {@link #loadHiveMetadataPreservingCatalog(Configuration)} instead */ diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java deleted file mode 100644 index 20d499535d..0000000000 --- a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.iceberg.hiveberg; - -import org.apache.iceberg.FileFormat; -import org.apache.iceberg.StructLike; - - -/** - * Metadata for a data directory referenced by either a Hive table or a partition - */ -class DirectoryInfo { - private final String location; - private final FileFormat format; - private final StructLike partitionData; - - DirectoryInfo(String location, FileFormat format, StructLike partitionData) { - this.location = location; - this.format = format; - this.partitionData = partitionData; - } - - public String location() { - return location; - } - - public FileFormat format() { - return format; - } - - public StructLike partitionData() { - return partitionData; - } -} diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java deleted file mode 100644 index 51f8ac81fd..0000000000 --- a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.iceberg.hiveberg; - -import java.io.IOException; -import java.io.UncheckedIOException; -import java.util.Arrays; -import java.util.List; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.iceberg.exceptions.RuntimeIOException; - -class FileSystemUtils { - - private FileSystemUtils() { - } - - /** - * Lists all non-hidden files for the given directory - */ - static List listFiles(String directory, Configuration conf) { - - final Path directoryPath = new Path(directory); - final FileStatus[] files; - try { - FileSystem fs = directoryPath.getFileSystem(conf); - files = fs.listStatus(directoryPath, HiddenPathFilter.INSTANCE); - } catch (IOException e) { - throw new UncheckedIOException("Error listing files for directory: " + directory, e); - } - return Arrays.asList(files); - } - - static boolean exists(String file, Configuration conf) { - final Path filePath = new Path(file); - try { - FileSystem fs = filePath.getFileSystem(conf); - return fs.exists(filePath); - } catch (IOException e) { - throw new RuntimeIOException(e, "Error determining if file or directory exists: %s", file); - } - } - - private enum HiddenPathFilter implements PathFilter { - INSTANCE; - - @Override - public boolean accept(Path path) { - return !path.getName().startsWith("_") && !path.getName().startsWith("."); - } - } -} diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java deleted file mode 100644 index 419d48f418..0000000000 --- a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java +++ /dev/null @@ -1,335 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.iceberg.hiveberg; - -import java.time.Instant; -import java.time.OffsetDateTime; -import java.time.ZoneOffset; -import java.time.temporal.ChronoUnit; -import java.util.List; -import java.util.Set; -import java.util.stream.Collectors; -import org.apache.iceberg.expressions.BoundPredicate; -import org.apache.iceberg.expressions.Expression; -import org.apache.iceberg.expressions.ExpressionVisitors; -import org.apache.iceberg.expressions.Expressions; -import org.apache.iceberg.expressions.Literal; -import org.apache.iceberg.expressions.UnboundPredicate; -import org.apache.iceberg.expressions.UnboundTerm; -import org.apache.iceberg.types.Type; - - -class HiveExpressions { - - private static final Expression REMOVED = (Expression) () -> null; - - private HiveExpressions() { - } - - /** - * Simplifies the {@link Expression} so that it fits the restrictions of the expression that can be passed - * to the Hive metastore. For details about the simplification, please see {@link RemoveNonPartitionPredicates} and - * {@link RewriteUnsupportedOperators} - * @param expr The {@link Expression} to be simplified - * @param partitionColumnNames The set of partition column names - * @return TRUE if the simplified expression results in an always true expression or if there are no predicates on - * partition columns in the simplified expression, - * FALSE if the simplified expression results in an always false expression, - * otherwise returns the simplified expression - */ - static Expression simplifyPartitionFilter(Expression expr, Set partitionColumnNames) { - try { - // Pushing down NOTs is critical for the correctness of RemoveNonPartitionPredicates - // e.g. consider a predicate on a partition field (P) and a predicate on a non-partition field (NP) - // With simply ignoring NP, NOT(P and NP) will be written to NOT(P) - // However the correct behaviour is NOT(P and NP) => NOT(P) OR NOT(NP) => True - Expression notPushedDown = Expressions.rewriteNot(expr); - Expression partitionPredicatesOnly = ExpressionVisitors.visit(notPushedDown, - new RemoveNonPartitionPredicates(partitionColumnNames)); - if (partitionPredicatesOnly == REMOVED) { - return Expressions.alwaysTrue(); - } else { - return ExpressionVisitors.visit(partitionPredicatesOnly, new RewriteUnsupportedOperators()); - } - } catch (Exception e) { - throw new RuntimeException("Error while processing expression: " + expr, e); - } - } - - /** - * Converts an {@link Expression} into a filter string which can be passed to the Hive metastore - * - * It is expected that caller handles TRUE and FALSE expressions before calling this method. The given - * {@link Expressions} must also be passed through {@link #simplifyPartitionFilter(Expression, Set)} first to - * remove any unsupported predicates. - * @param expr The {@link Expression} to be converted into a filter string - * @return a filter string equivalent to the given {@link Expression} which can be passed to the Hive metastore - */ - static String toPartitionFilterString(Expression expr) { - return ExpressionVisitors.visit(expr, ExpressionToPartitionFilterString.get()); - } - - /** - * Removes any predicates on non-partition columns from the given {@link Expression} - */ - private static class RemoveNonPartitionPredicates extends ExpressionVisitors.ExpressionVisitor { - - private final Set partitionColumnNamesLowerCase; - - RemoveNonPartitionPredicates(Set partitionColumnNames) { - this.partitionColumnNamesLowerCase = - partitionColumnNames.stream().map(String::toLowerCase).collect(Collectors.toSet()); - } - - @Override - public Expression alwaysTrue() { - return Expressions.alwaysTrue(); - } - - @Override - public Expression alwaysFalse() { - return Expressions.alwaysFalse(); - } - - @Override - public Expression not(Expression result) { - return (result == REMOVED) ? REMOVED : Expressions.not(result); - } - - @Override - public Expression and(Expression leftResult, Expression rightResult) { - // if one of the children is a non partition predicate, we can ignore it as it will be applied as a post-scan - // filter - if (leftResult == REMOVED && rightResult == REMOVED) { - return REMOVED; - } else if (leftResult == REMOVED) { - return rightResult; - } else if (rightResult == REMOVED) { - return leftResult; - } else { - return Expressions.and(leftResult, rightResult); - } - } - - @Override - public Expression or(Expression leftResult, Expression rightResult) { - return (leftResult == REMOVED || rightResult == REMOVED) ? REMOVED : Expressions.or(leftResult, rightResult); - } - - @Override - public Expression predicate(BoundPredicate pred) { - throw new IllegalStateException("Bound predicate not expected: " + pred.getClass().getName()); - } - - @Override - public Expression predicate(UnboundPredicate pred) { - return (partitionColumnNamesLowerCase.contains(pred.ref().name().toLowerCase())) ? pred : REMOVED; - } - } - - /** - * Rewrites the {@link Expression} so that it fits the restrictions of the expression that can be passed - * to the Hive metastore. - * - * This visitor assumes that all predicates are on partition columns. Predicates on non-partition columns should be - * removed using {@link RemoveNonPartitionPredicates} before calling this visitor. It performs the following changes: - * 1. Rewrites NOT operators by inverting binary operators, negating unary literals and using De Morgan's laws - * e.g. NOT(value > 0 AND TRUE) => value <= 0 OR FALSE - * NOT(value < 0 OR value > 10) => value >= 0 AND value <= 10 - * 2. Removes IS NULL and IS NOT NULL predicates (Replaced with FALSE and TRUE respectively as partition column values - * are always non null for Hive) - * e.g. partitionColumn IS NULL => FALSE - * partitionColumn IS NOT NULL => TRUE - * 3. Expands IN and NOT IN operators into ORs of EQUAL operations and ANDs of NOT EQUAL operations respectively - * e.g. value IN (1, 2, 3) => value = 1 OR value = 2 OR value = 3 - * value NOT IN (1, 2, 3) => value != 1 AND value != 2 AND value != 3 - * 4. Removes any children TRUE and FALSE expressions. The checks to remove these are happening inside - * {@link Expressions#and(Expression, Expression)} and {@link Expressions#or(Expression, Expression)} - * (Note that the rewritten expression still can be TRUE and FALSE at the root and will have to be handled - * appropriately by the caller) - * - * For examples take a look at the tests in {@code TestHiveExpressions} - */ - private static class RewriteUnsupportedOperators extends ExpressionVisitors.ExpressionVisitor { - - @Override - public Expression alwaysTrue() { - return Expressions.alwaysTrue(); - } - - @Override - public Expression alwaysFalse() { - return Expressions.alwaysFalse(); - } - - @Override - public Expression not(Expression result) { - return result.negate(); - } - - @Override - public Expression and(Expression leftResult, Expression rightResult) { - return Expressions.and(leftResult, rightResult); - } - - @Override - public Expression or(Expression leftResult, Expression rightResult) { - return Expressions.or(leftResult, rightResult); - } - - Expression in(UnboundTerm term, List> literals) { - Expression in = alwaysFalse(); - for (Literal literal : literals) { - in = Expressions.or(in, Expressions.equal(term, literal.value())); - } - return in; - } - - Expression notIn(UnboundTerm term, List> literals) { - Expression notIn = alwaysTrue(); - for (Literal literal : literals) { - notIn = Expressions.and(notIn, Expressions.notEqual(term, literal.value())); - } - return notIn; - } - - @Override - public Expression predicate(BoundPredicate pred) { - throw new IllegalStateException("Bound predicate not expected: " + pred.getClass().getName()); - } - - @Override - public Expression predicate(UnboundPredicate pred) { - switch (pred.op()) { - case LT: - case LT_EQ: - case GT: - case GT_EQ: - case EQ: - case NOT_EQ: - return pred; - case IS_NULL: - return Expressions.alwaysFalse(); - case NOT_NULL: - return Expressions.alwaysTrue(); - case IN: - return in(pred.term(), pred.literals()); - case NOT_IN: - return notIn(pred.term(), pred.literals()); - case STARTS_WITH: - throw new UnsupportedOperationException("STARTS_WITH predicate not supported in partition filter " + - "expression. Please use a combination of greater than AND less than predicates instead."); - default: - throw new IllegalStateException("Unexpected predicate: " + pred.op()); - } - } - } - - private static class ExpressionToPartitionFilterString extends ExpressionVisitors.ExpressionVisitor { - private static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC); - private static final ExpressionToPartitionFilterString INSTANCE = new ExpressionToPartitionFilterString(); - - private ExpressionToPartitionFilterString() { - } - - static ExpressionToPartitionFilterString get() { - return INSTANCE; - } - - @Override - public String alwaysTrue() { - throw new IllegalStateException("TRUE literal not allowed in Hive partition filter string"); - } - - @Override - public String alwaysFalse() { - throw new IllegalStateException("FALSE literal not allowed in Hive partition filter string"); - } - - @Override - public String not(String result) { - throw new IllegalStateException("NOT operator not allowed in Hive partition filter string"); - } - - @Override - public String and(String leftResult, String rightResult) { - return String.format("((%s) AND (%s))", leftResult, rightResult); - } - - @Override - public String or(String leftResult, String rightResult) { - return String.format("((%s) OR (%s))", leftResult, rightResult); - } - - @Override - public String predicate(BoundPredicate pred) { - switch (pred.op()) { - case LT: - case LT_EQ: - case GT: - case GT_EQ: - case EQ: - case NOT_EQ: - return getBinaryExpressionString(pred); - default: - throw new IllegalStateException("Unexpected operator in Hive partition filter string: " + pred.op()); - } - } - - @Override - public String predicate(UnboundPredicate pred) { - throw new IllegalStateException("Unbound predicate not expected: " + pred.getClass().getName()); - } - - private String getBinaryExpressionString(BoundPredicate pred) { - String columnName = pred.ref().field().name(); - String opName = getOperationString(pred.op()); - String litValue = getLiteralValue(pred.asLiteralPredicate().literal(), pred.ref().type()); - return String.format("( %s %s %s )", columnName, opName, litValue); - } - - private String getOperationString(Expression.Operation op) { - switch (op) { - case LT: - return "<"; - case LT_EQ: - return "<="; - case GT: - return ">"; - case GT_EQ: - return ">="; - case EQ: - return "="; - case NOT_EQ: - return "!="; - default: - throw new IllegalStateException("Unexpected operator in Hive partition filter string: " + op); - } - } - - private String getLiteralValue(Literal lit, Type type) { - Object value = lit.value(); - if (type.typeId() == Type.TypeID.DATE) { - value = EPOCH.plus((Integer) value, ChronoUnit.DAYS).toLocalDate().toString(); - } - if (value instanceof String) { - String escapedString = ((String) value).replace("'", "\\'"); - return String.format("'%s'", escapedString); - } else { - return String.valueOf(value); - } - } - } -} diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java deleted file mode 100644 index f77f19feaa..0000000000 --- a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.iceberg.hiveberg; - -import java.util.List; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; - -/** - * A Hive {@link TypeInfo} visitor with an accompanying partner schema - * - * This visitor traverses the Hive {@link TypeInfo} tree contiguously accessing the schema tree for the partner schema - * using {@link PartnerAccessor}. When visiting each type in the Hive tree, the implementation is also presented - * with the corresponding type from the partner schema, or else a {@code null} if no match was found. Matching - * behavior can be controlled by implementing the methods in {@link PartnerAccessor} - * - * @param

type of partner schema - * @param type of the field representation in the partner schema - * @param type of the resultant schema generated by the visitor - * @param type of the field representation in the resultant schema - */ -@SuppressWarnings("ClassTypeParameterName") -public abstract class HiveSchemaWithPartnerVisitor { - - /** - * Methods to access types in the partner schema corresponding to types in the Hive schema being traversed - * - * @param

type of partner schema - * @param type of the field representation in the partner schema - */ - public interface PartnerAccessor { - - FP fieldPartner(P partnerStruct, String fieldName); - - P fieldType(FP partnerField); - - P mapKeyPartner(P partnerMap); - - P mapValuePartner(P partnerMap); - - P listElementPartner(P partnerList); - - P unionObjectPartner(P partnerUnion, int ordinal); - } - - @SuppressWarnings("MethodTypeParameterName") - public static R visit(TypeInfo typeInfo, P partner, HiveSchemaWithPartnerVisitor visitor, - PartnerAccessor accessor) { - switch (typeInfo.getCategory()) { - case STRUCT: - StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; - List names = structTypeInfo.getAllStructFieldNames(); - List results = Lists.newArrayListWithExpectedSize(names.size()); - for (String name : names) { - TypeInfo fieldTypeInfo = structTypeInfo.getStructFieldTypeInfo(name); - FP fieldPartner = partner != null ? accessor.fieldPartner(partner, name) : null; - P fieldPartnerType = fieldPartner != null ? accessor.fieldType(fieldPartner) : null; - R result = visit(fieldTypeInfo, fieldPartnerType, visitor, accessor); - results.add(visitor.field(name, fieldTypeInfo, fieldPartner, result)); - } - return visitor.struct(structTypeInfo, partner, results); - - case LIST: - ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; - TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo(); - P elementPartner = partner != null ? accessor.listElementPartner(partner) : null; - R elementResult = visit(elementTypeInfo, elementPartner, visitor, accessor); - return visitor.list(listTypeInfo, partner, elementResult); - - case MAP: - MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; - P keyPartner = partner != null ? accessor.mapKeyPartner(partner) : null; - R keyResult = visit(mapTypeInfo.getMapKeyTypeInfo(), keyPartner, visitor, accessor); - P valuePartner = partner != null ? accessor.mapValuePartner(partner) : null; - R valueResult = visit(mapTypeInfo.getMapValueTypeInfo(), valuePartner, visitor, accessor); - return visitor.map(mapTypeInfo, partner, keyResult, valueResult); - - case PRIMITIVE: - return visitor.primitive((PrimitiveTypeInfo) typeInfo, partner); - - case UNION: - UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; - List allAlternatives = unionTypeInfo.getAllUnionObjectTypeInfos(); - List unionResults = Lists.newArrayListWithExpectedSize(allAlternatives.size()); - for (int i = 0; i < allAlternatives.size(); i++) { - P unionObjectPartner = partner != null ? accessor.unionObjectPartner(partner, i) : null; - R result = visit(allAlternatives.get(i), unionObjectPartner, visitor, accessor); - unionResults.add(result); - } - return visitor.union(unionTypeInfo, partner, unionResults); - - default: - throw new UnsupportedOperationException(typeInfo + " not supported"); - } - } - - public R struct(StructTypeInfo struct, P partner, List fieldResults) { - return null; - } - - public FR field(String name, TypeInfo field, FP partner, R fieldResult) { - return null; - } - - public R list(ListTypeInfo list, P partner, R elementResult) { - return null; - } - - public R map(MapTypeInfo map, P partner, R keyResult, R valueResult) { - return null; - } - - public R union(UnionTypeInfo union, P partner, List results) { - return null; - } - - public R primitive(PrimitiveTypeInfo primitive, P partner) { - return null; - } -} diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java deleted file mode 100644 index a5f043fe26..0000000000 --- a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.iceberg.hiveberg; - -import java.util.List; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.Types; - - -public class HiveTypeToIcebergType extends HiveTypeUtil.HiveSchemaVisitor { - private static final String UNION_TO_STRUCT_CONVERSION_PREFIX = "field"; - private int nextId = 1; - - @Override - public Type struct(StructTypeInfo struct, List names, List fieldResults) { - List fields = Lists.newArrayListWithExpectedSize(fieldResults.size()); - for (int i = 0; i < names.size(); i++) { - fields.add(Types.NestedField.optional(allocateId(), names.get(i), fieldResults.get(i))); - } - return Types.StructType.of(fields); - } - - @Override - public Type map(MapTypeInfo map, Type keyResult, Type valueResult) { - return Types.MapType.ofOptional(allocateId(), allocateId(), keyResult, valueResult); - } - - @Override - public Type list(ListTypeInfo list, Type elementResult) { - return Types.ListType.ofOptional(allocateId(), elementResult); - } - - // Mimic the struct call behavior to construct a union converted struct type - @Override - public Type union(UnionTypeInfo union, List unionResults) { - List fields = Lists.newArrayListWithExpectedSize(unionResults.size() + 1); - fields.add(Types.NestedField.required(allocateId(), "tag", Types.IntegerType.get())); - for (int i = 0; i < unionResults.size(); i++) { - fields.add(Types.NestedField.optional(allocateId(), UNION_TO_STRUCT_CONVERSION_PREFIX + i, unionResults.get(i))); - } - return Types.StructType.of(fields); - } - - @Override - public Type primitive(PrimitiveTypeInfo primitive) { - switch (primitive.getPrimitiveCategory()) { - case FLOAT: - return Types.FloatType.get(); - case DOUBLE: - return Types.DoubleType.get(); - case BOOLEAN: - return Types.BooleanType.get(); - case BYTE: - case SHORT: - case INT: - return Types.IntegerType.get(); - case LONG: - return Types.LongType.get(); - case CHAR: - case VARCHAR: - case STRING: - return Types.StringType.get(); - case BINARY: - return Types.BinaryType.get(); - case DECIMAL: - DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitive; - return Types.DecimalType.of(decimalTypeInfo.precision(), decimalTypeInfo.scale()); - case TIMESTAMP: - return Types.TimestampType.withoutZone(); - case DATE: - return Types.DateType.get(); - default: - throw new UnsupportedOperationException("Unsupported primitive type " + primitive); - } - } - - private int allocateId() { - int current = nextId; - nextId += 1; - return current; - } -} diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java deleted file mode 100644 index 5603d6d46e..0000000000 --- a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.iceberg.hiveberg; - -import java.util.List; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.types.Type; - - -public class HiveTypeUtil { - private HiveTypeUtil() { - } - - public static Type convert(TypeInfo typeInfo) { - return HiveTypeUtil.visit(typeInfo, new HiveTypeToIcebergType()); - } - - public static T visit(TypeInfo typeInfo, HiveSchemaVisitor visitor) { - switch (typeInfo.getCategory()) { - case STRUCT: - final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; - List names = structTypeInfo.getAllStructFieldNames(); - List results = Lists.newArrayListWithExpectedSize(names.size()); - for (String name : names) { - results.add(visit(structTypeInfo.getStructFieldTypeInfo(name), visitor)); - } - return visitor.struct(structTypeInfo, names, results); - - case UNION: - final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; - List unionResults = Lists.newArrayListWithExpectedSize(unionTypeInfo.getAllUnionObjectTypeInfos().size()); - for (TypeInfo unionObjectTypeInfo : unionTypeInfo.getAllUnionObjectTypeInfos()) { - unionResults.add(visit(unionObjectTypeInfo, visitor)); - } - return visitor.union(unionTypeInfo, unionResults); - - case LIST: - ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; - return visitor.list(listTypeInfo, visit(listTypeInfo.getListElementTypeInfo(), visitor)); - - case MAP: - final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; - return visitor.map(mapTypeInfo, - visit(mapTypeInfo.getMapKeyTypeInfo(), visitor), - visit(mapTypeInfo.getMapValueTypeInfo(), visitor)); - - default: - final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; - return visitor.primitive(primitiveTypeInfo); - } - } - - public static class HiveSchemaVisitor { - public T struct(StructTypeInfo struct, List names, List fieldResults) { - return null; - } - - public T list(ListTypeInfo list, T elementResult) { - return null; - } - - public T map(MapTypeInfo map, T keyResult, T valueResult) { - return null; - } - - public T union(UnionTypeInfo union, List unionResults) { - return null; - } - - public T primitive(PrimitiveTypeInfo primitive) { - return null; - } - } -} diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java deleted file mode 100644 index e4af44917d..0000000000 --- a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.iceberg.hiveberg; - -import java.util.Map; -import org.apache.hadoop.conf.Configuration; -import org.apache.iceberg.PartitionSpec; -import org.apache.iceberg.Schema; -import org.apache.iceberg.Table; -import org.apache.iceberg.TableOperations; -import org.apache.iceberg.Transaction; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.exceptions.NoSuchTableException; -import org.apache.iceberg.hive.HiveCatalog; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * A {@link HiveCatalog} which uses Hive metadata to read tables. Features like time travel, snapshot isolation and - * incremental computation are not supported along with any WRITE operations to either the data or metadata. - */ -public class LegacyHiveCatalog extends HiveCatalog { - - private static final Logger LOG = LoggerFactory.getLogger(LegacyHiveCatalog.class); - - public LegacyHiveCatalog(Configuration conf) { - super(conf); - } - - @Override - @SuppressWarnings("CatchBlockLogException") - public Table loadTable(TableIdentifier identifier) { - if (isValidIdentifier(identifier)) { - TableOperations ops = newTableOps(identifier); - if (ops.current() == null) { - throw new NoSuchTableException("Table does not exist: %s", identifier); - } - - return new LegacyHiveTable(ops, fullTableName(name(), identifier)); - } else if (isValidMetadataIdentifier(identifier)) { - throw new UnsupportedOperationException( - "Metadata views not supported for Hive tables without Iceberg metadata. Table: " + identifier); - } else { - throw new NoSuchTableException("Invalid table identifier: %s", identifier); - } - } - - @Override - public TableOperations newTableOps(TableIdentifier tableIdentifier) { - String dbName = tableIdentifier.namespace().level(0); - String tableName = tableIdentifier.name(); - return new LegacyHiveTableOperations(conf(), clientPool(), dbName, tableName); - } - - @Override - public boolean dropTable(TableIdentifier identifier, boolean purge) { - throw new UnsupportedOperationException( - "Dropping tables not supported through legacy Hive catalog. Table: " + identifier); - } - - @Override - public void renameTable(TableIdentifier from, TableIdentifier to) { - throw new UnsupportedOperationException( - "Renaming tables not supported through legacy Hive catalog. From: " + from + " To: " + to); - } - - @Override - public Table createTable(TableIdentifier identifier, Schema schema, PartitionSpec spec, String location, - Map properties) { - throw new UnsupportedOperationException( - "Creating tables not supported through legacy Hive catalog. Table: " + identifier); - } - - @Override - public Transaction newCreateTableTransaction(TableIdentifier identifier, Schema schema, PartitionSpec spec, - String location, Map properties) { - throw new UnsupportedOperationException( - "Creating tables not supported through legacy Hive catalog. Table: " + identifier); - } - - @Override - public Transaction newReplaceTableTransaction(TableIdentifier identifier, Schema schema, PartitionSpec spec, - String location, Map properties, boolean orCreate) { - throw new UnsupportedOperationException( - "Replacing tables not supported through legacy Hive catalog. Table: " + identifier); - } -} diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java deleted file mode 100644 index 69e8794790..0000000000 --- a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.iceberg.hiveberg; - -import java.util.List; -import java.util.Map; -import org.apache.iceberg.AppendFiles; -import org.apache.iceberg.DeleteFiles; -import org.apache.iceberg.ExpireSnapshots; -import org.apache.iceberg.HasTableOperations; -import org.apache.iceberg.HistoryEntry; -import org.apache.iceberg.ManageSnapshots; -import org.apache.iceberg.OverwriteFiles; -import org.apache.iceberg.PartitionSpec; -import org.apache.iceberg.ReplacePartitions; -import org.apache.iceberg.ReplaceSortOrder; -import org.apache.iceberg.RewriteFiles; -import org.apache.iceberg.RewriteManifests; -import org.apache.iceberg.Rollback; -import org.apache.iceberg.RowDelta; -import org.apache.iceberg.Schema; -import org.apache.iceberg.Snapshot; -import org.apache.iceberg.SortOrder; -import org.apache.iceberg.Table; -import org.apache.iceberg.TableOperations; -import org.apache.iceberg.TableScan; -import org.apache.iceberg.Transaction; -import org.apache.iceberg.UpdateLocation; -import org.apache.iceberg.UpdatePartitionSpec; -import org.apache.iceberg.UpdateProperties; -import org.apache.iceberg.UpdateSchema; -import org.apache.iceberg.encryption.EncryptionManager; -import org.apache.iceberg.io.FileIO; -import org.apache.iceberg.io.LocationProvider; - - -/** - * A {@link Table} which uses Hive table/partition metadata to perform scans using {@link LegacyHiveTableScan}. - * This table does not provide any time travel, snapshot isolation, incremental computation benefits. - * It also does not allow any WRITE operations to either the data or metadata. - */ -public class LegacyHiveTable implements Table, HasTableOperations { - private final TableOperations ops; - private final String name; - - protected LegacyHiveTable(TableOperations ops, String name) { - this.ops = ops; - this.name = name; - } - - @Override - public TableOperations operations() { - return ops; - } - - @Override - public void refresh() { - ops.refresh(); - } - - @Override - public TableScan newScan() { - return new LegacyHiveTableScan(ops, this); - } - - @Override - public Schema schema() { - return ops.current().schema(); - } - - @Override - public PartitionSpec spec() { - return ops.current().spec(); - } - - @Override - public Map specs() { - throw new UnsupportedOperationException( - "Multiple partition specs not supported for Hive tables without Iceberg metadata"); - } - - @Override - public SortOrder sortOrder() { - throw new UnsupportedOperationException("Sort order not supported for Hive tables without Iceberg metadata"); - } - - @Override - public Map sortOrders() { - throw new UnsupportedOperationException("Sort orders not supported for Hive tables without Iceberg metadata"); - } - - @Override - public Map properties() { - return ops.current().properties(); - } - - @Override - public String location() { - return ops.current().location(); - } - - @Override - public Snapshot currentSnapshot() { - throw new UnsupportedOperationException("Snapshots not supported for Hive tables without Iceberg metadata"); - } - - @Override - public Snapshot snapshot(long snapshotId) { - throw new UnsupportedOperationException("Snapshots not supported for Hive tables without Iceberg metadata"); - } - - @Override - public Iterable snapshots() { - throw new UnsupportedOperationException("Snapshots not supported for Hive tables without Iceberg metadata"); - } - - @Override - public List history() { - throw new UnsupportedOperationException("History not available for Hive tables without Iceberg metadata"); - } - - @Override - public UpdateSchema updateSchema() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public UpdatePartitionSpec updateSpec() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public UpdateProperties updateProperties() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public ReplaceSortOrder replaceSortOrder() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public UpdateLocation updateLocation() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public AppendFiles newAppend() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public RewriteFiles newRewrite() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public RewriteManifests rewriteManifests() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public OverwriteFiles newOverwrite() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public RowDelta newRowDelta() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public ReplacePartitions newReplacePartitions() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public DeleteFiles newDelete() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public ExpireSnapshots expireSnapshots() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public Rollback rollback() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public ManageSnapshots manageSnapshots() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public Transaction newTransaction() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public FileIO io() { - return ops.io(); - } - - @Override - public EncryptionManager encryption() { - return ops.encryption(); - } - - @Override - public LocationProvider locationProvider() { - return ops.locationProvider(); - } - - @Override - public String toString() { - return name; - } -} diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java deleted file mode 100644 index fdd540ef65..0000000000 --- a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.iceberg.hiveberg; - -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.ZoneOffset; -import java.time.format.DateTimeFormatter; -import java.time.format.DateTimeFormatterBuilder; -import java.time.temporal.ChronoField; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.iceberg.BaseMetastoreTableOperations; -import org.apache.iceberg.DataFile; -import org.apache.iceberg.DataFiles; -import org.apache.iceberg.FileFormat; -import org.apache.iceberg.Metrics; -import org.apache.iceberg.PartitionSpec; -import org.apache.iceberg.Schema; -import org.apache.iceberg.StructLike; -import org.apache.iceberg.TableMetadata; -import org.apache.iceberg.TableProperties; -import org.apache.iceberg.data.GenericRecord; -import org.apache.iceberg.expressions.Binder; -import org.apache.iceberg.expressions.Bound; -import org.apache.iceberg.expressions.Expression; -import org.apache.iceberg.expressions.Expressions; -import org.apache.iceberg.hadoop.HadoopFileIO; -import org.apache.iceberg.hive.HiveClientPool; -import org.apache.iceberg.io.FileIO; -import org.apache.iceberg.io.LocationProvider; -import org.apache.iceberg.mapping.MappingUtil; -import org.apache.iceberg.mapping.NameMappingParser; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; -import org.apache.iceberg.relocated.com.google.common.collect.Iterables; -import org.apache.iceberg.relocated.com.google.common.collect.Maps; -import org.apache.iceberg.types.Types; -import org.apache.thrift.TException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -public class LegacyHiveTableOperations extends BaseMetastoreTableOperations { - - private static final Logger LOG = LoggerFactory.getLogger(LegacyHiveTableOperations.class); - - private final HiveClientPool metaClients; - private final String databaseName; - private final String tableName; - private final Configuration conf; - - private FileIO fileIO; - - protected LegacyHiveTableOperations(Configuration conf, HiveClientPool metaClients, String database, String table) { - this.conf = conf; - this.metaClients = metaClients; - this.databaseName = database; - this.tableName = table; - } - - @Override - public FileIO io() { - if (fileIO == null) { - fileIO = new HadoopFileIO(conf); - } - - return fileIO; - } - - @Override - protected void doRefresh() { - try { - org.apache.hadoop.hive.metastore.api.Table hiveTable = - metaClients.run(client -> client.getTable(databaseName, tableName)); - - Schema schema = LegacyHiveTableUtils.getSchema(hiveTable); - PartitionSpec spec = LegacyHiveTableUtils.getPartitionSpec(hiveTable, schema); - - Map tableProperties = Maps.newHashMap(LegacyHiveTableUtils.getTableProperties(hiveTable)); - // Provide a case insensitive name mapping for Hive tables - tableProperties.put(TableProperties.DEFAULT_NAME_MAPPING, - NameMappingParser.toJson(MappingUtil.create(schema, false))); - TableMetadata metadata = TableMetadata.newTableMetadataWithoutFreshIds(schema, spec, - hiveTable.getSd().getLocation(), tableProperties); - setCurrentMetadata(metadata); - } catch (TException e) { - String errMsg = String.format("Failed to get table info from metastore %s.%s", databaseName, tableName); - throw new RuntimeException(errMsg, e); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException("Interrupted during refresh", e); - } - setShouldRefresh(false); - } - - /** - * Returns an {@link Iterable} of {@link Iterable}s of {@link DataFile}s which belong to the current table and - * match the partition predicates from the given expression. - *

- * Each element in the outer {@link Iterable} maps to an {@link Iterable} of {@link DataFile}s originating from the - * same directory - */ - Iterable> getFilesByFilter(Expression expression) { - Iterable matchingDirectories; - if (current().spec().fields().isEmpty()) { - matchingDirectories = ImmutableList.of(getDirectoryInfo()); - } else { - matchingDirectories = getDirectoryInfosByFilter(expression); - } - - Iterable> filesPerDirectory = Iterables.transform( - matchingDirectories, - directory -> { - List files; - if (FileSystemUtils.exists(directory.location(), conf)) { - files = FileSystemUtils.listFiles(directory.location(), conf); - } else { - LOG.warn("Cannot find directory: {}. Skipping.", directory.location()); - files = ImmutableList.of(); - } - return Iterables.transform( - files, - file -> createDataFile(file, current().spec(), directory.partitionData(), directory.format()) - ); - }); - - // Note that we return an Iterable of Iterables here so that the TableScan can process iterables of individual - // directories in parallel hence resulting in a parallel file listing - return filesPerDirectory; - } - - private DirectoryInfo getDirectoryInfo() { - Preconditions.checkArgument(current().spec().fields().isEmpty(), - "getDirectoryInfo only allowed for unpartitioned tables"); - try { - org.apache.hadoop.hive.metastore.api.Table hiveTable = - metaClients.run(client -> client.getTable(databaseName, tableName)); - - return LegacyHiveTableUtils.toDirectoryInfo(hiveTable); - } catch (TException e) { - String errMsg = String.format("Failed to get table info for %s.%s from metastore", databaseName, tableName); - throw new RuntimeException(errMsg, e); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException("Interrupted in call to getDirectoryInfo", e); - } - } - - private List getDirectoryInfosByFilter(Expression expression) { - Preconditions.checkArgument(!current().spec().fields().isEmpty(), - "getDirectoryInfosByFilter only allowed for partitioned tables"); - try { - LOG.info("Fetching partitions for {}.{} with expression: {}", databaseName, tableName, expression); - Set partitionColumnNames = current().spec() - .identitySourceIds() - .stream() - .map(id -> current().schema().findColumnName(id)) - .collect(Collectors.toSet()); - Expression simplified = HiveExpressions.simplifyPartitionFilter(expression, partitionColumnNames); - Types.StructType partitionSchema = current().spec().partitionType(); - LOG.info("Simplified expression for {}.{} to {}", databaseName, tableName, simplified); - - List partitions; - Expression boundExpression; - if (simplified.equals(Expressions.alwaysFalse())) { - // If simplifyPartitionFilter returns FALSE, no partitions are going to match the filter expression - partitions = ImmutableList.of(); - } else if (simplified.equals(Expressions.alwaysTrue())) { - // If simplifyPartitionFilter returns TRUE, all partitions are going to match the filter expression - partitions = metaClients.run(client -> client.listPartitionsByFilter( - databaseName, tableName, null, (short) -1)); - } else { - boundExpression = Binder.bind(partitionSchema, simplified, false); - String partitionFilterString = HiveExpressions.toPartitionFilterString(boundExpression); - LOG.info("Listing partitions for {}.{} with filter string: {}", databaseName, tableName, partitionFilterString); - try { - // We first try to use HMS API call to get the filtered partitions. - partitions = metaClients.run( - client -> client.listPartitionsByFilter(databaseName, tableName, partitionFilterString, (short) -1)); - } catch (MetaException e) { - // If the above HMS call fails, we here try to do the partition filtering ourselves, - // by evaluating all the partitions we got back from HMS against the boundExpression, - // if the evaluation results in true, we include such partition, if false, we filter. - List allPartitions = metaClients.run( - client -> client.listPartitionsByFilter(databaseName, tableName, null, (short) -1)); - partitions = allPartitions.stream().filter(partition -> { - GenericRecord record = GenericRecord.create(partitionSchema); - for (int i = 0; i < record.size(); i++) { - String value = partition.getValues().get(i); - switch (partitionSchema.fields().get(i).type().typeId()) { - case DATE: - record.set(i, - (int) LocalDate.parse(value).toEpochDay()); - break; - case TIMESTAMP: - // This format seems to be matching the hive timestamp column partition string literal value - record.set(i, - LocalDateTime.parse(value, - new DateTimeFormatterBuilder() - .parseLenient() - .append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) - .appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true) - .toFormatter()) - .toInstant(ZoneOffset.UTC).toEpochMilli() * 1000); - break; - default: - record.set(i, partition.getValues().get(i)); - break; - } - } - return ((Bound) boundExpression).eval(record); - }).collect(Collectors.toList()); - } - } - - return LegacyHiveTableUtils.toDirectoryInfos(partitions, current().spec()); - } catch (TException e) { - String errMsg = String.format("Failed to get partition info for %s.%s + expression %s from metastore", - databaseName, tableName, expression); - throw new RuntimeException(errMsg, e); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException("Interrupted in call to getPartitionsByFilter", e); - } - } - - private static DataFile createDataFile(FileStatus fileStatus, PartitionSpec partitionSpec, StructLike partitionData, - FileFormat format) { - DataFiles.Builder builder = DataFiles.builder(partitionSpec) - .withPath(fileStatus.getPath().toString()) - .withFormat(format) - .withFileSizeInBytes(fileStatus.getLen()) - .withMetrics(new Metrics(10000L, null, null, null, null, null)); - - if (partitionSpec.fields().isEmpty()) { - return builder.build(); - } else { - return builder.withPartition(partitionData).build(); - } - } - - @Override - public void commit(TableMetadata base, TableMetadata metadata) { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } - - @Override - public String metadataFileLocation(String filename) { - throw new UnsupportedOperationException( - "Metadata file location not available for Hive tables without Iceberg metadata"); - } - - @Override - public LocationProvider locationProvider() { - throw new UnsupportedOperationException("Writes not supported for Hive tables without Iceberg metadata"); - } -} diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java deleted file mode 100644 index b206f9a5c4..0000000000 --- a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.iceberg.hiveberg; - -import org.apache.iceberg.BaseFileScanTask; -import org.apache.iceberg.DataTableScan; -import org.apache.iceberg.DeleteFile; -import org.apache.iceberg.FileScanTask; -import org.apache.iceberg.PartitionSpec; -import org.apache.iceberg.PartitionSpecParser; -import org.apache.iceberg.Schema; -import org.apache.iceberg.SchemaParser; -import org.apache.iceberg.Snapshot; -import org.apache.iceberg.Table; -import org.apache.iceberg.TableOperations; -import org.apache.iceberg.TableScan; -import org.apache.iceberg.TableScanContext; -import org.apache.iceberg.events.Listeners; -import org.apache.iceberg.events.ScanEvent; -import org.apache.iceberg.expressions.Expression; -import org.apache.iceberg.expressions.ResidualEvaluator; -import org.apache.iceberg.io.CloseableIterable; -import org.apache.iceberg.relocated.com.google.common.collect.Iterables; -import org.apache.iceberg.util.ParallelIterable; -import org.apache.iceberg.util.ThreadPools; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * A {@link DataTableScan} which uses Hive table and partition metadata to read tables. - * This scan does not provide any time travel, snapshot isolation, incremental computation benefits. - */ -public class LegacyHiveTableScan extends DataTableScan { - private static final Logger LOG = LoggerFactory.getLogger(LegacyHiveTableScan.class); - - protected LegacyHiveTableScan(TableOperations ops, Table table) { - super(ops, table); - } - - protected LegacyHiveTableScan(TableOperations ops, Table table, Schema schema, TableScanContext context) { - super(ops, table, schema, context); - } - - @Override - @SuppressWarnings("checkstyle:HiddenField") - protected TableScan newRefinedScan(TableOperations ops, Table table, Schema schema, TableScanContext context) { - return new LegacyHiveTableScan(ops, table, schema, context); - } - - @Override - public CloseableIterable planFiles() { - LOG.info("Scanning table {} with filter {}", table().toString(), filter()); - - Listeners.notifyAll( - new ScanEvent(table().toString(), -1, filter(), schema())); - - LegacyHiveTableOperations hiveOps = (LegacyHiveTableOperations) tableOps(); - PartitionSpec spec = hiveOps.current().spec(); - String schemaString = SchemaParser.toJson(spec.schema()); - String specString = PartitionSpecParser.toJson(spec); - ResidualEvaluator residuals = ResidualEvaluator.of(spec, filter(), isCaseSensitive()); - - Iterable> tasks = Iterables.transform( - hiveOps.getFilesByFilter(filter()), - fileIterable -> - Iterables.transform( - fileIterable, - file -> new BaseFileScanTask(file, new DeleteFile[0], schemaString, specString, residuals))); - - return new ParallelIterable<>(tasks, ThreadPools.getWorkerPool()); - } - - @Override - public CloseableIterable planFiles(TableOperations ops, Snapshot snapshot, - Expression rowFilter, boolean ignoreResiduals, - boolean caseSensitive, boolean colStats) { - throw new IllegalStateException("Control flow should never reach here"); - } -} diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java deleted file mode 100644 index 67dbc51a72..0000000000 --- a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.iceberg.hiveberg; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.iceberg.FileFormat; -import org.apache.iceberg.PartitionSpec; -import org.apache.iceberg.Schema; -import org.apache.iceberg.StructLike; -import org.apache.iceberg.avro.AvroSchemaUtil; -import org.apache.iceberg.avro.AvroSchemaVisitor; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.types.Conversions; -import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.Types; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -class LegacyHiveTableUtils { - - private LegacyHiveTableUtils() { - } - - private static final Logger LOG = LoggerFactory.getLogger(LegacyHiveTableUtils.class); - - static Schema getSchema(org.apache.hadoop.hive.metastore.api.Table table) { - Map props = getTableProperties(table); - String schemaStr = props.get("avro.schema.literal"); - // Disable default value validation for backward compatibility with Avro 1.7 - org.apache.avro.Schema avroSchema = - schemaStr != null ? new org.apache.avro.Schema.Parser().setValidateDefaults(false).parse(schemaStr) : null; - Schema schema; - if (avroSchema != null) { - String serde = table.getSd().getSerdeInfo().getSerializationLib(); - org.apache.avro.Schema finalAvroSchema; - if (serde.equals("org.apache.hadoop.hive.serde2.avro.AvroSerDe") || - HasDuplicateLowercaseColumnNames.visit(avroSchema)) { - // Case 1: If serde == AVRO, early escape; Hive column info is not reliable and can be empty for these tables - // Hive itself uses avro.schema.literal as source of truth for these tables, so this should be fine - // Case 2: If avro.schema.literal has duplicate column names when lowercased, that means we cannot do reliable - // matching with Hive schema as multiple Avro fields can map to the same Hive field - finalAvroSchema = avroSchema; - } else { - finalAvroSchema = MergeHiveSchemaWithAvro.visit(structTypeInfoFromCols(table.getSd().getCols()), avroSchema); - } - schema = AvroSchemaUtil.toIceberg(finalAvroSchema); - } else { - // TODO: Do we need to support column and column.types properties for ORC tables? - LOG.info("Table {}.{} does not have an avro.schema.literal set; using Hive schema instead. " + - "The schema will not have case sensitivity and nullability information", - table.getDbName(), table.getTableName()); - Type icebergType = HiveTypeUtil.convert(structTypeInfoFromCols(table.getSd().getCols())); - schema = new Schema(icebergType.asNestedType().asStructType().fields()); - } - Types.StructType dataStructType = schema.asStruct(); - List fields = Lists.newArrayList(dataStructType.fields()); - - Schema partitionSchema = partitionSchema(table.getPartitionKeys(), schema); - Types.StructType partitionStructType = partitionSchema.asStruct(); - fields.addAll(partitionStructType.fields()); - return new Schema(fields); - } - - static StructTypeInfo structTypeInfoFromCols(List cols) { - Preconditions.checkArgument(cols != null && cols.size() > 0, "No Hive schema present"); - List fieldNames = cols - .stream() - .map(FieldSchema::getName) - .collect(Collectors.toList()); - List fieldTypeInfos = cols - .stream() - .map(f -> TypeInfoUtils.getTypeInfoFromTypeString(f.getType())) - .collect(Collectors.toList()); - return (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(fieldNames, fieldTypeInfos); - } - - private static Schema partitionSchema(List partitionKeys, Schema dataSchema) { - AtomicInteger fieldId = new AtomicInteger(10000); - List partitionFields = Lists.newArrayList(); - partitionKeys.forEach(f -> { - Types.NestedField field = dataSchema.findField(f.getName()); - if (field != null) { - throw new IllegalStateException(String.format("Partition field %s also present in data", field.name())); - } - partitionFields.add( - Types.NestedField.optional( - fieldId.incrementAndGet(), f.getName(), primitiveIcebergType(f.getType()), f.getComment())); - }); - return new Schema(partitionFields); - } - - private static Type primitiveIcebergType(String hiveTypeString) { - PrimitiveTypeInfo primitiveTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(hiveTypeString); - return HiveTypeUtil.convert(primitiveTypeInfo); - } - - static Map getTableProperties(org.apache.hadoop.hive.metastore.api.Table table) { - Map props = new HashMap<>(); - props.putAll(table.getSd().getParameters()); - props.putAll(table.getParameters()); - props.putAll(table.getSd().getSerdeInfo().getParameters()); - return props; - } - - static PartitionSpec getPartitionSpec(org.apache.hadoop.hive.metastore.api.Table table, Schema schema) { - PartitionSpec.Builder builder = PartitionSpec.builderFor(schema); - table.getPartitionKeys().forEach(fieldSchema -> builder.identity(fieldSchema.getName())); - return builder.build(); - } - - static DirectoryInfo toDirectoryInfo(org.apache.hadoop.hive.metastore.api.Table table) { - return new DirectoryInfo(table.getSd().getLocation(), - serdeToFileFormat(table.getSd().getSerdeInfo().getSerializationLib()), null); - } - - static List toDirectoryInfos(List partitions, PartitionSpec spec) { - return partitions.stream().map( - p -> new DirectoryInfo( - p.getSd().getLocation(), - serdeToFileFormat( - p.getSd().getSerdeInfo().getSerializationLib()), - buildPartitionStructLike(p.getValues(), spec)) - ).collect(Collectors.toList()); - } - - private static StructLike buildPartitionStructLike(List partitionValues, PartitionSpec spec) { - List fields = spec.partitionType().fields(); - return new StructLike() { - @Override - public int size() { - return partitionValues.size(); - } - - @Override - public T get(int pos, Class javaClass) { - final Object partitionValue = Conversions.fromPartitionString( - fields.get(pos).type(), - partitionValues.get(pos)); - return javaClass.cast(partitionValue); - } - - @Override - public void set(int pos, T value) { - throw new IllegalStateException("Read-only"); - } - }; - } - - private static FileFormat serdeToFileFormat(String serde) { - switch (serde) { - case "org.apache.hadoop.hive.serde2.avro.AvroSerDe": - return FileFormat.AVRO; - case "org.apache.hadoop.hive.ql.io.orc.OrcSerde": - return FileFormat.ORC; - default: - throw new IllegalArgumentException("Unrecognized serde: " + serde); - } - } - - private static class HasDuplicateLowercaseColumnNames extends AvroSchemaVisitor { - - private static boolean visit(org.apache.avro.Schema schema) { - return AvroSchemaVisitor.visit(schema, new HasDuplicateLowercaseColumnNames()); - } - - @Override - public Boolean record(org.apache.avro.Schema record, List names, List fieldResults) { - return fieldResults.stream().anyMatch(x -> x) || - names.stream().collect(Collectors.groupingBy(String::toLowerCase)) - .values().stream().anyMatch(x -> x.size() > 1); - } - - @Override - public Boolean union(org.apache.avro.Schema union, List optionResults) { - return optionResults.stream().anyMatch(x -> x); - } - - @Override - public Boolean array(org.apache.avro.Schema array, Boolean elementResult) { - return elementResult; - } - - @Override - public Boolean map(org.apache.avro.Schema map, Boolean valueResult) { - return valueResult; - } - - @Override - public Boolean primitive(org.apache.avro.Schema primitive) { - return false; - } - } -} diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java b/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java deleted file mode 100644 index 4edd620502..0000000000 --- a/hive-metastore/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java +++ /dev/null @@ -1,268 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.iceberg.hiveberg; - -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; -import org.apache.avro.JsonProperties; -import org.apache.avro.LogicalTypes; -import org.apache.avro.Schema; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; -import org.apache.iceberg.avro.AvroSchemaUtil; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; - - -/** - * A {@link HiveSchemaWithPartnerVisitor} which augments a Hive schema with extra metadata from a partner Avro schema - * and generates a resultant "merged" Avro schema - * - * 1. Fields are matched between Hive and Avro schemas using a case insensitive search by field name - * 2. Copies field names, nullability, default value, field props from the Avro schema - * 3. Copies field type from the Hive schema. - * TODO: We should also handle some cases of type promotion where the types in Avro are potentially more correct - * e.g.BINARY in Hive -> FIXED in Avro, STRING in Hive -> ENUM in Avro, etc - * 4. Retains fields found only in the Hive schema; Ignores fields found only in the Avro schema - * 5. Fields found only in Hive schema are represented as optional fields in the resultant Avro schema - * 6. For fields found only in Hive schema, field names are sanitized to make them compatible with Avro identifier spec - */ -class MergeHiveSchemaWithAvro extends HiveSchemaWithPartnerVisitor { - - static Schema visit(StructTypeInfo typeInfo, Schema schema) { - return visit(typeInfo, schema, new MergeHiveSchemaWithAvro(), - AvroPartnerAccessor.INSTANCE); - } - - private final AtomicInteger recordCounter = new AtomicInteger(0); - - @Override - public Schema struct(StructTypeInfo struct, Schema partner, List fieldResults) { - boolean shouldResultBeOptional = partner == null || AvroSchemaUtil.isOptionSchema(partner); - Schema result; - if (partner == null || extractIfOption(partner).getType() != Schema.Type.RECORD) { - // if there was no matching Avro struct, return a struct with new record/namespace - int recordNum = recordCounter.incrementAndGet(); - result = Schema.createRecord("record" + recordNum, null, "namespace" + recordNum, false, fieldResults); - } else { - result = AvroSchemaUtil.copyRecord(extractIfOption(partner), fieldResults, null); - } - return shouldResultBeOptional ? AvroSchemaUtil.toOption(result) : result; - } - - @Override - public Schema.Field field(String name, TypeInfo field, Schema.Field partner, Schema fieldResult) { - // No need to infer `shouldResultBeOptional`. We expect other visitor methods to return optional schemas - // in their field results if required - if (partner == null) { - // if there was no matching Avro field, use name form the Hive schema and set a null default - return new Schema.Field( - AvroSchemaUtil.makeCompatibleName(name), fieldResult, null, Schema.Field.NULL_DEFAULT_VALUE); - } else { - // TODO: How to ensure that field default value is compatible with new field type generated from Hive? - // Copy field type from the visitor result, copy everything else from the partner - // Avro requires the default value to match the first type in the option, reorder option if required - Schema reordered = reorderOptionIfRequired(fieldResult, partner.defaultVal()); - return AvroSchemaUtil.copyField(partner, reordered, partner.name()); - } - } - - /** - * Reorders an option schema so that the type of the provided default value is the first type in the option schema - * - * e.g. If the schema is (NULL, INT) and the default value is 1, the returned schema is (INT, NULL) - * If the schema is not an option schema or if there is no default value, schema is returned as-is - */ - private Schema reorderOptionIfRequired(Schema schema, Object defaultValue) { - if (AvroSchemaUtil.isOptionSchema(schema) && defaultValue != null) { - boolean isNullFirstOption = schema.getTypes().get(0).getType() == Schema.Type.NULL; - if (isNullFirstOption && defaultValue.equals(JsonProperties.NULL_VALUE)) { - return schema; - } else { - return Schema.createUnion(schema.getTypes().get(1), schema.getTypes().get(0)); - } - } else { - return schema; - } - } - - @Override - public Schema list(ListTypeInfo list, Schema partner, Schema elementResult) { - // if there was no matching Avro list, or if matching Avro list was an option, return an optional list - boolean shouldResultBeOptional = partner == null || AvroSchemaUtil.isOptionSchema(partner); - Schema result = Schema.createArray(elementResult); - return shouldResultBeOptional ? AvroSchemaUtil.toOption(result) : result; - } - - @Override - public Schema map(MapTypeInfo map, Schema partner, Schema keyResult, Schema valueResult) { - Preconditions.checkArgument(extractIfOption(keyResult).getType() == Schema.Type.STRING, - "Map keys should always be non-nullable strings. Found: %s", keyResult); - // if there was no matching Avro map, or if matching Avro map was an option, return an optional map - boolean shouldResultBeOptional = partner == null || AvroSchemaUtil.isOptionSchema(partner); - Schema result = Schema.createMap(valueResult); - return shouldResultBeOptional ? AvroSchemaUtil.toOption(result) : result; - } - - @Override - public Schema union(UnionTypeInfo union, Schema partner, List results) { - if (AvroSchemaUtil.nullExistInUnion(partner)) { - List toAddNull = new ArrayList<>(); - toAddNull.add(Schema.create(Schema.Type.NULL)); - toAddNull.addAll(results); - return Schema.createUnion(toAddNull); - } - return Schema.createUnion(results); - } - - @Override - public Schema primitive(PrimitiveTypeInfo primitive, Schema partner) { - boolean shouldResultBeOptional = partner == null || AvroSchemaUtil.isOptionSchema(partner); - Schema hivePrimitive = hivePrimitiveToAvro(primitive); - // if there was no matching Avro primitive, use the Hive primitive - Schema result = partner == null ? hivePrimitive : checkCompatibilityAndPromote(hivePrimitive, partner); - return shouldResultBeOptional ? AvroSchemaUtil.toOption(result) : result; - } - - private Schema checkCompatibilityAndPromote(Schema schema, Schema partner) { - // TODO: Check if schema is compatible with partner - // Also do type promotion if required, schema = string & partner = enum, schema = bytes & partner = fixed, etc - return schema; - } - - /** - * A {@link PartnerAccessor} which matches the requested field from a partner Avro struct by case insensitive - * field name match - */ - private static class AvroPartnerAccessor implements PartnerAccessor { - private static final AvroPartnerAccessor INSTANCE = new AvroPartnerAccessor(); - - private static final Schema MAP_KEY = Schema.create(Schema.Type.STRING); - - @Override - public Schema.Field fieldPartner(Schema partner, String fieldName) { - Schema schema = extractIfOption(partner); - return (schema.getType() == Schema.Type.RECORD) ? findCaseInsensitive(schema, fieldName) : null; - } - - @Override - public Schema fieldType(Schema.Field partnerField) { - return partnerField.schema(); - } - - @Override - public Schema mapKeyPartner(Schema partner) { - Schema schema = extractIfOption(partner); - return (schema.getType() == Schema.Type.MAP) ? MAP_KEY : null; - } - - @Override - public Schema mapValuePartner(Schema partner) { - Schema schema = extractIfOption(partner); - return (schema.getType() == Schema.Type.MAP) ? schema.getValueType() : null; - } - - @Override - public Schema listElementPartner(Schema partner) { - Schema schema = extractIfOption(partner); - return (schema.getType() == Schema.Type.ARRAY) ? schema.getElementType() : null; - } - - @Override - public Schema unionObjectPartner(Schema partner, int ordinal) { - if (partner.getType() != Schema.Type.UNION) { - return null; - } - Schema schema = AvroSchemaUtil.discardNullFromUnionIfExist(partner); - return schema.getTypes().get(ordinal); - } - - private Schema.Field findCaseInsensitive(Schema struct, String fieldName) { - Preconditions.checkArgument(struct.getType() == Schema.Type.RECORD); - // TODO: Optimize? This will be called for every struct field, we will run the for loop for every struct field - for (Schema.Field field : struct.getFields()) { - if (field.name().equalsIgnoreCase(fieldName)) { - return field; - } - } - return null; - } - } - - private static Schema extractIfOption(Schema schema) { - if (AvroSchemaUtil.isOptionSchema(schema)) { - return AvroSchemaUtil.fromOption(schema); - } else { - return schema; - } - } - - // Additional numeric type, similar to other logical type names in AvroSerde - private static final String SHORT_TYPE_NAME = "short"; - private static final String BYTE_TYPE_NAME = "byte"; - - // TODO: This should be refactored into a visitor if we ever require conversion of complex types - public Schema hivePrimitiveToAvro(PrimitiveTypeInfo primitive) { - switch (primitive.getPrimitiveCategory()) { - case INT: - case BYTE: - case SHORT: - return Schema.create(Schema.Type.INT); - - case LONG: - return Schema.create(Schema.Type.LONG); - - case FLOAT: - return Schema.create(Schema.Type.FLOAT); - - case DOUBLE: - return Schema.create(Schema.Type.DOUBLE); - - case BOOLEAN: - return Schema.create(Schema.Type.BOOLEAN); - - case CHAR: - case STRING: - case VARCHAR: - return Schema.create(Schema.Type.STRING); - - case BINARY: - return Schema.create(Schema.Type.BYTES); - - case VOID: - return Schema.create(Schema.Type.NULL); - - case DATE: - return LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT)); - - case TIMESTAMP: - Schema schema = Schema.create(Schema.Type.LONG); - schema.addProp(AvroSchemaUtil.ADJUST_TO_UTC_PROP, false); - return LogicalTypes.timestampMillis().addToSchema(schema); - - case DECIMAL: - DecimalTypeInfo dti = (DecimalTypeInfo) primitive; - return LogicalTypes.decimal(dti.getPrecision(), dti.getScale()).addToSchema(Schema.create(Schema.Type.BYTES)); - - default: - throw new UnsupportedOperationException(primitive + " is not supported."); - } - } -} diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java index fd16c18906..75d7388ebb 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java @@ -1,29 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java index 2db783f81e..e67091d3d7 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java @@ -1,29 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java index e931ba015f..2d3eb4ead0 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java @@ -1,29 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java index f77f19feaa..2cc63a70dd 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java @@ -1,15 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java index a5f043fe26..fd9403eda8 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java @@ -1,15 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java index 5603d6d46e..c58c30360b 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java @@ -1,15 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java index e3be31e8ac..dd8bb9f1ce 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java @@ -1,35 +1,29 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; import java.util.Map; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; @@ -50,10 +44,20 @@ public class LegacyHiveCatalog extends HiveCatalog { private static final Logger LOG = LoggerFactory.getLogger(LegacyHiveCatalog.class); + // Hiveberg refactoring: this is moved from HiveCatalogs + private static final Cache LEGACY_CATALOG_CACHE = Caffeine.newBuilder().build(); + public LegacyHiveCatalog(Configuration conf) { super(conf); } + // Hiveberg refactoring: this is moved from HiveCatalogs + public static HiveCatalog loadLegacyCatalog(Configuration conf) { + // metastore URI can be null in local mode + String metastoreUri = conf.get(HiveConf.ConfVars.METASTOREURIS.varname, ""); + return LEGACY_CATALOG_CACHE.get(metastoreUri, uri -> new LegacyHiveCatalog(conf)); + } + @Override @SuppressWarnings("CatchBlockLogException") public Table loadTable(TableIdentifier identifier) { diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java index 66b8035f25..c05722a1af 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java @@ -1,29 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java index 6dcbe0fee5..0ca64c6245 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java @@ -1,29 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java index 87a4d4e2e1..752d676dc9 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java @@ -1,29 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java index 4a33ced42b..c04667d07a 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java @@ -1,29 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java index 4edd620502..67bb7f1fa7 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java @@ -1,15 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/HiveMetastoreTest.java b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/HiveMetastoreTest.java index 05d5ac648f..251eee6646 100644 --- a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/HiveMetastoreTest.java +++ b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/HiveMetastoreTest.java @@ -29,6 +29,7 @@ /** + * Hiveberg refactoring: * This class is copied from iceberg-hive-metastore module test code */ public abstract class HiveMetastoreTest { diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/ScriptRunner.java b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/ScriptRunner.java index 701ea98ae7..09a4979ee5 100644 --- a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/ScriptRunner.java +++ b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/ScriptRunner.java @@ -1,22 +1,20 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * Copyright 2004 Clinton Begin + * http://www.apache.org/licenses/LICENSE-2.0 * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Slightly modified version of the com.ibatis.common.jdbc.ScriptRunner class - * from the iBATIS Apache project. Only removed dependency on Resource class - * and a constructor. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; @@ -33,6 +31,7 @@ /** + * Hiveberg refactoring: * This class is copied from iceberg-hive-metastore module test code */ public class ScriptRunner { diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveExpressions.java b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveExpressions.java index 0c75c251b3..df52a32bc3 100644 --- a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveExpressions.java +++ b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveExpressions.java @@ -1,15 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveMetastore.java b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveMetastore.java index 991fe61013..78c5c8872c 100644 --- a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveMetastore.java +++ b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveMetastore.java @@ -48,10 +48,12 @@ import org.apache.thrift.transport.TServerSocket; import org.apache.thrift.transport.TTransportFactory; -import static java.nio.file.Files.*; -import static java.nio.file.attribute.PosixFilePermissions.*; +import static java.nio.file.Files.createTempDirectory; +import static java.nio.file.attribute.PosixFilePermissions.asFileAttribute; +import static java.nio.file.attribute.PosixFilePermissions.fromString; /** + * Hiveberg refactoring: * This class is copied from iceberg-hive-metastore module test code */ public class TestHiveMetastore { diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveSchemaConversions.java b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveSchemaConversions.java index db4bfa094a..38132d4495 100644 --- a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveSchemaConversions.java +++ b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveSchemaConversions.java @@ -1,15 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestLegacyHiveTableScan.java b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestLegacyHiveTableScan.java index 49e8818fdc..5fde7b69fe 100644 --- a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestLegacyHiveTableScan.java +++ b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestLegacyHiveTableScan.java @@ -1,15 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestMergeHiveSchemaWithAvro.java b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestMergeHiveSchemaWithAvro.java index 4298069e7b..67e865b59d 100644 --- a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestMergeHiveSchemaWithAvro.java +++ b/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestMergeHiveSchemaWithAvro.java @@ -1,15 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.hiveberg; From 2e8b7430ffd605b6b5e02f64304039def46f29c1 Mon Sep 17 00:00:00 2001 From: Wenye Zhang Date: Thu, 27 Jan 2022 13:35:05 -0800 Subject: [PATCH 3/5] remove hiveberg dependency in iceberg-spark2 module --- build.gradle | 5 +- .../hiveberg/spark2/IcebergSource.java | 54 +++++++++++ .../iceberg/hiveberg/spark2/Reader.java | 89 +++++++++++++++++++ .../iceberg/spark/source/IcebergSource.java | 9 +- .../apache/iceberg/spark/source/Reader.java | 33 ++----- .../apache/iceberg/spark/source/Stats.java | 4 +- 6 files changed, 162 insertions(+), 32 deletions(-) create mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/IcebergSource.java create mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/Reader.java diff --git a/build.gradle b/build.gradle index 577627d7df..15e763cc9c 100644 --- a/build.gradle +++ b/build.gradle @@ -498,12 +498,16 @@ project(':iceberg-hive-metastore') { project(':iceberg-hiveberg') { dependencies { compile project(':iceberg-hive-metastore') + compile project(':iceberg-spark2') compileOnly "org.apache.avro:avro" compileOnly("org.apache.hadoop:hadoop-client") { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' } + compileOnly("org.apache.spark:spark-hive_2.11") { + exclude group: 'org.apache.avro', module: 'avro' + } compileOnly("org.apache.hive:hive-metastore") { exclude group: 'org.apache.avro', module: 'avro' @@ -872,7 +876,6 @@ if (jdkVersion == '8') { compile project(':iceberg-arrow') compile project(':iceberg-hive-metastore') compile project(':iceberg-spark') - compile project(':iceberg-hiveberg') compileOnly "org.apache.avro:avro" compileOnly("org.apache.spark:spark-hive_2.11") { diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/IcebergSource.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/IcebergSource.java new file mode 100644 index 0000000000..c34e0ee2df --- /dev/null +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/IcebergSource.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hiveberg.spark2; + +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.Table; +import org.apache.iceberg.encryption.EncryptionManager; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.spark.SparkSchemaUtil; +import org.apache.iceberg.spark.SparkUtil; +import org.apache.spark.broadcast.Broadcast; +import org.apache.spark.sql.sources.v2.DataSourceOptions; +import org.apache.spark.sql.sources.v2.reader.DataSourceReader; +import org.apache.spark.sql.types.StructType; + + +public class IcebergSource extends org.apache.iceberg.spark.source.IcebergSource { + + @Override + public DataSourceReader createReader(StructType readSchema, DataSourceOptions options) { + Configuration conf = new Configuration(lazyBaseConf()); + Table table = getTableAndResolveHadoopConfiguration(options, conf); + String caseSensitive = lazySparkSession().conf().get("spark.sql.caseSensitive"); + + Broadcast io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table)); + Broadcast encryptionManager = lazySparkContext().broadcast(table.encryption()); + + Reader reader = new Reader(table, io, encryptionManager, Boolean.parseBoolean(caseSensitive), options); + if (readSchema != null) { + // convert() will fail if readSchema contains fields not in table.schema() + SparkSchemaUtil.convert(table.schema(), readSchema); + reader.pruneColumns(readSchema); + } + + return reader; + } +} diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/Reader.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/Reader.java new file mode 100644 index 0000000000..a07890b489 --- /dev/null +++ b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/Reader.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hiveberg.spark2; + +import java.util.OptionalLong; +import org.apache.iceberg.CombinedScanTask; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.SnapshotSummary; +import org.apache.iceberg.Table; +import org.apache.iceberg.encryption.EncryptionManager; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.hiveberg.LegacyHiveTable; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.spark.SparkSchemaUtil; +import org.apache.iceberg.spark.source.Stats; +import org.apache.iceberg.util.PropertyUtil; +import org.apache.spark.broadcast.Broadcast; +import org.apache.spark.sql.sources.v2.DataSourceOptions; +import org.apache.spark.sql.sources.v2.reader.Statistics; + + +class Reader extends org.apache.iceberg.spark.source.Reader { + Reader(Table table, Broadcast io, Broadcast encryptionManager, + boolean caseSensitive, DataSourceOptions options) { + super(table, io, encryptionManager, caseSensitive, options); + } + + @Override + public Statistics estimateStatistics() { + Table table = super.getTable(); + if (table instanceof LegacyHiveTable) { + // We currently don't have reliable stats for Hive tables + return EMPTY_STATS; + } + + // its a fresh table, no data + if (table.currentSnapshot() == null) { + return new Stats(0L, 0L); + } + + // estimate stats using snapshot summary only for partitioned tables (metadata tables are unpartitioned) + if (!table.spec().isUnpartitioned() && filterExpression() == Expressions.alwaysTrue()) { + long totalRecords = PropertyUtil.propertyAsLong(table.currentSnapshot().summary(), + SnapshotSummary.TOTAL_RECORDS_PROP, Long.MAX_VALUE); + return new Stats(SparkSchemaUtil.estimateSize(lazyType(), totalRecords), totalRecords); + } + + long sizeInBytes = 0L; + long numRows = 0L; + + for (CombinedScanTask task : tasks()) { + for (FileScanTask file : task.files()) { + sizeInBytes += file.length(); + numRows += file.file().recordCount(); + } + } + + return new Stats(sizeInBytes, numRows); + } + + private static final Statistics EMPTY_STATS = new Statistics() { + @Override + public OptionalLong sizeInBytes() { + return OptionalLong.empty(); + } + + @Override + public OptionalLong numRows() { + return OptionalLong.empty(); + } + }; +} diff --git a/spark2/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java b/spark2/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java index 9bf9d15ce6..a0e2141d55 100644 --- a/spark2/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java +++ b/spark2/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java @@ -139,29 +139,28 @@ protected Table findTable(DataSourceOptions options, Configuration conf) { } } - private SparkSession lazySparkSession() { + protected SparkSession lazySparkSession() { if (lazySpark == null) { this.lazySpark = SparkSession.builder().getOrCreate(); } return lazySpark; } - private JavaSparkContext lazySparkContext() { + protected JavaSparkContext lazySparkContext() { if (lazySparkContext == null) { this.lazySparkContext = new JavaSparkContext(lazySparkSession().sparkContext()); } return lazySparkContext; } - private Configuration lazyBaseConf() { + protected Configuration lazyBaseConf() { if (lazyConf == null) { this.lazyConf = lazySparkSession().sessionState().newHadoopConf(); } return lazyConf; } - private Table getTableAndResolveHadoopConfiguration( - DataSourceOptions options, Configuration conf) { + protected Table getTableAndResolveHadoopConfiguration(DataSourceOptions options, Configuration conf) { // Overwrite configurations from the Spark Context with configurations from the options. mergeIcebergHadoopConfs(conf, options.asMap()); Table table = findTable(options, conf); diff --git a/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java b/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java index b292aa6201..4a1940e23d 100644 --- a/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java +++ b/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java @@ -24,7 +24,6 @@ import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.OptionalLong; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -44,7 +43,6 @@ import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.hadoop.HadoopFileIO; import org.apache.iceberg.hadoop.Util; -import org.apache.iceberg.hiveberg.LegacyHiveTable; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.orc.OrcRowFilterUtils; @@ -80,7 +78,7 @@ import static org.apache.iceberg.TableProperties.DEFAULT_NAME_MAPPING; -class Reader implements DataSourceReader, SupportsScanColumnarBatch, SupportsPushDownFilters, +public class Reader implements DataSourceReader, SupportsScanColumnarBatch, SupportsPushDownFilters, SupportsPushDownRequiredColumns, SupportsReportStatistics { private static final Logger LOG = LoggerFactory.getLogger(Reader.class); @@ -112,7 +110,7 @@ class Reader implements DataSourceReader, SupportsScanColumnarBatch, SupportsPus private List tasks = null; // lazy cache of tasks private Boolean readUsingBatch = null; - Reader(Table table, Broadcast io, Broadcast encryptionManager, + protected Reader(Table table, Broadcast io, Broadcast encryptionManager, boolean caseSensitive, DataSourceOptions options) { this.table = table; this.snapshotId = options.get(SparkReadOptions.SNAPSHOT_ID).map(Long::parseLong).orElse(null); @@ -202,14 +200,14 @@ private Schema lazySchema() { return schema; } - private Expression filterExpression() { + protected Expression filterExpression() { if (filterExpressions != null) { return filterExpressions.stream().reduce(Expressions.alwaysTrue(), Expressions::and); } return Expressions.alwaysTrue(); } - private StructType lazyType() { + protected StructType lazyType() { if (type == null) { Preconditions.checkArgument(readTimestampWithoutZone || !hasTimestampWithoutZone(lazySchema()), "Spark does not support timestamp without time zone fields"); @@ -310,11 +308,6 @@ public void pruneColumns(StructType newRequestedSchema) { @Override public Statistics estimateStatistics() { - if (table instanceof LegacyHiveTable) { - // We currently don't have reliable stats for Hive tables - return EMPTY_STATS; - } - // its a fresh table, no data if (table.currentSnapshot() == null) { return new Stats(0L, 0L); @@ -340,18 +333,6 @@ public Statistics estimateStatistics() { return new Stats(sizeInBytes, numRows); } - private static final Statistics EMPTY_STATS = new Statistics() { - @Override - public OptionalLong sizeInBytes() { - return OptionalLong.empty(); - } - - @Override - public OptionalLong numRows() { - return OptionalLong.empty(); - } - }; - @Override public boolean enableBatchRead() { if (readUsingBatch == null) { @@ -402,7 +383,7 @@ private static void mergeIcebergHadoopConfs( .forEach(key -> baseConf.set(key.replaceFirst("hadoop.", ""), options.get(key))); } - private List tasks() { + protected List tasks() { if (tasks == null) { TableScan scan = table .newScan() @@ -588,4 +569,8 @@ private static class BatchReader extends BatchDataReader implements InputPartiti super(task, expectedSchema, nameMapping, io, encryptionManager, caseSensitive, size); } } + + public Table getTable() { + return table; + } } diff --git a/spark2/src/main/java/org/apache/iceberg/spark/source/Stats.java b/spark2/src/main/java/org/apache/iceberg/spark/source/Stats.java index 76119c1869..afeb17feb3 100644 --- a/spark2/src/main/java/org/apache/iceberg/spark/source/Stats.java +++ b/spark2/src/main/java/org/apache/iceberg/spark/source/Stats.java @@ -22,11 +22,11 @@ import java.util.OptionalLong; import org.apache.spark.sql.sources.v2.reader.Statistics; -class Stats implements Statistics { +public class Stats implements Statistics { private final OptionalLong sizeInBytes; private final OptionalLong numRows; - Stats(long sizeInBytes, long numRows) { + public Stats(long sizeInBytes, long numRows) { this.sizeInBytes = OptionalLong.of(sizeInBytes); this.numRows = OptionalLong.of(numRows); } From 2681f10ef55f9930734cb337d3203832c0504588 Mon Sep 17 00:00:00 2001 From: Wenye Zhang Date: Wed, 2 Mar 2022 10:54:04 -0800 Subject: [PATCH 4/5] Revert "remove hiveberg dependency in iceberg-spark2 module" This reverts commit 2e8b7430ffd605b6b5e02f64304039def46f29c1. --- build.gradle | 5 +- .../hiveberg/spark2/IcebergSource.java | 54 ----------- .../iceberg/hiveberg/spark2/Reader.java | 89 ------------------- .../iceberg/spark/source/IcebergSource.java | 9 +- .../apache/iceberg/spark/source/Reader.java | 33 +++++-- .../apache/iceberg/spark/source/Stats.java | 4 +- 6 files changed, 32 insertions(+), 162 deletions(-) delete mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/IcebergSource.java delete mode 100644 hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/Reader.java diff --git a/build.gradle b/build.gradle index 15e763cc9c..577627d7df 100644 --- a/build.gradle +++ b/build.gradle @@ -498,16 +498,12 @@ project(':iceberg-hive-metastore') { project(':iceberg-hiveberg') { dependencies { compile project(':iceberg-hive-metastore') - compile project(':iceberg-spark2') compileOnly "org.apache.avro:avro" compileOnly("org.apache.hadoop:hadoop-client") { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' } - compileOnly("org.apache.spark:spark-hive_2.11") { - exclude group: 'org.apache.avro', module: 'avro' - } compileOnly("org.apache.hive:hive-metastore") { exclude group: 'org.apache.avro', module: 'avro' @@ -876,6 +872,7 @@ if (jdkVersion == '8') { compile project(':iceberg-arrow') compile project(':iceberg-hive-metastore') compile project(':iceberg-spark') + compile project(':iceberg-hiveberg') compileOnly "org.apache.avro:avro" compileOnly("org.apache.spark:spark-hive_2.11") { diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/IcebergSource.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/IcebergSource.java deleted file mode 100644 index c34e0ee2df..0000000000 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/IcebergSource.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.iceberg.hiveberg.spark2; - -import org.apache.hadoop.conf.Configuration; -import org.apache.iceberg.Table; -import org.apache.iceberg.encryption.EncryptionManager; -import org.apache.iceberg.io.FileIO; -import org.apache.iceberg.spark.SparkSchemaUtil; -import org.apache.iceberg.spark.SparkUtil; -import org.apache.spark.broadcast.Broadcast; -import org.apache.spark.sql.sources.v2.DataSourceOptions; -import org.apache.spark.sql.sources.v2.reader.DataSourceReader; -import org.apache.spark.sql.types.StructType; - - -public class IcebergSource extends org.apache.iceberg.spark.source.IcebergSource { - - @Override - public DataSourceReader createReader(StructType readSchema, DataSourceOptions options) { - Configuration conf = new Configuration(lazyBaseConf()); - Table table = getTableAndResolveHadoopConfiguration(options, conf); - String caseSensitive = lazySparkSession().conf().get("spark.sql.caseSensitive"); - - Broadcast io = lazySparkContext().broadcast(SparkUtil.serializableFileIO(table)); - Broadcast encryptionManager = lazySparkContext().broadcast(table.encryption()); - - Reader reader = new Reader(table, io, encryptionManager, Boolean.parseBoolean(caseSensitive), options); - if (readSchema != null) { - // convert() will fail if readSchema contains fields not in table.schema() - SparkSchemaUtil.convert(table.schema(), readSchema); - reader.pruneColumns(readSchema); - } - - return reader; - } -} diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/Reader.java b/hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/Reader.java deleted file mode 100644 index a07890b489..0000000000 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/spark2/Reader.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.iceberg.hiveberg.spark2; - -import java.util.OptionalLong; -import org.apache.iceberg.CombinedScanTask; -import org.apache.iceberg.FileScanTask; -import org.apache.iceberg.SnapshotSummary; -import org.apache.iceberg.Table; -import org.apache.iceberg.encryption.EncryptionManager; -import org.apache.iceberg.expressions.Expressions; -import org.apache.iceberg.hiveberg.LegacyHiveTable; -import org.apache.iceberg.io.FileIO; -import org.apache.iceberg.spark.SparkSchemaUtil; -import org.apache.iceberg.spark.source.Stats; -import org.apache.iceberg.util.PropertyUtil; -import org.apache.spark.broadcast.Broadcast; -import org.apache.spark.sql.sources.v2.DataSourceOptions; -import org.apache.spark.sql.sources.v2.reader.Statistics; - - -class Reader extends org.apache.iceberg.spark.source.Reader { - Reader(Table table, Broadcast io, Broadcast encryptionManager, - boolean caseSensitive, DataSourceOptions options) { - super(table, io, encryptionManager, caseSensitive, options); - } - - @Override - public Statistics estimateStatistics() { - Table table = super.getTable(); - if (table instanceof LegacyHiveTable) { - // We currently don't have reliable stats for Hive tables - return EMPTY_STATS; - } - - // its a fresh table, no data - if (table.currentSnapshot() == null) { - return new Stats(0L, 0L); - } - - // estimate stats using snapshot summary only for partitioned tables (metadata tables are unpartitioned) - if (!table.spec().isUnpartitioned() && filterExpression() == Expressions.alwaysTrue()) { - long totalRecords = PropertyUtil.propertyAsLong(table.currentSnapshot().summary(), - SnapshotSummary.TOTAL_RECORDS_PROP, Long.MAX_VALUE); - return new Stats(SparkSchemaUtil.estimateSize(lazyType(), totalRecords), totalRecords); - } - - long sizeInBytes = 0L; - long numRows = 0L; - - for (CombinedScanTask task : tasks()) { - for (FileScanTask file : task.files()) { - sizeInBytes += file.length(); - numRows += file.file().recordCount(); - } - } - - return new Stats(sizeInBytes, numRows); - } - - private static final Statistics EMPTY_STATS = new Statistics() { - @Override - public OptionalLong sizeInBytes() { - return OptionalLong.empty(); - } - - @Override - public OptionalLong numRows() { - return OptionalLong.empty(); - } - }; -} diff --git a/spark2/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java b/spark2/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java index a0e2141d55..9bf9d15ce6 100644 --- a/spark2/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java +++ b/spark2/src/main/java/org/apache/iceberg/spark/source/IcebergSource.java @@ -139,28 +139,29 @@ protected Table findTable(DataSourceOptions options, Configuration conf) { } } - protected SparkSession lazySparkSession() { + private SparkSession lazySparkSession() { if (lazySpark == null) { this.lazySpark = SparkSession.builder().getOrCreate(); } return lazySpark; } - protected JavaSparkContext lazySparkContext() { + private JavaSparkContext lazySparkContext() { if (lazySparkContext == null) { this.lazySparkContext = new JavaSparkContext(lazySparkSession().sparkContext()); } return lazySparkContext; } - protected Configuration lazyBaseConf() { + private Configuration lazyBaseConf() { if (lazyConf == null) { this.lazyConf = lazySparkSession().sessionState().newHadoopConf(); } return lazyConf; } - protected Table getTableAndResolveHadoopConfiguration(DataSourceOptions options, Configuration conf) { + private Table getTableAndResolveHadoopConfiguration( + DataSourceOptions options, Configuration conf) { // Overwrite configurations from the Spark Context with configurations from the options. mergeIcebergHadoopConfs(conf, options.asMap()); Table table = findTable(options, conf); diff --git a/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java b/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java index 4a1940e23d..b292aa6201 100644 --- a/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java +++ b/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.OptionalLong; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -43,6 +44,7 @@ import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.hadoop.HadoopFileIO; import org.apache.iceberg.hadoop.Util; +import org.apache.iceberg.hiveberg.LegacyHiveTable; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.orc.OrcRowFilterUtils; @@ -78,7 +80,7 @@ import static org.apache.iceberg.TableProperties.DEFAULT_NAME_MAPPING; -public class Reader implements DataSourceReader, SupportsScanColumnarBatch, SupportsPushDownFilters, +class Reader implements DataSourceReader, SupportsScanColumnarBatch, SupportsPushDownFilters, SupportsPushDownRequiredColumns, SupportsReportStatistics { private static final Logger LOG = LoggerFactory.getLogger(Reader.class); @@ -110,7 +112,7 @@ public class Reader implements DataSourceReader, SupportsScanColumnarBatch, Supp private List tasks = null; // lazy cache of tasks private Boolean readUsingBatch = null; - protected Reader(Table table, Broadcast io, Broadcast encryptionManager, + Reader(Table table, Broadcast io, Broadcast encryptionManager, boolean caseSensitive, DataSourceOptions options) { this.table = table; this.snapshotId = options.get(SparkReadOptions.SNAPSHOT_ID).map(Long::parseLong).orElse(null); @@ -200,14 +202,14 @@ private Schema lazySchema() { return schema; } - protected Expression filterExpression() { + private Expression filterExpression() { if (filterExpressions != null) { return filterExpressions.stream().reduce(Expressions.alwaysTrue(), Expressions::and); } return Expressions.alwaysTrue(); } - protected StructType lazyType() { + private StructType lazyType() { if (type == null) { Preconditions.checkArgument(readTimestampWithoutZone || !hasTimestampWithoutZone(lazySchema()), "Spark does not support timestamp without time zone fields"); @@ -308,6 +310,11 @@ public void pruneColumns(StructType newRequestedSchema) { @Override public Statistics estimateStatistics() { + if (table instanceof LegacyHiveTable) { + // We currently don't have reliable stats for Hive tables + return EMPTY_STATS; + } + // its a fresh table, no data if (table.currentSnapshot() == null) { return new Stats(0L, 0L); @@ -333,6 +340,18 @@ public Statistics estimateStatistics() { return new Stats(sizeInBytes, numRows); } + private static final Statistics EMPTY_STATS = new Statistics() { + @Override + public OptionalLong sizeInBytes() { + return OptionalLong.empty(); + } + + @Override + public OptionalLong numRows() { + return OptionalLong.empty(); + } + }; + @Override public boolean enableBatchRead() { if (readUsingBatch == null) { @@ -383,7 +402,7 @@ private static void mergeIcebergHadoopConfs( .forEach(key -> baseConf.set(key.replaceFirst("hadoop.", ""), options.get(key))); } - protected List tasks() { + private List tasks() { if (tasks == null) { TableScan scan = table .newScan() @@ -569,8 +588,4 @@ private static class BatchReader extends BatchDataReader implements InputPartiti super(task, expectedSchema, nameMapping, io, encryptionManager, caseSensitive, size); } } - - public Table getTable() { - return table; - } } diff --git a/spark2/src/main/java/org/apache/iceberg/spark/source/Stats.java b/spark2/src/main/java/org/apache/iceberg/spark/source/Stats.java index afeb17feb3..76119c1869 100644 --- a/spark2/src/main/java/org/apache/iceberg/spark/source/Stats.java +++ b/spark2/src/main/java/org/apache/iceberg/spark/source/Stats.java @@ -22,11 +22,11 @@ import java.util.OptionalLong; import org.apache.spark.sql.sources.v2.reader.Statistics; -public class Stats implements Statistics { +class Stats implements Statistics { private final OptionalLong sizeInBytes; private final OptionalLong numRows; - public Stats(long sizeInBytes, long numRows) { + Stats(long sizeInBytes, long numRows) { this.sizeInBytes = OptionalLong.of(sizeInBytes); this.numRows = OptionalLong.of(numRows); } From 36dae56214356fc7a9591a4e62b28abb41f8e118 Mon Sep 17 00:00:00 2001 From: Wenye Zhang Date: Wed, 2 Mar 2022 16:03:55 -0800 Subject: [PATCH 5/5] rename hiveberg module to hivelink --- build.gradle | 6 +++--- .../java/org/apache/iceberg/hivelink}/DirectoryInfo.java | 2 +- .../java/org/apache/iceberg/hivelink}/FileSystemUtils.java | 2 +- .../java/org/apache/iceberg/hivelink}/HiveExpressions.java | 2 +- .../iceberg/hivelink}/HiveSchemaWithPartnerVisitor.java | 2 +- .../org/apache/iceberg/hivelink}/HiveTypeToIcebergType.java | 2 +- .../java/org/apache/iceberg/hivelink}/HiveTypeUtil.java | 2 +- .../org/apache/iceberg/hivelink}/LegacyHiveCatalog.java | 6 +++--- .../java/org/apache/iceberg/hivelink}/LegacyHiveTable.java | 2 +- .../apache/iceberg/hivelink}/LegacyHiveTableOperations.java | 2 +- .../org/apache/iceberg/hivelink}/LegacyHiveTableScan.java | 2 +- .../org/apache/iceberg/hivelink}/LegacyHiveTableUtils.java | 2 +- .../apache/iceberg/hivelink}/MergeHiveSchemaWithAvro.java | 2 +- .../org/apache/iceberg/hivelink}/HiveMetastoreTest.java | 4 ++-- .../java/org/apache/iceberg/hivelink}/ScriptRunner.java | 4 ++-- .../org/apache/iceberg/hivelink}/TestHiveExpressions.java | 4 ++-- .../org/apache/iceberg/hivelink}/TestHiveMetastore.java | 4 ++-- .../apache/iceberg/hivelink}/TestHiveSchemaConversions.java | 2 +- .../apache/iceberg/hivelink}/TestLegacyHiveTableScan.java | 2 +- .../iceberg/hivelink}/TestMergeHiveSchemaWithAvro.java | 2 +- .../src/test/resources/hive-schema-3.1.0.derby.sql | 0 settings.gradle | 4 ++-- .../main/java/org/apache/iceberg/spark/source/Reader.java | 2 +- .../org/apache/iceberg/spark/source/SparkBatchScan.java | 2 +- 24 files changed, 32 insertions(+), 32 deletions(-) rename {hiveberg/src/main/java/org/apache/iceberg/hiveberg => hivelink/src/main/java/org/apache/iceberg/hivelink}/DirectoryInfo.java (97%) rename {hiveberg/src/main/java/org/apache/iceberg/hiveberg => hivelink/src/main/java/org/apache/iceberg/hivelink}/FileSystemUtils.java (98%) rename {hiveberg/src/main/java/org/apache/iceberg/hiveberg => hivelink/src/main/java/org/apache/iceberg/hivelink}/HiveExpressions.java (99%) rename {hiveberg/src/main/java/org/apache/iceberg/hiveberg => hivelink/src/main/java/org/apache/iceberg/hivelink}/HiveSchemaWithPartnerVisitor.java (99%) rename {hiveberg/src/main/java/org/apache/iceberg/hiveberg => hivelink/src/main/java/org/apache/iceberg/hivelink}/HiveTypeToIcebergType.java (99%) rename {hiveberg/src/main/java/org/apache/iceberg/hiveberg => hivelink/src/main/java/org/apache/iceberg/hivelink}/HiveTypeUtil.java (98%) rename {hiveberg/src/main/java/org/apache/iceberg/hiveberg => hivelink/src/main/java/org/apache/iceberg/hivelink}/LegacyHiveCatalog.java (96%) rename {hiveberg/src/main/java/org/apache/iceberg/hiveberg => hivelink/src/main/java/org/apache/iceberg/hivelink}/LegacyHiveTable.java (99%) rename {hiveberg/src/main/java/org/apache/iceberg/hiveberg => hivelink/src/main/java/org/apache/iceberg/hivelink}/LegacyHiveTableOperations.java (99%) rename {hiveberg/src/main/java/org/apache/iceberg/hiveberg => hivelink/src/main/java/org/apache/iceberg/hivelink}/LegacyHiveTableScan.java (99%) rename {hiveberg/src/main/java/org/apache/iceberg/hiveberg => hivelink/src/main/java/org/apache/iceberg/hivelink}/LegacyHiveTableUtils.java (99%) rename {hiveberg/src/main/java/org/apache/iceberg/hiveberg => hivelink/src/main/java/org/apache/iceberg/hivelink}/MergeHiveSchemaWithAvro.java (99%) rename {hiveberg/src/test/java/org/apache/iceberg/hiveberg => hivelink/src/test/java/org/apache/iceberg/hivelink}/HiveMetastoreTest.java (97%) rename {hiveberg/src/test/java/org/apache/iceberg/hiveberg => hivelink/src/test/java/org/apache/iceberg/hivelink}/ScriptRunner.java (99%) rename {hiveberg/src/test/java/org/apache/iceberg/hiveberg => hivelink/src/test/java/org/apache/iceberg/hivelink}/TestHiveExpressions.java (98%) rename {hiveberg/src/test/java/org/apache/iceberg/hiveberg => hivelink/src/test/java/org/apache/iceberg/hivelink}/TestHiveMetastore.java (99%) rename {hiveberg/src/test/java/org/apache/iceberg/hiveberg => hivelink/src/test/java/org/apache/iceberg/hivelink}/TestHiveSchemaConversions.java (99%) rename {hiveberg/src/test/java/org/apache/iceberg/hiveberg => hivelink/src/test/java/org/apache/iceberg/hivelink}/TestLegacyHiveTableScan.java (99%) rename {hiveberg/src/test/java/org/apache/iceberg/hiveberg => hivelink/src/test/java/org/apache/iceberg/hivelink}/TestMergeHiveSchemaWithAvro.java (99%) rename {hiveberg => hivelink}/src/test/resources/hive-schema-3.1.0.derby.sql (100%) diff --git a/build.gradle b/build.gradle index 577627d7df..f29199298a 100644 --- a/build.gradle +++ b/build.gradle @@ -495,7 +495,7 @@ project(':iceberg-hive-metastore') { } } -project(':iceberg-hiveberg') { +project(':iceberg-hivelink') { dependencies { compile project(':iceberg-hive-metastore') @@ -872,7 +872,7 @@ if (jdkVersion == '8') { compile project(':iceberg-arrow') compile project(':iceberg-hive-metastore') compile project(':iceberg-spark') - compile project(':iceberg-hiveberg') + compile project(':iceberg-hivelink') compileOnly "org.apache.avro:avro" compileOnly("org.apache.spark:spark-hive_2.11") { @@ -982,7 +982,7 @@ project(':iceberg-spark3') { compile project(':iceberg-arrow') compile project(':iceberg-hive-metastore') compile project(':iceberg-spark') - compile project(':iceberg-hiveberg') + compile project(':iceberg-hivelink') compileOnly "org.apache.avro:avro" compileOnly("org.apache.spark:spark-hive_2.12") { diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java b/hivelink/src/main/java/org/apache/iceberg/hivelink/DirectoryInfo.java similarity index 97% rename from hiveberg/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java rename to hivelink/src/main/java/org/apache/iceberg/hivelink/DirectoryInfo.java index 75d7388ebb..aea050f1ed 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/DirectoryInfo.java +++ b/hivelink/src/main/java/org/apache/iceberg/hivelink/DirectoryInfo.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import org.apache.iceberg.FileFormat; import org.apache.iceberg.StructLike; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java b/hivelink/src/main/java/org/apache/iceberg/hivelink/FileSystemUtils.java similarity index 98% rename from hiveberg/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java rename to hivelink/src/main/java/org/apache/iceberg/hivelink/FileSystemUtils.java index e67091d3d7..92f8018c3a 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/FileSystemUtils.java +++ b/hivelink/src/main/java/org/apache/iceberg/hivelink/FileSystemUtils.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import java.io.IOException; import java.io.UncheckedIOException; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java b/hivelink/src/main/java/org/apache/iceberg/hivelink/HiveExpressions.java similarity index 99% rename from hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java rename to hivelink/src/main/java/org/apache/iceberg/hivelink/HiveExpressions.java index 2d3eb4ead0..a90c55cd7b 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveExpressions.java +++ b/hivelink/src/main/java/org/apache/iceberg/hivelink/HiveExpressions.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import java.time.Instant; import java.time.OffsetDateTime; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java b/hivelink/src/main/java/org/apache/iceberg/hivelink/HiveSchemaWithPartnerVisitor.java similarity index 99% rename from hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java rename to hivelink/src/main/java/org/apache/iceberg/hivelink/HiveSchemaWithPartnerVisitor.java index 2cc63a70dd..8317ebc0c3 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveSchemaWithPartnerVisitor.java +++ b/hivelink/src/main/java/org/apache/iceberg/hivelink/HiveSchemaWithPartnerVisitor.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import java.util.List; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java b/hivelink/src/main/java/org/apache/iceberg/hivelink/HiveTypeToIcebergType.java similarity index 99% rename from hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java rename to hivelink/src/main/java/org/apache/iceberg/hivelink/HiveTypeToIcebergType.java index fd9403eda8..686221a496 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeToIcebergType.java +++ b/hivelink/src/main/java/org/apache/iceberg/hivelink/HiveTypeToIcebergType.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import java.util.List; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java b/hivelink/src/main/java/org/apache/iceberg/hivelink/HiveTypeUtil.java similarity index 98% rename from hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java rename to hivelink/src/main/java/org/apache/iceberg/hivelink/HiveTypeUtil.java index c58c30360b..e2f3770282 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/HiveTypeUtil.java +++ b/hivelink/src/main/java/org/apache/iceberg/hivelink/HiveTypeUtil.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import java.util.List; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java b/hivelink/src/main/java/org/apache/iceberg/hivelink/LegacyHiveCatalog.java similarity index 96% rename from hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java rename to hivelink/src/main/java/org/apache/iceberg/hivelink/LegacyHiveCatalog.java index dd8bb9f1ce..2206f9d543 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveCatalog.java +++ b/hivelink/src/main/java/org/apache/iceberg/hivelink/LegacyHiveCatalog.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; @@ -44,14 +44,14 @@ public class LegacyHiveCatalog extends HiveCatalog { private static final Logger LOG = LoggerFactory.getLogger(LegacyHiveCatalog.class); - // Hiveberg refactoring: this is moved from HiveCatalogs + // hivelink refactoring: this is moved from HiveCatalogs private static final Cache LEGACY_CATALOG_CACHE = Caffeine.newBuilder().build(); public LegacyHiveCatalog(Configuration conf) { super(conf); } - // Hiveberg refactoring: this is moved from HiveCatalogs + // hivelink refactoring: this is moved from HiveCatalogs public static HiveCatalog loadLegacyCatalog(Configuration conf) { // metastore URI can be null in local mode String metastoreUri = conf.get(HiveConf.ConfVars.METASTOREURIS.varname, ""); diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java b/hivelink/src/main/java/org/apache/iceberg/hivelink/LegacyHiveTable.java similarity index 99% rename from hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java rename to hivelink/src/main/java/org/apache/iceberg/hivelink/LegacyHiveTable.java index c05722a1af..a6550535ee 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTable.java +++ b/hivelink/src/main/java/org/apache/iceberg/hivelink/LegacyHiveTable.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import java.util.List; import java.util.Map; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java b/hivelink/src/main/java/org/apache/iceberg/hivelink/LegacyHiveTableOperations.java similarity index 99% rename from hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java rename to hivelink/src/main/java/org/apache/iceberg/hivelink/LegacyHiveTableOperations.java index 0ca64c6245..952108ea3d 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableOperations.java +++ b/hivelink/src/main/java/org/apache/iceberg/hivelink/LegacyHiveTableOperations.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import java.time.LocalDate; import java.time.LocalDateTime; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java b/hivelink/src/main/java/org/apache/iceberg/hivelink/LegacyHiveTableScan.java similarity index 99% rename from hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java rename to hivelink/src/main/java/org/apache/iceberg/hivelink/LegacyHiveTableScan.java index 752d676dc9..826af746e3 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableScan.java +++ b/hivelink/src/main/java/org/apache/iceberg/hivelink/LegacyHiveTableScan.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import org.apache.iceberg.BaseFileScanTask; import org.apache.iceberg.DataTableScan; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java b/hivelink/src/main/java/org/apache/iceberg/hivelink/LegacyHiveTableUtils.java similarity index 99% rename from hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java rename to hivelink/src/main/java/org/apache/iceberg/hivelink/LegacyHiveTableUtils.java index c04667d07a..474852d501 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/LegacyHiveTableUtils.java +++ b/hivelink/src/main/java/org/apache/iceberg/hivelink/LegacyHiveTableUtils.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import java.util.HashMap; import java.util.List; diff --git a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java b/hivelink/src/main/java/org/apache/iceberg/hivelink/MergeHiveSchemaWithAvro.java similarity index 99% rename from hiveberg/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java rename to hivelink/src/main/java/org/apache/iceberg/hivelink/MergeHiveSchemaWithAvro.java index 67bb7f1fa7..ace4f6d8ed 100644 --- a/hiveberg/src/main/java/org/apache/iceberg/hiveberg/MergeHiveSchemaWithAvro.java +++ b/hivelink/src/main/java/org/apache/iceberg/hivelink/MergeHiveSchemaWithAvro.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import java.util.ArrayList; import java.util.List; diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/HiveMetastoreTest.java b/hivelink/src/test/java/org/apache/iceberg/hivelink/HiveMetastoreTest.java similarity index 97% rename from hiveberg/src/test/java/org/apache/iceberg/hiveberg/HiveMetastoreTest.java rename to hivelink/src/test/java/org/apache/iceberg/hivelink/HiveMetastoreTest.java index 251eee6646..6d3db19166 100644 --- a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/HiveMetastoreTest.java +++ b/hivelink/src/test/java/org/apache/iceberg/hivelink/HiveMetastoreTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import java.util.HashMap; import org.apache.hadoop.hive.conf.HiveConf; @@ -29,7 +29,7 @@ /** - * Hiveberg refactoring: + * hivelink refactoring: * This class is copied from iceberg-hive-metastore module test code */ public abstract class HiveMetastoreTest { diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/ScriptRunner.java b/hivelink/src/test/java/org/apache/iceberg/hivelink/ScriptRunner.java similarity index 99% rename from hiveberg/src/test/java/org/apache/iceberg/hiveberg/ScriptRunner.java rename to hivelink/src/test/java/org/apache/iceberg/hivelink/ScriptRunner.java index 09a4979ee5..512fbb3b70 100644 --- a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/ScriptRunner.java +++ b/hivelink/src/test/java/org/apache/iceberg/hivelink/ScriptRunner.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import java.io.IOException; import java.io.LineNumberReader; @@ -31,7 +31,7 @@ /** - * Hiveberg refactoring: + * hivelink refactoring: * This class is copied from iceberg-hive-metastore module test code */ public class ScriptRunner { diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveExpressions.java b/hivelink/src/test/java/org/apache/iceberg/hivelink/TestHiveExpressions.java similarity index 98% rename from hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveExpressions.java rename to hivelink/src/test/java/org/apache/iceberg/hivelink/TestHiveExpressions.java index df52a32bc3..4fcacc8472 100644 --- a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveExpressions.java +++ b/hivelink/src/test/java/org/apache/iceberg/hivelink/TestHiveExpressions.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; @@ -35,7 +35,7 @@ import static org.apache.iceberg.expressions.Expressions.notIn; import static org.apache.iceberg.expressions.Expressions.notNull; import static org.apache.iceberg.expressions.Expressions.or; -import static org.apache.iceberg.hiveberg.HiveExpressions.simplifyPartitionFilter; +import static org.apache.iceberg.hivelink.HiveExpressions.simplifyPartitionFilter; public class TestHiveExpressions { diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveMetastore.java b/hivelink/src/test/java/org/apache/iceberg/hivelink/TestHiveMetastore.java similarity index 99% rename from hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveMetastore.java rename to hivelink/src/test/java/org/apache/iceberg/hivelink/TestHiveMetastore.java index 78c5c8872c..3452793e48 100644 --- a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveMetastore.java +++ b/hivelink/src/test/java/org/apache/iceberg/hivelink/TestHiveMetastore.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import java.io.File; import java.io.IOException; @@ -53,7 +53,7 @@ import static java.nio.file.attribute.PosixFilePermissions.fromString; /** - * Hiveberg refactoring: + * hivelink refactoring: * This class is copied from iceberg-hive-metastore module test code */ public class TestHiveMetastore { diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveSchemaConversions.java b/hivelink/src/test/java/org/apache/iceberg/hivelink/TestHiveSchemaConversions.java similarity index 99% rename from hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveSchemaConversions.java rename to hivelink/src/test/java/org/apache/iceberg/hivelink/TestHiveSchemaConversions.java index 38132d4495..60fe895324 100644 --- a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestHiveSchemaConversions.java +++ b/hivelink/src/test/java/org/apache/iceberg/hivelink/TestHiveSchemaConversions.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import java.util.List; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestLegacyHiveTableScan.java b/hivelink/src/test/java/org/apache/iceberg/hivelink/TestLegacyHiveTableScan.java similarity index 99% rename from hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestLegacyHiveTableScan.java rename to hivelink/src/test/java/org/apache/iceberg/hivelink/TestLegacyHiveTableScan.java index 5fde7b69fe..2f748c8728 100644 --- a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestLegacyHiveTableScan.java +++ b/hivelink/src/test/java/org/apache/iceberg/hivelink/TestLegacyHiveTableScan.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import java.io.File; import java.io.IOException; diff --git a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestMergeHiveSchemaWithAvro.java b/hivelink/src/test/java/org/apache/iceberg/hivelink/TestMergeHiveSchemaWithAvro.java similarity index 99% rename from hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestMergeHiveSchemaWithAvro.java rename to hivelink/src/test/java/org/apache/iceberg/hivelink/TestMergeHiveSchemaWithAvro.java index 67e865b59d..ddfac388cf 100644 --- a/hiveberg/src/test/java/org/apache/iceberg/hiveberg/TestMergeHiveSchemaWithAvro.java +++ b/hivelink/src/test/java/org/apache/iceberg/hivelink/TestMergeHiveSchemaWithAvro.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.iceberg.hiveberg; +package org.apache.iceberg.hivelink; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; diff --git a/hiveberg/src/test/resources/hive-schema-3.1.0.derby.sql b/hivelink/src/test/resources/hive-schema-3.1.0.derby.sql similarity index 100% rename from hiveberg/src/test/resources/hive-schema-3.1.0.derby.sql rename to hivelink/src/test/resources/hive-schema-3.1.0.derby.sql diff --git a/settings.gradle b/settings.gradle index bbae341730..d6f0fb9a69 100644 --- a/settings.gradle +++ b/settings.gradle @@ -37,7 +37,7 @@ include 'spark3-extensions' include 'spark3-runtime' include 'pig' include 'hive-metastore' -include 'hiveberg' +include 'hivelink' include 'nessie' include 'runtime' @@ -60,7 +60,7 @@ project(':spark3-extensions').name = 'iceberg-spark3-extensions' project(':spark3-runtime').name = 'iceberg-spark3-runtime' project(':pig').name = 'iceberg-pig' project(':hive-metastore').name = 'iceberg-hive-metastore' -project(':hiveberg').name = 'iceberg-hiveberg' +project(':hivelink').name = 'iceberg-hivelink' project(':nessie').name = 'iceberg-nessie' project(':runtime').name = 'iceberg-runtime' diff --git a/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java b/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java index b292aa6201..2ae355c09f 100644 --- a/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java +++ b/spark2/src/main/java/org/apache/iceberg/spark/source/Reader.java @@ -44,7 +44,7 @@ import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.hadoop.HadoopFileIO; import org.apache.iceberg.hadoop.Util; -import org.apache.iceberg.hiveberg.LegacyHiveTable; +import org.apache.iceberg.hivelink.LegacyHiveTable; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.orc.OrcRowFilterUtils; diff --git a/spark3/src/main/java/org/apache/iceberg/spark/source/SparkBatchScan.java b/spark3/src/main/java/org/apache/iceberg/spark/source/SparkBatchScan.java index 1cf16886eb..8df6004b9c 100644 --- a/spark3/src/main/java/org/apache/iceberg/spark/source/SparkBatchScan.java +++ b/spark3/src/main/java/org/apache/iceberg/spark/source/SparkBatchScan.java @@ -37,7 +37,7 @@ import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.hadoop.HadoopInputFile; import org.apache.iceberg.hadoop.Util; -import org.apache.iceberg.hiveberg.LegacyHiveTable; +import org.apache.iceberg.hivelink.LegacyHiveTable; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.orc.OrcRowFilterUtils; import org.apache.iceberg.relocated.com.google.common.base.Preconditions;