From 8e0261d28ea35536a876140c903de1006e6d9ade Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Tue, 31 Jan 2023 16:33:45 +0100 Subject: [PATCH] Fix NPE when Glue table has no type `Glue.getTableType` is nullable. --- .mvn/modernizer/violations.xml | 6 ++++++ .../metastore/glue/GlueHiveMetastore.java | 5 +++-- .../glue/converter/GlueToTrinoConverter.java | 19 ++++++++++++++++-- .../glue/TestGlueToTrinoConverter.java | 5 +++-- .../glue/GlueIcebergTableOperations.java | 3 ++- .../catalog/glue/TrinoGlueCatalog.java | 20 ++++++++++--------- ...estDatabricksWithGlueMetastoreCleanUp.java | 3 ++- 7 files changed, 44 insertions(+), 17 deletions(-) diff --git a/.mvn/modernizer/violations.xml b/.mvn/modernizer/violations.xml index ebe585ac7b3e..58a5e0589c9c 100644 --- a/.mvn/modernizer/violations.xml +++ b/.mvn/modernizer/violations.xml @@ -140,6 +140,12 @@ Use AssertJ's assertThatThrownBy, see https://github.com/trinodb/trino/issues/5320 for rationale + + com/amazonaws/services/glue/model/Table.getTableType:()Ljava/lang/String; + 1.1 + Table type is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getTableType + + org/apache/hadoop/mapred/JobConf."<init>":()V 1.1 diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java index ef2ddd51a8bf..3200d3dd50e5 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java @@ -151,6 +151,7 @@ import static io.trino.plugin.hive.metastore.MetastoreUtil.verifyCanDropColumn; import static io.trino.plugin.hive.metastore.glue.AwsSdkUtil.getPaginatedResults; import static io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter.convertPartition; +import static io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.getTableTypeNullable; import static io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.mappedCopy; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters; @@ -177,7 +178,7 @@ public class GlueHiveMetastore private static final int BATCH_UPDATE_PARTITION_MAX_PAGE_SIZE = 100; private static final int AWS_GLUE_GET_PARTITIONS_MAX_RESULTS = 1000; private static final Comparator> PARTITION_VALUE_COMPARATOR = lexicographical(String.CASE_INSENSITIVE_ORDER); - private static final Predicate VIEWS_FILTER = table -> VIRTUAL_VIEW.name().equals(table.getTableType()); + private static final Predicate VIEWS_FILTER = table -> VIRTUAL_VIEW.name().equals(getTableTypeNullable(table)); private final HdfsEnvironment hdfsEnvironment; private final HdfsContext hdfsContext; @@ -703,7 +704,7 @@ private TableInput convertGlueTableToTableInput(com.amazonaws.services.glue.mode .withPartitionKeys(glueTable.getPartitionKeys()) .withViewOriginalText(glueTable.getViewOriginalText()) .withViewExpandedText(glueTable.getViewExpandedText()) - .withTableType(glueTable.getTableType()) + .withTableType(getTableTypeNullable(glueTable)) .withTargetTable(glueTable.getTargetTable()) .withParameters(glueTable.getParameters()); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java index 3fe2679e8385..4c8639d6dd4a 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java @@ -32,6 +32,9 @@ import io.trino.plugin.hive.util.HiveBucketing.BucketingVersion; import io.trino.spi.TrinoException; import io.trino.spi.security.PrincipalType; +import org.gaul.modernizer_maven_annotations.SuppressModernizer; + +import javax.annotation.Nullable; import java.util.List; import java.util.Locale; @@ -59,6 +62,19 @@ public final class GlueToTrinoConverter private GlueToTrinoConverter() {} + public static String getTableType(com.amazonaws.services.glue.model.Table glueTable) + { + // Athena treats missing table type as EXTERNAL_TABLE. + return firstNonNull(getTableTypeNullable(glueTable), EXTERNAL_TABLE.name()); + } + + @Nullable + @SuppressModernizer // Usage of `Table.getTableType` is not allowed. Only this method can call that. + public static String getTableTypeNullable(com.amazonaws.services.glue.model.Table glueTable) + { + return glueTable.getTableType(); + } + public static Database convertDatabase(com.amazonaws.services.glue.model.Database glueDb) { return Database.builder() @@ -81,8 +97,7 @@ public static Table convertTable(com.amazonaws.services.glue.model.Table glueTab .setDatabaseName(dbName) .setTableName(glueTable.getName()) .setOwner(Optional.ofNullable(glueTable.getOwner())) - // Athena treats missing table type as EXTERNAL_TABLE. - .setTableType(firstNonNull(glueTable.getTableType(), EXTERNAL_TABLE.name())) + .setTableType(getTableType(glueTable)) .setParameters(tableParameters) .setViewOriginalText(Optional.ofNullable(glueTable.getViewOriginalText())) .setViewExpandedText(Optional.ofNullable(glueTable.getViewExpandedText())); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestGlueToTrinoConverter.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestGlueToTrinoConverter.java index e2f9b074ec76..93f2ac929eaf 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestGlueToTrinoConverter.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestGlueToTrinoConverter.java @@ -41,6 +41,7 @@ import static io.trino.plugin.hive.metastore.glue.TestingMetastoreObjects.getGlueTestPartition; import static io.trino.plugin.hive.metastore.glue.TestingMetastoreObjects.getGlueTestStorageDescriptor; import static io.trino.plugin.hive.metastore.glue.TestingMetastoreObjects.getGlueTestTable; +import static io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.getTableTypeNullable; import static io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER; import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_NAME; import static io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_VALUE; @@ -93,7 +94,7 @@ public void testConvertTable() io.trino.plugin.hive.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName()); assertEquals(trinoTable.getTableName(), testTable.getName()); assertEquals(trinoTable.getDatabaseName(), testDatabase.getName()); - assertEquals(trinoTable.getTableType(), testTable.getTableType()); + assertEquals(trinoTable.getTableType(), getTableTypeNullable(testTable)); assertEquals(trinoTable.getOwner().orElse(null), testTable.getOwner()); assertEquals(trinoTable.getParameters(), testTable.getParameters()); assertColumnList(trinoTable.getDataColumns(), testTable.getStorageDescriptor().getColumns()); @@ -114,7 +115,7 @@ public void testConvertTableWithOpenCSVSerDe() assertEquals(trinoTable.getTableName(), glueTable.getName()); assertEquals(trinoTable.getDatabaseName(), testDatabase.getName()); - assertEquals(trinoTable.getTableType(), glueTable.getTableType()); + assertEquals(trinoTable.getTableType(), getTableTypeNullable(glueTable)); assertEquals(trinoTable.getOwner().orElse(null), glueTable.getOwner()); assertEquals(trinoTable.getParameters(), glueTable.getParameters()); assertEquals(trinoTable.getDataColumns().size(), 1); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperations.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperations.java index 19848e7237fb..e9a70be488d2 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperations.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperations.java @@ -44,6 +44,7 @@ import static com.google.common.base.Verify.verify; import static io.trino.plugin.hive.ViewReaderUtil.isHiveOrPrestoView; import static io.trino.plugin.hive.ViewReaderUtil.isPrestoView; +import static io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.getTableType; import static io.trino.plugin.hive.util.HiveUtil.isIcebergTable; import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA; import static io.trino.plugin.iceberg.catalog.glue.GlueIcebergUtil.getTableInput; @@ -83,7 +84,7 @@ protected String getRefreshedLocation(boolean invalidateCaches) glueVersionId = table.getVersionId(); Map parameters = firstNonNull(table.getParameters(), ImmutableMap.of()); - if (isPrestoView(parameters) && isHiveOrPrestoView(table.getTableType())) { + if (isPrestoView(parameters) && isHiveOrPrestoView(getTableType(table))) { // this is a Presto Hive view, hence not a table throw new TableNotFoundException(getSchemaTableName()); } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java index 3d98a4ad2fcb..0ea382bf541a 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java @@ -91,6 +91,8 @@ import static io.trino.plugin.hive.ViewReaderUtil.isPrestoView; import static io.trino.plugin.hive.ViewReaderUtil.isTrinoMaterializedView; import static io.trino.plugin.hive.metastore.glue.AwsSdkUtil.getPaginatedResults; +import static io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.getTableType; +import static io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.getTableTypeNullable; import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema; import static io.trino.plugin.hive.util.HiveUtil.isIcebergTable; import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA; @@ -464,7 +466,7 @@ private Optional getTable(ConnectorSess LOG.warn(e, "Failed to cache table metadata from table at %s", metadataLocation); } } - else if (isTrinoMaterializedView(table.getTableType(), parameters)) { + else if (isTrinoMaterializedView(getTableType(table), parameters)) { if (viewCache.containsKey(schemaTableName) || tableMetadataCache.containsKey(schemaTableName)) { throw new TrinoException(GENERIC_INTERNAL_ERROR, "Glue table cache inconsistency. Materialized View cannot also be a table or view"); } @@ -485,7 +487,7 @@ else if (isPrestoView(parameters) && !viewCache.containsKey(schemaTableName)) { try { TrinoViewUtil.getView(schemaTableName, Optional.ofNullable(table.getViewOriginalText()), - table.getTableType(), + getTableType(table), parameters, Optional.ofNullable(table.getOwner())) .ifPresent(viewDefinition -> viewCache.put(schemaTableName, viewDefinition)); @@ -708,7 +710,7 @@ public Optional getView(ConnectorSession session, Schem return TrinoViewUtil.getView( viewName, Optional.ofNullable(viewDefinition.getViewOriginalText()), - viewDefinition.getTableType(), + getTableType(viewDefinition), firstNonNull(viewDefinition.getParameters(), ImmutableMap.of()), Optional.ofNullable(viewDefinition.getOwner())); } @@ -784,7 +786,7 @@ public List listMaterializedViews(ConnectorSession session, Opt stats.getGetTables()) .map(GetTablesResult::getTableList) .flatMap(List::stream) - .filter(table -> isTrinoMaterializedView(table.getTableType(), firstNonNull(table.getParameters(), ImmutableMap.of()))) + .filter(table -> isTrinoMaterializedView(getTableType(table), firstNonNull(table.getParameters(), ImmutableMap.of()))) .map(table -> new SchemaTableName(glueNamespace, table.getName())); } catch (EntityNotFoundException e) { @@ -810,7 +812,7 @@ public void createMaterializedView( Optional existing = getTable(session, viewName); if (existing.isPresent()) { - if (!isTrinoMaterializedView(existing.get().getTableType(), firstNonNull(existing.get().getParameters(), ImmutableMap.of()))) { + if (!isTrinoMaterializedView(getTableType(existing.get()), firstNonNull(existing.get().getParameters(), ImmutableMap.of()))) { throw new TrinoException(UNSUPPORTED_TABLE_TYPE, "Existing table is not a Materialized View: " + viewName); } if (!replace) { @@ -862,7 +864,7 @@ public void dropMaterializedView(ConnectorSession session, SchemaTableName viewN com.amazonaws.services.glue.model.Table view = getTable(session, viewName) .orElseThrow(() -> new MaterializedViewNotFoundException(viewName)); - if (!isTrinoMaterializedView(view.getTableType(), firstNonNull(view.getParameters(), ImmutableMap.of()))) { + if (!isTrinoMaterializedView(getTableType(view), firstNonNull(view.getParameters(), ImmutableMap.of()))) { throw new TrinoException(UNSUPPORTED_TABLE_TYPE, "Not a Materialized View: " + view.getDatabaseName() + "." + view.getName()); } materializedViewCache.remove(viewName); @@ -905,7 +907,7 @@ protected Optional doGetMaterializedView(Co } com.amazonaws.services.glue.model.Table table = maybeTable.get(); - if (!isTrinoMaterializedView(table.getTableType(), firstNonNull(table.getParameters(), ImmutableMap.of()))) { + if (!isTrinoMaterializedView(getTableType(table), firstNonNull(table.getParameters(), ImmutableMap.of()))) { return Optional.empty(); } @@ -956,7 +958,7 @@ public void renameMaterializedView(ConnectorSession session, SchemaTableName sou com.amazonaws.services.glue.model.Table glueTable = getTable(session, source) .orElseThrow(() -> new TableNotFoundException(source)); materializedViewCache.remove(source); - if (!isTrinoMaterializedView(glueTable.getTableType(), firstNonNull(glueTable.getParameters(), ImmutableMap.of()))) { + if (!isTrinoMaterializedView(getTableType(glueTable), firstNonNull(glueTable.getParameters(), ImmutableMap.of()))) { throw new TrinoException(UNSUPPORTED_TABLE_TYPE, "Not a Materialized View: " + source); } TableInput tableInput = getMaterializedViewTableInput(target.getTableName(), glueTable.getViewOriginalText(), glueTable.getOwner(), glueTable.getParameters()); @@ -1003,7 +1005,7 @@ public Optional redirectTable(ConnectorSession session, Optional table = getTable(session, new SchemaTableName(tableNameBase.getSchemaName(), tableNameBase.getTableName())); - if (table.isEmpty() || VIRTUAL_VIEW.name().equals(table.get().getTableType())) { + if (table.isEmpty() || VIRTUAL_VIEW.name().equals(getTableTypeNullable(table.get()))) { return Optional.empty(); } if (!isIcebergTable(firstNonNull(table.get().getParameters(), ImmutableMap.of()))) { diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDatabricksWithGlueMetastoreCleanUp.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDatabricksWithGlueMetastoreCleanUp.java index e5d5ac719f4d..f728dcdb8c45 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDatabricksWithGlueMetastoreCleanUp.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDatabricksWithGlueMetastoreCleanUp.java @@ -31,6 +31,7 @@ import java.util.stream.Collectors; import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.getTableType; import static io.trino.tests.product.TestGroups.DELTA_LAKE_DATABRICKS; import static io.trino.tests.product.TestGroups.PROFILE_SPECIFIC_TESTS; import static io.trino.tests.product.deltalake.util.DeltaLakeTestUtils.DATABRICKS_COMMUNICATION_FAILURE_ISSUE; @@ -78,7 +79,7 @@ private void cleanSchema(String schema, long startTime, AWSGlueAsync glueClient) Table table = glueClient.getTable(new GetTableRequest().withDatabaseName(schema).withName(tableName)).getTable(); Instant createTime = table.getCreateTime().toInstant(); if (createTime.isBefore(SCHEMA_CLEANUP_THRESHOLD)) { - if (table.getTableType() != null && table.getTableType().contains("VIEW")) { + if (getTableType(table).contains("VIEW")) { onTrino().executeQuery(format("DROP VIEW IF EXISTS %s.%s", schema, tableName)); log.info("Dropped view %s.%s", schema, tableName); }