From d1d465e8a9b4459e35903e6f3fb4fac56973508a Mon Sep 17 00:00:00 2001 From: David Phillips Date: Wed, 22 Mar 2023 13:53:19 -0400 Subject: [PATCH 1/2] Fix formatting and simplify condition in HiveMetadata --- .../io/trino/plugin/hive/HiveMetadata.java | 47 ++++++++++--------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java index b39428bcd7d2..395736172fd7 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java @@ -804,35 +804,38 @@ public Map> listTableColumns(ConnectorSess throw new UnsupportedOperationException("The deprecated listTableColumns is not supported because streamTableColumns is implemented instead"); } - @SuppressWarnings("TryWithIdenticalCatches") @Override public Iterator streamTableColumns(ConnectorSession session, SchemaTablePrefix prefix) { requireNonNull(prefix, "prefix is null"); - return listTables(session, prefix).stream().flatMap(tableName -> { - try { - if (redirectTable(session, tableName).isPresent()) { - return Stream.of(TableColumnsMetadata.forRedirectedTable(tableName)); - } - return Stream.of(TableColumnsMetadata.forTable(tableName, getTableMetadata(session, tableName).getColumns())); - } - catch (HiveViewNotSupportedException e) { - // view is not supported - return Stream.empty(); - } - catch (TableNotFoundException e) { - // table disappeared during listing operation - return Stream.empty(); + return listTables(session, prefix).stream() + .flatMap(tableName -> streamTableColumns(session, tableName)) + .iterator(); + } + + private Stream streamTableColumns(ConnectorSession session, SchemaTableName tableName) + { + try { + if (redirectTable(session, tableName).isPresent()) { + return Stream.of(TableColumnsMetadata.forRedirectedTable(tableName)); } - catch (TrinoException e) { - // Skip this table if there's a failure due to Hive, a bad Serde, or bad metadata - if (!e.getErrorCode().getType().equals(ErrorType.EXTERNAL)) { - throw e; - } + return Stream.of(TableColumnsMetadata.forTable(tableName, getTableMetadata(session, tableName).getColumns())); + } + catch (HiveViewNotSupportedException e) { + // view is not supported + return Stream.empty(); + } + catch (TableNotFoundException e) { + // table disappeared during listing operation + return Stream.empty(); + } + catch (TrinoException e) { + // Skip this table if there's a failure due to Hive, a bad Serde, or bad metadata + if (e.getErrorCode().getType() == ErrorType.EXTERNAL) { return Stream.empty(); } - }) - .iterator(); + throw e; + } } @Override From 79ffeeca0a3b30fe7d130fe0ca7fdfba8b8d392f Mon Sep 17 00:00:00 2001 From: David Phillips Date: Wed, 22 Mar 2023 13:50:11 -0400 Subject: [PATCH 2/2] Skip listing Glue tables with invalid column types --- .../glue/converter/GlueToTrinoConverter.java | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java index 4c8639d6dd4a..e6c2307846a7 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/converter/GlueToTrinoConverter.java @@ -31,6 +31,7 @@ import io.trino.plugin.hive.util.HiveBucketing; import io.trino.plugin.hive.util.HiveBucketing.BucketingVersion; import io.trino.spi.TrinoException; +import io.trino.spi.connector.SchemaTableName; import io.trino.spi.security.PrincipalType; import org.gaul.modernizer_maven_annotations.SuppressModernizer; @@ -92,10 +93,12 @@ public static Database convertDatabase(com.amazonaws.services.glue.model.Databas public static Table convertTable(com.amazonaws.services.glue.model.Table glueTable, String dbName) { + SchemaTableName table = new SchemaTableName(dbName, glueTable.getName()); + Map tableParameters = convertParameters(glueTable.getParameters()); Table.Builder tableBuilder = Table.builder() - .setDatabaseName(dbName) - .setTableName(glueTable.getName()) + .setDatabaseName(table.getSchemaName()) + .setTableName(table.getTableName()) .setOwner(Optional.ofNullable(glueTable.getOwner())) .setTableType(getTableType(glueTable)) .setParameters(tableParameters) @@ -112,11 +115,11 @@ public static Table convertTable(com.amazonaws.services.glue.model.Table glueTab } else { if (sd == null) { - throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Table StorageDescriptor is null for table %s.%s (%s)", dbName, glueTable.getName(), glueTable)); + throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, "Table StorageDescriptor is null for table '%s' %s".formatted(table, glueTable)); } - tableBuilder.setDataColumns(convertColumns(sd.getColumns(), sd.getSerdeInfo().getSerializationLibrary())); + tableBuilder.setDataColumns(convertColumns(table, sd.getColumns(), sd.getSerdeInfo().getSerializationLibrary())); if (glueTable.getPartitionKeys() != null) { - tableBuilder.setPartitionColumns(convertColumns(glueTable.getPartitionKeys(), sd.getSerdeInfo().getSerializationLibrary())); + tableBuilder.setPartitionColumns(convertColumns(table, glueTable.getPartitionKeys(), sd.getSerdeInfo().getSerializationLibrary())); } else { tableBuilder.setPartitionColumns(ImmutableList.of()); @@ -128,7 +131,7 @@ public static Table convertTable(com.amazonaws.services.glue.model.Table glueTab return tableBuilder.build(); } - private static Column convertColumn(com.amazonaws.services.glue.model.Column glueColumn, String serde) + private static Column convertColumn(SchemaTableName table, com.amazonaws.services.glue.model.Column glueColumn, String serde) { // OpenCSVSerde deserializes columns from csv file into strings, so we set the column type from the metastore // to string to avoid cast exceptions. @@ -136,12 +139,22 @@ private static Column convertColumn(com.amazonaws.services.glue.model.Column glu //TODO(https://github.com/trinodb/trino/issues/7240) Add tests return new Column(glueColumn.getName(), HiveType.HIVE_STRING, Optional.ofNullable(glueColumn.getComment())); } - return new Column(glueColumn.getName(), HiveType.valueOf(glueColumn.getType().toLowerCase(Locale.ENGLISH)), Optional.ofNullable(glueColumn.getComment())); + return new Column(glueColumn.getName(), convertType(table, glueColumn), Optional.ofNullable(glueColumn.getComment())); + } + + private static HiveType convertType(SchemaTableName table, com.amazonaws.services.glue.model.Column column) + { + try { + return HiveType.valueOf(column.getType().toLowerCase(Locale.ENGLISH)); + } + catch (IllegalArgumentException e) { + throw new TrinoException(HIVE_INVALID_METADATA, "Glue table '%s' column '%s' has invalid data type: %s".formatted(table, column.getName(), column.getType())); + } } - private static List convertColumns(List glueColumns, String serde) + private static List convertColumns(SchemaTableName table, List glueColumns, String serde) { - return mappedCopy(glueColumns, glueColumn -> convertColumn(glueColumn, serde)); + return mappedCopy(glueColumns, glueColumn -> convertColumn(table, glueColumn, serde)); } private static Map convertParameters(Map parameters) @@ -178,7 +191,9 @@ public GluePartitionConverter(Table table) this.databaseName = requireNonNull(table.getDatabaseName(), "databaseName is null"); this.tableName = requireNonNull(table.getTableName(), "tableName is null"); this.tableParameters = convertParameters(table.getParameters()); - this.columnsConverter = memoizeLast(glueColumns -> convertColumns(glueColumns, + this.columnsConverter = memoizeLast(glueColumns -> convertColumns( + table.getSchemaTableName(), + glueColumns, table.getStorage().getStorageFormat().getSerde())); }