diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveAnalyzeCorruptStatistics.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveAnalyzeCorruptStatistics.java index 07a697a8f058..4e4514955ccf 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveAnalyzeCorruptStatistics.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveAnalyzeCorruptStatistics.java @@ -71,11 +71,11 @@ private void prepareBrokenColumnStatisticsTable(String tableName) assertUpdate("CREATE TABLE " + tableName + " AS SELECT 1 col", 1); // Insert duplicated row to simulate broken column statistics status https://github.com/trinodb/trino/issues/13787 - assertEquals(onMetastore("SELECT COUNT(1) FROM TAB_COL_STATS WHERE db_name = 'tpch' AND table_name = '" + tableName + "'"), "1\n"); + assertEquals(onMetastore("SELECT COUNT(1) FROM TAB_COL_STATS WHERE db_name = 'tpch' AND table_name = '" + tableName + "'"), "1"); onMetastore("INSERT INTO TAB_COL_STATS " + "SELECT cs_id + 1, db_name, table_name, column_name, column_type, tbl_id, long_low_value, long_high_value, double_high_value, double_low_value, big_decimal_low_value, big_decimal_high_value, num_nulls, num_distincts, avg_col_len, max_col_len, num_trues, num_falses, last_analyzed " + "FROM TAB_COL_STATS WHERE db_name = 'tpch' AND table_name = '" + tableName + "'"); - assertEquals(onMetastore("SELECT COUNT(1) FROM TAB_COL_STATS WHERE db_name = 'tpch' AND table_name = '" + tableName + "'"), "2\n"); + assertEquals(onMetastore("SELECT COUNT(1) FROM TAB_COL_STATS WHERE db_name = 'tpch' AND table_name = '" + tableName + "'"), "2"); } private String onMetastore(@Language("SQL") String sql) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/containers/HiveHadoop.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/containers/HiveHadoop.java index a177c796147e..6682ba610d68 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/containers/HiveHadoop.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/containers/HiveHadoop.java @@ -90,7 +90,7 @@ public String runOnHive(String query) public String runOnMetastore(String query) { - return executeInContainerFailOnError("mysql", "-D", "metastore", "-uroot", "-proot", "--batch", "--column-names=false", "-e", query); + return executeInContainerFailOnError("mysql", "-D", "metastore", "-uroot", "-proot", "--batch", "--column-names=false", "-e", query).replaceAll("\n$", ""); } public HostAndPort getHiveMetastoreEndpoint() diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java index f951d0c91a13..b86f8c0f3049 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java @@ -205,6 +205,7 @@ import static io.trino.plugin.iceberg.IcebergTableProperties.getPartitioning; import static io.trino.plugin.iceberg.IcebergUtil.canEnforceColumnConstraintInSpecs; import static io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue; +import static io.trino.plugin.iceberg.IcebergUtil.fileName; import static io.trino.plugin.iceberg.IcebergUtil.getColumnHandle; import static io.trino.plugin.iceberg.IcebergUtil.getColumns; import static io.trino.plugin.iceberg.IcebergUtil.getFileFormat; @@ -1271,7 +1272,7 @@ private void removeOrphanMetadataFiles(Table table, ConnectorSession session, Sc Stream.of(versionHintLocation(table))) .collect(toImmutableList()); Set validMetadataFiles = concat(manifests.stream(), manifestLists.stream(), otherMetadataFiles.stream()) - .map(IcebergMetadata::fileName) + .map(IcebergUtil::fileName) .collect(toImmutableSet()); scanAndDeleteInvalidFiles(table, session, schemaTableName, expireTimestamp, validMetadataFiles, "metadata"); } @@ -1297,11 +1298,6 @@ private void scanAndDeleteInvalidFiles(Table table, ConnectorSession session, Sc } } - private static String fileName(String path) - { - return path.substring(path.lastIndexOf('/') + 1); - } - @Override public Optional getInfo(ConnectorTableHandle tableHandle) { diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java index 7db1246a5855..ebe65349d25d 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java @@ -76,6 +76,7 @@ import java.util.Set; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; +import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Stream; @@ -624,4 +625,25 @@ private static void validateOrcBloomFilterColumns(ConnectorTableMetadata tableMe throw new TrinoException(INVALID_TABLE_PROPERTY, format("Orc bloom filter columns %s not present in schema", Sets.difference(ImmutableSet.copyOf(orcBloomFilterColumns), allColumns))); } } + + public static String fixBrokenMetadataLocation(String location) + { + // Version 393-394 stored metadata location with double slash https://github.com/trinodb/trino/commit/e95fdcc7d1ec110b10977d17458e06fc4e6f217d#diff-9bbb7c0b6168f0e6b4732136f9a97f820aa082b04efb5609b6138afc118831d7R46 + // e.g. s3://bucket/db/table//metadata/00001.metadata.json + // It caused failure when accessing S3 objects https://github.com/trinodb/trino/issues/14299 + // Version 395 fixed the above issue by removing trailing slash https://github.com/trinodb/trino/pull/13984, + // but the change was insufficient for existing table cases created by 393 and 394. This method covers existing table cases. + String fileName = fileName(location); + String correctSuffix = "/metadata/" + fileName; + String brokenSuffix = "//metadata/" + fileName; + if (!location.endsWith(brokenSuffix)) { + return location; + } + return location.replaceFirst(Pattern.quote(brokenSuffix) + "$", Matcher.quoteReplacement(correctSuffix)); + } + + public static String fileName(String path) + { + return path.substring(path.lastIndexOf('/') + 1); + } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractIcebergTableOperations.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractIcebergTableOperations.java index 0093853b98ac..b18e0450ddef 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractIcebergTableOperations.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractIcebergTableOperations.java @@ -42,6 +42,7 @@ import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.ImmutableList.toImmutableList; import static io.trino.plugin.hive.HiveType.toHiveType; +import static io.trino.plugin.iceberg.IcebergUtil.fixBrokenMetadataLocation; import static io.trino.plugin.iceberg.IcebergUtil.getLocationProvider; import static java.lang.Integer.parseInt; import static java.lang.String.format; @@ -125,7 +126,7 @@ public TableMetadata refresh(boolean invalidateCaches) refreshFromMetadataLocation(null); return currentMetadata; } - refreshFromMetadataLocation(getRefreshedLocation(invalidateCaches)); + refreshFromMetadataLocation(fixBrokenMetadataLocation(getRefreshedLocation(invalidateCaches))); return currentMetadata; } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/HiveMetastoreTableOperations.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/HiveMetastoreTableOperations.java index 81bcacfcb5fc..daecddd9fa88 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/HiveMetastoreTableOperations.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/HiveMetastoreTableOperations.java @@ -34,6 +34,7 @@ import static com.google.common.base.Preconditions.checkState; import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable; +import static io.trino.plugin.iceberg.IcebergUtil.fixBrokenMetadataLocation; import static java.util.Objects.requireNonNull; import static org.apache.iceberg.BaseMetastoreTableOperations.METADATA_LOCATION_PROP; import static org.apache.iceberg.BaseMetastoreTableOperations.PREVIOUS_METADATA_LOCATION_PROP; @@ -74,7 +75,7 @@ protected void commitToExistingTable(TableMetadata base, TableMetadata metadata) .orElseThrow(() -> new TableNotFoundException(getSchemaTableName()))); checkState(currentMetadataLocation != null, "No current metadata location for existing table"); - String metadataLocation = currentTable.getParameters().get(METADATA_LOCATION_PROP); + String metadataLocation = fixBrokenMetadataLocation(currentTable.getParameters().get(METADATA_LOCATION_PROP)); if (!currentMetadataLocation.equals(metadataLocation)) { throw new CommitFailedException("Metadata location [%s] is not same as table metadata location [%s] for %s", currentMetadataLocation, metadataLocation, getSchemaTableName()); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergMinioConnectorSmokeTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergMinioConnectorSmokeTest.java index adb65faa5095..c1962d622a58 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergMinioConnectorSmokeTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergMinioConnectorSmokeTest.java @@ -17,6 +17,7 @@ import io.trino.plugin.hive.containers.HiveMinioDataLake; import io.trino.testing.QueryRunner; import org.apache.iceberg.FileFormat; +import org.intellij.lang.annotations.Language; import org.testng.annotations.Test; import java.util.List; @@ -107,4 +108,34 @@ public void testS3LocationWithTrailingSlash() assertUpdate("DROP TABLE " + tableName); } + + @Test + public void testMetadataLocationWithDoubleSlash() + { + // Regression test for https://github.com/trinodb/trino/issues/14299 + String schemaName = getSession().getSchema().orElseThrow(); + String tableName = "test_meatdata_location_with_double_slash_" + randomTableSuffix(); + + assertUpdate("CREATE TABLE " + tableName + " AS SELECT 1 col", 1); + + // Update metadata location to contain double slash + String tableId = onMetastore("SELECT tbl_id FROM TBLS t INNER JOIN DBS db ON t.db_id = db.db_id WHERE db.name = '" + schemaName + "' and t.tbl_name = '" + tableName + "'"); + String metadataLocation = onMetastore("SELECT param_value FROM TABLE_PARAMS WHERE param_key = 'metadata_location' AND tbl_id = " + tableId); + + // Simulate corrupted metadata location as Trino 393-394 was doing + String newMetadataLocation = metadataLocation.replace("/metadata/", "//metadata/"); + onMetastore("UPDATE TABLE_PARAMS SET param_value = '" + newMetadataLocation + "' WHERE tbl_id = " + tableId + " AND param_key = 'metadata_location'"); + + // Confirm read and write operations succeed + assertQuery("SELECT * FROM " + tableName, "VALUES 1"); + assertUpdate("INSERT INTO " + tableName + " VALUES 2", 1); + assertQuery("SELECT * FROM " + tableName, "VALUES (1), (2)"); + + assertUpdate("DROP TABLE " + tableName); + } + + private String onMetastore(@Language("SQL") String sql) + { + return hiveMinioDataLake.getHiveHadoop().runOnMetastore(sql); + } }