From 192e5b473548dcc244eab03c3d57bfb0856cd573 Mon Sep 17 00:00:00 2001 From: Ashhar Hasan Date: Tue, 10 Oct 2023 14:22:57 +0530 Subject: [PATCH 1/6] Rename method to clarify scope DefaultJdbcMetadata#createSyntheticColumn is meant to only be used to build the projections for join pushdown and not in other places. This commit renames the method to createSyntheticJoinProjectionColumn to clarify this since in the next commit a similar method is being introduced for aggregation pushdown and the existing name causes confusion. --- .../java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java | 6 +++--- .../io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java index 13c2e7f75fd8..24a8613c1f5e 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java @@ -460,14 +460,14 @@ public Optional> applyJoin( ImmutableMap.Builder newLeftColumnsBuilder = ImmutableMap.builder(); for (JdbcColumnHandle column : jdbcClient.getColumns(session, leftHandle)) { - newLeftColumnsBuilder.put(column, createSyntheticColumn(column, nextSyntheticColumnId)); + newLeftColumnsBuilder.put(column, createSyntheticJoinProjectionColumn(column, nextSyntheticColumnId)); nextSyntheticColumnId++; } Map newLeftColumns = newLeftColumnsBuilder.buildOrThrow(); ImmutableMap.Builder newRightColumnsBuilder = ImmutableMap.builder(); for (JdbcColumnHandle column : jdbcClient.getColumns(session, rightHandle)) { - newRightColumnsBuilder.put(column, createSyntheticColumn(column, nextSyntheticColumnId)); + newRightColumnsBuilder.put(column, createSyntheticJoinProjectionColumn(column, nextSyntheticColumnId)); nextSyntheticColumnId++; } Map newRightColumns = newRightColumnsBuilder.buildOrThrow(); @@ -525,7 +525,7 @@ public Optional> applyJoin( } @VisibleForTesting - static JdbcColumnHandle createSyntheticColumn(JdbcColumnHandle column, int nextSyntheticColumnId) + static JdbcColumnHandle createSyntheticJoinProjectionColumn(JdbcColumnHandle column, int nextSyntheticColumnId) { verify(nextSyntheticColumnId >= 0, "nextSyntheticColumnId rolled over and is not monotonically increasing any more"); diff --git a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java index 81b09fba8f79..8aaaa77b0710 100644 --- a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java +++ b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java @@ -44,7 +44,7 @@ import java.util.function.Function; import static io.airlift.slice.Slices.utf8Slice; -import static io.trino.plugin.jdbc.DefaultJdbcMetadata.createSyntheticColumn; +import static io.trino.plugin.jdbc.DefaultJdbcMetadata.createSyntheticJoinProjectionColumn; import static io.trino.plugin.jdbc.TestingJdbcTypeHandle.JDBC_BIGINT; import static io.trino.plugin.jdbc.TestingJdbcTypeHandle.JDBC_VARCHAR; import static io.trino.spi.StandardErrorCode.NOT_FOUND; @@ -402,11 +402,11 @@ public void testMultiGroupKeyPredicatePushdown() @Test public void testColumnAliasTruncation() { - assertThat(createSyntheticColumn(column("column_0"), 999).getColumnName()) + assertThat(createSyntheticJoinProjectionColumn(column("column_0"), 999).getColumnName()) .isEqualTo("column_0_999"); - assertThat(createSyntheticColumn(column("column_with_over_twenty_characters"), 100).getColumnName()) + assertThat(createSyntheticJoinProjectionColumn(column("column_with_over_twenty_characters"), 100).getColumnName()) .isEqualTo("column_with_over_twenty_ch_100"); - assertThat(createSyntheticColumn(column("column_with_over_twenty_characters"), Integer.MAX_VALUE).getColumnName()) + assertThat(createSyntheticJoinProjectionColumn(column("column_with_over_twenty_characters"), Integer.MAX_VALUE).getColumnName()) .isEqualTo("column_with_over_tw_2147483647"); } @@ -415,7 +415,7 @@ public void testNegativeSyntheticId() { JdbcColumnHandle column = column("column_0"); - assertThatThrownBy(() -> createSyntheticColumn(column, -2147483648)).isInstanceOf(VerifyException.class); + assertThatThrownBy(() -> createSyntheticJoinProjectionColumn(column, -2147483648)).isInstanceOf(VerifyException.class); } private static JdbcColumnHandle column(String columnName) From 20624486b97e66614f64cc3f2a11f95213b549bc Mon Sep 17 00:00:00 2001 From: Ashhar Hasan Date: Sun, 10 Sep 2023 15:28:54 +0530 Subject: [PATCH 2/6] Verify aggregation pushdown isn't affected by column name length limits In 5e520fcea34149c223090b51f781e93f73165a76 we introduced changes which prevent a long synthetic column name to be pushed down into remote database during join pushdown leading to query failures because for example Oracle 12 and older have a small limit of 30 characters. The only other pushdown operation which adds synthetic columns in the pushed down part is aggregation pushdown. However it's not practically affected since the synthetic aggregation columns are always of the form `_pfgnrtd_ + nextSyntheticColumnId` which means it's limited to 9 + 10 characters which is small enough to be supported by all databases. --- .../plugin/jdbc/DefaultJdbcMetadata.java | 23 ++++++++++++------- .../plugin/jdbc/TestDefaultJdbcMetadata.java | 13 +++++++++++ 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java index 24a8613c1f5e..9567c967e5cb 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java @@ -391,19 +391,13 @@ public Optional> applyAggrega return Optional.empty(); } - String columnName = SYNTHETIC_COLUMN_NAME_PREFIX + nextSyntheticColumnId; + JdbcColumnHandle newColumn = createSyntheticAggregationColumn(aggregate, expression.get().getJdbcTypeHandle(), nextSyntheticColumnId); nextSyntheticColumnId++; - JdbcColumnHandle newColumn = JdbcColumnHandle.builder() - .setColumnName(columnName) - .setJdbcTypeHandle(expression.get().getJdbcTypeHandle()) - .setColumnType(aggregate.getOutputType()) - .setComment(Optional.of("synthetic")) - .build(); newColumns.add(newColumn); projections.add(new Variable(newColumn.getColumnName(), aggregate.getOutputType())); resultAssignments.add(new Assignment(newColumn.getColumnName(), newColumn, aggregate.getOutputType())); - expressions.put(columnName, new ParameterizedExpression(expression.get().getExpression(), expression.get().getParameters())); + expressions.put(newColumn.getColumnName(), new ParameterizedExpression(expression.get().getExpression(), expression.get().getParameters())); } List newColumnsList = newColumns.build(); @@ -431,6 +425,19 @@ public Optional> applyAggrega return Optional.of(new AggregationApplicationResult<>(handle, projections.build(), resultAssignments.build(), ImmutableMap.of(), precalculateStatisticsForPushdown)); } + @VisibleForTesting + static JdbcColumnHandle createSyntheticAggregationColumn(AggregateFunction aggregate, JdbcTypeHandle typeHandle, int nextSyntheticColumnId) + { + // the new column can be max len(SYNTHETIC_COLUMN_NAME_PREFIX) + len(Integer.MAX_VALUE) = 9 + 10 = 19 characters which is small enough to be supported by all databases + String columnName = SYNTHETIC_COLUMN_NAME_PREFIX + nextSyntheticColumnId; + return JdbcColumnHandle.builder() + .setColumnName(columnName) + .setJdbcTypeHandle(typeHandle) + .setColumnType(aggregate.getOutputType()) + .setComment(Optional.of("synthetic")) + .build(); + } + @Override public Optional> applyJoin( ConnectorSession session, diff --git a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java index 8aaaa77b0710..39877d6257ad 100644 --- a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java +++ b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java @@ -44,6 +44,7 @@ import java.util.function.Function; import static io.airlift.slice.Slices.utf8Slice; +import static io.trino.plugin.jdbc.DefaultJdbcMetadata.createSyntheticAggregationColumn; import static io.trino.plugin.jdbc.DefaultJdbcMetadata.createSyntheticJoinProjectionColumn; import static io.trino.plugin.jdbc.TestingJdbcTypeHandle.JDBC_BIGINT; import static io.trino.plugin.jdbc.TestingJdbcTypeHandle.JDBC_VARCHAR; @@ -418,6 +419,18 @@ public void testNegativeSyntheticId() assertThatThrownBy(() -> createSyntheticJoinProjectionColumn(column, -2147483648)).isInstanceOf(VerifyException.class); } + @Test + public void testAggregationColumnAliasMaxLength() + { + // this test is to ensure that the generated names for aggregation pushdown are short enough to be supported by all databases. + // Oracle has the smallest limit at 30 so any value smaller than that is acceptable. + assertThat(createSyntheticAggregationColumn( + new AggregateFunction("count", BIGINT, List.of(), List.of(), false, Optional.empty()), + JDBC_BIGINT, + Integer.MAX_VALUE).getColumnName().length()) + .isEqualTo(19); + } + private static JdbcColumnHandle column(String columnName) { return JdbcColumnHandle.builder() From 598ead58d33a4056c8c33c3de3b9eeb98aadd104 Mon Sep 17 00:00:00 2001 From: Ashhar Hasan Date: Sun, 10 Sep 2023 21:14:36 +0530 Subject: [PATCH 3/6] Introduce JdbcClient#getMaxColumnNameLength This will be used to customise behaviour of DefaultJdbcMetadata#createSyntheticJoinProjectionColumn according to specific connector being used in the next commit. --- .../main/java/io/trino/plugin/jdbc/CachingJdbcClient.java | 6 ++++++ .../java/io/trino/plugin/jdbc/ForwardingJdbcClient.java | 6 ++++++ .../src/main/java/io/trino/plugin/jdbc/JdbcClient.java | 5 +++++ .../io/trino/plugin/jdbc/jmx/StatisticsAwareJdbcClient.java | 6 ++++++ 4 files changed, 23 insertions(+) diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/CachingJdbcClient.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/CachingJdbcClient.java index 1a2ed089609f..032d9895ca1b 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/CachingJdbcClient.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/CachingJdbcClient.java @@ -560,6 +560,12 @@ public OptionalInt getMaxWriteParallelism(ConnectorSession session) return delegate.getMaxWriteParallelism(session); } + @Override + public OptionalInt getMaxColumnNameLength(ConnectorSession session) + { + return delegate.getMaxColumnNameLength(session); + } + public void onDataChanged(SchemaTableName table) { invalidateAllIf(statisticsCache, key -> key.mayReference(table)); diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/ForwardingJdbcClient.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/ForwardingJdbcClient.java index 0e70ec7beb94..2f784634a44b 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/ForwardingJdbcClient.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/ForwardingJdbcClient.java @@ -450,4 +450,10 @@ public OptionalInt getMaxWriteParallelism(ConnectorSession session) { return delegate().getMaxWriteParallelism(session); } + + @Override + public OptionalInt getMaxColumnNameLength(ConnectorSession session) + { + return delegate().getMaxColumnNameLength(session); + } } diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcClient.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcClient.java index 9cb1e5ed7b43..38e6f7e80114 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcClient.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/JdbcClient.java @@ -239,4 +239,9 @@ default Optional getTableScanRedirection(Con OptionalLong update(ConnectorSession session, JdbcTableHandle handle); OptionalInt getMaxWriteParallelism(ConnectorSession session); + + default OptionalInt getMaxColumnNameLength(ConnectorSession session) + { + return OptionalInt.empty(); + } } diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/jmx/StatisticsAwareJdbcClient.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/jmx/StatisticsAwareJdbcClient.java index ae1e01d029e9..d07bcfa5178c 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/jmx/StatisticsAwareJdbcClient.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/jmx/StatisticsAwareJdbcClient.java @@ -470,4 +470,10 @@ public OptionalInt getMaxWriteParallelism(ConnectorSession session) { return delegate().getMaxWriteParallelism(session); } + + @Override + public OptionalInt getMaxColumnNameLength(ConnectorSession session) + { + return delegate().getMaxColumnNameLength(session); + } } From 8d093ec73cfe23306caa4fc9f65f117471f67ce5 Mon Sep 17 00:00:00 2001 From: Ashhar Hasan Date: Wed, 11 Oct 2023 12:27:02 +0530 Subject: [PATCH 4/6] Allow having different column length limits per connector Before this change we hard-coded the column name length limits to that of Oracle 11g. This change allows each connector to provide their own length limits so that instead of always eagerly truncating synthetic columns we only do so when required. --- .../plugin/jdbc/DefaultJdbcMetadata.java | 16 ++++++++----- .../plugin/jdbc/TestDefaultJdbcMetadata.java | 9 +++---- .../io/trino/plugin/oracle/OracleClient.java | 24 +++++++++++++++++++ .../oracle/TestOracleConnectorTest.java | 4 ++-- 4 files changed, 41 insertions(+), 12 deletions(-) diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java index 9567c967e5cb..c652ac374c82 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java @@ -107,8 +107,6 @@ public class DefaultJdbcMetadata implements JdbcMetadata { - public static final int DEFAULT_COLUMN_ALIAS_LENGTH = 30; - private static final String SYNTHETIC_COLUMN_NAME_PREFIX = "_pfgnrtd_"; private static final String DELETE_ROW_ID = "_trino_artificial_column_handle_for_delete_row_id_"; private static final String MERGE_ROW_ID = "$merge_row_id"; @@ -466,15 +464,16 @@ public Optional> applyJoin( int nextSyntheticColumnId = max(leftHandle.getNextSyntheticColumnId(), rightHandle.getNextSyntheticColumnId()); ImmutableMap.Builder newLeftColumnsBuilder = ImmutableMap.builder(); + OptionalInt maxColumnNameLength = jdbcClient.getMaxColumnNameLength(session); for (JdbcColumnHandle column : jdbcClient.getColumns(session, leftHandle)) { - newLeftColumnsBuilder.put(column, createSyntheticJoinProjectionColumn(column, nextSyntheticColumnId)); + newLeftColumnsBuilder.put(column, createSyntheticJoinProjectionColumn(column, nextSyntheticColumnId, maxColumnNameLength)); nextSyntheticColumnId++; } Map newLeftColumns = newLeftColumnsBuilder.buildOrThrow(); ImmutableMap.Builder newRightColumnsBuilder = ImmutableMap.builder(); for (JdbcColumnHandle column : jdbcClient.getColumns(session, rightHandle)) { - newRightColumnsBuilder.put(column, createSyntheticJoinProjectionColumn(column, nextSyntheticColumnId)); + newRightColumnsBuilder.put(column, createSyntheticJoinProjectionColumn(column, nextSyntheticColumnId, maxColumnNameLength)); nextSyntheticColumnId++; } Map newRightColumns = newRightColumnsBuilder.buildOrThrow(); @@ -532,12 +531,17 @@ public Optional> applyJoin( } @VisibleForTesting - static JdbcColumnHandle createSyntheticJoinProjectionColumn(JdbcColumnHandle column, int nextSyntheticColumnId) + static JdbcColumnHandle createSyntheticJoinProjectionColumn(JdbcColumnHandle column, int nextSyntheticColumnId, OptionalInt optionalMaxColumnNameLength) { verify(nextSyntheticColumnId >= 0, "nextSyntheticColumnId rolled over and is not monotonically increasing any more"); + if (optionalMaxColumnNameLength.isEmpty()) { + return JdbcColumnHandle.builderFrom(column) + .setColumnName(column.getColumnName() + "_" + nextSyntheticColumnId) + .build(); + } int sequentialNumberLength = String.valueOf(nextSyntheticColumnId).length(); - int originalColumnNameLength = DEFAULT_COLUMN_ALIAS_LENGTH - sequentialNumberLength - "_".length(); + int originalColumnNameLength = optionalMaxColumnNameLength.getAsInt() - sequentialNumberLength - "_".length(); String columnNameTruncated = fixedLength(originalColumnNameLength) .split(column.getColumnName()) diff --git a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java index 39877d6257ad..93a09e45d89f 100644 --- a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java +++ b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java @@ -41,6 +41,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.OptionalInt; import java.util.function.Function; import static io.airlift.slice.Slices.utf8Slice; @@ -403,11 +404,11 @@ public void testMultiGroupKeyPredicatePushdown() @Test public void testColumnAliasTruncation() { - assertThat(createSyntheticJoinProjectionColumn(column("column_0"), 999).getColumnName()) + assertThat(createSyntheticJoinProjectionColumn(column("column_0"), 999, OptionalInt.of(30)).getColumnName()) .isEqualTo("column_0_999"); - assertThat(createSyntheticJoinProjectionColumn(column("column_with_over_twenty_characters"), 100).getColumnName()) + assertThat(createSyntheticJoinProjectionColumn(column("column_with_over_twenty_characters"), 100, OptionalInt.of(30)).getColumnName()) .isEqualTo("column_with_over_twenty_ch_100"); - assertThat(createSyntheticJoinProjectionColumn(column("column_with_over_twenty_characters"), Integer.MAX_VALUE).getColumnName()) + assertThat(createSyntheticJoinProjectionColumn(column("column_with_over_twenty_characters"), Integer.MAX_VALUE, OptionalInt.of(30)).getColumnName()) .isEqualTo("column_with_over_tw_2147483647"); } @@ -416,7 +417,7 @@ public void testNegativeSyntheticId() { JdbcColumnHandle column = column("column_0"); - assertThatThrownBy(() -> createSyntheticJoinProjectionColumn(column, -2147483648)).isInstanceOf(VerifyException.class); + assertThatThrownBy(() -> createSyntheticJoinProjectionColumn(column, -2147483648, OptionalInt.of(30))).isInstanceOf(VerifyException.class); } @Test diff --git a/plugin/trino-oracle/src/main/java/io/trino/plugin/oracle/OracleClient.java b/plugin/trino-oracle/src/main/java/io/trino/plugin/oracle/OracleClient.java index eeeaccb62ac9..b252cabe552d 100644 --- a/plugin/trino-oracle/src/main/java/io/trino/plugin/oracle/OracleClient.java +++ b/plugin/trino-oracle/src/main/java/io/trino/plugin/oracle/OracleClient.java @@ -89,6 +89,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.OptionalInt; import java.util.Set; import java.util.function.BiFunction; @@ -215,6 +216,8 @@ public class OracleClient private final ConnectorExpressionRewriter connectorExpressionRewriter; private final AggregateFunctionRewriter aggregateFunctionRewriter; + private Integer maxColumnNameLength; + @Inject public OracleClient( BaseJdbcConfig config, @@ -348,6 +351,27 @@ public void renameSchema(ConnectorSession session, String schemaName, String new throw new TrinoException(NOT_SUPPORTED, "This connector does not support renaming schemas"); } + @Override + public OptionalInt getMaxColumnNameLength(ConnectorSession session) + { + if (maxColumnNameLength != null) { + // According to JavaDoc of DatabaseMetaData#getMaxColumnNameLength a value of 0 signifies that the limit is unknown + if (maxColumnNameLength == 0) { + return OptionalInt.empty(); + } + + return OptionalInt.of(maxColumnNameLength); + } + + try (Connection connection = connectionFactory.openConnection(session)) { + maxColumnNameLength = connection.getMetaData().getMaxColumnNameLength(); + return OptionalInt.of(maxColumnNameLength); + } + catch (SQLException e) { + throw new TrinoException(JDBC_ERROR, e); + } + } + @Override public Optional getTableComment(ResultSet resultSet) throws SQLException diff --git a/plugin/trino-oracle/src/test/java/io/trino/plugin/oracle/TestOracleConnectorTest.java b/plugin/trino-oracle/src/test/java/io/trino/plugin/oracle/TestOracleConnectorTest.java index a372c5171e48..d213183f9396 100644 --- a/plugin/trino-oracle/src/test/java/io/trino/plugin/oracle/TestOracleConnectorTest.java +++ b/plugin/trino-oracle/src/test/java/io/trino/plugin/oracle/TestOracleConnectorTest.java @@ -22,7 +22,6 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; -import static io.trino.plugin.jdbc.DefaultJdbcMetadata.DEFAULT_COLUMN_ALIAS_LENGTH; import static io.trino.plugin.oracle.TestingOracleServer.TEST_PASS; import static io.trino.plugin.oracle.TestingOracleServer.TEST_SCHEMA; import static io.trino.plugin.oracle.TestingOracleServer.TEST_USER; @@ -36,7 +35,8 @@ public class TestOracleConnectorTest extends BaseOracleConnectorTest { - private static final String MAXIMUM_LENGTH_COLUMN_IDENTIFIER = "z".repeat(DEFAULT_COLUMN_ALIAS_LENGTH); + // older Oracle versions are limited to 30 character identifier names + private static final String MAXIMUM_LENGTH_COLUMN_IDENTIFIER = "z".repeat(30); private TestingOracleServer oracleServer; From 7a76821860ccde1c534b61d0286502dc781d1248 Mon Sep 17 00:00:00 2001 From: Ashhar Hasan Date: Wed, 11 Oct 2023 20:53:08 +0530 Subject: [PATCH 5/6] Handle edge cases in createSyntheticJoinProjectionColumn In the previous implementation some conditions didn't need to be handled because the max length limit was known statically which made some conditions impossible. --- .../plugin/jdbc/DefaultJdbcMetadata.java | 27 +++++++++++---- .../plugin/jdbc/TestDefaultJdbcMetadata.java | 33 ++++++++++++++----- 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java index c652ac374c82..cee54f86096c 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/DefaultJdbcMetadata.java @@ -534,22 +534,37 @@ public Optional> applyJoin( static JdbcColumnHandle createSyntheticJoinProjectionColumn(JdbcColumnHandle column, int nextSyntheticColumnId, OptionalInt optionalMaxColumnNameLength) { verify(nextSyntheticColumnId >= 0, "nextSyntheticColumnId rolled over and is not monotonically increasing any more"); + + final String separator = "_"; if (optionalMaxColumnNameLength.isEmpty()) { return JdbcColumnHandle.builderFrom(column) - .setColumnName(column.getColumnName() + "_" + nextSyntheticColumnId) + .setColumnName(column.getColumnName() + separator + nextSyntheticColumnId) + .build(); + } + + int maxColumnNameLength = optionalMaxColumnNameLength.getAsInt(); + int nextSyntheticColumnIdLength = String.valueOf(nextSyntheticColumnId).length(); + verify(maxColumnNameLength >= nextSyntheticColumnIdLength, "Maximum allowed column name length is %s but next synthetic id has length %s", maxColumnNameLength, nextSyntheticColumnIdLength); + + if (nextSyntheticColumnIdLength == maxColumnNameLength) { + return JdbcColumnHandle.builderFrom(column) + .setColumnName(String.valueOf(nextSyntheticColumnId)) .build(); } - int sequentialNumberLength = String.valueOf(nextSyntheticColumnId).length(); - int originalColumnNameLength = optionalMaxColumnNameLength.getAsInt() - sequentialNumberLength - "_".length(); + if (nextSyntheticColumnIdLength + separator.length() == maxColumnNameLength) { + return JdbcColumnHandle.builderFrom(column) + .setColumnName(separator + nextSyntheticColumnId) + .build(); + } - String columnNameTruncated = fixedLength(originalColumnNameLength) + // fixedLength only accepts values > 0, so the cases where the value would be <= 0 are handled above explicitly + String truncatedColumnName = fixedLength(maxColumnNameLength - separator.length() - nextSyntheticColumnIdLength) .split(column.getColumnName()) .iterator() .next(); - String columnName = columnNameTruncated + "_" + nextSyntheticColumnId; return JdbcColumnHandle.builderFrom(column) - .setColumnName(columnName) + .setColumnName(truncatedColumnName + separator + nextSyntheticColumnId) .build(); } diff --git a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java index 93a09e45d89f..971049d827dc 100644 --- a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java +++ b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/TestDefaultJdbcMetadata.java @@ -404,20 +404,35 @@ public void testMultiGroupKeyPredicatePushdown() @Test public void testColumnAliasTruncation() { - assertThat(createSyntheticJoinProjectionColumn(column("column_0"), 999, OptionalInt.of(30)).getColumnName()) - .isEqualTo("column_0_999"); - assertThat(createSyntheticJoinProjectionColumn(column("column_with_over_twenty_characters"), 100, OptionalInt.of(30)).getColumnName()) - .isEqualTo("column_with_over_twenty_ch_100"); - assertThat(createSyntheticJoinProjectionColumn(column("column_with_over_twenty_characters"), Integer.MAX_VALUE, OptionalInt.of(30)).getColumnName()) - .isEqualTo("column_with_over_tw_2147483647"); + OptionalInt maxLength = OptionalInt.of(30); + assertThat(createSyntheticJoinProjectionColumn(column("no_truncation"), 123, maxLength).getColumnName()) + .isEqualTo("no_truncation_123"); + assertThat(createSyntheticJoinProjectionColumn(column("long_column_name_gets_truncated"), 123, maxLength).getColumnName()) + .isEqualTo("long_column_name_gets_trun_123"); + assertThat(createSyntheticJoinProjectionColumn(column("long_id_causes_truncation"), Integer.MAX_VALUE, maxLength).getColumnName()) + .isEqualTo("long_id_causes_trun_2147483647"); + + assertThat(createSyntheticJoinProjectionColumn(column("id_equals_max_length"), 1234, OptionalInt.of(4)).getColumnName()) + .isEqualTo("1234"); + assertThat(createSyntheticJoinProjectionColumn(column("id_and_separator_equals_max_length"), 1234, OptionalInt.of(5)).getColumnName()) + .isEqualTo("_1234"); } @Test - public void testNegativeSyntheticId() + public void testSyntheticIdExceedsLength() { - JdbcColumnHandle column = column("column_0"); + assertThatThrownBy(() -> createSyntheticJoinProjectionColumn(column("id_exceeds_max_length"), 1234, OptionalInt.of(3))) + .isInstanceOf(VerifyException.class) + .hasMessage("Maximum allowed column name length is 3 but next synthetic id has length 4"); + } - assertThatThrownBy(() -> createSyntheticJoinProjectionColumn(column, -2147483648, OptionalInt.of(30))).isInstanceOf(VerifyException.class); + @Test + public void testNegativeSyntheticId() + { + //noinspection NumericOverflow + assertThatThrownBy(() -> createSyntheticJoinProjectionColumn(column("negative_id"), Integer.MAX_VALUE + 1, OptionalInt.of(30))) + .isInstanceOf(VerifyException.class) + .hasMessage("nextSyntheticColumnId rolled over and is not monotonically increasing any more"); } @Test From 9d3fe7e04bc09070fb6cceabaeb4bbb789705275 Mon Sep 17 00:00:00 2001 From: Ashhar Hasan Date: Fri, 13 Oct 2023 21:03:02 +0530 Subject: [PATCH 6/6] Apply generated column name truncation to all affected connectors This change also applies the generated column name truncation for join pushdown to Postgres, Redshift and SQL Server. Postgres and Redshift silently truncate long identifiers unlike Oracle and SQL Server which fail on long identifiers. However note that there was no correctness issue because even after silently truncating an identifier Postgres and Redshift can identify that ambiguous identifiers exist and fail the query. --- .../io/trino/plugin/jdbc/BaseJdbcClient.java | 21 +++++++++++++++++++ .../plugin/jdbc/BaseJdbcConnectorTest.java | 20 ++++++++++++++++++ .../io/trino/plugin/oracle/OracleClient.java | 19 +---------------- .../oracle/TestOracleConnectorTest.java | 17 --------------- .../plugin/postgresql/PostgreSqlClient.java | 7 +++++++ .../trino/plugin/redshift/RedshiftClient.java | 7 +++++++ .../plugin/sqlserver/SqlServerClient.java | 7 +++++++ 7 files changed, 63 insertions(+), 35 deletions(-) diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/BaseJdbcClient.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/BaseJdbcClient.java index a53f6651b8a3..a8150d624563 100644 --- a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/BaseJdbcClient.java +++ b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/BaseJdbcClient.java @@ -114,6 +114,7 @@ public abstract class BaseJdbcClient private final IdentifierMapping identifierMapping; private final boolean supportsRetries; private final JdbcRemoteIdentifiersFactory jdbcRemoteIdentifiersFactory = new JdbcRemoteIdentifiersFactory(this); + private Integer maxColumnNameLength; public BaseJdbcClient( String identifierQuote, @@ -1435,6 +1436,26 @@ public OptionalInt getMaxWriteParallelism(ConnectorSession session) return OptionalInt.of(getWriteParallelism(session)); } + protected OptionalInt getMaxColumnNameLengthFromDatabaseMetaData(ConnectorSession session) + { + if (maxColumnNameLength != null) { + // According to JavaDoc of DatabaseMetaData#getMaxColumnNameLength a value of 0 signifies that the limit is unknown + if (maxColumnNameLength == 0) { + return OptionalInt.empty(); + } + + return OptionalInt.of(maxColumnNameLength); + } + + try (Connection connection = connectionFactory.openConnection(session)) { + maxColumnNameLength = connection.getMetaData().getMaxColumnNameLength(); + return OptionalInt.of(maxColumnNameLength); + } + catch (SQLException e) { + throw new TrinoException(JDBC_ERROR, e); + } + } + protected void verifySchemaName(DatabaseMetaData databaseMetadata, String schemaName) throws SQLException { diff --git a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/BaseJdbcConnectorTest.java b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/BaseJdbcConnectorTest.java index 4c1f14eacdb0..e2ea22c7d46a 100644 --- a/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/BaseJdbcConnectorTest.java +++ b/plugin/trino-base-jdbc/src/test/java/io/trino/plugin/jdbc/BaseJdbcConnectorTest.java @@ -2041,6 +2041,26 @@ protected TestTable simpleTable() return new TestTable(onRemoteDatabase(), format("%s.simple_table", getSession().getSchema().orElseThrow()), "(col BIGINT)", ImmutableList.of("1", "2")); } + @Test + public void testJoinPushdownWithLongIdentifiers() + { + skipTestUnless(hasBehavior(SUPPORTS_CREATE_TABLE) && hasBehavior(SUPPORTS_JOIN_PUSHDOWN)); + + String baseColumnName = "col"; + int maxLength = maxColumnNameLength() + // Assume 2^16 is enough for most use cases. Add a bit more to ensure 2^16 isn't actual limit. + .orElse(65536 + 5); + + String validColumnName = baseColumnName + "z".repeat(maxLength - baseColumnName.length()); + try (TestTable left = new TestTable(getQueryRunner()::execute, "test_long_id_l", format("(%s BIGINT)", validColumnName)); + TestTable right = new TestTable(getQueryRunner()::execute, "test_long_id_r", format("(%s BIGINT)", validColumnName))) { + assertThat(query(joinPushdownEnabled(getSession()), """ + SELECT l.%1$s, r.%1$s + FROM %2$s l JOIN %3$s r ON l.%1$s = r.%1$s""".formatted(validColumnName, left.getName(), right.getName()))) + .isFullyPushedDown(); + } + } + @Test public void testDynamicFiltering() { diff --git a/plugin/trino-oracle/src/main/java/io/trino/plugin/oracle/OracleClient.java b/plugin/trino-oracle/src/main/java/io/trino/plugin/oracle/OracleClient.java index b252cabe552d..47837cff4e3e 100644 --- a/plugin/trino-oracle/src/main/java/io/trino/plugin/oracle/OracleClient.java +++ b/plugin/trino-oracle/src/main/java/io/trino/plugin/oracle/OracleClient.java @@ -216,8 +216,6 @@ public class OracleClient private final ConnectorExpressionRewriter connectorExpressionRewriter; private final AggregateFunctionRewriter aggregateFunctionRewriter; - private Integer maxColumnNameLength; - @Inject public OracleClient( BaseJdbcConfig config, @@ -354,22 +352,7 @@ public void renameSchema(ConnectorSession session, String schemaName, String new @Override public OptionalInt getMaxColumnNameLength(ConnectorSession session) { - if (maxColumnNameLength != null) { - // According to JavaDoc of DatabaseMetaData#getMaxColumnNameLength a value of 0 signifies that the limit is unknown - if (maxColumnNameLength == 0) { - return OptionalInt.empty(); - } - - return OptionalInt.of(maxColumnNameLength); - } - - try (Connection connection = connectionFactory.openConnection(session)) { - maxColumnNameLength = connection.getMetaData().getMaxColumnNameLength(); - return OptionalInt.of(maxColumnNameLength); - } - catch (SQLException e) { - throw new TrinoException(JDBC_ERROR, e); - } + return getMaxColumnNameLengthFromDatabaseMetaData(session); } @Override diff --git a/plugin/trino-oracle/src/test/java/io/trino/plugin/oracle/TestOracleConnectorTest.java b/plugin/trino-oracle/src/test/java/io/trino/plugin/oracle/TestOracleConnectorTest.java index d213183f9396..440fdfcd25e7 100644 --- a/plugin/trino-oracle/src/test/java/io/trino/plugin/oracle/TestOracleConnectorTest.java +++ b/plugin/trino-oracle/src/test/java/io/trino/plugin/oracle/TestOracleConnectorTest.java @@ -17,7 +17,6 @@ import io.airlift.testing.Closeables; import io.trino.testing.QueryRunner; import io.trino.testing.sql.SqlExecutor; -import io.trino.testing.sql.TestTable; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; @@ -28,16 +27,12 @@ import static java.lang.String.format; import static java.util.stream.Collectors.joining; import static java.util.stream.IntStream.range; -import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; @TestInstance(PER_CLASS) public class TestOracleConnectorTest extends BaseOracleConnectorTest { - // older Oracle versions are limited to 30 character identifier names - private static final String MAXIMUM_LENGTH_COLUMN_IDENTIFIER = "z".repeat(30); - private TestingOracleServer oracleServer; @Override @@ -103,16 +98,4 @@ public void execute(String sql) } }; } - - @Test - public void testPushdownJoinWithLongNameSucceeds() - { - try (TestTable table = new TestTable(getQueryRunner()::execute, "long_identifier", "(%s bigint)".formatted(MAXIMUM_LENGTH_COLUMN_IDENTIFIER))) { - assertThat(query(joinPushdownEnabled(getSession()), """ - SELECT r.name, t.%s, n.name - FROM %s t JOIN region r ON r.regionkey = t.%s - JOIN nation n ON r.regionkey = n.regionkey""".formatted(MAXIMUM_LENGTH_COLUMN_IDENTIFIER, table.getName(), MAXIMUM_LENGTH_COLUMN_IDENTIFIER))) - .isFullyPushedDown(); - } - } } diff --git a/plugin/trino-postgresql/src/main/java/io/trino/plugin/postgresql/PostgreSqlClient.java b/plugin/trino-postgresql/src/main/java/io/trino/plugin/postgresql/PostgreSqlClient.java index d062c849ad17..affd261a925b 100644 --- a/plugin/trino-postgresql/src/main/java/io/trino/plugin/postgresql/PostgreSqlClient.java +++ b/plugin/trino-postgresql/src/main/java/io/trino/plugin/postgresql/PostgreSqlClient.java @@ -131,6 +131,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.OptionalInt; import java.util.OptionalLong; import java.util.UUID; import java.util.function.BiFunction; @@ -918,6 +919,12 @@ public OptionalLong update(ConnectorSession session, JdbcTableHandle handle) } } + @Override + public OptionalInt getMaxColumnNameLength(ConnectorSession session) + { + return getMaxColumnNameLengthFromDatabaseMetaData(session); + } + @Override public TableStatistics getTableStatistics(ConnectorSession session, JdbcTableHandle handle) { diff --git a/plugin/trino-redshift/src/main/java/io/trino/plugin/redshift/RedshiftClient.java b/plugin/trino-redshift/src/main/java/io/trino/plugin/redshift/RedshiftClient.java index 67184ff6604c..ba04e2693f02 100644 --- a/plugin/trino-redshift/src/main/java/io/trino/plugin/redshift/RedshiftClient.java +++ b/plugin/trino-redshift/src/main/java/io/trino/plugin/redshift/RedshiftClient.java @@ -102,6 +102,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.OptionalInt; import java.util.OptionalLong; import java.util.function.BiFunction; @@ -507,6 +508,12 @@ public OptionalLong update(ConnectorSession session, JdbcTableHandle handle) } } + @Override + public OptionalInt getMaxColumnNameLength(ConnectorSession session) + { + return getMaxColumnNameLengthFromDatabaseMetaData(session); + } + @Override protected void addColumn(ConnectorSession session, Connection connection, RemoteTableName table, ColumnMetadata column) throws SQLException diff --git a/plugin/trino-sqlserver/src/main/java/io/trino/plugin/sqlserver/SqlServerClient.java b/plugin/trino-sqlserver/src/main/java/io/trino/plugin/sqlserver/SqlServerClient.java index 29d9f937511f..f182a7f4e278 100644 --- a/plugin/trino-sqlserver/src/main/java/io/trino/plugin/sqlserver/SqlServerClient.java +++ b/plugin/trino-sqlserver/src/main/java/io/trino/plugin/sqlserver/SqlServerClient.java @@ -114,6 +114,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.OptionalInt; import java.util.function.BiFunction; import java.util.stream.Stream; @@ -1098,6 +1099,12 @@ public Map getTableProperties(ConnectorSession session, JdbcTabl } } + @Override + public OptionalInt getMaxColumnNameLength(ConnectorSession session) + { + return getMaxColumnNameLengthFromDatabaseMetaData(session); + } + @Override public void abortReadConnection(Connection connection, ResultSet resultSet) throws SQLException