From a5778d2e24ea5e6f6eba5998179b927622bfff03 Mon Sep 17 00:00:00 2001 From: "praveenkrishna.d" Date: Wed, 27 Nov 2024 16:48:34 +0530 Subject: [PATCH] Reuse coercion on binary to varchar for non-orc partitioned table --- .../coercions/VarbinaryToVarcharCoercers.java | 29 --------------- .../TestVarbinaryToVarcharCoercer.java | 35 ------------------- .../product/hive/BaseTestHiveCoercion.java | 6 +--- 3 files changed, 1 insertion(+), 69 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/VarbinaryToVarcharCoercers.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/VarbinaryToVarcharCoercers.java index 307ebf31b535a..2ee42fa010436 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/VarbinaryToVarcharCoercers.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/VarbinaryToVarcharCoercers.java @@ -26,7 +26,6 @@ import java.util.HexFormat; import static io.trino.plugin.hive.HiveStorageFormat.ORC; -import static io.trino.plugin.hive.HiveStorageFormat.PARQUET; import static io.trino.spi.type.VarbinaryType.VARBINARY; import static io.trino.spi.type.Varchars.truncateToLength; import static java.nio.charset.CodingErrorAction.REPLACE; @@ -41,9 +40,6 @@ public static TypeCoercer createVarbinaryToVarcharCo if (storageFormat == ORC) { return new OrcVarbinaryToVarcharCoercer(toType); } - if (storageFormat == PARQUET) { - return new ParquetVarbinaryToVarcharCoercer(toType); - } return new VarbinaryToVarcharCoercer(toType); } @@ -55,31 +51,6 @@ public VarbinaryToVarcharCoercer(VarcharType toType) super(VARBINARY, toType); } - @Override - protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position) - { - try { - Slice decodedValue = fromType.getSlice(block, position); - if (toType.isUnbounded()) { - toType.writeSlice(blockBuilder, decodedValue); - return; - } - toType.writeSlice(blockBuilder, truncateToLength(decodedValue, toType.getBoundedLength())); - } - catch (RuntimeException e) { - blockBuilder.appendNull(); - } - } - } - - private static class ParquetVarbinaryToVarcharCoercer - extends TypeCoercer - { - public ParquetVarbinaryToVarcharCoercer(VarcharType toType) - { - super(VARBINARY, toType); - } - @Override protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position) { diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestVarbinaryToVarcharCoercer.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestVarbinaryToVarcharCoercer.java index d0317fd465703..af69c9a37dcd2 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestVarbinaryToVarcharCoercer.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestVarbinaryToVarcharCoercer.java @@ -23,7 +23,6 @@ import static io.trino.plugin.hive.HiveStorageFormat.ORC; import static io.trino.plugin.hive.HiveStorageFormat.PARQUET; -import static io.trino.plugin.hive.HiveStorageFormat.RCTEXT; import static io.trino.plugin.hive.HiveTimestampPrecision.DEFAULT_PRECISION; import static io.trino.plugin.hive.coercions.CoercionUtils.createCoercer; import static io.trino.plugin.hive.util.HiveTypeTranslator.toHiveType; @@ -45,35 +44,6 @@ public class TestVarbinaryToVarcharCoercer @Test public void testVarbinaryToVarcharCoercion() - { - assertVarbinaryToVarcharCoercion(Slices.utf8Slice("abc"), VARBINARY, Slices.utf8Slice("abc"), VARCHAR); - assertVarbinaryToVarcharCoercion(Slices.utf8Slice("abc"), VARBINARY, Slices.utf8Slice("ab"), createVarcharType(2)); - // Invalid UTF-8 encoding - assertVarbinaryToVarcharCoercion(Slices.wrappedBuffer(X_CHAR, CONTINUATION_BYTE), VARBINARY, Slices.wrappedBuffer(X_CHAR, CONTINUATION_BYTE), VARCHAR); - assertVarbinaryToVarcharCoercion( - Slices.wrappedBuffer(X_CHAR, START_4_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE), - VARBINARY, - Slices.wrappedBuffer(X_CHAR, START_4_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE), - VARCHAR); - assertVarbinaryToVarcharCoercion( - Slices.wrappedBuffer(X_CHAR, START_4_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE, X_CHAR), - VARBINARY, - Slices.wrappedBuffer(X_CHAR, START_4_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE, X_CHAR), - VARCHAR); - assertVarbinaryToVarcharCoercion( - Slices.wrappedBuffer(X_CHAR, (byte) 0b11101101, (byte) 0xA0, (byte) 0x80), - VARBINARY, - Slices.wrappedBuffer(X_CHAR, (byte) 0b11101101, (byte) 0xA0, (byte) 0x80), - VARCHAR); - assertVarbinaryToVarcharCoercion( - Slices.wrappedBuffer(X_CHAR, (byte) 0b11101101, (byte) 0xBF, (byte) 0xBF), - VARBINARY, - Slices.wrappedBuffer(X_CHAR, (byte) 0b11101101, (byte) 0xBF, (byte) 0xBF), - VARCHAR); - } - - @Test - public void testVarbinaryToVarcharCoercionForParquet() { assertVarbinaryToVarcharCoercionForParquet(Slices.utf8Slice("abc"), VARBINARY, "abc", VARCHAR); assertVarbinaryToVarcharCoercionForParquet(Slices.utf8Slice("abc"), VARBINARY, "ab", createVarcharType(2)); @@ -98,11 +68,6 @@ public void testVarbinaryToVarcharCoercionForOrc() assertVarbinaryToVarcharCoercionForOrc(Slices.wrappedBuffer(X_CHAR, (byte) 0b11101101, (byte) 0xBF, (byte) 0xBF), VARBINARY, "58 ed bf bf", VARCHAR); } - private static void assertVarbinaryToVarcharCoercion(Slice actualValue, Type fromType, Slice expectedValue, Type toType) - { - assertVarbinaryToVarcharCoercion(actualValue, fromType, expectedValue, toType, RCTEXT); - } - private static void assertVarbinaryToVarcharCoercionForOrc(Slice actualValue, Type fromType, String expectedValue, Type toType) { assertVarbinaryToVarcharCoercion(actualValue, fromType, Slices.utf8Slice(expectedValue), toType, ORC); diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java index 1f85d1b862745..9745a3dc051da 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java @@ -214,16 +214,12 @@ protected void doTestHiveCoercion(HiveTableDefinition tableDefinition) // Additional assertions for VARBINARY coercion if (prestoReadColumns.contains("binary_to_string")) { - List hexRepresentedValue = ImmutableList.of("58F7BFBFBF", "58F7BFBFBF58"); + List hexRepresentedValue = ImmutableList.of("58EFBFBDEFBFBDEFBFBDEFBFBD", "58EFBFBDEFBFBDEFBFBDEFBFBD58"); if (tableName.toLowerCase(ENGLISH).contains("orc")) { hexRepresentedValue = ImmutableList.of("3538206637206266206266206266", "3538206637206266206266206266203538"); } - if (tableName.toLowerCase(ENGLISH).contains("parquet")) { - hexRepresentedValue = ImmutableList.of("58EFBFBDEFBFBDEFBFBDEFBFBD", "58EFBFBDEFBFBDEFBFBDEFBFBD58"); - } - assertQueryResults( Engine.TRINO, format("SELECT to_hex(cast(binary_to_string as varbinary)) as hex_representation FROM %s", tableName),