Skip to content

Commit

Permalink
Reuse coercion on binary to varchar for non-orc partitioned table
Browse files Browse the repository at this point in the history
  • Loading branch information
Praveen2112 authored and wendigo committed Nov 28, 2024
1 parent ba41a21 commit a5778d2
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import java.util.HexFormat;

import static io.trino.plugin.hive.HiveStorageFormat.ORC;
import static io.trino.plugin.hive.HiveStorageFormat.PARQUET;
import static io.trino.spi.type.VarbinaryType.VARBINARY;
import static io.trino.spi.type.Varchars.truncateToLength;
import static java.nio.charset.CodingErrorAction.REPLACE;
Expand All @@ -41,9 +40,6 @@ public static TypeCoercer<VarbinaryType, VarcharType> createVarbinaryToVarcharCo
if (storageFormat == ORC) {
return new OrcVarbinaryToVarcharCoercer(toType);
}
if (storageFormat == PARQUET) {
return new ParquetVarbinaryToVarcharCoercer(toType);
}
return new VarbinaryToVarcharCoercer(toType);
}

Expand All @@ -55,31 +51,6 @@ public VarbinaryToVarcharCoercer(VarcharType toType)
super(VARBINARY, toType);
}

@Override
protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position)
{
try {
Slice decodedValue = fromType.getSlice(block, position);
if (toType.isUnbounded()) {
toType.writeSlice(blockBuilder, decodedValue);
return;
}
toType.writeSlice(blockBuilder, truncateToLength(decodedValue, toType.getBoundedLength()));
}
catch (RuntimeException e) {
blockBuilder.appendNull();
}
}
}

private static class ParquetVarbinaryToVarcharCoercer
extends TypeCoercer<VarbinaryType, VarcharType>
{
public ParquetVarbinaryToVarcharCoercer(VarcharType toType)
{
super(VARBINARY, toType);
}

@Override
protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

import static io.trino.plugin.hive.HiveStorageFormat.ORC;
import static io.trino.plugin.hive.HiveStorageFormat.PARQUET;
import static io.trino.plugin.hive.HiveStorageFormat.RCTEXT;
import static io.trino.plugin.hive.HiveTimestampPrecision.DEFAULT_PRECISION;
import static io.trino.plugin.hive.coercions.CoercionUtils.createCoercer;
import static io.trino.plugin.hive.util.HiveTypeTranslator.toHiveType;
Expand All @@ -45,35 +44,6 @@ public class TestVarbinaryToVarcharCoercer

@Test
public void testVarbinaryToVarcharCoercion()
{
assertVarbinaryToVarcharCoercion(Slices.utf8Slice("abc"), VARBINARY, Slices.utf8Slice("abc"), VARCHAR);
assertVarbinaryToVarcharCoercion(Slices.utf8Slice("abc"), VARBINARY, Slices.utf8Slice("ab"), createVarcharType(2));
// Invalid UTF-8 encoding
assertVarbinaryToVarcharCoercion(Slices.wrappedBuffer(X_CHAR, CONTINUATION_BYTE), VARBINARY, Slices.wrappedBuffer(X_CHAR, CONTINUATION_BYTE), VARCHAR);
assertVarbinaryToVarcharCoercion(
Slices.wrappedBuffer(X_CHAR, START_4_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE),
VARBINARY,
Slices.wrappedBuffer(X_CHAR, START_4_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE),
VARCHAR);
assertVarbinaryToVarcharCoercion(
Slices.wrappedBuffer(X_CHAR, START_4_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE, X_CHAR),
VARBINARY,
Slices.wrappedBuffer(X_CHAR, START_4_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE, CONTINUATION_BYTE, X_CHAR),
VARCHAR);
assertVarbinaryToVarcharCoercion(
Slices.wrappedBuffer(X_CHAR, (byte) 0b11101101, (byte) 0xA0, (byte) 0x80),
VARBINARY,
Slices.wrappedBuffer(X_CHAR, (byte) 0b11101101, (byte) 0xA0, (byte) 0x80),
VARCHAR);
assertVarbinaryToVarcharCoercion(
Slices.wrappedBuffer(X_CHAR, (byte) 0b11101101, (byte) 0xBF, (byte) 0xBF),
VARBINARY,
Slices.wrappedBuffer(X_CHAR, (byte) 0b11101101, (byte) 0xBF, (byte) 0xBF),
VARCHAR);
}

@Test
public void testVarbinaryToVarcharCoercionForParquet()
{
assertVarbinaryToVarcharCoercionForParquet(Slices.utf8Slice("abc"), VARBINARY, "abc", VARCHAR);
assertVarbinaryToVarcharCoercionForParquet(Slices.utf8Slice("abc"), VARBINARY, "ab", createVarcharType(2));
Expand All @@ -98,11 +68,6 @@ public void testVarbinaryToVarcharCoercionForOrc()
assertVarbinaryToVarcharCoercionForOrc(Slices.wrappedBuffer(X_CHAR, (byte) 0b11101101, (byte) 0xBF, (byte) 0xBF), VARBINARY, "58 ed bf bf", VARCHAR);
}

private static void assertVarbinaryToVarcharCoercion(Slice actualValue, Type fromType, Slice expectedValue, Type toType)
{
assertVarbinaryToVarcharCoercion(actualValue, fromType, expectedValue, toType, RCTEXT);
}

private static void assertVarbinaryToVarcharCoercionForOrc(Slice actualValue, Type fromType, String expectedValue, Type toType)
{
assertVarbinaryToVarcharCoercion(actualValue, fromType, Slices.utf8Slice(expectedValue), toType, ORC);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,16 +214,12 @@ protected void doTestHiveCoercion(HiveTableDefinition tableDefinition)

// Additional assertions for VARBINARY coercion
if (prestoReadColumns.contains("binary_to_string")) {
List<Object> hexRepresentedValue = ImmutableList.of("58F7BFBFBF", "58F7BFBFBF58");
List<Object> hexRepresentedValue = ImmutableList.of("58EFBFBDEFBFBDEFBFBDEFBFBD", "58EFBFBDEFBFBDEFBFBDEFBFBD58");

if (tableName.toLowerCase(ENGLISH).contains("orc")) {
hexRepresentedValue = ImmutableList.of("3538206637206266206266206266", "3538206637206266206266206266203538");
}

if (tableName.toLowerCase(ENGLISH).contains("parquet")) {
hexRepresentedValue = ImmutableList.of("58EFBFBDEFBFBDEFBFBDEFBFBD", "58EFBFBDEFBFBDEFBFBDEFBFBD58");
}

assertQueryResults(
Engine.TRINO,
format("SELECT to_hex(cast(binary_to_string as varbinary)) as hex_representation FROM %s", tableName),
Expand Down

0 comments on commit a5778d2

Please sign in to comment.