diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/ParquetTypeUtils.java b/lib/trino-parquet/src/main/java/io/trino/parquet/ParquetTypeUtils.java index d8818c559983..af480ebbff03 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/ParquetTypeUtils.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/ParquetTypeUtils.java @@ -21,8 +21,8 @@ import org.apache.parquet.io.MessageColumnIO; import org.apache.parquet.io.ParquetDecodingException; import org.apache.parquet.io.PrimitiveColumnIO; -import org.apache.parquet.schema.DecimalMetadata; import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; import javax.annotation.Nullable; @@ -34,7 +34,6 @@ import java.util.Optional; import static com.google.common.base.Preconditions.checkArgument; -import static org.apache.parquet.schema.OriginalType.DECIMAL; import static org.apache.parquet.schema.Type.Repetition.REPEATED; public final class ParquetTypeUtils @@ -80,7 +79,7 @@ public static ColumnIO getArrayElementColumn(ColumnIO columnIO) * } */ if (columnIO instanceof GroupColumnIO && - columnIO.getType().getOriginalType() == null && + columnIO.getType().getLogicalTypeAnnotation() == null && ((GroupColumnIO) columnIO).getChildrenCount() == 1 && !columnIO.getName().equals("array") && !columnIO.getName().equals(columnIO.getParent().getName() + "_tuple")) { @@ -221,11 +220,11 @@ public static ColumnIO lookupColumnById(GroupColumnIO groupColumnIO, int columnI public static Optional createDecimalType(RichColumnDescriptor descriptor) { - if (descriptor.getPrimitiveType().getOriginalType() != DECIMAL) { + if (!(descriptor.getPrimitiveType().getLogicalTypeAnnotation() instanceof DecimalLogicalTypeAnnotation)) { return Optional.empty(); } - DecimalMetadata decimalMetadata = descriptor.getPrimitiveType().getDecimalMetadata(); - return Optional.of(DecimalType.createDecimalType(decimalMetadata.getPrecision(), decimalMetadata.getScale())); + DecimalLogicalTypeAnnotation decimalLogicalType = (DecimalLogicalTypeAnnotation) descriptor.getPrimitiveType().getLogicalTypeAnnotation(); + return Optional.of(DecimalType.createDecimalType(decimalLogicalType.getPrecision(), decimalLogicalType.getScale())); } /** diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/PrimitiveColumnReader.java b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/PrimitiveColumnReader.java index 6eba0774a0e8..2386082331ff 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/reader/PrimitiveColumnReader.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/reader/PrimitiveColumnReader.java @@ -35,8 +35,8 @@ import org.apache.parquet.internal.filter2.columnindex.RowRanges; import org.apache.parquet.io.ParquetDecodingException; import org.apache.parquet.schema.LogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.TimeLogicalTypeAnnotation; import org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation; -import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; import org.joda.time.DateTimeZone; @@ -105,13 +105,16 @@ public static PrimitiveColumnReader createReader(RichColumnDescriptor descriptor case INT32: return createDecimalColumnReader(descriptor).orElse(new IntColumnReader(descriptor)); case INT64: - if (descriptor.getPrimitiveType().getOriginalType() == OriginalType.TIME_MICROS) { + if (descriptor.getPrimitiveType().getLogicalTypeAnnotation() instanceof TimeLogicalTypeAnnotation && + ((TimeLogicalTypeAnnotation) descriptor.getPrimitiveType().getLogicalTypeAnnotation()).getUnit() == LogicalTypeAnnotation.TimeUnit.MICROS) { return new TimeMicrosColumnReader(descriptor); } - if (descriptor.getPrimitiveType().getOriginalType() == OriginalType.TIMESTAMP_MICROS) { + if (descriptor.getPrimitiveType().getLogicalTypeAnnotation() instanceof TimestampLogicalTypeAnnotation && + ((TimestampLogicalTypeAnnotation) descriptor.getPrimitiveType().getLogicalTypeAnnotation()).getUnit() == LogicalTypeAnnotation.TimeUnit.MICROS) { return new TimestampMicrosColumnReader(descriptor); } - if (descriptor.getPrimitiveType().getOriginalType() == OriginalType.TIMESTAMP_MILLIS) { + if (descriptor.getPrimitiveType().getLogicalTypeAnnotation() instanceof TimestampLogicalTypeAnnotation && + ((TimestampLogicalTypeAnnotation) descriptor.getPrimitiveType().getLogicalTypeAnnotation()).getUnit() == LogicalTypeAnnotation.TimeUnit.MILLIS) { return new Int64TimestampMillisColumnReader(descriptor); } if (descriptor.getPrimitiveType().getLogicalTypeAnnotation() instanceof TimestampLogicalTypeAnnotation && diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetSchemaConverter.java b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetSchemaConverter.java index 1c5d07fe3eb6..c3a51f8a71ed 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetSchemaConverter.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetSchemaConverter.java @@ -28,7 +28,6 @@ import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Type.Repetition; import org.apache.parquet.schema.Types; @@ -143,7 +142,7 @@ private org.apache.parquet.schema.Type getPrimitiveType(Type type, String name, .named(name); } if (DATE.equals(type)) { - return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name); + return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(LogicalTypeAnnotation.dateType()).named(name); } if (BIGINT.equals(type)) { return Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition).named(name); diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetTypeVisitor.java b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetTypeVisitor.java index 804c75e74ea5..72b6153f688d 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetTypeVisitor.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetTypeVisitor.java @@ -15,8 +15,8 @@ import com.google.common.collect.Lists; import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Type; @@ -24,8 +24,6 @@ import java.util.List; import static com.google.common.base.Preconditions.checkArgument; -import static org.apache.parquet.schema.OriginalType.LIST; -import static org.apache.parquet.schema.OriginalType.MAP; import static org.apache.parquet.schema.Type.Repetition.REPEATED; // Code from iceberg @@ -44,8 +42,8 @@ else if (type.isPrimitive()) { else { // if not a primitive, the typeId must be a group GroupType group = type.asGroupType(); - OriginalType annotation = group.getOriginalType(); - if (annotation == LIST) { + LogicalTypeAnnotation annotation = group.getLogicalTypeAnnotation(); + if (LogicalTypeAnnotation.listType().equals(annotation)) { checkArgument(!group.isRepetition(REPEATED), "Invalid list: top-level group is repeated: " + group); checkArgument(group.getFieldCount() == 1, @@ -70,7 +68,7 @@ else if (type.isPrimitive()) { visitor.fieldNames.pop(); } } - else if (annotation == MAP) { + else if (LogicalTypeAnnotation.mapType().equals(annotation)) { checkArgument(!group.isRepetition(REPEATED), "Invalid map: top-level group is repeated: " + group); checkArgument(group.getFieldCount() == 1, diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriters.java b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriters.java index 8909ea0d3b57..dcda643e2301 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriters.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/writer/ParquetWriters.java @@ -47,9 +47,9 @@ import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.LogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.TimeLogicalTypeAnnotation; import org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; import org.joda.time.DateTimeZone; @@ -215,7 +215,7 @@ private static PrimitiveValueWriter getValueWriter(ValuesWriter valuesWriter, Ty return new DateValueWriter(valuesWriter, parquetType); } if (TIME_MICROS.equals(type)) { - verifyParquetType(type, parquetType, OriginalType.TIME_MICROS); + verifyParquetType(type, parquetType, TimeLogicalTypeAnnotation.class, isTime(LogicalTypeAnnotation.TimeUnit.MICROS)); return new TimeMicrosValueWriter(valuesWriter, parquetType); } if (type instanceof TimestampType) { @@ -224,28 +224,23 @@ private static PrimitiveValueWriter getValueWriter(ValuesWriter valuesWriter, Ty return new Int96TimestampValueWriter(valuesWriter, type, parquetType, parquetTimeZone.get()); } if (TIMESTAMP_MILLIS.equals(type)) { - verifyParquetType(type, parquetType, OriginalType.TIMESTAMP_MILLIS); verifyParquetType(type, parquetType, TimestampLogicalTypeAnnotation.class, isTimestamp(LogicalTypeAnnotation.TimeUnit.MILLIS)); return new TimestampMillisValueWriter(valuesWriter, type, parquetType); } if (TIMESTAMP_MICROS.equals(type)) { - verifyParquetType(type, parquetType, OriginalType.TIMESTAMP_MICROS); verifyParquetType(type, parquetType, TimestampLogicalTypeAnnotation.class, isTimestamp(LogicalTypeAnnotation.TimeUnit.MICROS)); return new BigintValueWriter(valuesWriter, type, parquetType); } if (TIMESTAMP_NANOS.equals(type)) { - verifyParquetType(type, parquetType, (OriginalType) null); // no OriginalType for timestamp NANOS verifyParquetType(type, parquetType, TimestampLogicalTypeAnnotation.class, isTimestamp(LogicalTypeAnnotation.TimeUnit.NANOS)); return new TimestampNanosValueWriter(valuesWriter, type, parquetType); } } if (TIMESTAMP_TZ_MILLIS.equals(type)) { - verifyParquetType(type, parquetType, OriginalType.TIMESTAMP_MILLIS); return new TimestampTzMillisValueWriter(valuesWriter, parquetType); } if (TIMESTAMP_TZ_MICROS.equals(type)) { - verifyParquetType(type, parquetType, OriginalType.TIMESTAMP_MICROS); return new TimestampTzMicrosValueWriter(valuesWriter, parquetType); } if (DOUBLE.equals(type)) { @@ -264,11 +259,6 @@ private static PrimitiveValueWriter getValueWriter(ValuesWriter valuesWriter, Ty throw new TrinoException(NOT_SUPPORTED, format("Unsupported type for Parquet writer: %s", type)); } - private static void verifyParquetType(Type type, PrimitiveType parquetType, OriginalType originalType) - { - checkArgument(parquetType.getOriginalType() == originalType, "Wrong Parquet type '%s' for Trino type '%s'", parquetType, type); - } - private static void verifyParquetType(Type type, PrimitiveType parquetType, Class annotationType, Predicate predicate) { checkArgument( @@ -277,6 +267,14 @@ private static void verifyParquetType(Type type, PrimitiveType parquetType, "Wrong Parquet type '%s' for Trino type '%s'", parquetType, type); } + private static Predicate isTime(LogicalTypeAnnotation.TimeUnit precision) + { + requireNonNull(precision, "precision is null"); + return annotation -> annotation.getUnit() == precision && + // isAdjustedToUTC=false indicates Local semantics (timestamps not normalized to UTC) + !annotation.isAdjustedToUTC(); + } + private static Predicate isTimestamp(LogicalTypeAnnotation.TimeUnit precision) { requireNonNull(precision, "precision is null"); diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestColumnIndexBuilder.java b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestColumnIndexBuilder.java index b0f8edb19d32..d6944356fc01 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestColumnIndexBuilder.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestColumnIndexBuilder.java @@ -24,6 +24,7 @@ import org.apache.parquet.internal.column.columnindex.ColumnIndex; import org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder; import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Types; import org.testng.annotations.Test; @@ -51,9 +52,6 @@ import static org.apache.parquet.filter2.predicate.FilterApi.notEq; import static org.apache.parquet.filter2.predicate.FilterApi.userDefined; import static org.apache.parquet.filter2.predicate.LogicalInverter.invert; -import static org.apache.parquet.schema.OriginalType.DECIMAL; -import static org.apache.parquet.schema.OriginalType.UINT_8; -import static org.apache.parquet.schema.OriginalType.UTF8; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE; @@ -265,7 +263,7 @@ public boolean inverseCanDrop(org.apache.parquet.filter2.predicate.Statistics fileCreatedBy, St if (max != null) { statistics.setMax(max.getBytes(UTF_8)); } - assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), new PrimitiveType(OPTIONAL, BINARY, "Test column", OriginalType.UTF8))) + assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), Types.optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("Test column"))) .isInstanceOfSatisfying(BinaryStatistics.class, columnStatistics -> { assertFalse(columnStatistics.isEmpty()); @@ -437,7 +438,7 @@ private void testReadStatsBinaryUtf8OldWriter(Optional fileCreatedBy, St @Test(dataProvider = "allCreatedBy") public void testReadStatsBinaryUtf8(Optional fileCreatedBy) { - PrimitiveType varchar = new PrimitiveType(OPTIONAL, BINARY, "Test column", OriginalType.UTF8); + PrimitiveType varchar = Types.optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("Test column"); Statistics statistics; // Stats written by Parquet after https://issues.apache.org/jira/browse/PARQUET-1025 @@ -477,7 +478,7 @@ public void testReadNullStats(Optional fileCreatedBy) columnStatistics -> assertTrue(columnStatistics.isEmpty())); // varchar - assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), new PrimitiveType(OPTIONAL, BINARY, "Test column", OriginalType.UTF8))) + assertThat(MetadataReader.readStats(fileCreatedBy, Optional.empty(), Types.optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("Test column"))) .isInstanceOfSatisfying( BinaryStatistics.class, columnStatistics -> assertTrue(columnStatistics.isEmpty())); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/MapKeyValuesSchemaConverter.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/MapKeyValuesSchemaConverter.java index bf3e38dac612..8f51fe21d476 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/MapKeyValuesSchemaConverter.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/MapKeyValuesSchemaConverter.java @@ -23,8 +23,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; import org.apache.parquet.schema.Type; import org.apache.parquet.schema.Type.Repetition; @@ -34,7 +34,6 @@ import java.util.Locale; import static com.google.common.base.Preconditions.checkState; -import static org.apache.parquet.schema.OriginalType.MAP_KEY_VALUE; /** * This class is copied from org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter @@ -69,7 +68,7 @@ private static Type convertType(String name, TypeInfo typeInfo, Repetition repet { if (typeInfo.getCategory() == Category.PRIMITIVE) { if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) { - return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8) + return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(LogicalTypeAnnotation.stringType()) .named(name); } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) || @@ -100,21 +99,21 @@ else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) { } else if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith( serdeConstants.CHAR_TYPE_NAME)) { - return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); + return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name); } else if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith( serdeConstants.VARCHAR_TYPE_NAME)) { - return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); + return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name); } else if (typeInfo instanceof DecimalTypeInfo) { DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo; int prec = decimalTypeInfo.precision(); int scale = decimalTypeInfo.scale(); int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1]; - return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name); + return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(LogicalTypeAnnotation.decimalType(scale, prec)).named(name); } else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) { - return Types.primitive(PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name); + return Types.primitive(PrimitiveTypeName.INT32, repetition).as(LogicalTypeAnnotation.dateType()).named(name); } else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) { throw new UnsupportedOperationException("Unknown type not implemented"); @@ -145,7 +144,7 @@ else if (typeInfo.getCategory() == Category.UNION) { private static GroupType convertArrayType(String name, ListTypeInfo typeInfo) { TypeInfo subType = typeInfo.getListElementTypeInfo(); - return listWrapper(name, OriginalType.LIST, new GroupType(Repetition.REPEATED, + return listWrapper(name, LogicalTypeAnnotation.listType(), new GroupType(Repetition.REPEATED, ParquetHiveSerDe.ARRAY.toString(), convertType("array_element", subType))); } @@ -175,7 +174,7 @@ public static GroupType mapType(Repetition repetition, String alias, String mapA return listWrapper( repetition, alias, - MAP_KEY_VALUE, + LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance(), new GroupType( Repetition.REPEATED, mapAlias, @@ -188,7 +187,7 @@ public static GroupType mapType(Repetition repetition, String alias, String mapA return listWrapper( repetition, alias, - MAP_KEY_VALUE, + LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance(), new GroupType( Repetition.REPEATED, mapAlias, @@ -197,16 +196,16 @@ public static GroupType mapType(Repetition repetition, String alias, String mapA } } - private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) + private static GroupType listWrapper(Repetition repetition, String alias, LogicalTypeAnnotation logicalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } - return new GroupType(repetition, alias, originalType, nested); + return Types.buildGroup(repetition).as(logicalType).addField(nested).named(alias); } - private static GroupType listWrapper(String name, OriginalType originalType, GroupType groupType) + private static GroupType listWrapper(String name, LogicalTypeAnnotation logicalType, GroupType groupType) { - return new GroupType(Repetition.OPTIONAL, name, originalType, groupType); + return Types.optionalGroup().as(logicalType).addField(groupType).named(name); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/SingleLevelArrayMapKeyValuesSchemaConverter.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/SingleLevelArrayMapKeyValuesSchemaConverter.java index 7b2144ef08b4..82cb87d714c3 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/SingleLevelArrayMapKeyValuesSchemaConverter.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/SingleLevelArrayMapKeyValuesSchemaConverter.java @@ -23,8 +23,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; import org.apache.parquet.schema.Type; import org.apache.parquet.schema.Type.Repetition; @@ -34,7 +34,6 @@ import java.util.Locale; import static com.google.common.base.Preconditions.checkState; -import static org.apache.parquet.schema.OriginalType.MAP_KEY_VALUE; /** * This class is copied from org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter @@ -70,7 +69,7 @@ private static Type convertType(String name, TypeInfo typeInfo, Repetition repet { if (typeInfo.getCategory() == Category.PRIMITIVE) { if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) { - return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8) + return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(LogicalTypeAnnotation.stringType()) .named(name); } if (typeInfo.equals(TypeInfoFactory.intTypeInfo) || @@ -102,16 +101,16 @@ private static Type convertType(String name, TypeInfo typeInfo, Repetition repet if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith( serdeConstants.CHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { - return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); + return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name); } - return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); + return Types.repeated(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name); } if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith( serdeConstants.VARCHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { - return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); + return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name); } - return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); + return Types.repeated(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name); } if (typeInfo instanceof DecimalTypeInfo) { DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo; @@ -119,12 +118,12 @@ private static Type convertType(String name, TypeInfo typeInfo, Repetition repet int scale = decimalTypeInfo.scale(); int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1]; if (repetition == Repetition.OPTIONAL) { - return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name); + return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(LogicalTypeAnnotation.decimalType(scale, prec)).named(name); } - return Types.repeated(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name); + return Types.repeated(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(LogicalTypeAnnotation.decimalType(scale, prec)).named(name); } if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) { - return Types.primitive(PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name); + return Types.primitive(PrimitiveTypeName.INT32, repetition).as(LogicalTypeAnnotation.dateType()).named(name); } if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) { throw new UnsupportedOperationException("Unknown type not implemented"); @@ -150,7 +149,7 @@ private static Type convertType(String name, TypeInfo typeInfo, Repetition repet private static GroupType convertArrayType(String name, ListTypeInfo typeInfo, Repetition repetition) { TypeInfo subType = typeInfo.getListElementTypeInfo(); - return listWrapper(name, OriginalType.LIST, convertType("array_element", subType, Repetition.REPEATED), repetition); + return listWrapper(name, LogicalTypeAnnotation.listType(), convertType("array_element", subType, Repetition.REPEATED), repetition); } // An optional group containing multiple elements @@ -179,7 +178,7 @@ public static GroupType mapType(Repetition repetition, String alias, String mapA return listWrapper( repetition, alias, - MAP_KEY_VALUE, + LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance(), new GroupType( Repetition.REPEATED, mapAlias, @@ -191,7 +190,7 @@ public static GroupType mapType(Repetition repetition, String alias, String mapA return listWrapper( repetition, alias, - MAP_KEY_VALUE, + LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance(), new GroupType( Repetition.REPEATED, mapAlias, @@ -199,16 +198,16 @@ public static GroupType mapType(Repetition repetition, String alias, String mapA valueType)); } - private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) + private static GroupType listWrapper(Repetition repetition, String alias, LogicalTypeAnnotation logicalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } - return new GroupType(repetition, alias, originalType, nested); + return Types.buildGroup(repetition).as(logicalType).addField(nested).named(alias); } - private static GroupType listWrapper(String name, OriginalType originalType, Type elementType, Repetition repetition) + private static GroupType listWrapper(String name, LogicalTypeAnnotation logicalType, Type elementType, Repetition repetition) { - return new GroupType(repetition, name, originalType, elementType); + return Types.buildGroup(repetition).as(logicalType).addField(elementType).named(name); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/SingleLevelArraySchemaConverter.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/SingleLevelArraySchemaConverter.java index 51d7b5f2d1cf..d051478869ac 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/SingleLevelArraySchemaConverter.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/SingleLevelArraySchemaConverter.java @@ -24,8 +24,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.parquet.schema.ConversionPatterns; import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; import org.apache.parquet.schema.Type; import org.apache.parquet.schema.Type.Repetition; @@ -71,7 +71,7 @@ private static Type convertType(String name, TypeInfo typeInfo, { if (typeInfo.getCategory() == Category.PRIMITIVE) { if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) { - return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8) + return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(LogicalTypeAnnotation.stringType()) .named(name); } else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) || @@ -103,19 +103,19 @@ else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) { else if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith( serdeConstants.CHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { - return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); + return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name); } else { - return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); + return Types.repeated(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name); } } else if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith( serdeConstants.VARCHAR_TYPE_NAME)) { if (repetition == Repetition.OPTIONAL) { - return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); + return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name); } else { - return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name); + return Types.repeated(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name); } } else if (typeInfo instanceof DecimalTypeInfo) { @@ -124,14 +124,14 @@ else if (typeInfo instanceof DecimalTypeInfo) { int scale = decimalTypeInfo.scale(); int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1]; if (repetition == Repetition.OPTIONAL) { - return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name); + return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(LogicalTypeAnnotation.decimalType(scale, prec)).named(name); } else { - return Types.repeated(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name); + return Types.repeated(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(LogicalTypeAnnotation.decimalType(scale, prec)).named(name); } } else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) { - return Types.primitive(PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name); + return Types.primitive(PrimitiveTypeName.INT32, repetition).as(LogicalTypeAnnotation.dateType()).named(name); } else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) { throw new UnsupportedOperationException("Unknown type not implemented"); @@ -161,7 +161,7 @@ else if (typeInfo.getCategory() == Category.UNION) { private static GroupType convertArrayType(String name, ListTypeInfo typeInfo, Repetition repetition) { TypeInfo subType = typeInfo.getListElementTypeInfo(); - return listWrapper(name, OriginalType.LIST, convertType("array", subType, Repetition.REPEATED), repetition); + return listWrapper(name, LogicalTypeAnnotation.listType(), convertType("array", subType, Repetition.REPEATED), repetition); } // An optional group containing multiple elements @@ -183,9 +183,9 @@ private static GroupType convertMapType(String name, MapTypeInfo typeInfo, Repet return ConversionPatterns.mapType(repetition, name, keyType, valueType); } - private static GroupType listWrapper(String name, OriginalType originalType, + private static GroupType listWrapper(String name, LogicalTypeAnnotation logicalType, Type elementType, Repetition repetition) { - return new GroupType(repetition, name, originalType, elementType); + return Types.buildGroup(repetition).as(logicalType).addField(elementType).named(name); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/TestDataWritableWriter.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/TestDataWritableWriter.java index 58243dde32d4..4c73128673c9 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/TestDataWritableWriter.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/write/TestDataWritableWriter.java @@ -43,7 +43,7 @@ import org.apache.parquet.io.api.Binary; import org.apache.parquet.io.api.RecordConsumer; import org.apache.parquet.schema.GroupType; -import org.apache.parquet.schema.OriginalType; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.Type; import org.joda.time.DateTimeZone; @@ -140,9 +140,9 @@ private void writeValue(Object value, ObjectInspector inspector, Type type) } else { GroupType groupType = type.asGroupType(); - OriginalType originalType = type.getOriginalType(); + LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation(); - if (OriginalType.LIST == originalType) { + if (LogicalTypeAnnotation.listType().equals(logicalType)) { checkInspectorCategory(inspector, ObjectInspector.Category.LIST); if (singleLevelArray) { writeSingleLevelArray(value, (ListObjectInspector) inspector, groupType); @@ -151,7 +151,7 @@ private void writeValue(Object value, ObjectInspector inspector, Type type) writeArray(value, (ListObjectInspector) inspector, groupType); } } - else if (originalType != null && (originalType == OriginalType.MAP || originalType == OriginalType.MAP_KEY_VALUE)) { + else if (LogicalTypeAnnotation.mapType().equals(logicalType) || LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance().equals(logicalType)) { checkInspectorCategory(inspector, ObjectInspector.Category.MAP); writeMap(value, (MapObjectInspector) inspector, groupType); }