Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions hudi-client/hudi-flink-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
<name>hudi-flink-client</name>
<packaging>jar</packaging>

<properties>
<parquet.version>${flink.format.parquet.version}</parquet.version>
</properties>

<dependencies>
<!-- Hudi -->
<dependency>
Expand Down Expand Up @@ -87,6 +91,13 @@
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId>
<version>${parquet.version}</version>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
<version>${parquet.version}</version>
</dependency>

<!-- Hoodie - Test -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

import org.apache.flink.table.types.logical.TimestampType;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
Expand All @@ -46,6 +47,8 @@
import java.util.ArrayList;
import java.util.List;

import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit;

/**
* Schema converter converts Parquet schema to and from Flink internal types.
*
Expand Down Expand Up @@ -436,7 +439,7 @@ private static Type convertField(
String.format(
"Can not convert Flink MapTypeInfo %s to Parquet"
+ " Map type as key has to be String",
typeInfo.toString()));
typeInfo));
}
} else if (typeInfo instanceof ObjectArrayTypeInfo) {
ObjectArrayTypeInfo objectArrayTypeInfo = (ObjectArrayTypeInfo) typeInfo;
Expand Down Expand Up @@ -567,18 +570,16 @@ private static Type convertToParquetType(
int numBytes = computeMinBytesForDecimalPrecision(precision);
return Types.primitive(
PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, repetition)
.precision(precision)
.scale(scale)
.as(LogicalTypeAnnotation.decimalType(scale, precision))
.length(numBytes)
.as(OriginalType.DECIMAL)
.named(name);
case TINYINT:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
.as(OriginalType.INT_8)
.as(LogicalTypeAnnotation.intType(8, true))
.named(name);
case SMALLINT:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
.as(OriginalType.INT_16)
.as(LogicalTypeAnnotation.intType(16, true))
.named(name);
case INTEGER:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
Expand All @@ -594,16 +595,17 @@ private static Type convertToParquetType(
.named(name);
case DATE:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
.as(OriginalType.DATE)
.as(LogicalTypeAnnotation.dateType())
.named(name);
case TIME_WITHOUT_TIME_ZONE:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition)
.as(OriginalType.TIME_MILLIS)
.as(LogicalTypeAnnotation.timeType(true, TimeUnit.MILLIS))
.named(name);
case TIMESTAMP_WITHOUT_TIME_ZONE:
TimestampType timestampType = (TimestampType) type;
if (timestampType.getPrecision() == 3) {
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition)
.as(LogicalTypeAnnotation.timestampType(true, TimeUnit.MILLIS))
.named(name);
} else {
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT96, repetition)
Expand All @@ -613,6 +615,7 @@ private static Type convertToParquetType(
LocalZonedTimestampType localZonedTimestampType = (LocalZonedTimestampType) type;
if (localZonedTimestampType.getPrecision() == 3) {
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition)
.as(LogicalTypeAnnotation.timestampType(false, TimeUnit.MILLIS))
.named(name);
} else {
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT96, repetition)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,24 +51,41 @@ void testConvertComplexTypes() {
final String expected = "message converted {\n"
+ " optional group f_array (LIST) {\n"
+ " repeated group list {\n"
+ " optional binary element (UTF8);\n"
+ " optional binary element (STRING);\n"
+ " }\n"
+ " }\n"
+ " optional group f_map (MAP) {\n"
+ " repeated group key_value {\n"
+ " optional int32 key;\n"
+ " optional binary value (UTF8);\n"
+ " optional binary value (STRING);\n"
+ " }\n"
+ " }\n"
+ " optional group f_row {\n"
+ " optional int32 f_row_f0;\n"
+ " optional binary f_row_f1 (UTF8);\n"
+ " optional binary f_row_f1 (STRING);\n"
+ " optional group f_row_f2 {\n"
+ " optional int32 f_row_f2_f0;\n"
+ " optional binary f_row_f2_f1 (UTF8);\n"
+ " optional binary f_row_f2_f1 (STRING);\n"
+ " }\n"
+ " }\n"
+ "}\n";
assertThat(messageType.toString(), is(expected));
}

@Test
void testConvertTimestampTypes() {
DataType dataType = DataTypes.ROW(
DataTypes.FIELD("ts_3", DataTypes.TIMESTAMP(3)),
DataTypes.FIELD("ts_6", DataTypes.TIMESTAMP(6)),
DataTypes.FIELD("ts_9", DataTypes.TIMESTAMP(9)));
org.apache.parquet.schema.MessageType messageType =
ParquetSchemaConverter.convertToParquetMessageType("converted", (RowType) dataType.getLogicalType());
assertThat(messageType.getColumns().size(), is(3));
final String expected = "message converted {\n"
+ " optional int64 ts_3 (TIMESTAMP(MILLIS,true));\n"
+ " optional int96 ts_6;\n"
+ " optional int96 ts_9;\n"
+ "}\n";
assertThat(messageType.toString(), is(expected));
}
}
2 changes: 1 addition & 1 deletion hudi-flink-datasource/hudi-flink/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

<properties>
<main.basedir>${project.parent.parent.basedir}</main.basedir>
<parquet.version>1.11.1</parquet.version>
<parquet.version>${flink.format.parquet.version}</parquet.version>
</properties>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@ private FlinkOptions() {
public static final ConfigOption<Boolean> HIVE_SYNC_SUPPORT_TIMESTAMP = ConfigOptions
.key("hive_sync.support_timestamp")
.booleanType()
.defaultValue(false)
.defaultValue(true)
.withDescription("INT64 with original type TIMESTAMP_MICROS is converted to hive timestamp type.\n"
+ "Disabled by default for backward compatibility.");

Expand Down
4 changes: 2 additions & 2 deletions packaging/hudi-flink-bundle/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
<flink.bundle.hive.scope>provided</flink.bundle.hive.scope>
<flink.bundle.shade.prefix>org.apache.hudi.</flink.bundle.shade.prefix>
<javax.servlet.version>3.1.0</javax.servlet.version>
<!-- override to be same with flink 1.12.2 -->
<parquet.version>1.11.1</parquet.version>
<!-- override to be same with flink 1.15.x -->
<parquet.version>${flink.format.parquet.version}</parquet.version>
<hive.version>2.3.1</hive.version>
<thrift.version>0.9.3</thrift.version>
</properties>
Expand Down
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@
<flink.runtime.artifactId>flink-runtime</flink.runtime.artifactId>
<flink.table.runtime.artifactId>flink-table-runtime_${scala.binary.version}</flink.table.runtime.artifactId>
<flink.table.planner.artifactId>flink-table-planner_${scala.binary.version}</flink.table.planner.artifactId>
<flink.format.parquet.version>1.12.2</flink.format.parquet.version>
<spark31.version>3.1.3</spark31.version>
<spark32.version>3.2.1</spark32.version>
<hudi.spark.module>hudi-spark2</hudi.spark.module>
Expand Down