Skip to content

Commit 5eddfac

Browse files
committed
Add an option to convert Int96 to Arrow Timestamp
1 parent e9c2837 commit 5eddfac

File tree

2 files changed

+35
-2
lines changed

2 files changed

+35
-2
lines changed

parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,18 @@
8686
*/
8787
public class SchemaConverter {
8888

89+
// Indicates if Int96 should be converted to timestamp arrow type.
90+
private final boolean convertInt96ToArrowTimestamp;
91+
8992
/**
9093
* For when we'll need this to be configurable
9194
*/
9295
public SchemaConverter() {
96+
this(false);
97+
}
98+
99+
public SchemaConverter(final boolean convertInt96ToArrowTimestamp) {
100+
this.convertInt96ToArrowTimestamp = convertInt96ToArrowTimestamp;
93101
}
94102

95103
/**
@@ -492,8 +500,11 @@ private String getTimeZone(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation
492500

493501
@Override
494502
public TypeMapping convertINT96(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
495-
// Possibly timestamp
496-
return field(new ArrowType.Binary());
503+
if (convertInt96ToArrowTimestamp) {
504+
return field(new ArrowType.Timestamp(TimeUnit.NANOSECOND, null));
505+
} else {
506+
return field(new ArrowType.Binary());
507+
}
497508
}
498509

499510
@Override

parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
4848
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
4949
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
50+
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96;
5051

5152
import java.io.IOException;
5253
import java.util.List;
@@ -419,6 +420,27 @@ public void testParquetInt64TimeMicrosToArrow() {
419420
Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
420421
}
421422

423+
@Test
424+
public void testParquetInt96ToArrowBinary() {
425+
MessageType parquet = Types.buildMessage()
426+
.addField(Types.optional(INT96).named("a")).named("root");
427+
Schema expected = new Schema(asList(
428+
field("a", new ArrowType.Binary())
429+
));
430+
Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
431+
}
432+
433+
@Test
434+
public void testParquetInt96ToArrowTimestamp() {
435+
final SchemaConverter converterInt96ToTimestamp = new SchemaConverter(true);
436+
MessageType parquet = Types.buildMessage()
437+
.addField(Types.optional(INT96).named("a")).named("root");
438+
Schema expected = new Schema(asList(
439+
field("a", new ArrowType.Timestamp(TimeUnit.NANOSECOND, null))
440+
));
441+
Assert.assertEquals(expected, converterInt96ToTimestamp.fromParquet(parquet).getArrowSchema());
442+
}
443+
422444
@Test(expected = IllegalStateException.class)
423445
public void testParquetInt64TimeMillisToArrow() {
424446
converter.fromParquet(Types.buildMessage()

0 commit comments

Comments
 (0)