Skip to content

Commit 3d0709d

Browse files
committed
PARQUET-1504: Add an option to convert Int96 to Arrow Timestamp
1 parent 4b40d96 commit 3d0709d

File tree

2 files changed

+35
-2
lines changed

2 files changed

+35
-2
lines changed

parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,18 @@
8686
*/
8787
public class SchemaConverter {
8888

89+
// Indicates if Int96 should be converted to timestamp arrow type.
90+
private final boolean convertInt96ToArrowTimestamp;
91+
8992
/**
9093
* For when we'll need this to be configurable
9194
*/
9295
public SchemaConverter() {
96+
this(false);
97+
}
98+
99+
public SchemaConverter(final boolean convertInt96ToArrowTimestamp) {
100+
this.convertInt96ToArrowTimestamp = convertInt96ToArrowTimestamp;
93101
}
94102

95103
/**
@@ -492,8 +500,11 @@ private String getTimeZone(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation
492500

493501
@Override
494502
public TypeMapping convertINT96(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
495-
// Possibly timestamp
496-
return field(new ArrowType.Binary());
503+
if (convertInt96ToArrowTimestamp) {
504+
return field(new ArrowType.Timestamp(TimeUnit.NANOSECOND, null));
505+
} else {
506+
return field(new ArrowType.Binary());
507+
}
497508
}
498509

499510
@Override

parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
4848
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
4949
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
50+
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96;
5051

5152
import java.io.IOException;
5253
import java.util.List;
@@ -439,6 +440,27 @@ public void testParquetFixedBinaryToArrowDecimal() {
439440
Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
440441
}
441442

443+
@Test
444+
public void testParquetInt96ToArrowBinary() {
445+
MessageType parquet = Types.buildMessage()
446+
.addField(Types.optional(INT96).named("a")).named("root");
447+
Schema expected = new Schema(asList(
448+
field("a", new ArrowType.Binary())
449+
));
450+
Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
451+
}
452+
453+
@Test
454+
public void testParquetInt96ToArrowTimestamp() {
455+
final SchemaConverter converterInt96ToTimestamp = new SchemaConverter(true);
456+
MessageType parquet = Types.buildMessage()
457+
.addField(Types.optional(INT96).named("a")).named("root");
458+
Schema expected = new Schema(asList(
459+
field("a", new ArrowType.Timestamp(TimeUnit.NANOSECOND, null))
460+
));
461+
Assert.assertEquals(expected, converterInt96ToTimestamp.fromParquet(parquet).getArrowSchema());
462+
}
463+
442464
@Test(expected = IllegalStateException.class)
443465
public void testParquetInt64TimeMillisToArrow() {
444466
converter.fromParquet(Types.buildMessage()

0 commit comments

Comments
 (0)