apache · kingeasternsun · Jan 26, 2022 · Feb 8, 2022 · Feb 8, 2022 · Feb 10, 2022
diff --git a/flink/v1.13/build.gradle b/flink/v1.13/build.gradle
@@ -68,6 +68,7 @@ project(':iceberg-flink:iceberg-flink-1.13') {
       exclude group: 'org.apache.hive', module: 'hive-storage-api'
     }
 
+    testImplementation "org.apache.spark:spark-sql_2.12:3.2.0"
     testImplementation "org.apache.flink:flink-core:${flinkVersion}"
     testImplementation "org.apache.flink:flink-runtime_2.12:${flinkVersion}"
     testImplementation "org.apache.flink:flink-table-planner-blink_2.12:${flinkVersion}"

diff --git a/flink/v1.13/flink/src/main/java/org/apache/iceberg/flink/data/FlinkParquetReaders.java b/flink/v1.13/flink/src/main/java/org/apache/iceberg/flink/data/FlinkParquetReaders.java
@@ -22,10 +22,12 @@
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
 import java.time.Instant;
 import java.time.ZoneOffset;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.TimeUnit;
 import org.apache.flink.table.data.ArrayData;
 import org.apache.flink.table.data.DecimalData;
 import org.apache.flink.table.data.GenericRowData;
@@ -264,6 +266,10 @@ public ParquetValueReader<?> primitive(org.apache.iceberg.types.Type.PrimitiveTy
         case INT64:
         case DOUBLE:
           return new ParquetValueReaders.UnboxedReader<>(desc);
+        case INT96:
+          // Impala & Spark used to write timestamps as INT96 without a logical type. For backwards
+          // compatibility we try to read INT96 as timestamps.
+          return new TimestampInt96Reader(desc);
         default:
           throw new UnsupportedOperationException("Unsupported type: " + primitive);
       }
@@ -321,6 +327,29 @@ public DecimalData read(DecimalData ignored) {
     }
   }
 
+  private static class TimestampInt96Reader extends ParquetValueReaders.UnboxedReader<Long> {
+    private static final long UNIX_EPOCH_JULIAN = 2_440_588L;
+
+    TimestampInt96Reader(ColumnDescriptor desc) {
+      super(desc);
+    }
+
+    @Override
+    public Long read(Long ignored) {
+      return readLong();
+    }
+
+    @Override
+    public long readLong() {
+      final ByteBuffer byteBuffer = column.nextBinary().toByteBuffer().order(ByteOrder.LITTLE_ENDIAN);
+      final long timeOfDayNanos = byteBuffer.getLong();
+      final int julianDay = byteBuffer.getInt();
+
+      return TimeUnit.DAYS.toMicros(julianDay - UNIX_EPOCH_JULIAN) +
+              TimeUnit.NANOSECONDS.toMicros(timeOfDayNanos);
+    }
+  }
+
   private static class MicrosToTimestampTzReader extends ParquetValueReaders.UnboxedReader<TimestampData> {
     MicrosToTimestampTzReader(ColumnDescriptor desc) {
       super(desc);

diff --git a/flink/v1.13/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java b/flink/v1.13/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java
@@ -22,9 +22,14 @@
 import java.io.File;
 import java.io.IOException;
 import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
 import org.apache.flink.table.data.RowData;
 import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.iceberg.Files;
+import org.apache.iceberg.MetricsConfig;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.data.DataTest;
 import org.apache.iceberg.data.RandomGenericData;
@@ -34,8 +39,26 @@
 import org.apache.iceberg.flink.TestHelpers;
 import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.io.FileAppender;
+import org.apache.iceberg.io.InputFile;
 import org.apache.iceberg.parquet.Parquet;
+import org.apache.iceberg.parquet.ParquetWriteAdapter;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.RandomUtil;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.api.WriteSupport;
+import org.apache.parquet.hadoop.util.HadoopOutputFile;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
 import org.junit.Assert;
+import org.junit.Test;
+
+import static org.apache.iceberg.types.Types.NestedField.required;
 
 public class TestFlinkParquetReader extends DataTest {
   private static final int NUM_RECORDS = 100;
@@ -72,4 +95,86 @@ protected void writeAndValidate(Schema schema) throws IOException {
     writeAndValidate(RandomGenericData.generateDictionaryEncodableRecords(schema, NUM_RECORDS, 21124), schema);
     writeAndValidate(RandomGenericData.generateFallbackRecords(schema, NUM_RECORDS, 21124, NUM_RECORDS / 20), schema);
   }
+
+  protected List<RowData> rowDatasFromFile(InputFile inputFile, Schema schema) throws IOException {
+    try (CloseableIterable<RowData> reader =
+        Parquet.read(inputFile)
+            .project(schema)
+            .createReaderFunc(type -> FlinkParquetReaders.buildReader(schema, type))
+            .build()) {
+      return Lists.newArrayList(reader);
+    }
+  }
+
+  @Test
+  public void testInt96TimestampProducedBySparkIsReadCorrectly() throws IOException {
+    String outputFilePath = String.format("%s/%s", temp.getRoot().getAbsolutePath(), "parquet_int96.parquet");
+    HadoopOutputFile outputFile =
+        HadoopOutputFile.fromPath(
+            new org.apache.hadoop.fs.Path(outputFilePath), new Configuration());
+    Schema schema = new Schema(required(1, "ts", Types.TimestampType.withZone()));
+    StructType sparkSchema =
+        new StructType(
+            new StructField[] {
+                new StructField("ts", DataTypes.TimestampType, true, Metadata.empty())
+            });
+
+    final Random random = new Random(0L);
+    List<InternalRow> rows = Lists.newArrayList();
+    for (int i = 0; i < 10; i++) {
+      rows.add(new GenericInternalRow(new Object[] {
+          RandomUtil.generatePrimitive(schema.asStruct().fieldType("ts").asPrimitiveType(), random)}));
+    }
+
+    try (FileAppender<InternalRow> writer =
+        new ParquetWriteAdapter<>(
+            new NativeSparkWriterBuilder(outputFile)
+                .set("org.apache.spark.sql.parquet.row.attributes", sparkSchema.json())
+                .set("spark.sql.parquet.writeLegacyFormat", "false")
+                .set("spark.sql.parquet.outputTimestampType", "INT96")
+                .build(),
+            MetricsConfig.getDefault())) {
+      writer.addAll(rows);
+    }
+
+    InputFile parquetInputFile = Files.localInput(outputFilePath);
+    List<RowData> readDataRows = rowDatasFromFile(parquetInputFile, schema);
+    Assert.assertEquals(rows.size(), readDataRows.size());
+    for (int i = 0; i < rows.size(); i += 1) {
+      Assert.assertEquals(rows.get(i).getLong(0), readDataRows.get(i).getLong(0));
+    }
+  }
+
+  /**
+   * Native Spark ParquetWriter.Builder implementation so that we can write timestamps using Spark's native
+   * ParquetWriteSupport.
+   * thanks for the PR https://github.com/apache/iceberg/pull/1184 by @gustavoatt
+   */
+  private static class NativeSparkWriterBuilder
+      extends ParquetWriter.Builder<InternalRow, NativeSparkWriterBuilder> {
+    private final Map<String, String> config = Maps.newHashMap();
+
+    NativeSparkWriterBuilder(org.apache.parquet.io.OutputFile path) {
+      super(path);
+    }
+
+    public NativeSparkWriterBuilder set(String property, String value) {
+      this.config.put(property, value);
+      return self();
+    }
+
+    @Override
+    protected NativeSparkWriterBuilder self() {
+      return this;
+    }
+
+    @Override
+    protected WriteSupport<InternalRow> getWriteSupport(Configuration configuration) {
+      for (Map.Entry<String, String> entry : config.entrySet()) {
+        configuration.set(entry.getKey(), entry.getValue());
+      }
+
+      return new org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport();
+    }
+  }
 }
diff --git a/flink/v1.14/build.gradle b/flink/v1.14/build.gradle
@@ -67,6 +67,7 @@ project(':iceberg-flink:iceberg-flink-1.14') {
       exclude group: 'org.apache.hive', module: 'hive-storage-api'
     }
 
+    testImplementation "org.apache.spark:spark-sql_2.12:3.2.0"
     testImplementation "org.apache.flink:flink-core:${flinkVersion}"
     testImplementation "org.apache.flink:flink-runtime:${flinkVersion}"
     testImplementation ("org.apache.flink:flink-test-utils-junit:${flinkVersion}") {

diff --git a/flink/v1.14/flink/src/main/java/org/apache/iceberg/flink/data/FlinkParquetReaders.java b/flink/v1.14/flink/src/main/java/org/apache/iceberg/flink/data/FlinkParquetReaders.java
@@ -22,10 +22,12 @@
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
 import java.time.Instant;
 import java.time.ZoneOffset;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.TimeUnit;
 import org.apache.flink.table.data.ArrayData;
 import org.apache.flink.table.data.DecimalData;
 import org.apache.flink.table.data.GenericRowData;
@@ -264,6 +266,10 @@ public ParquetValueReader<?> primitive(org.apache.iceberg.types.Type.PrimitiveTy
         case INT64:
         case DOUBLE:
           return new ParquetValueReaders.UnboxedReader<>(desc);
+        case INT96:
+          // Impala & Spark used to write timestamps as INT96 without a logical type. For backwards
+          // compatibility we try to read INT96 as timestamps.
+          return new TimestampInt96Reader(desc);
         default:
           throw new UnsupportedOperationException("Unsupported type: " + primitive);
       }
@@ -321,6 +327,29 @@ public DecimalData read(DecimalData ignored) {
     }
   }
 
+  private static class TimestampInt96Reader extends ParquetValueReaders.UnboxedReader<Long> {
+    private static final long UNIX_EPOCH_JULIAN = 2_440_588L;
+
+    TimestampInt96Reader(ColumnDescriptor desc) {
+      super(desc);
+    }
+
+    @Override
+    public Long read(Long ignored) {
+      return readLong();
+    }
+
+    @Override
+    public long readLong() {
+      final ByteBuffer byteBuffer = column.nextBinary().toByteBuffer().order(ByteOrder.LITTLE_ENDIAN);
+      final long timeOfDayNanos = byteBuffer.getLong();
+      final int julianDay = byteBuffer.getInt();
+
+      return TimeUnit.DAYS.toMicros(julianDay - UNIX_EPOCH_JULIAN) +
+          TimeUnit.NANOSECONDS.toMicros(timeOfDayNanos);
+    }
+  }
+
   private static class MicrosToTimestampTzReader extends ParquetValueReaders.UnboxedReader<TimestampData> {
     MicrosToTimestampTzReader(ColumnDescriptor desc) {
       super(desc);

diff --git a/flink/v1.14/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java b/flink/v1.14/flink/src/test/java/org/apache/iceberg/flink/data/TestFlinkParquetReader.java
@@ -25,13 +25,17 @@
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
+import java.util.Random;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.GenericRecordBuilder;
 import org.apache.flink.table.data.RowData;
 import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.iceberg.Files;
+import org.apache.iceberg.MetricsConfig;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.avro.AvroSchemaUtil;
 import org.apache.iceberg.data.DataTest;
@@ -42,14 +46,28 @@
 import org.apache.iceberg.flink.TestHelpers;
 import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.io.FileAppender;
+import org.apache.iceberg.io.InputFile;
 import org.apache.iceberg.parquet.Parquet;
+import org.apache.iceberg.parquet.ParquetWriteAdapter;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
 import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.RandomUtil;
 import org.apache.parquet.avro.AvroParquetWriter;
 import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.api.WriteSupport;
+import org.apache.parquet.hadoop.util.HadoopOutputFile;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
 import org.junit.Assert;
 import org.junit.Test;
 
 import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
 
 public class TestFlinkParquetReader extends DataTest {
   private static final int NUM_RECORDS = 100;
@@ -129,4 +147,86 @@ protected void writeAndValidate(Schema schema) throws IOException {
     writeAndValidate(RandomGenericData.generateDictionaryEncodableRecords(schema, NUM_RECORDS, 21124), schema);
     writeAndValidate(RandomGenericData.generateFallbackRecords(schema, NUM_RECORDS, 21124, NUM_RECORDS / 20), schema);
   }
+
+  protected List<RowData> rowDatasFromFile(InputFile inputFile, Schema schema) throws IOException {
+    try (CloseableIterable<RowData> reader =
+        Parquet.read(inputFile)
+            .project(schema)
+            .createReaderFunc(type -> FlinkParquetReaders.buildReader(schema, type))
+            .build()) {
+      return Lists.newArrayList(reader);
+    }
+  }
+
+  @Test
+  public void testInt96TimestampProducedBySparkIsReadCorrectly() throws IOException {
+    String outputFilePath = String.format("%s/%s", temp.getRoot().getAbsolutePath(), "parquet_int96.parquet");
+    HadoopOutputFile outputFile =
+        HadoopOutputFile.fromPath(
+            new org.apache.hadoop.fs.Path(outputFilePath), new Configuration());
+    Schema schema = new Schema(required(1, "ts", Types.TimestampType.withZone()));
+    StructType sparkSchema =
+        new StructType(
+            new StructField[] {
+                new StructField("ts", DataTypes.TimestampType, true, Metadata.empty())
+            });
+
+    final Random random = new Random(0L);
+    List<InternalRow> rows = Lists.newArrayList();
+    for (int i = 0; i < 10; i++) {
+      rows.add(new GenericInternalRow(new Object[] {
+              RandomUtil.generatePrimitive(schema.asStruct().fieldType("ts").asPrimitiveType(), random)}));
+    }
+
+    try (FileAppender<InternalRow> writer =
+        new ParquetWriteAdapter<>(
+            new NativeSparkWriterBuilder(outputFile)
+                .set("org.apache.spark.sql.parquet.row.attributes", sparkSchema.json())
+                .set("spark.sql.parquet.writeLegacyFormat", "false")
+                .set("spark.sql.parquet.outputTimestampType", "INT96")
+                .build(),
+            MetricsConfig.getDefault())) {
+      writer.addAll(rows);
+    }
+
+    InputFile parquetInputFile = Files.localInput(outputFilePath);
+    List<RowData> readDataRows = rowDatasFromFile(parquetInputFile, schema);
+    Assert.assertEquals(rows.size(), readDataRows.size());
+    for (int i = 0; i < rows.size(); i += 1) {
+      Assert.assertEquals(rows.get(i).getLong(0), readDataRows.get(i).getLong(0));
+    }
+  }
+
+  /**
+   * Native Spark ParquetWriter.Builder implementation so that we can write timestamps using Spark's native
+   * ParquetWriteSupport.
+   * thanks for the PR https://github.com/apache/iceberg/pull/1184 by @gustavoatt
+   */
+  private static class NativeSparkWriterBuilder
+      extends ParquetWriter.Builder<InternalRow, NativeSparkWriterBuilder> {
+    private final Map<String, String> config = Maps.newHashMap();
+
+    NativeSparkWriterBuilder(org.apache.parquet.io.OutputFile path) {
+      super(path);
+    }
+
+    public NativeSparkWriterBuilder set(String property, String value) {
+      this.config.put(property, value);
+      return self();
+    }
+
+    @Override
+    protected NativeSparkWriterBuilder self() {
+      return this;
+    }
+
+    @Override
+    protected WriteSupport<InternalRow> getWriteSupport(Configuration configuration) {
+      for (Map.Entry<String, String> entry : config.entrySet()) {
+        configuration.set(entry.getKey(), entry.getValue());
+      }
+
+      return new org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport();
+    }
+  }
 }