apache · gengliangwang · Jul 31, 2018 · Aug 1, 2018 · Aug 1, 2018 · Aug 1, 2018
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
@@ -23,6 +23,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.avro.{Schema, SchemaBuilder}
+import org.apache.avro.LogicalTypes.{TimestampMicros, TimestampMillis}
 import org.apache.avro.Schema.Type._
 import org.apache.avro.generic._
 import org.apache.avro.util.Utf8
@@ -71,7 +72,15 @@ class AvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType) {
   private def newWriter(
       avroType: Schema,
       catalystType: DataType,
-      path: List[String]): (CatalystDataUpdater, Int, Any) => Unit =
+      path: List[String]): (CatalystDataUpdater, Int, Any) => Unit = {
+    (avroType.getLogicalType, catalystType) match {
+      case (_: TimestampMillis, TimestampType) => return (updater, ordinal, value) =>
+        updater.setLong(ordinal, value.asInstanceOf[Long] * 1000)
+      case (_: TimestampMicros, TimestampType) => return (updater, ordinal, value) =>
+        updater.setLong(ordinal, value.asInstanceOf[Long])
+      case _ =>
+    }
+
     (avroType.getType, catalystType) match {
       case (NULL, NullType) => (updater, ordinal, _) =>
         updater.setNullAt(ordinal)
@@ -246,6 +255,7 @@ class AvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType) {
             s"Source Avro schema: $rootAvroType.\n" +
             s"Target Catalyst type: $rootCatalystType")
     }
+  }
 
   private def getRecordWriter(
       avroType: Schema,

diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
@@ -19,7 +19,8 @@ package org.apache.spark.sql.avro
 
 import scala.collection.JavaConverters._
 
-import org.apache.avro.{Schema, SchemaBuilder}
+import org.apache.avro.{LogicalType, Schema, SchemaBuilder}
+import org.apache.avro.LogicalTypes.{TimestampMicros, TimestampMillis}
 import org.apache.avro.Schema.Type._
 
 import org.apache.spark.sql.types._
@@ -35,6 +36,12 @@ object SchemaConverters {
    * This function takes an avro schema and returns a sql schema.
    */
   def toSqlType(avroSchema: Schema): SchemaType = {
+    avroSchema.getLogicalType match {
+      case _: TimestampMillis | _: TimestampMicros =>
+        return SchemaType(TimestampType, nullable = false)
+      case _ =>
+    }
+
     avroSchema.getType match {
       case INT => SchemaType(IntegerType, nullable = false)
       case STRING => SchemaType(StringType, nullable = false)
@@ -114,7 +121,10 @@ object SchemaConverters {
       case ByteType | ShortType | IntegerType => builder.intType()
       case LongType => builder.longType()
       case DateType => builder.longType()
-      case TimestampType => builder.longType()
+      case TimestampType =>
+        // To be consistent with the previous behavior of writing Timestamp type with Avro 1.7,
+        // the default output Avro Timestamp type is with millisecond precision.
+        builder.longBuilder().prop(LogicalType.LOGICAL_TYPE_PROP, "timestamp-millis").endLong()
       case FloatType => builder.floatType()
       case DoubleType => builder.doubleType()
       case _: DecimalType | StringType => builder.stringType()

diff --git a/external/avro/src/test/resources/timestamp.avro b/external/avro/src/test/resources/timestamp.avro
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -41,6 +41,7 @@ import org.apache.spark.sql.types._
 class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
   val episodesAvro = testFile("episodes.avro")
   val testAvro = testFile("test.avro")
+  val timestampAvro = testFile("timestamp.avro")
 
   override protected def beforeAll(): Unit = {
     super.beforeAll()
@@ -331,6 +332,63 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
     }
   }
 
+  test("Logical type: timestamp_millis") {
+    val sparkSession = spark
+    import sparkSession.implicits._
+
+    val expected =
+      Seq(1L, 666L).toDF("timestamp_millis").select('timestamp_millis.cast(TimestampType)).collect()
+    val df = spark.read.format("avro").load(timestampAvro).select('timestamp_millis)
+
+    checkAnswer(df, expected)
+
+    withTempPath { dir =>
+      df.write.format("avro").save(dir.toString)
+      checkAnswer(spark.read.format("avro").load(dir.toString), expected)
+    }
+  }
+
+  test("Logical type: timestamp_micros") {
+    val sparkSession = spark
+    import sparkSession.implicits._
+
+    val expected =
+      Seq(2L, 999L).toDF("timestamp_micros").select('timestamp_micros.cast(TimestampType)).collect()
+    val df = spark.read.format("avro").load(timestampAvro).select('timestamp_micros)
+
+    checkAnswer(df, expected)
+
+    withTempPath { dir =>
+      df.write.format("avro").save(dir.toString)
+      checkAnswer(spark.read.format("avro").load(dir.toString), expected)
+    }
+  }
+
+  test("Logical type: user specified schema") {
+    val sparkSession = spark
+    import sparkSession.implicits._
+
+    val expected = Seq((1L, 2L), (666L, 999L))
+      .toDF("timestamp_millis", "timestamp_micros")
+      .select('timestamp_millis.cast(TimestampType), 'timestamp_micros.cast(TimestampType))
+      .collect()
+
+    val avroSchema = s"""
+      {
+        "namespace": "logical",
+        "type": "record",
+        "name": "test",
+        "fields": [
+          {"name": "timestamp_millis", "type": {"type": "long","logicalType": "timestamp-millis"}},
+          {"name": "timestamp_micros", "type": {"type": "long","logicalType": "timestamp-micros"}}
+        ]
+      }
+    """
+    val df = spark.read.format("avro").option("avroSchema", avroSchema).load(timestampAvro)
+
+    checkAnswer(df, expected)
+  }
+
   test("Array data types") {
     withTempPath { dir =>
       val testSchema = StructType(Seq(
@@ -511,7 +569,8 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
 
       // TimesStamps are converted to longs
       val times = spark.read.format("avro").load(avroDir).select("Time").collect()
-      assert(times.map(_(0)).toSet == Set(666, 777, 42))
+      assert(times.map(_(0)).toSet ==
+        Set(new Timestamp(666), new Timestamp(777), new Timestamp(42)))
 
       // DecimalType should be converted to string
       val decimals = spark.read.format("avro").load(avroDir).select("Decimal").collect()