apache · dongjoon-hyun · Dec 6, 2017 · Dec 6, 2017 · dongjoon-hyun · Dec 6, 2017
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
@@ -32,13 +32,21 @@ import org.apache.spark.unsafe.types.UTF8String
  */
 class OrcDeserializer(
     dataSchema: StructType,
-    requiredSchema: StructType,
-    requestedColIds: Array[Int]) {
-
-  private val resultRow = new SpecificInternalRow(requiredSchema.map(_.dataType))
+    resultSchema: StructType,
+    requestedColIds: Array[Int],
+    partitionValues: InternalRow) {
+
+  // Make a resultRow and initialize the partition column values once.
+  private val resultRow = new SpecificInternalRow(resultSchema.map(_.dataType))
+  private var i = 0
+  private val start = resultSchema.length - partitionValues.numFields
+  while (i < partitionValues.numFields) {
+    resultRow.update(start + i, partitionValues.get(i, resultSchema(start + i).dataType))
+    i += 1
+  }
 
   private val fieldWriters: Array[WritableComparable[_] => Unit] = {
-    requiredSchema.zipWithIndex
+    resultSchema.zipWithIndex
       // The value of missing columns are always null, do not need writers.
       .filterNot { case (_, index) => requestedColIds(index) == -1 }
       .map { case (f, index) =>

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -124,7 +124,7 @@ class OrcFileFormat
     true
   }
 
-  override def buildReader(
+  override def buildReaderWithPartitionValues(
       sparkSession: SparkSession,
       dataSchema: StructType,
       partitionSchema: StructType,
@@ -138,6 +138,8 @@ class OrcFileFormat
       }
     }
 
+    val resultSchema = StructType(requiredSchema.fields ++ partitionSchema.fields)
+
     val broadcastedConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
     val isCaseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
@@ -167,8 +169,10 @@ class OrcFileFormat
         val iter = new RecordReaderIterator[OrcStruct](orcRecordReader)
         Option(TaskContext.get()).foreach(_.addTaskCompletionListener(_ => iter.close()))
 
-        val unsafeProjection = UnsafeProjection.create(requiredSchema)
-        val deserializer = new OrcDeserializer(dataSchema, requiredSchema, requestedColIds)
+        val colIds = requestedColIds ++ List.fill(partitionSchema.length)(-1).toArray[Int]
+        val unsafeProjection = UnsafeProjection.create(resultSchema)
+        val deserializer =
+          new OrcDeserializer(dataSchema, resultSchema, colIds, file.partitionValues)
         iter.map(value => unsafeProjection(deserializer.deserialize(value)))
       }
     }