@@ -85,17 +85,28 @@ case class Exchange(
8585 keySchema : Array [DataType ],
8686 valueSchema : Array [DataType ],
8787 numPartitions : Int ): Serializer = {
88+ // In ExternalSorter's spillToMergeableFile function, key-value pairs are written out
89+ // through write(key) and then write(value) instead of write((key, value)). Because
90+ // SparkSqlSerializer2 assumes that objects passed in are Product2, we cannot safely use
91+ // it when spillToMergeableFile in ExternalSorter will be used.
92+ // So, we will not use SparkSqlSerializer2 when
93+ // - Sort-based shuffle is enabled and the number of reducers (numPartitions) is greater
94+ // then the bypassMergeThreshold; or
95+ // - newOrdering is defined.
96+ val cannotUseSqlSerializer2 =
97+ (sortBasedShuffleOn && numPartitions > bypassMergeThreshold) || newOrdering.nonEmpty
98+
8899 val useSqlSerializer2 =
89- ! (sortBasedShuffleOn && numPartitions > bypassMergeThreshold) &&
90- child.sqlContext.conf.useSqlSerializer2 &&
91- SparkSqlSerializer2 .support(keySchema) &&
92- SparkSqlSerializer2 .support(valueSchema)
100+ child.sqlContext.conf.useSqlSerializer2 && // SparkSqlSerializer2 is enabled.
101+ ! cannotUseSqlSerializer2 && // Safe to use Serializer2.
102+ SparkSqlSerializer2 .support(keySchema) && // The schema of key is supported.
103+ SparkSqlSerializer2 .support(valueSchema) // The schema of value is supported.
93104
94105 val serializer = if (useSqlSerializer2) {
95- logInfo(" Use SparkSqlSerializer2." )
106+ logInfo(" Using SparkSqlSerializer2." )
96107 new SparkSqlSerializer2 (keySchema, valueSchema)
97108 } else {
98- logInfo(" Use SparkSqlSerializer." )
109+ logInfo(" Using SparkSqlSerializer." )
99110 new SparkSqlSerializer (sparkConf)
100111 }
101112
@@ -160,7 +171,7 @@ case class Exchange(
160171 } else {
161172 new ShuffledRDD [Row , Null , Null ](rdd, part)
162173 }
163- val keySchema = sortingExpressions .map(_.dataType).toArray
174+ val keySchema = child.output .map(_.dataType).toArray
164175 shuffled.setSerializer(serializer(keySchema, null , numPartitions))
165176
166177 shuffled.map(_._1)
0 commit comments