@@ -22,7 +22,7 @@ import java.util
2222
2323import com .esotericsoftware .kryo .io .ByteBufferOutputStream
2424
25- import org .apache .spark .{ ShuffleDependency , SparkConf , SparkEnv , TaskContext }
25+ import org .apache .spark ._
2626import org .apache .spark .executor .ShuffleWriteMetrics
2727import org .apache .spark .scheduler .MapStatus
2828import org .apache .spark .serializer .Serializer
@@ -34,17 +34,31 @@ import org.apache.spark.unsafe.memory.{MemoryBlock, TaskMemoryManager}
3434import org .apache .spark .unsafe .sort .UnsafeSorter
3535import org .apache .spark .unsafe .sort .UnsafeSorter .{KeyPointerAndPrefix , PrefixComparator , PrefixComputer , RecordComparator }
3636
37- private [spark] class UnsafeShuffleHandle [K , V ](
37+ private class UnsafeShuffleHandle [K , V ](
3838 shuffleId : Int ,
3939 override val numMaps : Int ,
4040 override val dependency : ShuffleDependency [K , V , V ])
4141 extends BaseShuffleHandle (shuffleId, numMaps, dependency) {
42- require(UnsafeShuffleManager .canUseUnsafeShuffle(dependency))
4342}
4443
45- private [spark] object UnsafeShuffleManager {
44+ private [spark] object UnsafeShuffleManager extends Logging {
4645 def canUseUnsafeShuffle [K , V , C ](dependency : ShuffleDependency [K , V , C ]): Boolean = {
47- dependency.aggregator.isEmpty && dependency.keyOrdering.isEmpty
46+ val shufId = dependency.shuffleId
47+ val serializer = Serializer .getSerializer(dependency.serializer)
48+ if (! serializer.supportsRelocationOfSerializedObjects) {
49+ log.debug(s " Can't use UnsafeShuffle for shuffle $shufId because the serializer, " +
50+ s " ${serializer.getClass.getName}, does not support object relocation " )
51+ false
52+ } else if (dependency.aggregator.isDefined) {
53+ log.debug(s " Can't use UnsafeShuffle for shuffle $shufId because an aggregator is defined " )
54+ false
55+ } else if (dependency.keyOrdering.isDefined) {
56+ log.debug(s " Can't use UnsafeShuffle for shuffle $shufId because a key ordering is defined " )
57+ false
58+ } else {
59+ log.debug(s " Can use UnsafeShuffle for shuffle $shufId" )
60+ true
61+ }
4862 }
4963}
5064
@@ -73,15 +87,13 @@ private object PartitionerPrefixComparator extends PrefixComparator {
7387 }
7488}
7589
76- private [spark] class UnsafeShuffleWriter [K , V ](
90+ private class UnsafeShuffleWriter [K , V ](
7791 shuffleBlockManager : IndexShuffleBlockManager ,
7892 handle : UnsafeShuffleHandle [K , V ],
7993 mapId : Int ,
8094 context : TaskContext )
8195 extends ShuffleWriter [K , V ] {
8296
83- println(" Construcing a new UnsafeShuffleWriter" )
84-
8597 private [this ] val memoryManager : TaskMemoryManager = context.taskMemoryManager()
8698
8799 private [this ] val dep = handle.dependency
@@ -158,7 +170,6 @@ private[spark] class UnsafeShuffleWriter[K, V](
158170 memoryManager.encodePageNumberAndOffset(currentPage, currentPagePosition)
159171 PlatformDependent .UNSAFE .putLong(currentPage.getBaseObject, currentPagePosition, partitionId)
160172 currentPagePosition += 8
161- println(" The stored record length is " + serializedRecordSize)
162173 PlatformDependent .UNSAFE .putLong(
163174 currentPage.getBaseObject, currentPagePosition, serializedRecordSize)
164175 currentPagePosition += 8
@@ -169,7 +180,6 @@ private[spark] class UnsafeShuffleWriter[K, V](
169180 currentPagePosition,
170181 serializedRecordSize)
171182 currentPagePosition += serializedRecordSize
172- println(" After writing record, current page position is " + currentPagePosition)
173183 sorter.insertRecord(newRecordAddress)
174184
175185 // Reset for writing the next record
@@ -195,8 +205,10 @@ private[spark] class UnsafeShuffleWriter[K, V](
195205 // TODO: don't close and re-open file handles so often; this could be inefficient
196206
197207 def closePartition (): Unit = {
198- writer.commitAndClose()
199- partitionLengths(currentPartition) = writer.fileSegment().length
208+ if (writer != null ) {
209+ writer.commitAndClose()
210+ partitionLengths(currentPartition) = writer.fileSegment().length
211+ }
200212 }
201213
202214 def switchToPartition (newPartition : Int ): Unit = {
@@ -219,8 +231,6 @@ private[spark] class UnsafeShuffleWriter[K, V](
219231 val baseObject = memoryManager.getPage(keyPointerAndPrefix.recordPointer)
220232 val baseOffset = memoryManager.getOffsetInPage(keyPointerAndPrefix.recordPointer)
221233 val recordLength : Int = PlatformDependent .UNSAFE .getLong(baseObject, baseOffset + 8 ).toInt
222- println(" Base offset is " + baseOffset)
223- println(" Record length is " + recordLength)
224234 // TODO: need to have a way to figure out whether a serializer supports relocation of
225235 // serialized objects or not. Sandy also ran into this in his patch (see
226236 // https://github.com/apache/spark/pull/4450). If we're using Java serialization, we might
@@ -244,12 +254,8 @@ private[spark] class UnsafeShuffleWriter[K, V](
244254
245255 /** Write a sequence of records to this task's output */
246256 override def write (records : Iterator [_ <: Product2 [K , V ]]): Unit = {
247- println(" Opened writer!" )
248-
249257 val sortedIterator = sortRecords(records)
250258 val partitionLengths = writeSortedRecordsToFile(sortedIterator)
251-
252- println(" Partition lengths are " + partitionLengths.toSeq)
253259 shuffleBlockManager.writeIndexFile(dep.shuffleId, mapId, partitionLengths)
254260 mapStatus = MapStatus (blockManager.shuffleServerId, partitionLengths)
255261 }
@@ -264,7 +270,6 @@ private[spark] class UnsafeShuffleWriter[K, V](
264270
265271 /** Close this writer, passing along whether the map completed */
266272 override def stop (success : Boolean ): Option [MapStatus ] = {
267- println(" Stopping unsafeshufflewriter" )
268273 try {
269274 if (stopping) {
270275 None
@@ -300,7 +305,6 @@ private[spark] class UnsafeShuffleManager(conf: SparkConf) extends ShuffleManage
300305 numMaps : Int ,
301306 dependency : ShuffleDependency [K , V , C ]): ShuffleHandle = {
302307 if (UnsafeShuffleManager .canUseUnsafeShuffle(dependency)) {
303- println(" Opening unsafeShuffleWriter" )
304308 new UnsafeShuffleHandle [K , V ](
305309 shuffleId, numMaps, dependency.asInstanceOf [ShuffleDependency [K , V , V ]])
306310 } else {
0 commit comments