@@ -29,6 +29,8 @@ import org.apache.spark.streaming.StreamingContext._
2929import org .apache .spark .streaming .dstream .{DStream , FileInputDStream }
3030import org .apache .spark .streaming .util .ManualClock
3131import org .apache .spark .util .Utils
32+ import org .apache .hadoop .io .{Text , IntWritable }
33+ import org .apache .hadoop .mapreduce .lib .output .TextOutputFormat
3234
3335/**
3436 * This test suites tests the checkpointing functionality of DStreams -
@@ -205,6 +207,30 @@ class CheckpointSuite extends TestSuiteBase {
205207 testCheckpointedOperation(input, operation, output, 7 )
206208 }
207209
210+ test(" recovery with saveAsNewAPIHadoopFiles" ) {
211+ val tempDir = Files .createTempDir()
212+ try {
213+ testCheckpointedOperation(
214+ Seq (Seq (" a" , " a" , " b" ), Seq (" " , " " ), Seq (), Seq (" a" , " a" , " b" ), Seq (" " , " " ), Seq ()),
215+ (s : DStream [String ]) => {
216+ val output = s.map(x => (x, 1 )).reduceByKey(_ + _)
217+ output.saveAsNewAPIHadoopFiles(
218+ tempDir.toURI.toString,
219+ " result" ,
220+ classOf [Text ],
221+ classOf [IntWritable ],
222+ classOf [TextOutputFormat [Text , IntWritable ]])
223+ (tempDir.toString, " result" )
224+ output
225+ },
226+ Seq (Seq ((" a" , 2 ), (" b" , 1 )), Seq ((" " , 2 )), Seq (), Seq ((" a" , 2 ), (" b" , 1 )), Seq ((" " , 2 )), Seq ()),
227+ 3
228+ )
229+ } finally {
230+ Utils .deleteRecursively(tempDir)
231+ }
232+ }
233+
208234
209235 // This tests whether the StateDStream's RDD checkpoints works correctly such
210236 // that the system can recover from a master failure. This assumes as reliable,
@@ -391,7 +417,9 @@ class CheckpointSuite extends TestSuiteBase {
391417 logInfo(" Manual clock after advancing = " + clock.time)
392418 Thread .sleep(batchDuration.milliseconds)
393419
394- val outputStream = ssc.graph.getOutputStreams.head.asInstanceOf [TestOutputStreamWithPartitions [V ]]
420+ val outputStream = ssc.graph.getOutputStreams.filter { dstream =>
421+ dstream.isInstanceOf [TestOutputStreamWithPartitions [V ]]
422+ }.head.asInstanceOf [TestOutputStreamWithPartitions [V ]]
395423 outputStream.output.map(_.flatten)
396424 }
397425}
0 commit comments