Skip to content

Commit 6a701ad

Browse files
committed
Input/Output format classes are now supported for map reduce jobs
1 parent e7d793c commit 6a701ad

File tree

4 files changed

+34
-4
lines changed

4 files changed

+34
-4
lines changed

README.md

+7-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ The classic word counts example looks like this in Saaloop:
1111

1212
```scala
1313
import com.jayway.saaloop.dsl.Saaloop._
14+
import org.apache.hadoop.io.{IntWritable, Text, LongWritable}
15+
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
16+
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
1417

1518
object WordCount {
1619

@@ -31,9 +34,12 @@ object WordCount {
3134
}
3235

3336
job("myjob")(
37+
hadoopConfiguration = c,
3438
mapWith = m,
3539
reduceWith = r,
36-
hadoopConfiguration = c
40+
inputFormatClass = classOf[TextInputFormat],
41+
outputFormatClass = classOf[TextOutputFormat[_,_]],
42+
waitForCompletion = true
3743
)
3844
}
3945

saaloop-core/src/main/scala/com/jayway/saaloop/dsl/SaaloopJob.scala

+17-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ package com.jayway.saaloop.dsl
22

33
import org.apache.hadoop.mapreduce.{Reducer, Job, Mapper}
44
import org.apache.hadoop.conf.Configuration
5+
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
6+
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
7+
import org.apache.hadoop.fs.Path
58

69
/**
710
* Copyright 2012 Amir Moulavi ([email protected])
@@ -27,8 +30,15 @@ trait SaaloopJob extends Types {
2730

2831
object job {
2932

30-
def apply[K1 <: key, V1 <: value, K2 <: key, V2 <: value, K3 <: key, V3 <: value, K4 <: key, V4 <: value]
31-
(name:String)(mapWith:Mapper[K1, V1, K2, V2], reduceWith:Reducer[K3, V3, K4, V4], hadoopConfiguration:Configuration = conf):Job = {
33+
def apply[K1 <: key, V1 <: value, K2 <: key, V2 <: value, K3 <: key, V3 <: value, K4 <: key, V4 <: value, FIN <: fin, FOUT <: fout]
34+
(name:String)(mapWith:Mapper[K1, V1, K2, V2],
35+
reduceWith:Reducer[K3, V3, K4, V4],
36+
hadoopConfiguration:Configuration = conf,
37+
inputPath:String = ".",
38+
outputPath:String = ".",
39+
inputFormatClass:Class[FIN] = null,
40+
outputFormatClass:Class[FOUT] = null,
41+
waitForCompletion:Boolean = false):Job = {
3242

3343
val job = new Job(hadoopConfiguration, name)
3444
job.setMapperClass(mapWith.getClass)
@@ -37,6 +47,11 @@ trait SaaloopJob extends Types {
3747
val outputValueType = asInstanceOf[V4]
3848
job.setOutputKeyClass(outputKeyType.getClass)
3949
job.setOutputValueClass(outputValueType.getClass)
50+
FileInputFormat.addInputPath(job, new Path(inputPath))
51+
FileOutputFormat.setOutputPath(job, new Path(outputPath))
52+
if (inputFormatClass != null) job.setInputFormatClass(inputFormatClass)
53+
if (outputFormatClass != null) job.setOutputFormatClass(outputFormatClass)
54+
job.waitForCompletion(waitForCompletion)
4055

4156
job
4257
}

saaloop-core/src/main/scala/com/jayway/saaloop/dsl/Types.scala

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package com.jayway.saaloop.dsl
22

33
import org.apache.hadoop.io.{WritableComparable, Writable}
4+
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
5+
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
46

57
/**
68
* Copyright 2012 Amir Moulavi ([email protected])
@@ -23,4 +25,6 @@ import org.apache.hadoop.io.{WritableComparable, Writable}
2325
trait Types {
2426
type key = Writable with WritableComparable[_]
2527
type value = Writable
28+
type fin = FileInputFormat[_, _]
29+
type fout = FileOutputFormat[_, _]
2630
}

saaloop-examples/src/main/scala/WordCount.scala

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import com.jayway.saaloop.dsl.Saaloop._
22
import org.apache.hadoop.io.{IntWritable, Text, LongWritable}
3+
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
4+
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
35

46
/**
57
* Copyright 2012 Amir Moulavi ([email protected])
@@ -38,9 +40,12 @@ object WordCount {
3840
}
3941

4042
job("myjob")(
43+
hadoopConfiguration = c,
4144
mapWith = m,
4245
reduceWith = r,
43-
hadoopConfiguration = c
46+
inputFormatClass = classOf[TextInputFormat],
47+
outputFormatClass = classOf[TextOutputFormat[_,_]],
48+
waitForCompletion = true
4449
)
4550
}
4651

0 commit comments

Comments
 (0)