forked from happyer/distributed-computing
-
Notifications
You must be signed in to change notification settings - Fork 0
/
common_map.go
96 lines (84 loc) · 2.92 KB
/
common_map.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
package mapreduce
import (
"hash/fnv"
"io/ioutil"
"log"
"encoding/json"
"os"
)
// doMap does the job of a map worker: it reads one of the input files
// (inFile), calls the user-defined map function (mapF) for that file's
// contents, and partitions the output into nReduce intermediate files.
func doMap(
jobName string, // the name of the MapReduce job
mapTaskNumber int, // which map task this is
inFile string,
nReduce int, // the number of reduce task that will be run ("R" in the paper)
mapF func(file string, contents string) []KeyValue,
) {
// TODO:
// You will need to write this function.
// You can find the filename for this map task's input to reduce task number
// r using reduceName(jobName, mapTaskNumber, r). The ihash function (given
// below doMap) should be used to decide which file a given key belongs into.
//
// The intermediate output of a map task is stored in the file
// system as multiple files whose name indicates which map task produced
// them, as well as which reduce task they are for. Coming up with a
// scheme for how to store the key/value pairs on disk can be tricky,
// especially when taking into account that both keys and values could
// contain newlines, quotes, and any other character you can think of.
//
// One format often used for serializing data to a byte stream that the
// other end can correctly reconstruct is JSON. You are not required to
// use JSON, but as the output of the reduce tasks *must* be JSON,
// familiarizing yourself with it here may prove useful. You can write
// out a data structure as a JSON string to a file using the commented
// code below. The corresponding decoding functions can be found in
// common_reduce.go.
//
// enc := json.NewEncoder(file)
// for _, kv := ... {
// err := enc.Encode(&kv)
//
// Remember to close the file after you have written all the values!
//setp 1 read file
contents, err := ioutil.ReadFile(inFile)
if err != nil {
log.Fatal("do map error for inFile ",err)
}
//setp 2 call user user-map method ,to get kv
kvResult := mapF(inFile, string(contents))
/**
* setp 3 use key of kv generator nReduce file ,partition
* a. create tmpFiles
* b. create encoder for tmpFile to write contents
* c. partition by key, then write tmpFile
*/
var tmpFiles [] *os.File = make([] *os.File, nReduce)
var encoders [] *json.Encoder = make([] *json.Encoder, nReduce)
for i := 0; i < nReduce; i++ {
tmpFileName := reduceName(jobName,mapTaskNumber,i)
tmpFiles[i],err = os.Create(tmpFileName)
if err!=nil {
log.Fatal(err)
}
defer tmpFiles[i].Close()
encoders[i] = json.NewEncoder(tmpFiles[i])
if err!=nil {
log.Fatal(err)
}
}
for _ , kv := range kvResult {
hashKey := int(ihash(kv.Key)) % nReduce
err := encoders[hashKey].Encode(&kv)
if err!=nil {
log.Fatal("do map encoders ",err)
}
}
}
func ihash(s string) uint32 {
h := fnv.New32a()
h.Write([]byte(s))
return h.Sum32()
}