-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
110 lines (102 loc) · 2.98 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
package main
import (
"fmt"
"io/ioutil"
"log"
"regexp"
"strings"
"time"
)
func main() {
runTime := time.Now()
model := train("big.txt")
correctionTime := time.Now()
badSpelling := "beaty"
correctedSpelling := correct(badSpelling, model)
if correctedSpelling != badSpelling {
fmt.Printf("%s - did you mean %s?\n", badSpelling, correctedSpelling)
} else {
fmt.Println(correctedSpelling)
}
log.Println("Time taken to correct:", time.Since(correctionTime))
log.Println("Total time taken to load and correct:", time.Since(runTime))
}
// train - Load words in big.txt into a map called WORDS
// WORDS keeps a track of the words and the number of times they appear in the file.
func train(trainingData string) map[string]int {
WORDS := make(map[string]int)
pattern := regexp.MustCompile("[a-z]+")
content, err := ioutil.ReadFile(trainingData)
if err != nil {
log.Fatalf("Could not find file %s. You can get a copy from http://norvig.com/big.txt", trainingData)
}
for _, word := range pattern.FindAllString(strings.ToLower(string(content)), -1) {
WORDS[word]++
}
return WORDS
}
// correct - Most probable spelling correction for word.
func correct(word string, model map[string]int) string {
if _, exists := model[word]; exists {
return word
}
if correction := best(word, edits1, model); correction != "" {
return correction
}
if correction := best(word, edits2, model); correction != "" {
return correction
}
return word
}
// best - Generate possible spelling corrections for word from subset of `words` that appear in the dictionary of WORDS.
func best(word string, edits func(string, chan string), model map[string]int) string {
channel := make(chan string, 1024*1024)
go func() { edits(word, channel); channel <- "" }()
maxFreq := 0
correction := ""
for word := range channel {
if word == "" {
break
}
if freq, exists := model[word]; exists && freq > maxFreq {
maxFreq, correction = freq, word
}
}
return correction
}
// edits1 - All edits that are one edit away from `word`.
func edits1(word string, channel chan string) {
const alphabet = "abcdefghijklmnopqrstuvwxyz"
type Pair struct{ leading, trailing string }
var splits []Pair
for i := 0; i < len(word)+1; i++ {
splits = append(splits, Pair{word[:i], word[i:]})
}
for _, s := range splits {
if len(s.trailing) > 0 {
channel <- s.leading + s.trailing[1:]
}
if len(s.trailing) > 1 {
channel <- s.leading + string(s.trailing[1]) + string(s.trailing[0]) + s.trailing[2:]
}
for _, character := range alphabet {
if len(s.trailing) > 0 {
channel <- s.leading + string(character) + s.trailing[1:]
}
}
for _, character := range alphabet {
channel <- s.leading + string(character) + s.trailing
}
}
}
// edits2 - All edits that are two edits away from `word`.
func edits2(word string, channel chan string) {
channel1 := make(chan string, 1024*1024)
go func() { edits1(word, channel1); channel1 <- "" }()
for e1 := range channel1 {
if e1 == "" {
break
}
edits1(e1, channel)
}
}