-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.go
130 lines (119 loc) · 2.58 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
package main
import (
"bytes"
"fmt"
"os"
"regexp"
"unicode"
"github.com/urfave/cli"
"golang.org/x/text/unicode/norm"
)
var version = "1.0.0"
var newLine = regexp.MustCompile(`(\r?\n)+`)
var duplicateWhiteSpace = regexp.MustCompile(`( ){2,}`)
var hyphenAtEnd = regexp.MustCompile(`-([^\S]+|(\r?\n))`)
var endOfSentence = regexp.MustCompile(`[?!.]`)
var lengthThreshold = 5000
var emptyErrorMsg = `NSaYw3'D2o,W1eL_|ac\`
var tooLongErrorMsg = `CPU8y2_Fo_DBqAoDCfps`
func main() {
app := cli.NewApp()
app.Name = "google-translate-formatter"
app.Usage = "Remove all `\\n` in the sentence and then separate by period."
app.Version = version
app.Commands = []cli.Command{
{
Name: "split",
Usage: "Remove `\\n` and split sentence by period.",
Action: common,
},
{
Name: "reshape",
Usage: "Just remove `\\n` ...etc",
Action: common,
},
}
if err := app.Run(os.Args); err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
}
}
func common(c *cli.Context) {
var res string
res = norm.NFC.String(c.Args().First())
res = removeEndHyphen(res)
res = removeNewLine(res)
res = removeConsecutiveWhiteSpace(res)
if len([]rune(res)) < 2 {
fmt.Print(emptyErrorMsg)
os.Exit(1)
}
if c.Command.Name == "split" {
res = split(res)
}
if len(res) > lengthThreshold {
fmt.Print(tooLongErrorMsg)
os.Exit(1)
}
fmt.Print(res)
}
func split(sentence string) string {
splittedSentence := splitAfter(sentence, endOfSentence)
var buffer bytes.Buffer
for i, s := range splittedSentence {
rs := []rune(s)
if i == 0 || len(rs) < 3 {
buffer.WriteString(s)
continue
}
mode := firstCharType(rs)
switch mode {
case 0:
// Nothing to do
case 1:
buffer.WriteString("\n\n")
default:
buffer.WriteString(" ")
}
buffer.WriteString(s)
}
return buffer.String()
}
func firstCharType(rs []rune) (mode int) {
for _, s := range rs {
if !unicode.IsSpace(s) {
if unicode.IsDigit(s) {
mode = 0
} else if unicode.IsUpper(s) {
mode = 1
} else {
mode = 2
}
break
}
}
return
}
func splitAfter(s string, re *regexp.Regexp) []string {
var (
r []string
p int
)
is := re.FindAllStringIndex(s, -1)
if is == nil {
return append(r, s)
}
for _, i := range is {
r = append(r, s[p:i[1]])
p = i[1]
}
return append(r, s[p:])
}
func removeNewLine(sentence string) string {
return newLine.ReplaceAllString(sentence, "")
}
func removeEndHyphen(sentence string) string {
return hyphenAtEnd.ReplaceAllString(sentence, "")
}
func removeConsecutiveWhiteSpace(sentence string) string {
return duplicateWhiteSpace.ReplaceAllString(sentence, " ")
}