Skip to content

Commit 2096d28

Browse files
committed
Initial commit
1 parent f8168b0 commit 2096d28

8 files changed

+457
-0
lines changed

.travis.yml

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
language: go
2+
3+
go:
4+
- 1.4.1
5+
6+
before_install:
7+
- sudo pip install codecov
8+
9+
install:
10+
- go get golang.org/x/tools/cmd/vet
11+
- go get golang.org/x/tools/cmd/cover
12+
- go get github.com/golang/lint/golint
13+
14+
before_script:
15+
- go get ./...
16+
- go vet ./...
17+
- golint ./... | xargs -r false
18+
19+
script:
20+
- go test -v -coverprofile=coverage.txt -covermode=count
21+
22+
after_success:
23+
- codecov

LICENSE

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Copyright (c) 2015, Suguru Namura
2+
3+
Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
4+
5+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

LICENSE.tinysegmenter

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
TinySegmenter 0.1 -- Super compact Japanese tokenizer in Javascript
2+
(c) 2008 Taku Kudo <[email protected]>
3+
TinySegmenter is freely distributable under the terms of a new BSD licence.
4+
For details, see http://chasen.org/~taku/software/TinySegmenter/LICENCE.txt
5+

README.md

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
NihonGo is japanese utility for Golang
2+
====
3+
4+
[![Build Status](http://img.shields.io/travis/dogenzaka/nihongo.svg?style=flat)](https://travis-ci.org/dogenzaka/nihongo)
5+
[![Coverage](http://img.shields.io/codecov/c/github/dogenzaka/nihongo.svg?style=flat)](https://codecov.io/github/dogenzaka/nihongo)
6+
[![License](http://img.shields.io/badge/license-MIT-red.svg?style=flat)](https://github.com/dogenzaka/nihongo/blob/master/LICENSE)
7+
8+
NihonGo is japanese utility on Go.
9+
10+
```
11+
go get github.com/dogenzaka/nihongo
12+
```
13+
14+
Featuers
15+
--
16+
17+
- Katakana / Hiragana conversion
18+
- Unicode normalization
19+
- Detect katakana / hiragana string
20+
- Simple tokenizer ported TinySegmenter
21+
22+
Examples
23+
--
24+
25+
```go
26+
import (
27+
"fmt"
28+
"github.com/dogenzaka/nihongo"
29+
)
30+
31+
func TestNormalize() {
32+
normalized := nihongo.Normalize("テストテスト+=")
33+
fmt.Println(normalized) // テストテスト+=
34+
}
35+
36+
func TestToHiragana() {
37+
hira := nihongo.ToHiragana("テストてすと")
38+
fmt.Println(hira) // てすとてすと
39+
}
40+
41+
func TestToKatakana() {
42+
kana := nihongo.ToKatakana("テストてすと")
43+
fmt.Println(kana) // テストテスト
44+
}
45+
46+
func TestTokenize() {
47+
words := nihongo.Tokenize("私は人間です")
48+
fmt.Println(words) // ["私" "は" "人間" "です"]
49+
}
50+
51+
```
52+
53+
License
54+
--
55+
ISC
56+

nihong_test.go

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package nihongo
2+
3+
import (
4+
"testing"
5+
)
6+
7+
func TestNormalize(t *testing.T) {
8+
9+
norm := Normalize("テストてすと")
10+
if norm != "テストてすと" {
11+
t.Errorf("Converted strings which should not be converted %v", norm)
12+
}
13+
14+
norm = Normalize("テストテスト/+")
15+
if norm != "テストテスト/+" {
16+
t.Errorf("Converted strings which should not be converted %v", norm)
17+
}
18+
19+
}
20+
21+
func TestToHiragana(t *testing.T) {
22+
23+
hira := ToHiragana("テスト")
24+
if hira != "てすと" {
25+
t.Errorf("Not converted to Hiragana %v", hira)
26+
}
27+
28+
hira = ToHiragana("テスト混合てすと")
29+
if hira != "てすと混合てすと" {
30+
t.Errorf("Not converted to Hiragana %v", hira)
31+
}
32+
33+
hira = ToHiragana("Englishテスト混合")
34+
if hira != "Englishてすと混合" {
35+
t.Errorf("Not converted to Hiragana %v", hira)
36+
}
37+
38+
hira = ToHiragana("アイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワヲンガギグゲゴザジズゼゾダヂヅデドバビブベボパピプペポ")
39+
if hira != "あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをんがぎぐげござじずぜぞだぢづでどばびぶべぼぱぴぷぺぽ" {
40+
t.Errorf("Not converted to Hiragana %v", hira)
41+
}
42+
}
43+
44+
func TestToKatakana(t *testing.T) {
45+
46+
kana := ToKatakana("てすと")
47+
if kana != "テスト" {
48+
t.Errorf("Not converted to Katakana %v", kana)
49+
}
50+
51+
kana = ToKatakana("てすと混合テスト")
52+
if kana != "テスト混合テスト" {
53+
t.Errorf("Not converted to Katakana %v", kana)
54+
}
55+
56+
kana = ToKatakana("てすと混合English")
57+
if kana != "テスト混合English" {
58+
t.Errorf("Not converted to Katakana %v", kana)
59+
}
60+
61+
kana = ToKatakana("あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわをんがぎぐげござじずぜぞだぢづでどばびぶべぼぱぴぷぺぽ")
62+
if kana != "アイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワヲンガギグゲゴザジズゼゾダヂヅデドバビブベボパピプペポ" {
63+
t.Errorf("Not converted to Katakana %v", kana)
64+
}
65+
}

nihongo.go

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
package nihongo
2+
3+
import (
4+
"bytes"
5+
"golang.org/x/text/unicode/norm"
6+
"unicode"
7+
)
8+
9+
// Normalize japanese text which will convert with NFKC normalization.
10+
// Hankaku-Kana -> Zenkaku-Kana
11+
// Zenkaku special chars -> Hankaku special chars
12+
func Normalize(text string) string {
13+
return norm.NFKC.String(text)
14+
}
15+
16+
// ContainsHiragana returns true when text contains hiragana
17+
func ContainsHiragana(text string) bool {
18+
for _, r := range text {
19+
if unicode.In(r, unicode.Hiragana) {
20+
return true
21+
}
22+
}
23+
return false
24+
}
25+
26+
// ContainsKatakana returns true when text contains katakana
27+
func ContainsKatakana(text string) bool {
28+
for _, r := range text {
29+
if unicode.In(r, unicode.Katakana) {
30+
return true
31+
}
32+
}
33+
return false
34+
}
35+
36+
// ToHiragana converts all katakana text to hiragana.
37+
// You should normalize text before converting.
38+
func ToHiragana(text string) string {
39+
var buf bytes.Buffer
40+
for _, r := range text {
41+
if unicode.In(r, unicode.Katakana) {
42+
// Convert to hiragana
43+
r -= 0x60
44+
}
45+
buf.WriteRune(r)
46+
}
47+
return buf.String()
48+
}
49+
50+
// ToKatakana converts all hiragana text to katakana.
51+
// You should normalize text before converting.
52+
func ToKatakana(text string) string {
53+
var buf bytes.Buffer
54+
//buf := bytes.NewBuffer(make([]byte, len(text)))
55+
for _, r := range text {
56+
if unicode.In(r, unicode.Hiragana) {
57+
// Convert to hiragana
58+
r += 0x60
59+
}
60+
buf.WriteRune(r)
61+
}
62+
return buf.String()
63+
}

0 commit comments

Comments
 (0)