Skip to content

Commit

Permalink
First try of Japanese Spelling validator
Browse files Browse the repository at this point in the history
  • Loading branch information
takahi-i committed Sep 14, 2017
1 parent d89d191 commit 15426c9
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,39 @@
import cc.redpen.model.Sentence;
import cc.redpen.tokenizer.TokenElement;

import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

public final class SpellingValidator extends SpellingDictionaryValidator {

private Map<String, List<TokenElement>> words = new HashMap<>();

@Override
public void validate(Sentence sentence) {
if (getSymbolTable().getLang().equals("en")) {
String lang = getSymbolTable().getLang();
if (lang.equals("ja")) {
validate_ja(sentence);
} else if (lang.equals("en")) {
validate_en(sentence);
}
}

private void validate_ja(Sentence sentence) {
for (TokenElement token : sentence.getTokens()) {
String reading = token.getTags().get(7);
if (this.words.containsKey(reading)) {
List<TokenElement> tokens = this.words.get(reading);
for (TokenElement candidate : tokens) {
if (candidate != token && !token.getSurface().equals(candidate.getSurface())) {
addLocalizedErrorFromToken(sentence, token);
}
}
}
}
}

private void validate_en(Sentence sentence) {
for (TokenElement token : sentence.getTokens()) {
String surface = token.getSurface().toLowerCase();
Expand All @@ -39,4 +64,20 @@ private void validate_en(Sentence sentence) {
}
}

@Override
public void preValidate(Sentence sentence) {
String lang = getSymbolTable().getLang();
if (!lang.equals("ja")) {
return;
}

for (TokenElement token : sentence.getTokens()) {
String reading = token.getTags().get(7);
if (!this.words.containsKey(reading)) {
this.words.put(reading, new LinkedList<TokenElement>());
}
this.words.get(reading).add(token);

}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,19 @@ public void nonLatin() throws RedPenException {
assertEquals(0, errors.get(document).size());
}

@Test
public void japanese() throws RedPenException {
config = Configuration.builder("ja")
.addValidatorConfig(new ValidatorConfiguration(validatorName))
.build();

Document document = prepareSimpleDocument("之は山です。これは川です。");

RedPen redPen = new RedPen(config);
Map<Document, List<ValidationError>> errors = redPen.validate(singletonList(document));
assertEquals(2, errors.get(document).size());
}

@Test
public void punctuationInsideOfWord() throws RedPenException {
config = Configuration.builder()
Expand Down

0 comments on commit 15426c9

Please sign in to comment.