Skip to content

Commit

Permalink
Add handling of alphabetical words
Browse files Browse the repository at this point in the history
  • Loading branch information
takahi-i committed Oct 9, 2017
1 parent 98ccea6 commit 369ed05
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,9 @@ public void validate(Document document) {
if (!sentenceMap.containsKey(document)) {
throw new IllegalStateException("Document " + document.getFileName() + " does not have any sentence");
}

for (Sentence sentence : sentenceMap.get(document)) {
for (TokenElement token : sentence.getTokens()) {
String reading = token.getTags().get(7);
String reading = getReading(token);
if (!this.words.containsKey(reading)) {
continue;
}
Expand All @@ -51,14 +50,16 @@ public void validate(Document document) {
}
}


@Override
public void preValidate(Document document) {
sentenceMap.put(document, extractSentences(document));
List<Sentence> sentences = sentenceMap.get(document);
for (Sentence sentence : sentences) {
for (TokenElement token : sentence.getTokens()) {
String reading = token.getTags().get(7);
if (token.getSurface().equals(" ")) {
continue;
}
String reading = getReading(token);
if (!this.words.containsKey(reading)) {
this.words.put(reading, new LinkedList<TokenElement>());
}
Expand All @@ -67,6 +68,11 @@ public void preValidate(Document document) {
}
}

private String getReading(TokenElement token) {
String reading = !token.getTags().get(7).equals("*") ? token.getTags().get(7) : token.getSurface();
return reading.toLowerCase();
}

private List<Sentence> extractSentences(Document document) {
List<Sentence> sentences = new ArrayList<>();
for (Section section : document) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ protected JapaneseExpressionVariationValidatorTest() {
}

@Test
void detectSameReadings() throws RedPenException {
void detecSameReadingsInJapaneseCharacters() throws RedPenException {
config = Configuration.builder("ja")
.addValidatorConfig(new ValidatorConfiguration(validatorName))
.build();
Expand All @@ -49,4 +49,17 @@ void detectSameReadings() throws RedPenException {
Map<Document, List<ValidationError>> errors = redPen.validate(singletonList(document));
assertEquals(2, errors.get(document).size());
}

@Test
void detectSameAlphabecicalReadings() throws RedPenException {
config = Configuration.builder("ja")
.addValidatorConfig(new ValidatorConfiguration(validatorName))
.build();

Document document = prepareSimpleDocument("この TYPE はあの Type とは違います。");

RedPen redPen = new RedPen(config);
Map<Document, List<ValidationError>> errors = redPen.validate(singletonList(document));
assertEquals(2, errors.get(document).size());
}
}

0 comments on commit 369ed05

Please sign in to comment.