Skip to content

Commit

Permalink
Build index of variants in XMLLexicon (fixes #2)
Browse files Browse the repository at this point in the history
  • Loading branch information
DaBr01 committed Mar 10, 2023
1 parent 439d155 commit d77058a
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 6 deletions.
1 change: 1 addition & 0 deletions src/main/java/simplenlgde/framework/NLGFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,7 @@ public NLGElement createWord(Object word, LexicalCategory category) {
} else if (word instanceof String && this.lexicon != null) {
wordElement = lexicon.lookupWord((String) word, category);
if (PRONOUNS.contains(((String) word).toLowerCase())) {
wordElement = new WordElement(((String) word).toLowerCase(), LexicalCategory.PRONOUN); //ignore pronouns in lexicon
setPronounFeatures(wordElement, (String) word);
}
if (MERGED_ARTICLES_DEF.contains(word.toString().toLowerCase())) {
Expand Down
103 changes: 97 additions & 6 deletions src/main/java/simplenlgde/lexicon/XMLLexicon.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,14 @@

package simplenlgde.lexicon;

import simplenlgde.features.*;
import simplenlgde.framework.*;

import java.io.File;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
Expand All @@ -42,6 +38,7 @@
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import simplenlgde.morphology.MorphologyProcessor;


/**
Expand Down Expand Up @@ -232,6 +229,100 @@ private void IndexWord(WordElement word) {
+ " occurs more than once");
indexByID.put(id, word);
}

// now index by variant
for(String variant : getVariants(word)) {
updateIndex(word, variant, indexByVariant);
}
}

/**
* generates all morph variants of a word
*
* @param word
* @return
*/
protected Set<String> getVariants(WordElement word) {
Set<String> variants = new HashSet<String>();
variants.add(word.getBaseForm());
ElementCategory category = word.getCategory();
if (category instanceof LexicalCategory) {
MorphologyProcessor morph = new MorphologyProcessor();
InflectedWordElement inflected = new InflectedWordElement(word);

switch ((LexicalCategory) category) {
case NOUN:
for(NumberAgreement number: NumberAgreement.values()) {
for(DiscourseFunction discourseFunction: DiscourseFunction.values()) {
inflected.setFeature(Feature.NUMBER, number);
inflected.setFeature(InternalFeature.CASE, discourseFunction);

try {
String realisation = morph.realise(inflected).getRealisation();
if(!variants.contains(realisation)){
variants.add(realisation);
}
} catch (Exception e) {
//Lexicon entry is not complete
}
}
}
break;

case VERB:
for (Tense tense: Tense.values()) {
for(NumberAgreement number: NumberAgreement.values()){
for(Person person: Person.values()) {
inflected.setFeature(Feature.TENSE, tense);
inflected.setFeature(Feature.NUMBER, number);
inflected.setFeature(Feature.PERSON, person);

try {
String realisation = morph.realise(inflected).getRealisation();
if(!variants.contains(realisation)){
variants.add(realisation);
}
} catch (Exception e) {
//Lexicon entry is not complete
}
}
}
}
break;

case ADJECTIVE:
Boolean[] b = {true, false};
inflected.setFeature(InternalFeature.DISCOURSE_FUNCTION, DiscourseFunction.MODIFIER);

for(NumberAgreement number: NumberAgreement.values()) {
for(DiscourseFunction discourseFunction: DiscourseFunction.values()) {
for(Gender gender: Gender.values()) {
for(Boolean superlative: b) {
for(Boolean comparative: b) {
inflected.setFeature(Feature.NUMBER, number);
inflected.setFeature(InternalFeature.CASE, discourseFunction);
inflected.setFeature(Feature.IS_SUPERLATIVE, superlative);
inflected.setFeature(Feature.IS_COMPARATIVE, comparative);
inflected.setFeature(LexicalFeature.GENDER, gender);

try {
String realisation = morph.realise(inflected).getRealisation();
if (!variants.contains(realisation)) {
variants.add(realisation);
}
} catch (Exception e) {
//Lexicon entry is not complete
}
}
}
}
}
}
break;
}
}

return variants;
}

/**
Expand Down

0 comments on commit d77058a

Please sign in to comment.