Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@

package org.elasticsearch.index.analysis;

import java.util.HashSet;
import java.util.List;

import org.apache.commons.codec.Encoder;
import org.apache.commons.codec.language.Caverphone1;
import org.apache.commons.codec.language.Caverphone2;
Expand All @@ -45,6 +42,9 @@
import org.elasticsearch.index.analysis.phonetic.KoelnerPhonetik;
import org.elasticsearch.index.analysis.phonetic.Nysiis;

import java.util.HashSet;
import java.util.List;

public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {

private final Encoder encoder;
Expand Down Expand Up @@ -116,11 +116,11 @@ public PhoneticTokenFilterFactory(IndexSettings indexSettings, Environment envir
public TokenStream create(TokenStream tokenStream) {
if (encoder == null) {
if (ruletype != null && nametype != null) {
if (languageset != null) {
final LanguageSet languages = LanguageSet.from(new HashSet<>(languageset));
return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true), languages);
LanguageSet langset = null;
if (languageset != null && languageset.size() > 0) {
langset = LanguageSet.from(new HashSet<>(languageset));
}
return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true));
return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true), langset);
}
if (maxcodelength > 0) {
return new DoubleMetaphoneFilter(tokenStream, maxcodelength, !replace);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,57 @@

package org.elasticsearch.index.analysis;

import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.plugin.analysis.AnalysisPhoneticPlugin;
import org.elasticsearch.test.ESTestCase;
import org.hamcrest.MatcherAssert;
import org.junit.Before;

import java.io.IOException;
import java.io.StringReader;

import static org.hamcrest.Matchers.instanceOf;

public class SimplePhoneticAnalysisTests extends ESTestCase {
public void testPhoneticTokenFilterFactory() throws IOException {

private TestAnalysis analysis;

@Before
public void setup() throws IOException {
String yaml = "/org/elasticsearch/index/analysis/phonetic-1.yml";
Settings settings = Settings.builder().loadFromStream(yaml, getClass().getResourceAsStream(yaml), false)
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.build();
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisPhoneticPlugin());
this.analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisPhoneticPlugin());
}

public void testPhoneticTokenFilterFactory() throws IOException {
TokenFilterFactory filterFactory = analysis.tokenFilter.get("phonetic");
MatcherAssert.assertThat(filterFactory, instanceOf(PhoneticTokenFilterFactory.class));
}

public void testPhoneticTokenFilterBeiderMorseNoLanguage() throws IOException {
TokenFilterFactory filterFactory = analysis.tokenFilter.get("beidermorsefilter");
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader("ABADIAS"));
String[] expected = new String[] { "abYdias", "abYdios", "abadia", "abadiaS", "abadias", "abadio", "abadioS", "abadios", "abodia",
"abodiaS", "abodias", "abodio", "abodioS", "abodios", "avadias", "avadios", "avodias", "avodios", "obadia", "obadiaS",
"obadias", "obadio", "obadioS", "obadios", "obodia", "obodiaS", "obodias", "obodioS" };
BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected);
}

public void testPhoneticTokenFilterBeiderMorseWithLanguage() throws IOException {
TokenFilterFactory filterFactory = analysis.tokenFilter.get("beidermorsefilterfrench");
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader("Rimbault"));
String[] expected = new String[] { "rimbD", "rimbDlt", "rimba", "rimbalt", "rimbo", "rimbolt", "rimbu", "rimbult", "rmbD", "rmbDlt",
"rmba", "rmbalt", "rmbo", "rmbolt", "rmbu", "rmbult" };
BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ index:
beidermorsefilter:
type: phonetic
encoder: beidermorse
beidermorsefilterfrench:
type: phonetic
encoder: beidermorse
languageset : [ "french" ]
koelnerphonetikfilter:
type: phonetic
encoder: koelnerphonetik
Expand Down