Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,8 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new);
filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
filters.put("stemmer", StemmerTokenFilterFactory::new);
filters.put("synonym", requiresAnalysisSettings(SynonymTokenFilterFactory::new));
filters.put("synonym_graph", requiresAnalysisSettings(SynonymGraphTokenFilterFactory::new));
filters.put("trim", TrimTokenFilterFactory::new);
filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new));
filters.put("unique", UniqueTokenFilterFactory::new);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;

import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;

import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
Expand All @@ -26,16 +26,18 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;

import java.io.IOException;
import java.util.List;
import java.util.function.Function;

public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory {

public SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
String name, Settings settings) throws IOException {
super(indexSettings, env, analysisRegistry, name, settings);
public SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this constructor can now be package protected?

String name, Settings settings) {
super(indexSettings, env, name, settings);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
Expand All @@ -26,8 +26,13 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.List;
Expand All @@ -41,8 +46,8 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
protected final Settings settings;
protected final Environment environment;

public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
String name, Settings settings) throws IOException {
public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this constructor can now be package protected too?

String name, Settings settings) {
super(indexSettings, name, settings);
this.settings = settings;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory;
import org.apache.lucene.analysis.reverse.ReverseStringFilterFactory;
import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory;
import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase;

import java.util.List;
Expand Down Expand Up @@ -106,6 +105,7 @@ protected Map<String, Class<?>> getTokenFilters() {
filters.put("stemmeroverride", StemmerOverrideTokenFilterFactory.class);
filters.put("kstem", KStemTokenFilterFactory.class);
filters.put("synonym", SynonymTokenFilterFactory.class);
filters.put("synonymgraph", SynonymGraphTokenFilterFactory.class);
filters.put("dictionarycompoundword", DictionaryCompoundWordTokenFilterFactory.class);
filters.put("hyphenationcompoundword", HyphenationCompoundWordTokenFilterFactory.class);
filters.put("reversestring", ReverseTokenFilterFactory.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopFilter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopFilter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,31 @@

import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.query.Operator;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.test.ESIntegTestCase;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;

import static org.elasticsearch.client.Requests.searchRequest;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchPhrasePrefixQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchPhraseQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight;
import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHighlight;
import static org.hamcrest.Matchers.anyOf;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.startsWith;

Expand Down Expand Up @@ -153,4 +163,165 @@ public void testMultiPhraseCutoff() throws IOException {
+ "<em>http://www.facebook.com</em> <em>http://elasticsearch.org</em> "
+ "<em>http://xing.com</em> <em>http://cnn.com</em> http://quora.com"));
}

public void testSynonyms() throws IOException {
Settings.Builder builder = Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer.synonym.tokenizer", "standard")
.putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase")
.put("index.analysis.filter.synonym.type", "synonym")
.putList("index.analysis.filter.synonym.synonyms", "fast,quick");

assertAcked(prepareCreate("test").setSettings(builder.build())
.addMapping("type1", "field1",
"type=text,term_vector=with_positions_offsets,search_analyzer=synonym," +
"analyzer=standard,index_options=offsets"));
ensureGreen();

client().prepareIndex("test", "type1", "0").setSource(
"field1", "The quick brown fox jumps over the lazy dog").get();
refresh();
for (String highlighterType : new String[] {"plain", "fvh", "unified"}) {
logger.info("--> highlighting (type=" + highlighterType + ") and searching on field1");
SearchSourceBuilder source = searchSource()
.query(matchQuery("field1", "quick brown fox").operator(Operator.AND))
.highlighter(
highlight()
.field("field1")
.order("score")
.preTags("<x>")
.postTags("</x>")
.highlighterType(highlighterType));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field1", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> <x>fox</x> jumps over the lazy dog"));

source = searchSource()
.query(matchQuery("field1", "fast brown fox").operator(Operator.AND))
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field1", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> <x>fox</x> jumps over the lazy dog"));
}
}

public void testPhrasePrefix() throws IOException {
Settings.Builder builder = Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer.synonym.tokenizer", "standard")
.putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase")
.put("index.analysis.filter.synonym.type", "synonym")
.putList("index.analysis.filter.synonym.synonyms", "quick => fast");

assertAcked(prepareCreate("first_test_index").setSettings(builder.build()).addMapping("type1", type1TermVectorMapping()));

ensureGreen();

client().prepareIndex("first_test_index", "type1", "0").setSource(
"field0", "The quick brown fox jumps over the lazy dog",
"field1", "The quick brown fox jumps over the lazy dog").get();
client().prepareIndex("first_test_index", "type1", "1").setSource("field1",
"The quick browse button is a fancy thing, right bro?").get();
refresh();
logger.info("--> highlighting and searching on field0");

SearchSourceBuilder source = searchSource()
.query(matchPhrasePrefixQuery("field0", "bro"))
.highlighter(highlight().field("field0").order("score").preTags("<x>").postTags("</x>"));
SearchResponse searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();

assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick <x>brown</x> fox jumps over the lazy dog"));

source = searchSource()
.query(matchPhrasePrefixQuery("field0", "quick bro"))
.highlighter(highlight().field("field0").order("score").preTags("<x>").postTags("</x>"));

searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field0", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));

logger.info("--> highlighting and searching on field1");
source = searchSource()
.query(boolQuery()
.should(matchPhrasePrefixQuery("field1", "test"))
.should(matchPhrasePrefixQuery("field1", "bro"))
)
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));

searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
assertThat(searchResponse.getHits().totalHits, equalTo(2L));
for (int i = 0; i < 2; i++) {
assertHighlight(searchResponse, i, "field1", 0, 1, anyOf(
equalTo("The quick <x>browse</x> button is a fancy thing, right <x>bro</x>?"),
equalTo("The quick <x>brown</x> fox jumps over the lazy dog")));
}

source = searchSource()
.query(matchPhrasePrefixQuery("field1", "quick bro"))
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));

searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();

assertHighlight(searchResponse, 0, "field1", 0, 1, anyOf(
equalTo("The <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
assertHighlight(searchResponse, 1, "field1", 0, 1, anyOf(
equalTo("The <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));

assertAcked(prepareCreate("second_test_index").setSettings(builder.build()).addMapping("doc",
"field4", "type=text,term_vector=with_positions_offsets,analyzer=synonym",
"field3", "type=text,analyzer=synonym"));
// with synonyms
client().prepareIndex("second_test_index", "doc", "0").setSource(
"type", "type2",
"field4", "The quick brown fox jumps over the lazy dog",
"field3", "The quick brown fox jumps over the lazy dog").get();
client().prepareIndex("second_test_index", "doc", "1").setSource(
"type", "type2",
"field4", "The quick browse button is a fancy thing, right bro?").get();
client().prepareIndex("second_test_index", "doc", "2").setSource(
"type", "type2",
"field4", "a quick fast blue car").get();
refresh();

source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field3", "fast bro"))
.highlighter(highlight().field("field3").order("score").preTags("<x>").postTags("</x>"));

searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();

assertHighlight(searchResponse, 0, "field3", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));

logger.info("--> highlighting and searching on field4");
source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field4", "the fast bro"))
.highlighter(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();

assertHighlight(searchResponse, 0, "field4", 0, 1, anyOf(
equalTo("<x>The</x> <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
assertHighlight(searchResponse, 1, "field4", 0, 1, anyOf(
equalTo("<x>The</x> <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));

logger.info("--> highlighting and searching on field4");
source = searchSource().postFilter(termQuery("type", "type2"))
.query(matchPhrasePrefixQuery("field4", "a fast quick blue ca"))
.highlighter(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();

assertHighlight(searchResponse, 0, "field4", 0, 1,
anyOf(equalTo("<x>a quick fast blue car</x>"),
equalTo("<x>a</x> <x>quick</x> <x>fast</x> <x>blue</x> <x>car</x>")));
}

public static XContentBuilder type1TermVectorMapping() throws IOException {
return XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("field1").field("type", "text").field("term_vector", "with_positions_offsets").endObject()
.startObject("field2").field("type", "text").field("term_vector", "with_positions_offsets").endObject()
.endObject()
.endObject().endObject();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,68 @@
- match: { tokens.0.token: Foo }
- match: { tokens.1.token: Bar! }

---
"synonym":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
my_synonym:
type: synonym
synonyms: ["car,auto"]

- do:
indices.analyze:
index: test
body:
text: what car magazine
tokenizer: whitespace
filter: [ my_synonym ]
- length: { tokens: 4 }
- match: { tokens.0.token: what }
- match: { tokens.0.position: 0 }
- match: { tokens.1.token: car }
- match: { tokens.1.position: 1 }
- match: { tokens.2.token: auto }
- match: { tokens.2.position: 1 }
- match: { tokens.3.token: magazine }
- match: { tokens.3.position: 2 }

---
"synonym_graph":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
my_graph_synonym:
type: synonym_graph
synonyms: [ "guinea pig,cavy" ]

- do:
indices.analyze:
index: test
body:
text: my guinea pig snores
tokenizer: whitespace
filter: [ my_graph_synonym ]
- length: { tokens: 5 }
- match: { tokens.0.token: my }
- match: { tokens.1.token: cavy }
- match: { tokens.1.position: 1 }
- match: { tokens.1.positionLength: 2 }
- match: { tokens.2.token: guinea }
- match: { tokens.2.position: 1 }
- match: { tokens.3.token: pig }
- match: { tokens.3.position: 2 }
- match: { tokens.4.token: snores }
- match: { tokens.4.position: 3 }

---
"synonym_graph and flatten_graph":
- do:
Expand Down
Loading