Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.analysis.AnalysisMode;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
Expand All @@ -50,6 +51,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
private final boolean lenient;
protected final Settings settings;
protected final Environment environment;
private final boolean updateable;

SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env,
String name, Settings settings) {
Expand All @@ -65,9 +67,15 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
this.expand = settings.getAsBoolean("expand", true);
this.lenient = settings.getAsBoolean("lenient", false);
this.format = settings.get("format", "");
this.updateable = settings.getAsBoolean("updateable", false);
this.environment = env;
}

@Override
public AnalysisMode getAnalysisMode() {
return this.updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
}

@Override
public TokenStream create(TokenStream tokenStream) {
throw new IllegalStateException("Call createPerAnalyzerSynonymFactory to specialize this factory for an analysis chain first");
Expand Down Expand Up @@ -98,6 +106,11 @@ public TokenFilterFactory getSynonymFilter() {
// which doesn't support stacked input tokens
return IDENTITY_FILTER;
}

@Override
public AnalysisMode getAnalysisMode() {
return updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
}
};
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.analysis.common;

import org.elasticsearch.action.admin.indices.analyze.AnalyzeAction.AnalyzeToken;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeAction.Response;
import org.elasticsearch.action.admin.indices.reloadanalyzer.ReloadAnalyzersResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.InternalTestCluster;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;

public class SynonymAnalyzerIT extends ESIntegTestCase {

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(CommonAnalysisPlugin.class);
}

/**
* This test needs to write to the config directory, this is difficult in an external cluster so we overwrite this to force running with
* {@link InternalTestCluster}
*/
@Override
protected boolean ignoreExternalCluster() {
return true;
}

public void testSynonymsUpdateable() throws FileNotFoundException, IOException, InterruptedException {
Path config = internalCluster().getInstance(Environment.class).configFile();
String synonymsFileName = "synonyms.txt";
Path synonymsFile = config.resolve(synonymsFileName);
Files.createFile(synonymsFile);
assertTrue(Files.exists(synonymsFile));
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.CREATE), StandardCharsets.UTF_8))) {
out.println("foo, baz");
}
assertAcked(client().admin().indices().prepareCreate("test").setSettings(Settings.builder()
.put("index.number_of_shards", cluster().numDataNodes() * 2)
.put("index.number_of_replicas", 1)
.put("analysis.analyzer.my_synonym_analyzer.tokenizer", "standard")
.put("analysis.analyzer.my_synonym_analyzer.filter", "my_synonym_filter")
.put("analysis.filter.my_synonym_filter.type", "synonym")
.put("analysis.filter.my_synonym_filter.updateable", "true")
.put("analysis.filter.my_synonym_filter.synonyms_path", synonymsFileName))
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=my_synonym_analyzer"));

client().prepareIndex("test", "_doc", "1").setSource("field", "foo").get();
assertNoFailures(client().admin().indices().prepareRefresh("test").execute().actionGet());

SearchResponse response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 0L);
Response analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(2, analyzeResponse.getTokens().size());
assertEquals("foo", analyzeResponse.getTokens().get(0).getTerm());
assertEquals("baz", analyzeResponse.getTokens().get(1).getTerm());

// now update synonyms file several times and trigger reloading
for (int i = 0; i < 10; i++) {
String testTerm = randomAlphaOfLength(10);
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz, " + testTerm);
}
ReloadAnalyzersResponse reloadResponse = client().admin().indices().prepareReloadAnalyzers("test").execute().actionGet();
assertNoFailures(reloadResponse);
assertEquals(cluster().numDataNodes(), reloadResponse.getSuccessfulShards());

analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(3, analyzeResponse.getTokens().size());
Set<String> tokens = new HashSet<>();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
assertTrue(tokens.contains("foo"));
assertTrue(tokens.contains("baz"));
assertTrue(tokens.contains(testTerm));

response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", testTerm)).get();
assertHitCount(response, 1L);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.analysis.common;

import org.elasticsearch.action.admin.indices.analyze.AnalyzeAction.AnalyzeToken;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeAction.Response;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;

public class SynonymAnalyzerTests extends ESSingleNodeTestCase {

@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
return Arrays.asList(CommonAnalysisPlugin.class);
}

public void testSynonymsUpdateable() throws FileNotFoundException, IOException {
String synonymsFileName = "synonyms.txt";
Path configDir = node().getEnvironment().configFile();
if (Files.exists(configDir) == false) {
Files.createDirectory(configDir);
}
Path synonymsFile = configDir.resolve(synonymsFileName);
if (Files.exists(synonymsFile) == false) {
Files.createFile(synonymsFile);
}
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz");
}

assertAcked(client().admin().indices().prepareCreate("test").setSettings(Settings.builder()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer.my_synonym_analyzer.tokenizer", "standard")
.putList("analysis.analyzer.my_synonym_analyzer.filter", "lowercase", "my_synonym_filter")
.put("analysis.filter.my_synonym_filter.type", "synonym")
.put("analysis.filter.my_synonym_filter.updateable", "true")
.put("analysis.filter.my_synonym_filter.synonyms_path", synonymsFileName))
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=my_synonym_analyzer"));

client().prepareIndex("test", "_doc", "1").setSource("field", "Foo").get();
assertNoFailures(client().admin().indices().prepareRefresh("test").execute().actionGet());

SearchResponse response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 0L);
Response analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(2, analyzeResponse.getTokens().size());
assertEquals("foo", analyzeResponse.getTokens().get(0).getTerm());
assertEquals("baz", analyzeResponse.getTokens().get(1).getTerm());

// now update synonyms file and trigger reloading
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz, buzz");
}
assertNoFailures(client().admin().indices().prepareReloadAnalyzers("test").execute().actionGet());

analyzeResponse = client().admin().indices().prepareAnalyze("test", "Foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(3, analyzeResponse.getTokens().size());
Set<String> tokens = new HashSet<>();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
assertTrue(tokens.contains("foo"));
assertTrue(tokens.contains("baz"));
assertTrue(tokens.contains("buzz"));

response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 1L);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@
import org.elasticsearch.action.admin.indices.recovery.TransportRecoveryAction;
import org.elasticsearch.action.admin.indices.refresh.RefreshAction;
import org.elasticsearch.action.admin.indices.refresh.TransportRefreshAction;
import org.elasticsearch.action.admin.indices.reloadanalyzer.ReloadAnalyzerAction;
import org.elasticsearch.action.admin.indices.reloadanalyzer.TransportReloadAnalyzersAction;
import org.elasticsearch.action.admin.indices.rollover.RolloverAction;
import org.elasticsearch.action.admin.indices.rollover.TransportRolloverAction;
import org.elasticsearch.action.admin.indices.segments.IndicesSegmentsAction;
Expand Down Expand Up @@ -509,6 +511,7 @@ public <Request extends ActionRequest, Response extends ActionResponse> void reg
actions.register(ClearScrollAction.INSTANCE, TransportClearScrollAction.class);
actions.register(RecoveryAction.INSTANCE, TransportRecoveryAction.class);
actions.register(NodesReloadSecureSettingsAction.INSTANCE, TransportNodesReloadSecureSettingsAction.class);
actions.register(ReloadAnalyzerAction.INSTANCE, TransportReloadAnalyzersAction.class);

//Indexed scripts
actions.register(PutStoredScriptAction.INSTANCE, TransportPutStoredScriptAction.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,14 @@
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.index.analysis.AnalyzerComponents;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
import org.elasticsearch.index.analysis.ReloadableCustomAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
Expand Down Expand Up @@ -297,18 +299,36 @@ private static AnalyzeAction.DetailAnalyzeResponse detailAnalyze(AnalyzeAction.R
}
}

CustomAnalyzer customAnalyzer = null;
if (analyzer instanceof CustomAnalyzer) {
customAnalyzer = (CustomAnalyzer) analyzer;
} else if (analyzer instanceof NamedAnalyzer && ((NamedAnalyzer) analyzer).analyzer() instanceof CustomAnalyzer) {
customAnalyzer = (CustomAnalyzer) ((NamedAnalyzer) analyzer).analyzer();
Analyzer customAnalyzer = null;
// maybe unwrap analyzer from NamedAnalyzer
Analyzer potentialCustomAnalyzer = analyzer;
if (analyzer instanceof NamedAnalyzer) {
potentialCustomAnalyzer = ((NamedAnalyzer) analyzer).analyzer();
}
if (potentialCustomAnalyzer instanceof CustomAnalyzer || potentialCustomAnalyzer instanceof ReloadableCustomAnalyzer) {
customAnalyzer = potentialCustomAnalyzer;
}

if (customAnalyzer != null) {
// customAnalyzer = divide charfilter, tokenizer tokenfilters
CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters();
TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory();
TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters();
// divide charfilter, tokenizer tokenfilters
CharFilterFactory[] charFilterFactories;
TokenizerFactory tokenizerFactory;
TokenFilterFactory[] tokenFilterFactories;
String tokenizerName;
if (customAnalyzer instanceof CustomAnalyzer) {
CustomAnalyzer casted = (CustomAnalyzer) analyzer;
charFilterFactories = casted.charFilters();
tokenizerFactory = casted.tokenizerFactory();
tokenFilterFactories = casted.tokenFilters();
tokenizerName = casted.getTokenizerName();
} else {
// for ReloadableCustomAnalyzer we want to make sure we get the factories from the same components object
AnalyzerComponents components = ((ReloadableCustomAnalyzer) customAnalyzer).getComponents();
charFilterFactories = components.getCharFilters();
tokenizerFactory = components.getTokenizerFactory();
tokenFilterFactories = components.getTokenFilters();
tokenizerName = components.getTokenizerName();
}

String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ?
Expand Down Expand Up @@ -367,8 +387,8 @@ private static AnalyzeAction.DetailAnalyzeResponse detailAnalyze(AnalyzeAction.R
tokenFilterFactories[tokenFilterIndex].name(), tokenFiltersTokenListCreator[tokenFilterIndex].getArrayTokens());
}
}
detailResponse = new AnalyzeAction.DetailAnalyzeResponse(charFilteredLists, new AnalyzeAction.AnalyzeTokenList(
customAnalyzer.getTokenizerName(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
detailResponse = new AnalyzeAction.DetailAnalyzeResponse(charFilteredLists,
new AnalyzeAction.AnalyzeTokenList(tokenizerName, tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
} else {
String name;
if (analyzer instanceof NamedAnalyzer) {
Expand Down
Loading