Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.analysis.AnalysisMode;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
Expand All @@ -50,6 +51,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
private final boolean lenient;
protected final Settings settings;
protected final Environment environment;
private final boolean updateable;

SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env,
String name, Settings settings) {
Expand All @@ -65,9 +67,15 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
this.expand = settings.getAsBoolean("expand", true);
this.lenient = settings.getAsBoolean("lenient", false);
this.format = settings.get("format", "");
this.updateable = settings.getAsBoolean("updateable", false);
this.environment = env;
}

@Override
public AnalysisMode getAnalysisMode() {
return this.updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
}

@Override
public TokenStream create(TokenStream tokenStream) {
throw new IllegalStateException("Call createPerAnalyzerSynonymFactory to specialize this factory for an analysis chain first");
Expand Down Expand Up @@ -98,6 +106,11 @@ public TokenFilterFactory getSynonymFilter() {
// which doesn't support stacked input tokens
return IDENTITY_FILTER;
}

@Override
public AnalysisMode getAnalysisMode() {
return updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
}
};
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.analysis.common;

import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse.AnalyzeToken;
import org.elasticsearch.action.admin.indices.reloadanalyzer.ReloadAnalyzersResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.InternalTestCluster;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;

public class SynonymAnalyzerIT extends ESIntegTestCase {

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(CommonAnalysisPlugin.class);
}

/**
* This test needs to write to the config directory, this is difficult in an external cluster so we overwrite this to force running with
* {@link InternalTestCluster}
*/
@Override
protected boolean ignoreExternalCluster() {
return true;
}

public void testSynonymsUpdateable() throws FileNotFoundException, IOException, InterruptedException {
Path config = internalCluster().getInstance(Environment.class).configFile();
String synonymsFileName = "synonyms.txt";
Path synonymsFile = config.resolve(synonymsFileName);
Files.createFile(synonymsFile);
assertTrue(Files.exists(synonymsFile));
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.CREATE), StandardCharsets.UTF_8))) {
out.println("foo, baz");
}
assertAcked(client().admin().indices().prepareCreate("test").setSettings(Settings.builder()
.put("index.number_of_shards", cluster().numDataNodes() * 2)
.put("index.number_of_replicas", 1)
.put("analysis.analyzer.my_synonym_analyzer.tokenizer", "standard")
.put("analysis.analyzer.my_synonym_analyzer.filter", "my_synonym_filter")
.put("analysis.filter.my_synonym_filter.type", "synonym")
.put("analysis.filter.my_synonym_filter.updateable", "true")
.put("analysis.filter.my_synonym_filter.synonyms_path", synonymsFileName))
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=my_synonym_analyzer"));

client().prepareIndex("test", "_doc", "1").setSource("field", "foo").get();
assertNoFailures(client().admin().indices().prepareRefresh("test").execute().actionGet());

SearchResponse response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 0L);
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(2, analyzeResponse.getTokens().size());
assertEquals("foo", analyzeResponse.getTokens().get(0).getTerm());
assertEquals("baz", analyzeResponse.getTokens().get(1).getTerm());

// now update synonyms file several times and trigger reloading
for (int i = 0; i < 10; i++) {
String testTerm = randomAlphaOfLength(10);
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz, " + testTerm);
}
ReloadAnalyzersResponse reloadResponse = client().admin().indices().prepareReloadAnalyzers("test").execute().actionGet();
assertNoFailures(reloadResponse);
assertEquals(cluster().numDataNodes(), reloadResponse.getSuccessfulShards());

analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(3, analyzeResponse.getTokens().size());
Set<String> tokens = new HashSet<>();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
assertTrue(tokens.contains("foo"));
assertTrue(tokens.contains("baz"));
assertTrue(tokens.contains(testTerm));

response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", testTerm)).get();
assertHitCount(response, 1L);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.analysis.common;

import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse.AnalyzeToken;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;

public class SynonymAnalyzerTests extends ESSingleNodeTestCase {

@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
return Arrays.asList(CommonAnalysisPlugin.class);
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a thread-safety test here as well? ie, create TokenStreams in multiple threads, trigger a reload in a separate thread, and check that no tokenstream ends up with 'mixed' synonyms

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking about that but would like to hear other peoples opinions about what a test like that would really need to be useful. Its a bit of overhead and these concurrency tests are typically a bit flaky, so I only would like to add one if the goals are clear.
Currently what I don't understand about your ask is what "mixed" synonyms would mean. The SynonymMap in SynonymTokenFilterFactory should always be changed as a whole I think, are you saying the we need tests that check that this happens at a specific point in time after another thread triggers the reload? Or do you just want to check that each running thread eventually sees the change?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need a unit test for the ReloadableCustomAnalyzer, something that checks that reloading works and that multiple threads will see the update when they create a new token stream. It doesn't need to be focus on synonyms, just checking that the new filters are taken into account when there is an update should be enough.

public void testSynonymsUpdateable() throws FileNotFoundException, IOException {
String synonymsFileName = "synonyms.txt";
Path configDir = node().getEnvironment().configFile();
if (Files.exists(configDir) == false) {
Files.createDirectory(configDir);
}
Path synonymsFile = configDir.resolve(synonymsFileName);
if (Files.exists(synonymsFile) == false) {
Files.createFile(synonymsFile);
}
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz");
}

assertAcked(client().admin().indices().prepareCreate("test").setSettings(Settings.builder()
.put("index.number_of_shards", 5)
.put("index.number_of_replicas", 0)
.put("analysis.analyzer.my_synonym_analyzer.tokenizer", "standard")
.putList("analysis.analyzer.my_synonym_analyzer.filter", "lowercase", "my_synonym_filter")
.put("analysis.filter.my_synonym_filter.type", "synonym")
.put("analysis.filter.my_synonym_filter.updateable", "true")
.put("analysis.filter.my_synonym_filter.synonyms_path", synonymsFileName))
.addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=my_synonym_analyzer"));

client().prepareIndex("test", "_doc", "1").setSource("field", "Foo").get();
assertNoFailures(client().admin().indices().prepareRefresh("test").execute().actionGet());

SearchResponse response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 0L);
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(2, analyzeResponse.getTokens().size());
assertEquals("foo", analyzeResponse.getTokens().get(0).getTerm());
assertEquals("baz", analyzeResponse.getTokens().get(1).getTerm());

// now update synonyms file and trigger reloading
try (PrintWriter out = new PrintWriter(
new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
out.println("foo, baz, buzz");
}
assertNoFailures(client().admin().indices().prepareReloadAnalyzers("test").execute().actionGet());

analyzeResponse = client().admin().indices().prepareAnalyze("test", "Foo").setAnalyzer("my_synonym_analyzer").get();
assertEquals(3, analyzeResponse.getTokens().size());
Set<String> tokens = new HashSet<>();
analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
assertTrue(tokens.contains("foo"));
assertTrue(tokens.contains("baz"));
assertTrue(tokens.contains("buzz"));

response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
assertHitCount(response, 1L);
response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
assertHitCount(response, 1L);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@
import org.elasticsearch.action.admin.indices.recovery.TransportRecoveryAction;
import org.elasticsearch.action.admin.indices.refresh.RefreshAction;
import org.elasticsearch.action.admin.indices.refresh.TransportRefreshAction;
import org.elasticsearch.action.admin.indices.reloadanalyzer.ReloadAnalyzerAction;
import org.elasticsearch.action.admin.indices.reloadanalyzer.TransportReloadAnalyzersAction;
import org.elasticsearch.action.admin.indices.rollover.RolloverAction;
import org.elasticsearch.action.admin.indices.rollover.TransportRolloverAction;
import org.elasticsearch.action.admin.indices.segments.IndicesSegmentsAction;
Expand Down Expand Up @@ -509,6 +511,7 @@ public <Request extends ActionRequest, Response extends ActionResponse> void reg
actions.register(ClearScrollAction.INSTANCE, TransportClearScrollAction.class);
actions.register(RecoveryAction.INSTANCE, TransportRecoveryAction.class);
actions.register(NodesReloadSecureSettingsAction.INSTANCE, TransportNodesReloadSecureSettingsAction.class);
actions.register(ReloadAnalyzerAction.INSTANCE, TransportReloadAnalyzersAction.class);

//Indexed scripts
actions.register(PutStoredScriptAction.INSTANCE, TransportPutStoredScriptAction.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,12 @@
import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.CustomAnalyzerProvider.AnalyzerComponents;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
import org.elasticsearch.index.analysis.ReloadableCustomAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
Expand Down Expand Up @@ -299,18 +301,36 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy
}
}

CustomAnalyzer customAnalyzer = null;
if (analyzer instanceof CustomAnalyzer) {
customAnalyzer = (CustomAnalyzer) analyzer;
} else if (analyzer instanceof NamedAnalyzer && ((NamedAnalyzer) analyzer).analyzer() instanceof CustomAnalyzer) {
customAnalyzer = (CustomAnalyzer) ((NamedAnalyzer) analyzer).analyzer();
Analyzer customAnalyzer = null;
// maybe unwrap analyzer from NamedAnalyzer
Analyzer potentialCustomAnalyzer = analyzer;
if (analyzer instanceof NamedAnalyzer) {
potentialCustomAnalyzer = ((NamedAnalyzer) analyzer).analyzer();
}
if (potentialCustomAnalyzer instanceof CustomAnalyzer || potentialCustomAnalyzer instanceof ReloadableCustomAnalyzer) {
customAnalyzer = potentialCustomAnalyzer;
}

if (customAnalyzer != null) {
// customAnalyzer = divide charfilter, tokenizer tokenfilters
CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters();
TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory();
TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters();
// divide charfilter, tokenizer tokenfilters
CharFilterFactory[] charFilterFactories;
TokenizerFactory tokenizerFactory;
TokenFilterFactory[] tokenFilterFactories;
String tokenizerName;
if (customAnalyzer instanceof CustomAnalyzer) {
CustomAnalyzer casted = (CustomAnalyzer) analyzer;
charFilterFactories = casted.charFilters();
tokenizerFactory = casted.tokenizerFactory();
tokenFilterFactories = casted.tokenFilters();
tokenizerName = casted.getTokenizerName();
} else {
// for ReloadableCustomAnalyzer we want to make sure we get the factories from the same components object
AnalyzerComponents components = ((ReloadableCustomAnalyzer) customAnalyzer).getComponents();
charFilterFactories = components.getCharFilters();
tokenizerFactory = components.getTokenizerFactory();
tokenFilterFactories = components.getTokenFilters();
tokenizerName = components.getTokenizerName();
}

String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ?
Expand Down Expand Up @@ -370,7 +390,7 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy
}
}
detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(
customAnalyzer.getTokenizerName(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
tokenizerName, tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
} else {
String name;
if (analyzer instanceof NamedAnalyzer) {
Expand Down
Loading