elastic · cbuescher · Jun 5, 2019 · May 29, 2019 · Jun 4, 2019 · romseygeek
diff --git a/...sis-common/src/main/java/org/elasticsearch/analysis/common/SynonymTokenFilterFactory.java b/...sis-common/src/main/java/org/elasticsearch/analysis/common/SynonymTokenFilterFactory.java
@@ -30,6 +30,7 @@
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 import org.elasticsearch.index.analysis.Analysis;
+import org.elasticsearch.index.analysis.AnalysisMode;
 import org.elasticsearch.index.analysis.CharFilterFactory;
 import org.elasticsearch.index.analysis.CustomAnalyzer;
 import org.elasticsearch.index.analysis.TokenFilterFactory;
@@ -50,6 +51,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
     private final boolean lenient;
     protected final Settings settings;
     protected final Environment environment;
+    private final boolean updateable;
 
     SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env,
                                       String name, Settings settings) {
@@ -65,9 +67,15 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
         this.expand = settings.getAsBoolean("expand", true);
         this.lenient = settings.getAsBoolean("lenient", false);
         this.format = settings.get("format", "");
+        this.updateable = settings.getAsBoolean("updateable", false);
         this.environment = env;
     }
 
+    @Override
+    public AnalysisMode getAnalysisMode() {
+        return this.updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
+    }
+
     @Override
     public TokenStream create(TokenStream tokenStream) {
         throw new IllegalStateException("Call createPerAnalyzerSynonymFactory to specialize this factory for an analysis chain first");
@@ -98,6 +106,11 @@ public TokenFilterFactory getSynonymFilter() {
                 // which doesn't support stacked input tokens
                 return IDENTITY_FILTER;
             }
+
+            @Override
+            public AnalysisMode getAnalysisMode() {
+                return updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
+            }
         };
     }
 

diff --git a/...es/analysis-common/src/test/java/org/elasticsearch/analysis/common/SynonymAnalyzerIT.java b/...es/analysis-common/src/test/java/org/elasticsearch/analysis/common/SynonymAnalyzerIT.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.analysis.common;
+
+import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
+import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse.AnalyzeToken;
+import org.elasticsearch.action.admin.indices.reloadanalyzer.ReloadAnalyzersResponse;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.test.ESIntegTestCase;
+import org.elasticsearch.test.InternalTestCluster;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
+
+public class SynonymAnalyzerIT extends ESIntegTestCase {
+
+    @Override
+    protected Collection<Class<? extends Plugin>> nodePlugins() {
+        return Arrays.asList(CommonAnalysisPlugin.class);
+    }
+
+    /**
+     * This test needs to write to the config directory, this is difficult in an external cluster so we overwrite this to force running with
+     * {@link InternalTestCluster}
+     */
+    @Override
+    protected boolean ignoreExternalCluster() {
+        return true;
+    }
+
+    public void testSynonymsUpdateable() throws FileNotFoundException, IOException, InterruptedException {
+        Path config = internalCluster().getInstance(Environment.class).configFile();
+        String synonymsFileName = "synonyms.txt";
+        Path synonymsFile = config.resolve(synonymsFileName);
+        Files.createFile(synonymsFile);
+        assertTrue(Files.exists(synonymsFile));
+        try (PrintWriter out = new PrintWriter(
+                new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.CREATE), StandardCharsets.UTF_8))) {
+            out.println("foo, baz");
+        }
+        assertAcked(client().admin().indices().prepareCreate("test").setSettings(Settings.builder()
+                .put("index.number_of_shards", cluster().numDataNodes() * 2)
+                .put("index.number_of_replicas", 1)
+                .put("analysis.analyzer.my_synonym_analyzer.tokenizer", "standard")
+                .put("analysis.analyzer.my_synonym_analyzer.filter", "my_synonym_filter")
+                .put("analysis.filter.my_synonym_filter.type", "synonym")
+                .put("analysis.filter.my_synonym_filter.updateable", "true")
+                .put("analysis.filter.my_synonym_filter.synonyms_path", synonymsFileName))
+                .addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=my_synonym_analyzer"));
+
+        client().prepareIndex("test", "_doc", "1").setSource("field", "foo").get();
+        assertNoFailures(client().admin().indices().prepareRefresh("test").execute().actionGet());
+
+        SearchResponse response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
+        assertHitCount(response, 1L);
+        response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
+        assertHitCount(response, 0L);
+        AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
+        assertEquals(2, analyzeResponse.getTokens().size());
+        assertEquals("foo", analyzeResponse.getTokens().get(0).getTerm());
+        assertEquals("baz", analyzeResponse.getTokens().get(1).getTerm());
+
+        // now update synonyms file several times and trigger reloading
+        for (int i = 0; i < 10; i++) {
+            String testTerm = randomAlphaOfLength(10);
+            try (PrintWriter out = new PrintWriter(
+                    new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
+                out.println("foo, baz, " + testTerm);
+            }
+            ReloadAnalyzersResponse reloadResponse = client().admin().indices().prepareReloadAnalyzers("test").execute().actionGet();
+            assertNoFailures(reloadResponse);
+            assertEquals(cluster().numDataNodes(), reloadResponse.getSuccessfulShards());
+
+            analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
+            assertEquals(3, analyzeResponse.getTokens().size());
+            Set<String> tokens = new HashSet<>();
+            analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
+            assertTrue(tokens.contains("foo"));
+            assertTrue(tokens.contains("baz"));
+            assertTrue(tokens.contains(testTerm));
+
+            response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
+            assertHitCount(response, 1L);
+            response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", testTerm)).get();
+            assertHitCount(response, 1L);
+        }
+    }
+}
diff --git a/...analysis-common/src/test/java/org/elasticsearch/analysis/common/SynonymAnalyzerTests.java b/...analysis-common/src/test/java/org/elasticsearch/analysis/common/SynonymAnalyzerTests.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.analysis.common;
+
+import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
+import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse.AnalyzeToken;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.test.ESSingleNodeTestCase;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
+
+public class SynonymAnalyzerTests extends ESSingleNodeTestCase {
+
+    @Override
+    protected Collection<Class<? extends Plugin>> getPlugins() {
+        return Arrays.asList(CommonAnalysisPlugin.class);
+    }
+
+    public void testSynonymsUpdateable() throws FileNotFoundException, IOException {
+        String synonymsFileName = "synonyms.txt";
+        Path configDir = node().getEnvironment().configFile();
+        if (Files.exists(configDir) == false) {
+            Files.createDirectory(configDir);
+        }
+        Path synonymsFile = configDir.resolve(synonymsFileName);
+        if (Files.exists(synonymsFile) == false) {
+            Files.createFile(synonymsFile);
+        }
+        try (PrintWriter out = new PrintWriter(
+                new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
+            out.println("foo, baz");
+        }
+
+        assertAcked(client().admin().indices().prepareCreate("test").setSettings(Settings.builder()
+                .put("index.number_of_shards", 5)
+                .put("index.number_of_replicas", 0)
+                .put("analysis.analyzer.my_synonym_analyzer.tokenizer", "standard")
+                .putList("analysis.analyzer.my_synonym_analyzer.filter", "lowercase", "my_synonym_filter")
+                .put("analysis.filter.my_synonym_filter.type", "synonym")
+                .put("analysis.filter.my_synonym_filter.updateable", "true")
+                .put("analysis.filter.my_synonym_filter.synonyms_path", synonymsFileName))
+                .addMapping("_doc", "field", "type=text,analyzer=standard,search_analyzer=my_synonym_analyzer"));
+
+        client().prepareIndex("test", "_doc", "1").setSource("field", "Foo").get();
+        assertNoFailures(client().admin().indices().prepareRefresh("test").execute().actionGet());
+
+        SearchResponse response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
+        assertHitCount(response, 1L);
+        response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
+        assertHitCount(response, 0L);
+        AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("test", "foo").setAnalyzer("my_synonym_analyzer").get();
+        assertEquals(2, analyzeResponse.getTokens().size());
+        assertEquals("foo", analyzeResponse.getTokens().get(0).getTerm());
+        assertEquals("baz", analyzeResponse.getTokens().get(1).getTerm());
+
+        // now update synonyms file and trigger reloading
+        try (PrintWriter out = new PrintWriter(
+                new OutputStreamWriter(Files.newOutputStream(synonymsFile, StandardOpenOption.WRITE), StandardCharsets.UTF_8))) {
+            out.println("foo, baz, buzz");
+        }
+        assertNoFailures(client().admin().indices().prepareReloadAnalyzers("test").execute().actionGet());
+
+        analyzeResponse = client().admin().indices().prepareAnalyze("test", "Foo").setAnalyzer("my_synonym_analyzer").get();
+        assertEquals(3, analyzeResponse.getTokens().size());
+        Set<String> tokens = new HashSet<>();
+        analyzeResponse.getTokens().stream().map(AnalyzeToken::getTerm).forEach(t -> tokens.add(t));
+        assertTrue(tokens.contains("foo"));
+        assertTrue(tokens.contains("baz"));
+        assertTrue(tokens.contains("buzz"));
+
+        response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "baz")).get();
+        assertHitCount(response, 1L);
+        response = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("field", "buzz")).get();
+        assertHitCount(response, 1L);
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/action/ActionModule.java b/server/src/main/java/org/elasticsearch/action/ActionModule.java
@@ -125,6 +125,8 @@
 import org.elasticsearch.action.admin.indices.recovery.TransportRecoveryAction;
 import org.elasticsearch.action.admin.indices.refresh.RefreshAction;
 import org.elasticsearch.action.admin.indices.refresh.TransportRefreshAction;
+import org.elasticsearch.action.admin.indices.reloadanalyzer.ReloadAnalyzerAction;
+import org.elasticsearch.action.admin.indices.reloadanalyzer.TransportReloadAnalyzersAction;
 import org.elasticsearch.action.admin.indices.rollover.RolloverAction;
 import org.elasticsearch.action.admin.indices.rollover.TransportRolloverAction;
 import org.elasticsearch.action.admin.indices.segments.IndicesSegmentsAction;
@@ -509,6 +511,7 @@ public <Request extends ActionRequest, Response extends ActionResponse> void reg
         actions.register(ClearScrollAction.INSTANCE, TransportClearScrollAction.class);
         actions.register(RecoveryAction.INSTANCE, TransportRecoveryAction.class);
         actions.register(NodesReloadSecureSettingsAction.INSTANCE, TransportNodesReloadSecureSettingsAction.class);
+        actions.register(ReloadAnalyzerAction.INSTANCE, TransportReloadAnalyzersAction.class);
 
         //Indexed scripts
         actions.register(PutStoredScriptAction.INSTANCE, TransportPutStoredScriptAction.class);

diff --git a/.../src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/.../src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java
@@ -49,10 +49,12 @@
 import org.elasticsearch.index.analysis.AnalysisRegistry;
 import org.elasticsearch.index.analysis.CharFilterFactory;
 import org.elasticsearch.index.analysis.CustomAnalyzer;
+import org.elasticsearch.index.analysis.CustomAnalyzerProvider.AnalyzerComponents;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
 import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
+import org.elasticsearch.index.analysis.ReloadableCustomAnalyzer;
 import org.elasticsearch.index.analysis.TokenFilterFactory;
 import org.elasticsearch.index.analysis.TokenizerFactory;
 import org.elasticsearch.index.mapper.KeywordFieldMapper;
@@ -299,18 +301,36 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy
             }
         }
 
-        CustomAnalyzer customAnalyzer = null;
-        if (analyzer instanceof CustomAnalyzer) {
-            customAnalyzer = (CustomAnalyzer) analyzer;
-        } else if (analyzer instanceof NamedAnalyzer && ((NamedAnalyzer) analyzer).analyzer() instanceof CustomAnalyzer) {
-            customAnalyzer = (CustomAnalyzer) ((NamedAnalyzer) analyzer).analyzer();
+        Analyzer customAnalyzer = null;
+        // maybe unwrap analyzer from NamedAnalyzer
+        Analyzer potentialCustomAnalyzer = analyzer;
+        if (analyzer instanceof NamedAnalyzer) {
+            potentialCustomAnalyzer = ((NamedAnalyzer) analyzer).analyzer();
+        }
+        if (potentialCustomAnalyzer instanceof CustomAnalyzer || potentialCustomAnalyzer instanceof ReloadableCustomAnalyzer) {
+            customAnalyzer = potentialCustomAnalyzer;
         }
 
         if (customAnalyzer != null) {
-            // customAnalyzer = divide charfilter, tokenizer tokenfilters
-            CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters();
-            TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory();
-            TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters();
+            // divide charfilter, tokenizer tokenfilters
+            CharFilterFactory[] charFilterFactories;
+            TokenizerFactory tokenizerFactory;
+            TokenFilterFactory[] tokenFilterFactories;
+            String tokenizerName;
+            if (customAnalyzer instanceof CustomAnalyzer) {
+                CustomAnalyzer casted = (CustomAnalyzer) analyzer;
+                charFilterFactories = casted.charFilters();
+                tokenizerFactory = casted.tokenizerFactory();
+                tokenFilterFactories = casted.tokenFilters();
+                tokenizerName = casted.getTokenizerName();
+            } else {
+                // for ReloadableCustomAnalyzer we want to make sure we get the factories from the same components object
+                AnalyzerComponents components = ((ReloadableCustomAnalyzer) customAnalyzer).getComponents();
+                charFilterFactories = components.getCharFilters();
+                tokenizerFactory = components.getTokenizerFactory();
+                tokenFilterFactories = components.getTokenFilters();
+                tokenizerName = components.getTokenizerName();
+            }
 
             String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
             TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ?
@@ -370,7 +390,7 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy
                 }
             }
             detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(
-                    customAnalyzer.getTokenizerName(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
+                    tokenizerName, tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
         } else {
             String name;
             if (analyzer instanceof NamedAnalyzer) {