elastic · afoucret · Dec 30, 2025 · Dec 29, 2025 · Dec 29, 2025 · Dec 29, 2025
diff --git a/...ommon/src/main/java/org/elasticsearch/analysis/common/SynonymGraphTokenFilterFactory.java b/...ommon/src/main/java/org/elasticsearch/analysis/common/SynonymGraphTokenFilterFactory.java
@@ -66,6 +66,24 @@ public TokenStream create(TokenStream tokenStream) {
                 return synonyms.fst == null ? tokenStream : new SynonymGraphFilter(tokenStream, synonyms, false);
             }
 
+            @Override
+            public TokenFilterFactory getSynonymFilter() {
+                // When building a synonym filter, we must prevent previous synonym filters in the chain
+                // from being active, as this would cause recursive synonym expansion during the building phase.
+                //
+                // Without this fix, when chaining multiple synonym filters (e.g., synonym_A → synonym_B → synonym_C),
+                // building synonym_C would use an analyzer containing active synonym_A and synonym_B filters.
+                // This causes:
+                // 1. Recursive synonym expansion when parsing synonym rules (e.g., synonyms are expanded via previous filters)
+                // 2. Each SynonymMap inflates since it applies all previous synonym rules again
+                // 3. Triggering O(n²) operations in SynonymGraphFilter.bufferOutputTokens()
+                // 4. Massive memory allocation during analyzer reload → OutOfMemoryError
+                //
+                // This matches the behavior of SynonymTokenFilterFactory and prevents OOM with chained
+                // synonym filters (critical for users with many chained synonym sets).
+                return IDENTITY_FILTER;
+            }
+
             @Override
             public AnalysisMode getAnalysisMode() {
                 return analysisMode;
@@ -77,5 +95,4 @@ public String getResourceName() {
             }
         };
     }
-
 }
diff --git a/...nalysis-common/src/test/java/org/elasticsearch/analysis/common/SynonymsAnalysisTests.java b/...nalysis-common/src/test/java/org/elasticsearch/analysis/common/SynonymsAnalysisTests.java
@@ -34,9 +34,11 @@
 import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Set;
 import java.util.function.BiConsumer;
 
@@ -333,6 +335,109 @@ public void testChainedSynonymFilters() throws IOException {
         );
     }
 
+    public void testChainedSynonymGraphFilters() throws IOException {
+        Settings settings = Settings.builder()
+            .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
+            .put("path.home", createTempDir().toString())
+            .put("index.analysis.filter.synonyms1.type", "synonym_graph")
+            .putList("index.analysis.filter.synonyms1.synonyms", "foo, bar")
+            .put("index.analysis.filter.synonyms2.type", "synonym_graph")
+            .putList("index.analysis.filter.synonyms2.synonyms", "baz, qux")
+            .put("index.analysis.filter.synonyms3.type", "synonym_graph")
+            .putList("index.analysis.filter.synonyms3.synonyms", "hello, world")
+            .put("index.analysis.analyzer.syn.tokenizer", "standard")
+            .putList("index.analysis.analyzer.syn.filter", "lowercase", "synonyms1", "synonyms2", "synonyms3")
+            .build();
+        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+        indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
+
+        // Test single word - synonym_graph produces both original and synonym at same position
+        BaseTokenStreamTestCase.assertAnalyzesTo(
+            indexAnalyzers.get("syn"),
+            "foo",
+            new String[] { "bar", "foo" },
+            new int[] { 0, 0 }, // start offsets
+            new int[] { 3, 3 }, // end offsets
+            new int[] { 1, 0 }  // position increments
+        );
+
+        // Test multi-word query with all three filters active
+        BaseTokenStreamTestCase.assertAnalyzesTo(
+            indexAnalyzers.get("syn"),
+            "foo baz hello",
+            new String[] { "bar", "foo", "qux", "baz", "world", "hello" },
+            new int[] { 0, 0, 4, 4, 8, 8 },     // start offsets
+            new int[] { 3, 3, 7, 7, 13, 13 },  // end offsets
+            new int[] { 1, 0, 1, 0, 1, 0 }     // position increments: each synonym pair at same position
+        );
+    }
+
+    public void testManyChainedSynonymGraphFilters() throws IOException {
+        Settings.Builder settingsBuilder = Settings.builder()
+            .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
+            .put("path.home", createTempDir().toString());
+
+        String[] vocab = randomArray(50_000, 100_000, String[]::new, () -> randomAlphanumericOfLength(20));
+        int synonymsPerFilter = 10_000;
+        int synonymSets = 100;
+        List<String> filterNames = new ArrayList<>();
+        filterNames.add("lowercase");
+
+        for (int i = 1; i <= synonymSets; i++) {
+            String filterName = "synonyms_" + i;
+            StringBuilder sb = new StringBuilder();
+
+            for (int j = 0; j < synonymsPerFilter; j++) {
+                if (j > 0) {
+                    sb.append("\n");
+                }
+                for (int k = 0; k < between(1, 3); k++) {
+                    if (k > 0) {
+                        sb.append(", ");
+                    }
+                    for (int l = 0; l < between(1, 3); l++) {
+                        if (l > 0) {
+                            sb.append(" ");
+                        }
+                        sb.append(randomFrom(vocab));
+                    }
+                }
+
+                sb.append(" => ");
+                sb.append("syn").append(i * (j + 1)); // Shared ID appears in ALL filters
+            }
+
+            filterNames.add(filterName);
+            settingsBuilder.put("index.analysis.filter." + filterName + ".type", "synonym_graph")
+                .put("index.analysis.filter." + filterName + ".lenient", true)
+                .putList("index.analysis.filter." + filterName + ".synonyms", sb.toString());
+        }
+
+        settingsBuilder.put("index.analysis.analyzer.many_syn.tokenizer", "standard")
+            .putList("index.analysis.analyzer.many_syn.filter", filterNames);
+
+        Settings settings = settingsBuilder.build();
+        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+
+        long startTime = System.currentTimeMillis();
+
+        // This would OOM without the SynonymGraphTokenFilterFactory::getSynonymFilter() fix (filters built sequentially)
+        indexAnalyzers = createTestAnalysis(idxSettings, settings, new CommonAnalysisPlugin()).indexAnalyzers;
+
+        // Verify the analyzer was built successfully and can analyze text
+        // With cross-referencing synonyms, the exact output is complex, so just verify it works
+        Analyzer analyzer = indexAnalyzers.get("many_syn");
+        assertNotNull("Analyzer should be created", analyzer);
+
+        for (int i = 0; i < 1000; i++) {
+            // Test that it can analyze without throwing exceptions
+            TokenStream ts = analyzer.tokenStream("test", randomFrom(vocab));
+            ts.reset();
+            assertTrue("Should produce at least one token", ts.incrementToken());
+            ts.close();
+        }
+    }
+
     public void testShingleFilters() {
 
         Settings settings = Settings.builder()

diff --git a/.../yamlRestTest/resources/rest-api-spec/test/analysis-common/50_chained_synonym_filters.yml b/.../yamlRestTest/resources/rest-api-spec/test/analysis-common/50_chained_synonym_filters.yml
@@ -0,0 +1,101 @@
+---
+"Test chained synonym_graph filters":
+  - do:
+      indices.create:
+        index: test_chained_synonyms
+        body:
+          settings:
+            analysis:
+              filter:
+                synonyms_a:
+                  type: synonym_graph
+                  synonyms: [ "foo, bar" ]
+                synonyms_b:
+                  type: synonym_graph
+                  synonyms: [ "baz, qux" ]
+                synonyms_c:
+                  type: synonym_graph
+                  synonyms: [ "hello, world" ]
+              analyzer:
+                chained_syn:
+                  tokenizer: standard
+                  filter: [ lowercase, synonyms_a, synonyms_b, synonyms_c ]
+          mappings:
+            properties:
+              text:
+                type: text
+                analyzer: chained_syn
+
+  - do:
+      index:
+        index: test_chained_synonyms
+        id: "1"
+        body:
+          text: "foo baz hello"
+        refresh: true
+
+  # Test that all three chained synonym filters work correctly
+  - do:
+      search:
+        index: test_chained_synonyms
+        body:
+          query:
+            match:
+              text: "bar qux world"
+  - match: { hits.total.value: 1 }
+  - match: { hits.hits.0._id: "1" }
+
+  # Verify analyzer behavior - synonym_graph produces synonym first, then original
+  - do:
+      indices.analyze:
+        index: test_chained_synonyms
+        body:
+          text: "foo"
+          analyzer: chained_syn
+  - length: { tokens: 2 }
+  - match: { tokens.0.position: 0 }
+  - match: { tokens.1.position: 0 }
+
+  # Test with multi-word query to verify all chained filters work together
+  - do:
+      indices.analyze:
+        index: test_chained_synonyms
+        body:
+          text: "foo baz hello"
+          analyzer: chained_syn
+  # Each word that matches a synonym expands to 2 tokens: foo→(foo,bar), baz→(baz,qux), hello→(hello,world)
+  - length: { tokens: 6 }
+  # Verify positions: each synonym pair at same position
+  - match: { tokens.0.position: 0 }
+  - match: { tokens.1.position: 0 }
+  - match: { tokens.2.position: 1 }
+  - match: { tokens.3.position: 1 }
+  - match: { tokens.4.position: 2 }
+  - match: { tokens.5.position: 2 }
+  # Verify token content - synonym_graph produces synonym first, then original
+  - match: { tokens.0.token: "bar" }
+  - match: { tokens.1.token: "foo" }
+  - match: { tokens.2.token: "qux" }
+  - match: { tokens.3.token: "baz" }
+  - match: { tokens.4.token: "world" }
+  - match: { tokens.5.token: "hello" }
+
+  # Test that analyzer reload doesn't cause issues (critical for the fix)
+  - do:
+      indices.close:
+        index: test_chained_synonyms
+
+  - do:
+      indices.open:
+        index: test_chained_synonyms
+
+  # Verify it still works after reload
+  - do:
+      search:
+        index: test_chained_synonyms
+        body:
+          query:
+            match:
+              text: "bar qux world"
+  - match: { hits.total.value: 1 }
+  - match: { hits.hits.0._id: "1" }