Skip to content

Commit b65e293

Browse files
authored
Fix caching for PreConfiguredTokenFilter (#50912) (#51095)
The PreConfiguredTokenFilter#singletonWithVersion uses the version internally for the token filter factories but it registers only one instance in the cache and not one instance per version. This can lead to exceptions like the one described in #50734 since the singleton is created and cached using the version created of the first index that is processed. Remove the singletonWithVersion() methods and use the elasticsearchVersion() methods instead. Fixes: #50734 (cherry picked from commit 24e1858)
1 parent 67d8fdb commit b65e293

File tree

4 files changed

+146
-25
lines changed

4 files changed

+146
-25
lines changed

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactorie
407407
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
408408
List<PreConfiguredCharFilter> filters = new ArrayList<>();
409409
filters.add(PreConfiguredCharFilter.singleton("html_strip", false, HTMLStripCharFilter::new));
410-
filters.add(PreConfiguredCharFilter.singletonWithVersion("htmlStrip", false, (reader, version) -> {
410+
filters.add(PreConfiguredCharFilter.elasticsearchVersion("htmlStrip", false, (reader, version) -> {
411411
if (version.onOrAfter(org.elasticsearch.Version.V_6_3_0)) {
412412
deprecationLogger.deprecatedAndMaybeLog("htmlStrip_deprecation",
413413
"The [htmpStrip] char filter name is deprecated and will be removed in a future version. "
@@ -434,7 +434,7 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
434434
input -> new CommonGramsFilter(input, CharArraySet.EMPTY_SET)));
435435
filters.add(PreConfiguredTokenFilter.singleton("czech_stem", false, CzechStemFilter::new));
436436
filters.add(PreConfiguredTokenFilter.singleton("decimal_digit", true, DecimalDigitFilter::new));
437-
filters.add(PreConfiguredTokenFilter.singletonWithVersion("delimited_payload_filter", false, (input, version) -> {
437+
filters.add(PreConfiguredTokenFilter.elasticsearchVersion("delimited_payload_filter", false, (input, version) -> {
438438
if (version.onOrAfter(Version.V_7_0_0)) {
439439
throw new IllegalArgumentException(
440440
"[delimited_payload_filter] is not supported for new indices, use [delimited_payload] instead");
@@ -453,7 +453,7 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
453453
filters.add(PreConfiguredTokenFilter.singleton("dutch_stem", false, input -> new SnowballFilter(input, new DutchStemmer())));
454454
filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, false, input ->
455455
new EdgeNGramTokenFilter(input, 1)));
456-
filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, false, (reader, version) -> {
456+
filters.add(PreConfiguredTokenFilter.elasticsearchVersion("edgeNGram", false, false, (reader, version) -> {
457457
if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) {
458458
throw new IllegalArgumentException(
459459
"The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
@@ -481,7 +481,7 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
481481
LimitTokenCountFilterFactory.DEFAULT_MAX_TOKEN_COUNT,
482482
LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS)));
483483
filters.add(PreConfiguredTokenFilter.singleton("ngram", false, false, reader -> new NGramTokenFilter(reader, 1, 2, false)));
484-
filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, false, (reader, version) -> {
484+
filters.add(PreConfiguredTokenFilter.elasticsearchVersion("nGram", false, false, (reader, version) -> {
485485
if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) {
486486
throw new IllegalArgumentException("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
487487
+ "Please change the filter name to [ngram] instead.");
@@ -527,7 +527,7 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
527527
| WordDelimiterFilter.SPLIT_ON_CASE_CHANGE
528528
| WordDelimiterFilter.SPLIT_ON_NUMERICS
529529
| WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null)));
530-
filters.add(PreConfiguredTokenFilter.singletonWithVersion("word_delimiter_graph", false, false, (input, version) -> {
530+
filters.add(PreConfiguredTokenFilter.elasticsearchVersion("word_delimiter_graph", false, false, (input, version) -> {
531531
boolean adjustOffsets = version.onOrAfter(Version.V_7_3_0);
532532
return new WordDelimiterGraphFilter(input, adjustOffsets, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE,
533533
WordDelimiterGraphFilter.GENERATE_WORD_PARTS

server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -57,25 +57,6 @@ public static PreConfiguredTokenFilter singleton(String name, boolean useFilterF
5757
(tokenStream, version) -> create.apply(tokenStream));
5858
}
5959

60-
/**
61-
* Create a pre-configured token filter that may vary based on the Elasticsearch version.
62-
*/
63-
public static PreConfiguredTokenFilter singletonWithVersion(String name, boolean useFilterForMultitermQueries,
64-
BiFunction<TokenStream, Version, TokenStream> create) {
65-
return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, true, CachingStrategy.ONE,
66-
(tokenStream, version) -> create.apply(tokenStream, version));
67-
}
68-
69-
/**
70-
* Create a pre-configured token filter that may vary based on the Elasticsearch version.
71-
*/
72-
public static PreConfiguredTokenFilter singletonWithVersion(String name, boolean useFilterForMultitermQueries,
73-
boolean useFilterForParsingSynonyms,
74-
BiFunction<TokenStream, Version, TokenStream> create) {
75-
return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, useFilterForParsingSynonyms, CachingStrategy.ONE,
76-
(tokenStream, version) -> create.apply(tokenStream, version));
77-
}
78-
7960
/**
8061
* Create a pre-configured token filter that may vary based on the Lucene version.
8162
*/
@@ -93,6 +74,16 @@ public static PreConfiguredTokenFilter elasticsearchVersion(String name, boolean
9374
return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, true, CachingStrategy.ELASTICSEARCH, create);
9475
}
9576

77+
/**
78+
* Create a pre-configured token filter that may vary based on the Elasticsearch version.
79+
*/
80+
public static PreConfiguredTokenFilter elasticsearchVersion(String name, boolean useFilterForMultitermQueries,
81+
boolean useFilterForParsingSynonyms,
82+
BiFunction<TokenStream, Version, TokenStream> create) {
83+
return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, useFilterForParsingSynonyms,
84+
CachingStrategy.ELASTICSEARCH, create);
85+
}
86+
9687
private final boolean useFilterForMultitermQueries;
9788
private final boolean allowForSynonymParsing;
9889
private final BiFunction<TokenStream, Version, TokenStream> create;

server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ static Map<String, PreConfiguredTokenFilter> setupPreConfiguredTokenFilters(List
181181
preConfiguredTokenFilters.register("lowercase", PreConfiguredTokenFilter.singleton("lowercase", true, LowerCaseFilter::new));
182182
// Add "standard" for old indices (bwc)
183183
preConfiguredTokenFilters.register( "standard",
184-
PreConfiguredTokenFilter.singletonWithVersion("standard", true, (reader, version) -> {
184+
PreConfiguredTokenFilter.elasticsearchVersion("standard", true, (reader, version) -> {
185185
if (version.before(Version.V_7_0_0)) {
186186
deprecationLogger.deprecatedAndMaybeLog("standard_deprecation",
187187
"The [standard] token filter is deprecated and will be removed in a future version.");
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.index.analysis;
20+
21+
import org.apache.lucene.analysis.TokenFilter;
22+
import org.elasticsearch.Version;
23+
import org.elasticsearch.cluster.metadata.IndexMetaData;
24+
import org.elasticsearch.common.settings.Settings;
25+
import org.elasticsearch.env.Environment;
26+
import org.elasticsearch.env.TestEnvironment;
27+
import org.elasticsearch.index.IndexSettings;
28+
import org.elasticsearch.test.ESTestCase;
29+
import org.elasticsearch.test.IndexSettingsModule;
30+
import org.elasticsearch.test.VersionUtils;
31+
32+
import java.io.IOException;
33+
34+
public class PreConfiguredTokenFilterTests extends ESTestCase {
35+
36+
private final Settings emptyNodeSettings = Settings.builder()
37+
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
38+
.build();
39+
40+
public void testCachingWithSingleton() throws IOException {
41+
PreConfiguredTokenFilter pctf =
42+
PreConfiguredTokenFilter.singleton("singleton", randomBoolean(),
43+
(tokenStream) -> new TokenFilter(tokenStream) {
44+
@Override
45+
public boolean incrementToken() {
46+
return false;
47+
}
48+
});
49+
50+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY);
51+
52+
Version version1 = VersionUtils.randomVersion(random());
53+
Settings settings1 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version1)
54+
.build();
55+
TokenFilterFactory tff_v1_1 =
56+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "singleton", settings1);
57+
TokenFilterFactory tff_v1_2 =
58+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "singleton", settings1);
59+
assertSame(tff_v1_1, tff_v1_2);
60+
61+
Version version2 = randomValueOtherThan(version1, () -> randomFrom(VersionUtils.allVersions()));
62+
Settings settings2 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version2)
63+
.build();
64+
65+
TokenFilterFactory tff_v2 =
66+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "singleton", settings2);
67+
assertSame(tff_v1_1, tff_v2);
68+
}
69+
70+
public void testCachingWithElasticsearchVersion() throws IOException {
71+
PreConfiguredTokenFilter pctf =
72+
PreConfiguredTokenFilter.elasticsearchVersion("elasticsearch_version", randomBoolean(),
73+
(tokenStream, esVersion) -> new TokenFilter(tokenStream) {
74+
@Override
75+
public boolean incrementToken() {
76+
return false;
77+
}
78+
});
79+
80+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY);
81+
82+
Version version1 = VersionUtils.randomVersion(random());
83+
Settings settings1 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version1)
84+
.build();
85+
TokenFilterFactory tff_v1_1 =
86+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "elasticsearch_version", settings1);
87+
TokenFilterFactory tff_v1_2 =
88+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "elasticsearch_version", settings1);
89+
assertSame(tff_v1_1, tff_v1_2);
90+
91+
Version version2 = randomValueOtherThan(version1, () -> randomFrom(VersionUtils.allVersions()));
92+
Settings settings2 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version2)
93+
.build();
94+
95+
TokenFilterFactory tff_v2 =
96+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "elasticsearch_version", settings2);
97+
assertNotSame(tff_v1_1, tff_v2);
98+
}
99+
100+
public void testCachingWithLuceneVersion() throws IOException {
101+
PreConfiguredTokenFilter pctf =
102+
PreConfiguredTokenFilter.luceneVersion("lucene_version", randomBoolean(),
103+
(tokenStream, luceneVersion) -> new TokenFilter(tokenStream) {
104+
@Override
105+
public boolean incrementToken() {
106+
return false;
107+
}
108+
});
109+
110+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY);
111+
112+
Version version1 = Version.CURRENT;
113+
Settings settings1 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version1)
114+
.build();
115+
TokenFilterFactory tff_v1_1 =
116+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "lucene_version", settings1);
117+
TokenFilterFactory tff_v1_2 =
118+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "lucene_version", settings1);
119+
assertSame(tff_v1_1, tff_v1_2);
120+
121+
byte major = VersionUtils.getFirstVersion().major;
122+
Version version2 = Version.fromString(major - 1 + ".0.0");
123+
Settings settings2 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version2)
124+
.build();
125+
126+
TokenFilterFactory tff_v2 =
127+
pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "lucene_version", settings2);
128+
assertNotSame(tff_v1_1, tff_v2);
129+
}
130+
}

0 commit comments

Comments
 (0)