Skip to content

Commit 03fa799

Browse files
author
Christoph Büscher
committed
Make ReloadableCustomAnalyzer sibling of CustomAnalyzer rather than extend it
1 parent 0ec5ac9 commit 03fa799

File tree

10 files changed

+210
-94
lines changed

10 files changed

+210
-94
lines changed

server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
import org.elasticsearch.index.analysis.NamedAnalyzer;
5555
import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
5656
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
57+
import org.elasticsearch.index.analysis.ReloadableCustomAnalyzer;
5758
import org.elasticsearch.index.analysis.TokenFilterFactory;
5859
import org.elasticsearch.index.analysis.TokenizerFactory;
5960
import org.elasticsearch.index.mapper.KeywordFieldMapper;
@@ -300,19 +301,36 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy
300301
}
301302
}
302303

303-
CustomAnalyzer customAnalyzer = null;
304-
if (analyzer instanceof CustomAnalyzer) {
305-
customAnalyzer = (CustomAnalyzer) analyzer;
306-
} else if (analyzer instanceof NamedAnalyzer && ((NamedAnalyzer) analyzer).analyzer() instanceof CustomAnalyzer) {
307-
customAnalyzer = (CustomAnalyzer) ((NamedAnalyzer) analyzer).analyzer();
304+
Analyzer customAnalyzer = null;
305+
// maybe unwrap analyzer from NamedAnalyzer
306+
Analyzer potentialCustomAnalyzer = analyzer;
307+
if (analyzer instanceof NamedAnalyzer) {
308+
potentialCustomAnalyzer = ((NamedAnalyzer) analyzer).analyzer();
309+
}
310+
if (potentialCustomAnalyzer instanceof CustomAnalyzer || potentialCustomAnalyzer instanceof ReloadableCustomAnalyzer) {
311+
customAnalyzer = potentialCustomAnalyzer;
308312
}
309313

310314
if (customAnalyzer != null) {
311-
// customAnalyzer = divide charfilter, tokenizer tokenfilters
312-
AnalyzerComponents components = customAnalyzer.getComponents();
313-
CharFilterFactory[] charFilterFactories = components.getCharFilters();
314-
TokenizerFactory tokenizerFactory = components.getTokenizerFactory();
315-
TokenFilterFactory[] tokenFilterFactories = components.getTokenFilters();
315+
// divide charfilter, tokenizer tokenfilters
316+
CharFilterFactory[] charFilterFactories;
317+
TokenizerFactory tokenizerFactory;
318+
TokenFilterFactory[] tokenFilterFactories;
319+
String tokenizerName;
320+
if (customAnalyzer instanceof CustomAnalyzer) {
321+
CustomAnalyzer casted = (CustomAnalyzer) analyzer;
322+
charFilterFactories = casted.charFilters();
323+
tokenizerFactory = casted.tokenizerFactory();
324+
tokenFilterFactories = casted.tokenFilters();
325+
tokenizerName = casted.getTokenizerName();
326+
} else {
327+
// for ReloadableCustomAnalyzer we want to make sure we get the factories from the same components object
328+
AnalyzerComponents components = ((ReloadableCustomAnalyzer) customAnalyzer).getComponents();
329+
charFilterFactories = components.getCharFilters();
330+
tokenizerFactory = components.getTokenizerFactory();
331+
tokenFilterFactories = components.getTokenFilters();
332+
tokenizerName = components.getTokenizerName();
333+
}
316334

317335
String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
318336
TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ?
@@ -372,7 +390,7 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy
372390
}
373391
}
374392
detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(
375-
components.getTokenizerName(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
393+
tokenizerName, tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
376394
} else {
377395
String name;
378396
if (analyzer instanceof NamedAnalyzer) {

server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java

Lines changed: 59 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -22,41 +22,73 @@
2222
import org.apache.lucene.analysis.Analyzer;
2323
import org.apache.lucene.analysis.TokenStream;
2424
import org.apache.lucene.analysis.Tokenizer;
25-
import org.elasticsearch.index.analysis.CustomAnalyzerProvider.AnalyzerComponents;
2625

2726
import java.io.Reader;
2827

29-
public class CustomAnalyzer extends Analyzer {
28+
public final class CustomAnalyzer extends Analyzer implements TokenFilterComposite {
3029

30+
private final String tokenizerName;
31+
private final TokenizerFactory tokenizerFactory;
32+
33+
private final CharFilterFactory[] charFilters;
34+
35+
private final TokenFilterFactory[] tokenFilters;
36+
37+
private final int positionIncrementGap;
38+
private final int offsetGap;
3139
private final AnalysisMode analysisMode;
32-
protected volatile AnalyzerComponents components;
3340

3441
public CustomAnalyzer(String tokenizerName, TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters,
35-
TokenFilterFactory[] tokenFilters) {
36-
this(new AnalyzerComponents(tokenizerName, tokenizerFactory, charFilters, tokenFilters, 0, -1), GLOBAL_REUSE_STRATEGY);
42+
TokenFilterFactory[] tokenFilters) {
43+
this(tokenizerName, tokenizerFactory, charFilters, tokenFilters, 0, -1);
44+
}
45+
46+
public CustomAnalyzer(String tokenizerName, TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters,
47+
TokenFilterFactory[] tokenFilters, int positionIncrementGap, int offsetGap) {
48+
this.tokenizerName = tokenizerName;
49+
this.tokenizerFactory = tokenizerFactory;
50+
this.charFilters = charFilters;
51+
this.tokenFilters = tokenFilters;
52+
this.positionIncrementGap = positionIncrementGap;
53+
this.offsetGap = offsetGap;
54+
// merge and transfer token filter analysis modes with analyzer
55+
AnalysisMode mode = AnalysisMode.ALL;
56+
for (TokenFilterFactory f : tokenFilters) {
57+
mode = mode.merge(f.getAnalysisMode());
58+
}
59+
this.analysisMode = mode;
60+
}
61+
62+
/**
63+
* The name of the tokenizer as configured by the user.
64+
*/
65+
public String getTokenizerName() {
66+
return tokenizerName;
67+
}
68+
69+
public TokenizerFactory tokenizerFactory() {
70+
return tokenizerFactory;
3771
}
3872

39-
CustomAnalyzer(AnalyzerComponents components, ReuseStrategy reuseStrategy) {
40-
super(reuseStrategy);
41-
this.components = components;
42-
this.analysisMode = calculateAnalysisMode(components);
73+
public TokenFilterFactory[] tokenFilters() {
74+
return tokenFilters;
4375
}
4476

45-
public AnalyzerComponents getComponents() {
46-
return this.components;
77+
public CharFilterFactory[] charFilters() {
78+
return charFilters;
4779
}
4880

4981
@Override
5082
public int getPositionIncrementGap(String fieldName) {
51-
return this.components.getPositionIncrementGap();
83+
return this.positionIncrementGap;
5284
}
5385

5486
@Override
5587
public int getOffsetGap(String field) {
56-
if (this.components.getOffsetGap() < 0) {
88+
if (offsetGap < 0) {
5789
return super.getOffsetGap(field);
5890
}
59-
return this.components.getOffsetGap();
91+
return this.offsetGap;
6092
}
6193

6294
public AnalysisMode getAnalysisMode() {
@@ -65,20 +97,18 @@ public AnalysisMode getAnalysisMode() {
6597

6698
@Override
6799
protected TokenStreamComponents createComponents(String fieldName) {
68-
final AnalyzerComponents components = getComponents();
69-
Tokenizer tokenizer = components.getTokenizerFactory().create();
100+
Tokenizer tokenizer = tokenizerFactory.create();
70101
TokenStream tokenStream = tokenizer;
71-
for (TokenFilterFactory tokenFilter : components.getTokenFilters()) {
102+
for (TokenFilterFactory tokenFilter : tokenFilters) {
72103
tokenStream = tokenFilter.create(tokenStream);
73104
}
74105
return new TokenStreamComponents(tokenizer, tokenStream);
75106
}
76107

77108
@Override
78109
protected Reader initReader(String fieldName, Reader reader) {
79-
final AnalyzerComponents components = getComponents();
80-
if (components.getCharFilters() != null && components.getCharFilters().length > 0) {
81-
for (CharFilterFactory charFilter : components.getCharFilters()) {
110+
if (charFilters != null && charFilters.length > 0) {
111+
for (CharFilterFactory charFilter : charFilters) {
82112
reader = charFilter.create(reader);
83113
}
84114
}
@@ -87,42 +117,18 @@ protected Reader initReader(String fieldName, Reader reader) {
87117

88118
@Override
89119
protected Reader initReaderForNormalization(String fieldName, Reader reader) {
90-
final AnalyzerComponents components = getComponents();
91-
for (CharFilterFactory charFilter : components.getCharFilters()) {
92-
reader = charFilter.normalize(reader);
93-
}
94-
return reader;
120+
for (CharFilterFactory charFilter : charFilters) {
121+
reader = charFilter.normalize(reader);
122+
}
123+
return reader;
95124
}
96125

97126
@Override
98127
protected TokenStream normalize(String fieldName, TokenStream in) {
99-
final AnalyzerComponents components = getComponents();
100-
TokenStream result = in;
101-
for (TokenFilterFactory filter : components.getTokenFilters()) {
102-
result = filter.normalize(result);
103-
}
104-
return result;
105-
}
106-
107-
private static AnalysisMode calculateAnalysisMode(AnalyzerComponents components) {
108-
// merge and transfer token filter analysis modes with analyzer
109-
AnalysisMode mode = AnalysisMode.ALL;
110-
for (TokenFilterFactory f : components.getTokenFilters()) {
111-
mode = mode.merge(f.getAnalysisMode());
112-
}
113-
return mode;
114-
}
115-
116-
/**
117-
* Factory method that either returns a plain {@link CustomAnalyzer} if the components used for creation are supporting index and search
118-
* time use, or a {@link ReloadableCustomAnalyzer} if the components are intended for search time use only.
119-
*/
120-
static CustomAnalyzer create(AnalyzerComponents components) {
121-
AnalysisMode mode = calculateAnalysisMode(components);
122-
if (mode.equals(AnalysisMode.SEARCH_TIME)) {
123-
return new ReloadableCustomAnalyzer(components);
124-
} else {
125-
return new CustomAnalyzer(components, GLOBAL_REUSE_STRATEGY);
126-
}
128+
TokenStream result = in;
129+
for (TokenFilterFactory filter : tokenFilters) {
130+
result = filter.normalize(result);
131+
}
132+
return result;
127133
}
128134
}

server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
package org.elasticsearch.index.analysis;
2121

22+
import org.apache.lucene.analysis.Analyzer;
2223
import org.elasticsearch.common.settings.Settings;
2324
import org.elasticsearch.index.IndexSettings;
2425
import org.elasticsearch.index.mapper.TextFieldMapper;
@@ -31,11 +32,11 @@
3132
* A custom analyzer that is built out of a single {@link org.apache.lucene.analysis.Tokenizer} and a list
3233
* of {@link org.apache.lucene.analysis.TokenFilter}s.
3334
*/
34-
public class CustomAnalyzerProvider extends AbstractIndexAnalyzerProvider<CustomAnalyzer> {
35+
public class CustomAnalyzerProvider extends AbstractIndexAnalyzerProvider<Analyzer> {
3536

3637
private final Settings analyzerSettings;
3738

38-
private CustomAnalyzer customAnalyzer;
39+
private Analyzer customAnalyzer;
3940

4041
public CustomAnalyzerProvider(IndexSettings indexSettings,
4142
String name, Settings settings) {
@@ -46,8 +47,23 @@ public CustomAnalyzerProvider(IndexSettings indexSettings,
4647
void build(final Map<String, TokenizerFactory> tokenizers,
4748
final Map<String, CharFilterFactory> charFilters,
4849
final Map<String, TokenFilterFactory> tokenFilters) {
49-
AnalyzerComponents components = createComponents(name(), analyzerSettings, tokenizers, charFilters, tokenFilters);
50-
customAnalyzer = CustomAnalyzer.create(components);
50+
customAnalyzer = create(name(), analyzerSettings, tokenizers, charFilters, tokenFilters);
51+
}
52+
53+
/**
54+
* Factory method that either returns a plain {@link ReloadableCustomAnalyzer} if the components used for creation are supporting index
55+
* and search time use, or a {@link ReloadableCustomAnalyzer} if the components are intended for search time use only.
56+
*/
57+
private static Analyzer create(String name, Settings analyzerSettings, Map<String, TokenizerFactory> tokenizers,
58+
Map<String, CharFilterFactory> charFilters,
59+
Map<String, TokenFilterFactory> tokenFilters) {
60+
AnalyzerComponents components = createComponents(name, analyzerSettings, tokenizers, charFilters, tokenFilters);
61+
if (components.analysisMode().equals(AnalysisMode.SEARCH_TIME)) {
62+
return new ReloadableCustomAnalyzer(components);
63+
} else {
64+
return new CustomAnalyzer(components.getTokenizerName(), components.getTokenizerFactory(), components.getCharFilters(),
65+
components.getTokenFilters(), components.getPositionIncrementGap(), components.getOffsetGap());
66+
}
5167
}
5268

5369
static AnalyzerComponents createComponents(String name, Settings analyzerSettings,
@@ -103,7 +119,7 @@ static AnalyzerComponents createComponents(String name, Settings analyzerSetting
103119
}
104120

105121
@Override
106-
public CustomAnalyzer get() {
122+
public Analyzer get() {
107123
return this.customAnalyzer;
108124
}
109125

@@ -114,6 +130,7 @@ public static class AnalyzerComponents {
114130
private final TokenFilterFactory[] tokenFilters;
115131
private final int positionIncrementGap;
116132
private final int offsetGap;
133+
private final AnalysisMode analysisMode;
117134

118135
AnalyzerComponents(String tokenizerName, TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters,
119136
TokenFilterFactory[] tokenFilters,
@@ -124,6 +141,11 @@ public static class AnalyzerComponents {
124141
this.tokenFilters = tokenFilters;
125142
this.positionIncrementGap = positionIncrementGap;
126143
this.offsetGap = offsetGap;
144+
AnalysisMode mode = AnalysisMode.ALL;
145+
for (TokenFilterFactory f : tokenFilters) {
146+
mode = mode.merge(f.getAnalysisMode());
147+
}
148+
this.analysisMode = mode;
127149
}
128150

129151
public String getTokenizerName() {
@@ -149,5 +171,9 @@ public int getPositionIncrementGap() {
149171
public int getOffsetGap() {
150172
return offsetGap;
151173
}
174+
175+
public AnalysisMode analysisMode() {
176+
return this.analysisMode;
177+
}
152178
}
153179
}

server/src/main/java/org/elasticsearch/index/analysis/NamedAnalyzer.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
import org.apache.lucene.analysis.Analyzer;
2323
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
24-
import org.elasticsearch.index.analysis.CustomAnalyzerProvider.AnalyzerComponents;
2524
import org.elasticsearch.index.mapper.MapperException;
2625

2726
import java.util.ArrayList;
@@ -113,9 +112,8 @@ public void checkAllowedInMode(AnalysisMode mode) {
113112
return; // everything allowed if this analyzer is in ALL mode
114113
}
115114
if (this.getAnalysisMode() != mode) {
116-
if (analyzer instanceof CustomAnalyzer) {
117-
AnalyzerComponents components = ((CustomAnalyzer) analyzer).getComponents();
118-
TokenFilterFactory[] tokenFilters = components.getTokenFilters();
115+
if (analyzer instanceof TokenFilterComposite) {
116+
TokenFilterFactory[] tokenFilters = ((TokenFilterComposite) analyzer).tokenFilters();
119117
List<String> offendingFilters = new ArrayList<>();
120118
for (TokenFilterFactory tokenFilter : tokenFilters) {
121119
if (tokenFilter.getAnalysisMode() != mode) {

0 commit comments

Comments
 (0)