101101import org .apache .lucene .analysis .tr .TurkishAnalyzer ;
102102import org .apache .lucene .analysis .util .ElisionFilter ;
103103import org .apache .lucene .util .SetOnce ;
104- import org .elasticsearch .common .logging .DeprecationCategory ;
105- import org .elasticsearch .common .logging .DeprecationLogger ;
106104import org .elasticsearch .common .regex .Regex ;
107- import org .elasticsearch .common .settings .Settings ;
108- import org .elasticsearch .env .Environment ;
109- import org .elasticsearch .index .IndexSettings ;
110105import org .elasticsearch .index .IndexVersions ;
111106import org .elasticsearch .index .analysis .AnalyzerProvider ;
112107import org .elasticsearch .index .analysis .CharFilterFactory ;
139134
140135public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin , ScriptPlugin {
141136
142- private static final DeprecationLogger deprecationLogger = DeprecationLogger .getLogger (CommonAnalysisPlugin .class );
143-
144137 private final SetOnce <ScriptService > scriptServiceHolder = new SetOnce <>();
145138 private final SetOnce <SynonymsManagementAPIService > synonymsManagementServiceHolder = new SetOnce <>();
146139
@@ -231,28 +224,6 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
231224 filters .put ("dictionary_decompounder" , requiresAnalysisSettings (DictionaryCompoundWordTokenFilterFactory ::new ));
232225 filters .put ("dutch_stem" , DutchStemTokenFilterFactory ::new );
233226 filters .put ("edge_ngram" , EdgeNGramTokenFilterFactory ::new );
234- filters .put ("edgeNGram" , (IndexSettings indexSettings , Environment environment , String name , Settings settings ) -> {
235- return new EdgeNGramTokenFilterFactory (indexSettings , environment , name , settings ) {
236- @ Override
237- public TokenStream create (TokenStream tokenStream ) {
238- if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_8_0_0 )) {
239- throw new IllegalArgumentException (
240- "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
241- + "Please change the filter name to [edge_ngram] instead."
242- );
243- } else {
244- deprecationLogger .warn (
245- DeprecationCategory .ANALYSIS ,
246- "edgeNGram_deprecation" ,
247- "The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
248- + "Please change the filter name to [edge_ngram] instead."
249- );
250- }
251- return super .create (tokenStream );
252- }
253-
254- };
255- });
256227 filters .put ("elision" , requiresAnalysisSettings (ElisionTokenFilterFactory ::new ));
257228 filters .put ("fingerprint" , FingerprintTokenFilterFactory ::new );
258229 filters .put ("flatten_graph" , FlattenGraphTokenFilterFactory ::new );
@@ -272,28 +243,6 @@ public TokenStream create(TokenStream tokenStream) {
272243 filters .put ("min_hash" , MinHashTokenFilterFactory ::new );
273244 filters .put ("multiplexer" , MultiplexerTokenFilterFactory ::new );
274245 filters .put ("ngram" , NGramTokenFilterFactory ::new );
275- filters .put ("nGram" , (IndexSettings indexSettings , Environment environment , String name , Settings settings ) -> {
276- return new NGramTokenFilterFactory (indexSettings , environment , name , settings ) {
277- @ Override
278- public TokenStream create (TokenStream tokenStream ) {
279- if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_8_0_0 )) {
280- throw new IllegalArgumentException (
281- "The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
282- + "Please change the filter name to [ngram] instead."
283- );
284- } else {
285- deprecationLogger .warn (
286- DeprecationCategory .ANALYSIS ,
287- "nGram_deprecation" ,
288- "The [nGram] token filter name is deprecated and will be removed in a future version. "
289- + "Please change the filter name to [ngram] instead."
290- );
291- }
292- return super .create (tokenStream );
293- }
294-
295- };
296- });
297246 filters .put ("pattern_capture" , requiresAnalysisSettings (PatternCaptureGroupTokenFilterFactory ::new ));
298247 filters .put ("pattern_replace" , requiresAnalysisSettings (PatternReplaceTokenFilterFactory ::new ));
299248 filters .put ("persian_normalization" , PersianNormalizationFilterFactory ::new );
@@ -345,39 +294,7 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
345294 tokenizers .put ("simple_pattern" , SimplePatternTokenizerFactory ::new );
346295 tokenizers .put ("simple_pattern_split" , SimplePatternSplitTokenizerFactory ::new );
347296 tokenizers .put ("thai" , ThaiTokenizerFactory ::new );
348- tokenizers .put ("nGram" , (IndexSettings indexSettings , Environment environment , String name , Settings settings ) -> {
349- if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_8_0_0 )) {
350- throw new IllegalArgumentException (
351- "The [nGram] tokenizer name was deprecated in 7.6. "
352- + "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead."
353- );
354- } else if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_7_6_0 )) {
355- deprecationLogger .warn (
356- DeprecationCategory .ANALYSIS ,
357- "nGram_tokenizer_deprecation" ,
358- "The [nGram] tokenizer name is deprecated and will be removed in a future version. "
359- + "Please change the tokenizer name to [ngram] instead."
360- );
361- }
362- return new NGramTokenizerFactory (indexSettings , environment , name , settings );
363- });
364297 tokenizers .put ("ngram" , NGramTokenizerFactory ::new );
365- tokenizers .put ("edgeNGram" , (IndexSettings indexSettings , Environment environment , String name , Settings settings ) -> {
366- if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_8_0_0 )) {
367- throw new IllegalArgumentException (
368- "The [edgeNGram] tokenizer name was deprecated in 7.6. "
369- + "Please use the tokenizer name to [edge_nGram] for indices created in versions 8 or higher instead."
370- );
371- } else if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_7_6_0 )) {
372- deprecationLogger .warn (
373- DeprecationCategory .ANALYSIS ,
374- "edgeNGram_tokenizer_deprecation" ,
375- "The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
376- + "Please change the tokenizer name to [edge_ngram] instead."
377- );
378- }
379- return new EdgeNGramTokenizerFactory (indexSettings , environment , name , settings );
380- });
381298 tokenizers .put ("edge_ngram" , EdgeNGramTokenizerFactory ::new );
382299 tokenizers .put ("char_group" , CharGroupTokenizerFactory ::new );
383300 tokenizers .put ("classic" , ClassicTokenizerFactory ::new );
@@ -588,54 +505,17 @@ public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
588505 tokenizers .add (PreConfiguredTokenizer .singleton ("letter" , LetterTokenizer ::new ));
589506 tokenizers .add (PreConfiguredTokenizer .singleton ("whitespace" , WhitespaceTokenizer ::new ));
590507 tokenizers .add (PreConfiguredTokenizer .singleton ("ngram" , NGramTokenizer ::new ));
591- tokenizers .add (PreConfiguredTokenizer . indexVersion ( "edge_ngram" , ( version ) -> {
592- if ( version . onOrAfter ( IndexVersions . V_7_3_0 )) {
593- return new EdgeNGramTokenizer ( NGramTokenizer . DEFAULT_MIN_NGRAM_SIZE , NGramTokenizer . DEFAULT_MAX_NGRAM_SIZE );
594- }
595- return new EdgeNGramTokenizer ( EdgeNGramTokenizer . DEFAULT_MIN_GRAM_SIZE , EdgeNGramTokenizer . DEFAULT_MAX_GRAM_SIZE );
596- }) );
508+ tokenizers .add (
509+ PreConfiguredTokenizer . indexVersion (
510+ "edge_ngram" ,
511+ ( version ) -> new EdgeNGramTokenizer ( NGramTokenizer . DEFAULT_MIN_NGRAM_SIZE , NGramTokenizer . DEFAULT_MAX_NGRAM_SIZE )
512+ )
513+ );
597514 tokenizers .add (PreConfiguredTokenizer .singleton ("pattern" , () -> new PatternTokenizer (Regex .compile ("\\ W+" , null ), -1 )));
598515 tokenizers .add (PreConfiguredTokenizer .singleton ("thai" , ThaiTokenizer ::new ));
599516 // TODO deprecate and remove in API
600517 // This is already broken with normalization, so backwards compat isn't necessary?
601518 tokenizers .add (PreConfiguredTokenizer .singleton ("lowercase" , XLowerCaseTokenizer ::new ));
602-
603- // Temporary shim for aliases. TODO deprecate after they are moved
604- tokenizers .add (PreConfiguredTokenizer .indexVersion ("nGram" , (version ) -> {
605- if (version .onOrAfter (IndexVersions .V_8_0_0 )) {
606- throw new IllegalArgumentException (
607- "The [nGram] tokenizer name was deprecated in 7.6. "
608- + "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead."
609- );
610- } else if (version .onOrAfter (IndexVersions .V_7_6_0 )) {
611- deprecationLogger .warn (
612- DeprecationCategory .ANALYSIS ,
613- "nGram_tokenizer_deprecation" ,
614- "The [nGram] tokenizer name is deprecated and will be removed in a future version. "
615- + "Please change the tokenizer name to [ngram] instead."
616- );
617- }
618- return new NGramTokenizer ();
619- }));
620- tokenizers .add (PreConfiguredTokenizer .indexVersion ("edgeNGram" , (version ) -> {
621- if (version .onOrAfter (IndexVersions .V_8_0_0 )) {
622- throw new IllegalArgumentException (
623- "The [edgeNGram] tokenizer name was deprecated in 7.6. "
624- + "Please use the tokenizer name to [edge_ngram] for indices created in versions 8 or higher instead."
625- );
626- } else if (version .onOrAfter (IndexVersions .V_7_6_0 )) {
627- deprecationLogger .warn (
628- DeprecationCategory .ANALYSIS ,
629- "edgeNGram_tokenizer_deprecation" ,
630- "The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
631- + "Please change the tokenizer name to [edge_ngram] instead."
632- );
633- }
634- if (version .onOrAfter (IndexVersions .V_7_3_0 )) {
635- return new EdgeNGramTokenizer (NGramTokenizer .DEFAULT_MIN_NGRAM_SIZE , NGramTokenizer .DEFAULT_MAX_NGRAM_SIZE );
636- }
637- return new EdgeNGramTokenizer (EdgeNGramTokenizer .DEFAULT_MIN_GRAM_SIZE , EdgeNGramTokenizer .DEFAULT_MAX_GRAM_SIZE );
638- }));
639519 tokenizers .add (PreConfiguredTokenizer .singleton ("PathHierarchy" , PathHierarchyTokenizer ::new ));
640520
641521 return tokenizers ;
0 commit comments