diff --git a/docs/reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc index f04cea237fa99..6a1ef08c0b12d 100644 --- a/docs/reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/lowercase-tokenfilter.asciidoc @@ -1,25 +1,135 @@ [[analysis-lowercase-tokenfilter]] -=== Lowercase Token Filter +=== Lowercase token filter +++++ +Lowercase +++++ -A token filter of type `lowercase` that normalizes token text to lower -case. +Changes token text to lowercase. For example, you can use the `lowercase` filter +to change `THE Lazy DoG` to `the lazy dog`. -Lowercase token filter supports Greek, Irish, and Turkish lowercase token -filters through the `language` parameter. Below is a usage example in a -custom analyzer +In addition to a default filter, the `lowercase` token filter provides access to +Lucene's language-specific lowercase filters for Greek, Irish, and Turkish. + +[[analysis-lowercase-tokenfilter-analyze-ex]] +==== Example + +The following <> request uses the default +`lowercase` filter to change the `THE Quick FoX JUMPs` to lowercase: + +[source,console] +-------------------------------------------------- +GET _analyze +{ + "tokenizer" : "standard", + "filter" : ["lowercase"], + "text" : "THE Quick FoX JUMPs" +} +-------------------------------------------------- + +The filter produces the following tokens: + +[source,text] +-------------------------------------------------- +[ the, quick, fox, jumps ] +-------------------------------------------------- + +///////////////////// +[source,console-result] +-------------------------------------------------- +{ + "tokens" : [ + { + "token" : "the", + "start_offset" : 0, + "end_offset" : 3, + "type" : "", + "position" : 0 + }, + { + "token" : "quick", + "start_offset" : 4, + "end_offset" : 9, + "type" : "", + "position" : 1 + }, + { + "token" : "fox", + "start_offset" : 10, + "end_offset" : 13, + "type" : "", + "position" : 2 + }, + { + "token" : "jumps", + "start_offset" : 14, + "end_offset" : 19, + "type" : "", + "position" : 3 + } + ] +} +-------------------------------------------------- +///////////////////// + +[[analysis-lowercase-tokenfilter-analyzer-ex]] +==== Add to an analyzer + +The following <> request uses the +`lowercase` filter to configure a new +<>. [source,console] -------------------------------------------------- -PUT /lowercase_example +PUT lowercase_example +{ + "settings" : { + "analysis" : { + "analyzer" : { + "whitespace_lowercase" : { + "tokenizer" : "whitespace", + "filter" : ["lowercase"] + } + } + } + } +} +-------------------------------------------------- + +[[analysis-lowercase-tokenfilter-configure-parms]] +==== Configurable parameters + +`language`:: ++ +-- +(Optional, string) +Language-specific lowercase token filter to use. Valid values include: + +`greek`::: Uses Lucene's https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/el/GreekLowerCaseFilter.html[GreekLowerCaseFilter] + +`irish`::: Uses Lucene's http://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/ga/IrishLowerCaseFilter.html[IrishLowerCaseFilter] + +`turkish`::: Uses Lucene's https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.html[TurkishLowerCaseFilter] + +If not specified, defaults to Lucene's https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/core/LowerCaseFilter.html[LowerCaseFilter]. +-- + +[[analysis-lowercase-tokenfilter-customize]] +==== Customize + +To customize the `lowercase` filter, duplicate it to create the basis +for a new custom token filter. You can modify the filter using its configurable +parameters. + +For example, the following request creates a custom `lowercase` filter for the +Greek language: + +[source,console] +-------------------------------------------------- +PUT custom_lowercase_example { "settings": { "analysis": { "analyzer": { - "standard_lowercase_example": { - "type": "custom", - "tokenizer": "standard", - "filter": ["lowercase"] - }, "greek_lowercase_example": { "type": "custom", "tokenizer": "standard",