elastic · jrodewig · Nov 13, 2019 · Nov 12, 2019 · Nov 13, 2019 · Nov 13, 2019
diff --git a/docs/reference/analysis/tokenfilters.asciidoc b/docs/reference/analysis/tokenfilters.asciidoc
@@ -22,14 +22,14 @@ include::tokenfilters/classic-tokenfilter.asciidoc[]
 
 include::tokenfilters/common-grams-tokenfilter.asciidoc[]
 
-include::tokenfilters/compound-word-tokenfilter.asciidoc[]
-
 include::tokenfilters/condition-tokenfilter.asciidoc[]
 
 include::tokenfilters/decimal-digit-tokenfilter.asciidoc[]
 
 include::tokenfilters/delimited-payload-tokenfilter.asciidoc[]
 
+include::tokenfilters/dictionary-decompounder-tokenfilter.asciidoc[]
+
 include::tokenfilters/edgengram-tokenfilter.asciidoc[]
 
 include::tokenfilters/elision-tokenfilter.asciidoc[]
@@ -40,6 +40,8 @@ include::tokenfilters/flatten-graph-tokenfilter.asciidoc[]
 
 include::tokenfilters/hunspell-tokenfilter.asciidoc[]
 
+include::tokenfilters/hyphenation-decompounder-tokenfilter.asciidoc[]
+
 include::tokenfilters/keep-types-tokenfilter.asciidoc[]
 
 include::tokenfilters/keep-words-tokenfilter.asciidoc[]

diff --git a/docs/reference/analysis/tokenfilters/compound-word-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/compound-word-tokenfilter.asciidoc
diff --git a/docs/reference/analysis/tokenfilters/dictionary-decompounder-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/dictionary-decompounder-tokenfilter.asciidoc
@@ -0,0 +1,173 @@
+[[analysis-dict-decomp-tokenfilter]]
+=== Dictionary decompounder token filter
+++++
+<titleabbrev>Dictionary decompounder</titleabbrev>
+++++
+
+[NOTE]
+====
+In most cases, we recommend using the faster
+<<analysis-hyp-decomp-tokenfilter,`hyphenation_decompounder`>> token filter
+in place of this filter. However, you can use the
+`dictionary_decompounder` filter to check the quality of a word list before
+implementing it in the `hyphenation_decompounder` filter.
+====
+
+Uses a specified list of words and a brute force approach to find subwords in
+compound words. If found, these subwords are included in the token output.
+
+This filter uses Lucene's
+https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.html[DictionaryCompoundWordTokenFilter],
+which was built for Germanic languages.
+
+[[analysis-dict-decomp-tokenfilter-analyze-ex]]
+==== Example
+
+The following <<indices-analyze,analyze API>> request uses the
+`dictionary_decompounder` filter to find subwords in `Donaudampfschiff`. The
+filter then checks these subwords against the specified list of words: `Donau`,
+`dampf`, `meer`, and `schiff`.
+
+[source,console]
+--------------------------------------------------
+GET _analyze
+{
+  "tokenizer": "standard",
+  "filter": [
+    {
+      "type": "dictionary_decompounder",
+      "word_list": ["Donau", "dampf", "meer", "schiff"]
+    }
+  ],
+  "text": "Donaudampfschiff"
+}
+--------------------------------------------------
+
+The filter produces the following tokens:
+
+[source,text]
+--------------------------------------------------
+[ Donaudampfschiff, Donau, dampf, schiff ]
+--------------------------------------------------
+
+/////////////////////
+[source,console-result]
+--------------------------------------------------
+{
+  "tokens" : [
+    {
+      "token" : "Donaudampfschiff",
+      "start_offset" : 0,
+      "end_offset" : 16,
+      "type" : "<ALPHANUM>",
+      "position" : 0
+    },
+    {
+      "token" : "Donau",
+      "start_offset" : 0,
+      "end_offset" : 16,
+      "type" : "<ALPHANUM>",
+      "position" : 0
+    },
+    {
+      "token" : "dampf",
+      "start_offset" : 0,
+      "end_offset" : 16,
+      "type" : "<ALPHANUM>",
+      "position" : 0
+    },
+    {
+      "token" : "schiff",
+      "start_offset" : 0,
+      "end_offset" : 16,
+      "type" : "<ALPHANUM>",
+      "position" : 0
+    }
+  ]
+}
+--------------------------------------------------
+/////////////////////
+
+[[analysis-dict-decomp-tokenfilter-configure-parms]]
+==== Configurable parameters
+
+`word_list`::
++
+--
+(Required+++*+++, array of strings)
+A list of subwords to look for in the token stream. If found, the subword is
+included in the token output.
+
+Either this parameter or `word_list_path` must be specified.
+--
+
+`word_list_path`::
++
+--
+(Required+++*+++, string)
+Path to a file that contains a list of subwords to find in the token stream. If
+found, the subword is included in the token output.
+
+This path must be absolute or relative to the `config` location, and the file
+must be UTF-8 encoded. Each token in the file must be separated by a line break.
+
+Either this parameter or `word_list` must be specified.
+--
+
+`max_subword_size`::
+(Optional, integer)
+Maximum subword character length. Longer subword tokens are excluded from the
+output. Defaults to `15`.
+
+`min_subword_size`::
+(Optional, integer)
+Minimum subword character length. Shorter subword tokens are excluded from the
+output. Defaults to `2`.
+
+`min_word_size`::
+(Optional, integer)
+Minimum word character length. Shorter word tokens are excluded from the
+output. Defaults to `5`.
+
+`only_longest_match`::
+(Optional, boolean)
+If `true`, only include the longest matching subword. Defaults to `false`.
+
+[[analysis-dict-decomp-tokenfilter-customize]]
+==== Customize and add to an analyzer
+
+To customize the `dictionary_decompounder` filter, duplicate it to create the
+basis for a new custom token filter. You can modify the filter using its
+configurable parameters.
+
+For example, the following <<indices-create-index,create index API>> request
+uses a custom `dictionary_decompounder` filter to configure a new
+<<analysis-custom-analyzer,custom analyzer>>.
+
+The custom `dictionary_decompounder` filter find subwords in the
+`analysis/example_word_list.txt` file. Subwords longer than 22 characters are
+excluded from the token output.
+
+[source,console]
+--------------------------------------------------
+PUT dictionary_decompound_example
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "standard_dictionary_decompound": {
+          "tokenizer": "standard",
+          "filter": [ "22_char_dictionary_decompound" ]
+        }
+      },
+      "filter": {
+        "22_char_dictionary_decompound": {
+          "type": "dictionary_decompounder",
+          "word_list_path": "analysis/example_word_list.txt",
+          "max_subword_size": 22
+        }
+      }
+    }
+  }
+}
+--------------------------------------------------