From aa449b4a9c7ee756ee62ff6d1b5a4942830d946e Mon Sep 17 00:00:00 2001 From: Tudor Golubenco Date: Wed, 5 Oct 2016 20:38:35 +0200 Subject: [PATCH] Set strings to be keywords by default (#2688) This adds a dynamic mapping to all our template files to set strings to be keywords by default. Previously we were using the default of `text` and only switching to `keyword` when configured in `fields.yml`. This reverses the logic and sets mappings to `text` only when requested in `fields.yml`. The goal is to make upgrading the mapping template less painful. In the Beats we have so far, unexpected fields are better of as keywords. --- CHANGELOG.asciidoc | 2 + filebeat/filebeat.template-es2x.json | 5 +-- filebeat/filebeat.template.json | 5 +-- libbeat/scripts/generate_template.py | 34 +++++++++++++--- metricbeat/metricbeat.template-es2x.json | 5 +-- metricbeat/metricbeat.template.json | 5 +-- packetbeat/packetbeat.template-es2x.json | 49 +----------------------- packetbeat/packetbeat.template.json | 45 +--------------------- winlogbeat/winlogbeat.template-es2x.json | 27 +------------ winlogbeat/winlogbeat.template.json | 25 +----------- 10 files changed, 46 insertions(+), 156 deletions(-) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index d2802d862fa..d6815727c3a 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -14,6 +14,8 @@ https://github.com/elastic/beats/compare/v5.0.0-beta1...master[Check the HEAD di *Affecting all Beats* +- A dynamic mapping rule is added to the default Elasticsearch template to treat strings as keywords by default. {pull}2688[2688] + *Metricbeat* *Packetbeat* diff --git a/filebeat/filebeat.template-es2x.json b/filebeat/filebeat.template-es2x.json index 6154762cc0f..b7845d4ccaa 100644 --- a/filebeat/filebeat.template-es2x.json +++ b/filebeat/filebeat.template-es2x.json @@ -11,14 +11,13 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "index": "not_analyzed", "type": "string" }, - "match_mapping_type": "string", - "path_match": "fields.*" + "match_mapping_type": "string" } } ], diff --git a/filebeat/filebeat.template.json b/filebeat/filebeat.template.json index fec7a9212d3..7ba1fa901b1 100644 --- a/filebeat/filebeat.template.json +++ b/filebeat/filebeat.template.json @@ -9,13 +9,12 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "type": "keyword" }, - "match_mapping_type": "string", - "path_match": "fields.*" + "match_mapping_type": "string" } } ], diff --git a/libbeat/scripts/generate_template.py b/libbeat/scripts/generate_template.py index 5dc4cd34af9..cc4b22245a9 100644 --- a/libbeat/scripts/generate_template.py +++ b/libbeat/scripts/generate_template.py @@ -68,6 +68,30 @@ def fields_to_es_template(args, input, output, index, version): properties = {} dynamic_templates = [] + + # Make strings keywords by default + if args.es2x: + dynamic_templates.append({ + "strings_as_keyword": { + "mapping": { + "type": "string", + "index": "not_analyzed", + "ignore_above": 1024 + }, + "match_mapping_type": "string", + } + }) + else: + dynamic_templates.append({ + "strings_as_keyword": { + "mapping": { + "type": "keyword", + "ignore_above": 1024 + }, + "match_mapping_type": "string", + } + }) + for section in docs["fields"]: prop, dynamic = fill_section_properties(args, section, defaults, "") @@ -200,9 +224,9 @@ def fill_field_properties(args, field, defaults, path): field.get("scaling_factor", 1000) elif field["type"] in ["dict", "list"]: - if field.get("dict-type") == "keyword": + if field.get("dict-type") == "text": # add a dynamic template to set all members of - # the dict as keywords + # the dict as text if len(path) > 0: name = path + "." + field["name"] else: @@ -213,8 +237,7 @@ def fill_field_properties(args, field, defaults, path): name: { "mapping": { "type": "string", - "index": "not_analyzed", - "ignore_above": 1024 + "index": "analyzed", }, "match_mapping_type": "string", "path_match": name + ".*" @@ -224,8 +247,7 @@ def fill_field_properties(args, field, defaults, path): dynamic_templates.append({ name: { "mapping": { - "type": "keyword", - "ignore_above": 1024 + "type": "text", }, "match_mapping_type": "string", "path_match": name + ".*" diff --git a/metricbeat/metricbeat.template-es2x.json b/metricbeat/metricbeat.template-es2x.json index 683f618c3cc..236374f732b 100644 --- a/metricbeat/metricbeat.template-es2x.json +++ b/metricbeat/metricbeat.template-es2x.json @@ -11,14 +11,13 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "index": "not_analyzed", "type": "string" }, - "match_mapping_type": "string", - "path_match": "fields.*" + "match_mapping_type": "string" } } ], diff --git a/metricbeat/metricbeat.template.json b/metricbeat/metricbeat.template.json index 9cfa87ac796..3d56007e247 100644 --- a/metricbeat/metricbeat.template.json +++ b/metricbeat/metricbeat.template.json @@ -9,13 +9,12 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "type": "keyword" }, - "match_mapping_type": "string", - "path_match": "fields.*" + "match_mapping_type": "string" } } ], diff --git a/packetbeat/packetbeat.template-es2x.json b/packetbeat/packetbeat.template-es2x.json index 2b1737a7dfd..94e0e518515 100644 --- a/packetbeat/packetbeat.template-es2x.json +++ b/packetbeat/packetbeat.template-es2x.json @@ -11,58 +11,13 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "index": "not_analyzed", "type": "string" }, - "match_mapping_type": "string", - "path_match": "fields.*" - } - }, - { - "amqp.headers": { - "mapping": { - "ignore_above": 1024, - "index": "not_analyzed", - "type": "string" - }, - "match_mapping_type": "string", - "path_match": "amqp.headers.*" - } - }, - { - "cassandra.response.supported": { - "mapping": { - "ignore_above": 1024, - "index": "not_analyzed", - "type": "string" - }, - "match_mapping_type": "string", - "path_match": "cassandra.response.supported.*" - } - }, - { - "http.request.headers": { - "mapping": { - "ignore_above": 1024, - "index": "not_analyzed", - "type": "string" - }, - "match_mapping_type": "string", - "path_match": "http.request.headers.*" - } - }, - { - "http.response.headers": { - "mapping": { - "ignore_above": 1024, - "index": "not_analyzed", - "type": "string" - }, - "match_mapping_type": "string", - "path_match": "http.response.headers.*" + "match_mapping_type": "string" } } ], diff --git a/packetbeat/packetbeat.template.json b/packetbeat/packetbeat.template.json index 45150d79873..096d09fb5f3 100644 --- a/packetbeat/packetbeat.template.json +++ b/packetbeat/packetbeat.template.json @@ -9,53 +9,12 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "type": "keyword" }, - "match_mapping_type": "string", - "path_match": "fields.*" - } - }, - { - "amqp.headers": { - "mapping": { - "ignore_above": 1024, - "type": "keyword" - }, - "match_mapping_type": "string", - "path_match": "amqp.headers.*" - } - }, - { - "cassandra.response.supported": { - "mapping": { - "ignore_above": 1024, - "type": "keyword" - }, - "match_mapping_type": "string", - "path_match": "cassandra.response.supported.*" - } - }, - { - "http.request.headers": { - "mapping": { - "ignore_above": 1024, - "type": "keyword" - }, - "match_mapping_type": "string", - "path_match": "http.request.headers.*" - } - }, - { - "http.response.headers": { - "mapping": { - "ignore_above": 1024, - "type": "keyword" - }, - "match_mapping_type": "string", - "path_match": "http.response.headers.*" + "match_mapping_type": "string" } } ], diff --git a/winlogbeat/winlogbeat.template-es2x.json b/winlogbeat/winlogbeat.template-es2x.json index 418ffa3d504..e8873db69c7 100644 --- a/winlogbeat/winlogbeat.template-es2x.json +++ b/winlogbeat/winlogbeat.template-es2x.json @@ -11,36 +11,13 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "index": "not_analyzed", "type": "string" }, - "match_mapping_type": "string", - "path_match": "fields.*" - } - }, - { - "event_data": { - "mapping": { - "ignore_above": 1024, - "index": "not_analyzed", - "type": "string" - }, - "match_mapping_type": "string", - "path_match": "event_data.*" - } - }, - { - "user_data": { - "mapping": { - "ignore_above": 1024, - "index": "not_analyzed", - "type": "string" - }, - "match_mapping_type": "string", - "path_match": "user_data.*" + "match_mapping_type": "string" } } ], diff --git a/winlogbeat/winlogbeat.template.json b/winlogbeat/winlogbeat.template.json index 79a020903f8..ea0fb6e8058 100644 --- a/winlogbeat/winlogbeat.template.json +++ b/winlogbeat/winlogbeat.template.json @@ -9,33 +9,12 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "type": "keyword" }, - "match_mapping_type": "string", - "path_match": "fields.*" - } - }, - { - "event_data": { - "mapping": { - "ignore_above": 1024, - "type": "keyword" - }, - "match_mapping_type": "string", - "path_match": "event_data.*" - } - }, - { - "user_data": { - "mapping": { - "ignore_above": 1024, - "type": "keyword" - }, - "match_mapping_type": "string", - "path_match": "user_data.*" + "match_mapping_type": "string" } } ],