From f4148699310012f22e894a66b6ceb73d205f8049 Mon Sep 17 00:00:00 2001 From: Tudor Golubenco Date: Wed, 5 Oct 2016 13:54:22 +0200 Subject: [PATCH] Set strings to be keywords by default This adds a dynamic mapping to all our template files to set strings to be keywords by default. Previously we were using the default of `text` and only switching to `keyword` when configured in `fields.yml`. This reverses the logic and sets mappings to `text` only when requested in `fields.yml`. The goal is to make upgrading the mapping template less painful. In the Beats we have so far, unexpected fields are better of as keywords. --- CHANGELOG.asciidoc | 2 + filebeat/filebeat.template-es2x.json | 5 +-- filebeat/filebeat.template.json | 5 +-- libbeat/scripts/generate_template.py | 34 +++++++++++++--- metricbeat/metricbeat.template-es2x.json | 5 +-- metricbeat/metricbeat.template.json | 5 +-- packetbeat/packetbeat.template-es2x.json | 49 +----------------------- packetbeat/packetbeat.template.json | 45 +--------------------- winlogbeat/winlogbeat.template-es2x.json | 27 +------------ winlogbeat/winlogbeat.template.json | 25 +----------- 10 files changed, 46 insertions(+), 156 deletions(-) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 0f8a30f735f..b18f82c01ca 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -14,6 +14,8 @@ https://github.com/elastic/beats/compare/v5.0.0-beta1...master[Check the HEAD di *Affecting all Beats* +- A dynamic mapping rule is added to the default Elasticsearch template to treat strings as keywords by default. {pull}2688[2688] + *Metricbeat* *Packetbeat* diff --git a/filebeat/filebeat.template-es2x.json b/filebeat/filebeat.template-es2x.json index 91d779555eb..f3f4932736e 100644 --- a/filebeat/filebeat.template-es2x.json +++ b/filebeat/filebeat.template-es2x.json @@ -11,14 +11,13 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "index": "not_analyzed", "type": "string" }, - "match_mapping_type": "string", - "path_match": "fields.*" + "match_mapping_type": "string" } } ], diff --git a/filebeat/filebeat.template.json b/filebeat/filebeat.template.json index 3bf6c46280a..f4204f549d8 100644 --- a/filebeat/filebeat.template.json +++ b/filebeat/filebeat.template.json @@ -9,13 +9,12 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "type": "keyword" }, - "match_mapping_type": "string", - "path_match": "fields.*" + "match_mapping_type": "string" } } ], diff --git a/libbeat/scripts/generate_template.py b/libbeat/scripts/generate_template.py index 5dc4cd34af9..cc4b22245a9 100644 --- a/libbeat/scripts/generate_template.py +++ b/libbeat/scripts/generate_template.py @@ -68,6 +68,30 @@ def fields_to_es_template(args, input, output, index, version): properties = {} dynamic_templates = [] + + # Make strings keywords by default + if args.es2x: + dynamic_templates.append({ + "strings_as_keyword": { + "mapping": { + "type": "string", + "index": "not_analyzed", + "ignore_above": 1024 + }, + "match_mapping_type": "string", + } + }) + else: + dynamic_templates.append({ + "strings_as_keyword": { + "mapping": { + "type": "keyword", + "ignore_above": 1024 + }, + "match_mapping_type": "string", + } + }) + for section in docs["fields"]: prop, dynamic = fill_section_properties(args, section, defaults, "") @@ -200,9 +224,9 @@ def fill_field_properties(args, field, defaults, path): field.get("scaling_factor", 1000) elif field["type"] in ["dict", "list"]: - if field.get("dict-type") == "keyword": + if field.get("dict-type") == "text": # add a dynamic template to set all members of - # the dict as keywords + # the dict as text if len(path) > 0: name = path + "." + field["name"] else: @@ -213,8 +237,7 @@ def fill_field_properties(args, field, defaults, path): name: { "mapping": { "type": "string", - "index": "not_analyzed", - "ignore_above": 1024 + "index": "analyzed", }, "match_mapping_type": "string", "path_match": name + ".*" @@ -224,8 +247,7 @@ def fill_field_properties(args, field, defaults, path): dynamic_templates.append({ name: { "mapping": { - "type": "keyword", - "ignore_above": 1024 + "type": "text", }, "match_mapping_type": "string", "path_match": name + ".*" diff --git a/metricbeat/metricbeat.template-es2x.json b/metricbeat/metricbeat.template-es2x.json index 7cc00cf10d4..901e9409e13 100644 --- a/metricbeat/metricbeat.template-es2x.json +++ b/metricbeat/metricbeat.template-es2x.json @@ -11,14 +11,13 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "index": "not_analyzed", "type": "string" }, - "match_mapping_type": "string", - "path_match": "fields.*" + "match_mapping_type": "string" } } ], diff --git a/metricbeat/metricbeat.template.json b/metricbeat/metricbeat.template.json index 95e31280e43..2c323bb53fb 100644 --- a/metricbeat/metricbeat.template.json +++ b/metricbeat/metricbeat.template.json @@ -9,13 +9,12 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "type": "keyword" }, - "match_mapping_type": "string", - "path_match": "fields.*" + "match_mapping_type": "string" } } ], diff --git a/packetbeat/packetbeat.template-es2x.json b/packetbeat/packetbeat.template-es2x.json index 5676c346995..89da89a66c8 100644 --- a/packetbeat/packetbeat.template-es2x.json +++ b/packetbeat/packetbeat.template-es2x.json @@ -11,58 +11,13 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "index": "not_analyzed", "type": "string" }, - "match_mapping_type": "string", - "path_match": "fields.*" - } - }, - { - "amqp.headers": { - "mapping": { - "ignore_above": 1024, - "index": "not_analyzed", - "type": "string" - }, - "match_mapping_type": "string", - "path_match": "amqp.headers.*" - } - }, - { - "cassandra.response.supported": { - "mapping": { - "ignore_above": 1024, - "index": "not_analyzed", - "type": "string" - }, - "match_mapping_type": "string", - "path_match": "cassandra.response.supported.*" - } - }, - { - "http.request.headers": { - "mapping": { - "ignore_above": 1024, - "index": "not_analyzed", - "type": "string" - }, - "match_mapping_type": "string", - "path_match": "http.request.headers.*" - } - }, - { - "http.response.headers": { - "mapping": { - "ignore_above": 1024, - "index": "not_analyzed", - "type": "string" - }, - "match_mapping_type": "string", - "path_match": "http.response.headers.*" + "match_mapping_type": "string" } } ], diff --git a/packetbeat/packetbeat.template.json b/packetbeat/packetbeat.template.json index 24269575f35..af72411fc85 100644 --- a/packetbeat/packetbeat.template.json +++ b/packetbeat/packetbeat.template.json @@ -9,53 +9,12 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "type": "keyword" }, - "match_mapping_type": "string", - "path_match": "fields.*" - } - }, - { - "amqp.headers": { - "mapping": { - "ignore_above": 1024, - "type": "keyword" - }, - "match_mapping_type": "string", - "path_match": "amqp.headers.*" - } - }, - { - "cassandra.response.supported": { - "mapping": { - "ignore_above": 1024, - "type": "keyword" - }, - "match_mapping_type": "string", - "path_match": "cassandra.response.supported.*" - } - }, - { - "http.request.headers": { - "mapping": { - "ignore_above": 1024, - "type": "keyword" - }, - "match_mapping_type": "string", - "path_match": "http.request.headers.*" - } - }, - { - "http.response.headers": { - "mapping": { - "ignore_above": 1024, - "type": "keyword" - }, - "match_mapping_type": "string", - "path_match": "http.response.headers.*" + "match_mapping_type": "string" } } ], diff --git a/winlogbeat/winlogbeat.template-es2x.json b/winlogbeat/winlogbeat.template-es2x.json index 206f5a9caba..43439b762da 100644 --- a/winlogbeat/winlogbeat.template-es2x.json +++ b/winlogbeat/winlogbeat.template-es2x.json @@ -11,36 +11,13 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "index": "not_analyzed", "type": "string" }, - "match_mapping_type": "string", - "path_match": "fields.*" - } - }, - { - "event_data": { - "mapping": { - "ignore_above": 1024, - "index": "not_analyzed", - "type": "string" - }, - "match_mapping_type": "string", - "path_match": "event_data.*" - } - }, - { - "user_data": { - "mapping": { - "ignore_above": 1024, - "index": "not_analyzed", - "type": "string" - }, - "match_mapping_type": "string", - "path_match": "user_data.*" + "match_mapping_type": "string" } } ], diff --git a/winlogbeat/winlogbeat.template.json b/winlogbeat/winlogbeat.template.json index d7643fe550e..9bdfd91b6b1 100644 --- a/winlogbeat/winlogbeat.template.json +++ b/winlogbeat/winlogbeat.template.json @@ -9,33 +9,12 @@ }, "dynamic_templates": [ { - "fields": { + "strings_as_keyword": { "mapping": { "ignore_above": 1024, "type": "keyword" }, - "match_mapping_type": "string", - "path_match": "fields.*" - } - }, - { - "event_data": { - "mapping": { - "ignore_above": 1024, - "type": "keyword" - }, - "match_mapping_type": "string", - "path_match": "event_data.*" - } - }, - { - "user_data": { - "mapping": { - "ignore_above": 1024, - "type": "keyword" - }, - "match_mapping_type": "string", - "path_match": "user_data.*" + "match_mapping_type": "string" } } ],