elastic · webmat · Jan 6, 2021 · Sep 28, 2020 · Sep 29, 2020 · Sep 29, 2020
diff --git a/CHANGELOG.next.md b/CHANGELOG.next.md
@@ -41,6 +41,7 @@ Thanks, you're awesome :-) -->
 * Introduced `--strict` flag to perform stricter schema validation when running the generator script. #937
 * Added check under `--strict` that ensures composite types in example fields are quoted. #966
 * Added `ignore_above` and `normalizer` support for keyword multi-fields. #971
+* Added functionality for merging custom and core multi-fields. #982
 
 #### Improvements
 

diff --git a/scripts/schema/loader.py b/scripts/schema/loader.py
@@ -171,6 +171,26 @@ def nest_fields(field_array):
     return schema_root
 
 
+def array_of_dicts_to_set(array_vals):
+    ret_set = set()
+    for dict_val in array_vals:
+        ret_set.add(frozenset(dict_val.items()))
+    return ret_set
+
+
+def set_of_sets_to_array(set_vals):
+    ret_list = []
+    for set_info in set_vals:
+        ret_list.append(dict(set_info))
+    return sorted(ret_list, key=lambda k: k['name'])
+
+
+def dedup_and_merge_lists(list_a, list_b):
+    list_a_set = array_of_dicts_to_set(list_a)
+    list_b_set = array_of_dicts_to_set(list_b)
+    return set_of_sets_to_array(list_a_set | list_b_set)
+
+
 def merge_fields(a, b):
     """Merge ECS field sets with custom field sets."""
     a = copy.deepcopy(a)
@@ -184,6 +204,14 @@ def merge_fields(a, b):
             a[key].setdefault('field_details', {})
             a[key]['field_details'].setdefault('normalize', [])
             a[key]['field_details']['normalize'].extend(b[key]['field_details'].pop('normalize'))
+        if 'multi_fields' in b[key]['field_details']:
+            a[key].setdefault('field_details', {})
+            a[key]['field_details'].setdefault('multi_fields', set())
+            a[key]['field_details']['multi_fields'] = dedup_and_merge_lists(
+                a[key]['field_details']['multi_fields'], b[key]['field_details']['multi_fields'])
+            # if we don't do this then the update call below will overwrite a's field_details, with the original
+            # contents of b, which undoes our merging the multi_fields
+            del b[key]['field_details']['multi_fields']
         a[key]['field_details'].update(b[key]['field_details'])
         # merge schema details
         if 'schema_details' in b[key]:

diff --git a/scripts/tests/unit/test_schema_loader.py b/scripts/tests/unit/test_schema_loader.py
@@ -594,6 +594,96 @@ def test_merge_non_array_attributes(self):
         }
         self.assertEqual(merged_fields, expected_fields)
 
+    def test_merge_multi_fields(self):
+        schema1 = {
+            'base': {
+                'field_details': {
+                    'multi_fields': [
+                        {
+                            'type': 'text',
+                            'name': 'text'
+                        },
+                        {
+                            'type': 'keyword',
+                            'name': 'caseless',
+                            'normalizer': 'lowercase'
+                        }
+                    ]
+                },
+                'fields': {
+                    'message': {
+                        'field_details': {
+                            'multi_fields': [
+                                {
+                                    'type': 'text',
+                                    'name': 'text'
+                                }
+                            ]
+                        }
+                    }
+                }
+            }
+        }
+
+        schema2 = {
+            'base': {
+                'field_details': {
+                    'multi_fields': [
+                        {
+                            'type': 'text',
+                            'name': 'text'
+                        },
+                        {
+                            'type': 'text',
+                            'name': 'almost_text',
+                        }
+                    ]
+                },
+                'fields': {
+                    'message': {
+                        'field_details': {
+                            'multi_fields': [
+                                {
+                                    'type': 'keyword',
+                                    'name': 'a_field'
+                                }
+                            ]
+                        }
+                    }
+                }
+            }
+        }
+        merged_fields = loader.merge_fields(schema1, schema2)
+        expected_multi_fields = [
+            {
+                'type': 'text',
+                'name': 'almost_text'
+            },
+            {
+                'type': 'keyword',
+                'name': 'caseless',
+                'normalizer': 'lowercase'
+            },
+            {
+                'type': 'text',
+                'name': 'text'
+            }
+        ]
+
+        expected_message_multi_fields = [
+            {
+                'type': 'keyword',
+                'name': 'a_field'
+            },
+            {
+                'type': 'text',
+                'name': 'text'
+            }
+        ]
+        self.assertEqual(merged_fields['base']['field_details']['multi_fields'], expected_multi_fields)
+        self.assertEqual(merged_fields['base']['fields']['message']['field_details']
+                         ['multi_fields'], expected_message_multi_fields)
+
 
 if __name__ == '__main__':
     unittest.main()