elastic · webmat · Jan 6, 2021 · Sep 28, 2020 · Sep 29, 2020 · Sep 29, 2020
diff --git a/CHANGELOG.next.md b/CHANGELOG.next.md
@@ -41,6 +41,7 @@ Thanks, you're awesome :-) -->
 * Introduced `--strict` flag to perform stricter schema validation when running the generator script. #937
 * Added check under `--strict` that ensures composite types in example fields are quoted. #966
 * Added `ignore_above` and `normalizer` support for keyword multi-fields. #971
+* Added functionality for merging custom and core multi-fields. #982
 
 #### Improvements
 

diff --git a/scripts/schema/loader.py b/scripts/schema/loader.py
@@ -171,6 +171,26 @@ def nest_fields(field_array):
  return schema_root
 
 
+def array_of_dicts_to_set(array_vals):
+ ret_set = set()
+ for dict_val in array_vals:
+ ret_set.add(frozenset(dict_val.items()))
+ return ret_set
+
+
+def set_of_sets_to_array(set_vals):
+ ret_list = []
+ for set_info in set_vals:
+ ret_list.append(dict(set_info))
+ return sorted(ret_list, key=lambda k: k['name'])
+
+
+def dedup_and_merge_lists(list_a, list_b):
+ list_a_set = array_of_dicts_to_set(list_a)
+ list_b_set = array_of_dicts_to_set(list_b)
+ return set_of_sets_to_array(list_a_set | list_b_set)
+
+
 def merge_fields(a, b):
  """Merge ECS field sets with custom field sets."""
  a = copy.deepcopy(a)
@@ -184,6 +204,14 @@ def merge_fields(a, b):
  a[key].setdefault('field_details', {})
  a[key]['field_details'].setdefault('normalize', [])
  a[key]['field_details']['normalize'].extend(b[key]['field_details'].pop('normalize'))
+ if 'multi_fields' in b[key]['field_details']:
+ a[key].setdefault('field_details', {})
+ a[key]['field_details'].setdefault('multi_fields', [])
+ a[key]['field_details']['multi_fields'] = dedup_and_merge_lists(
+ a[key]['field_details']['multi_fields'], b[key]['field_details']['multi_fields'])
+ # if we don't do this then the update call below will overwrite a's field_details, with the original
+ # contents of b, which undoes our merging the multi_fields
+ del b[key]['field_details']['multi_fields']
  a[key]['field_details'].update(b[key]['field_details'])
  # merge schema details
  if 'schema_details' in b[key]:

diff --git a/scripts/tests/unit/test_schema_loader.py b/scripts/tests/unit/test_schema_loader.py
@@ -594,6 +594,96 @@ def test_merge_non_array_attributes(self):
  }
  self.assertEqual(merged_fields, expected_fields)
 
+ def test_merge_multi_fields(self):
+ schema1 = {
+ 'base': {
+ 'field_details': {
+ 'multi_fields': [
+ {
+ 'type': 'text',
+ 'name': 'text'
+ },
+ {
+ 'type': 'keyword',
+ 'name': 'caseless',
+ 'normalizer': 'lowercase'
+ }
+ ]
+ },
+ 'fields': {
+ 'message': {
+ 'field_details': {
+ 'multi_fields': [
+ {
+ 'type': 'text',
+ 'name': 'text'
+ }
+ ]
+ }
+ }
+ }
+ }
+ }
+
+ schema2 = {
+ 'base': {
+ 'field_details': {
+ 'multi_fields': [
+ {
+ 'type': 'text',
+ 'name': 'text'
+ },
+ {
+ 'type': 'text',
+ 'name': 'almost_text',
+ }
+ ]
+ },
+ 'fields': {
+ 'message': {
+ 'field_details': {
+ 'multi_fields': [
+ {
+ 'type': 'keyword',
+ 'name': 'a_field'
+ }
+ ]
+ }
+ }
+ }
+ }
+ }
+ merged_fields = loader.merge_fields(schema1, schema2)
+ expected_multi_fields = [
+ {
+ 'type': 'text',
+ 'name': 'almost_text'
+ },
+ {
+ 'type': 'keyword',
+ 'name': 'caseless',
+ 'normalizer': 'lowercase'
+ },
+ {
+ 'type': 'text',
+ 'name': 'text'
+ }
+ ]
+
+ expected_message_multi_fields = [
+ {
+ 'type': 'keyword',
+ 'name': 'a_field'
+ },
+ {
+ 'type': 'text',
+ 'name': 'text'
+ }
+ ]
+ self.assertEqual(merged_fields['base']['field_details']['multi_fields'], expected_multi_fields)
+ self.assertEqual(merged_fields['base']['fields']['message']['field_details']
+ ['multi_fields'], expected_message_multi_fields)
+
 
 if __name__ == '__main__':
  unittest.main()