From a00eb8969c310c90fce8feaa40a795fb1d990c86 Mon Sep 17 00:00:00 2001
From: Dominic Page <11043991+djptek@users.noreply.github.com>
Date: Wed, 26 May 2021 16:23:30 +0200
Subject: [PATCH] [Tooling] Add --exclude flag to Generator to support field
 removal testing (#1411) (#1431)

* add --exclude flag to Generator
---
 CHANGELOG.next.md                             |   1 +
 USAGE.md                                      |  36 ++++++
 scripts/generator.py                          |   6 +-
 scripts/generators/beats.py                   |   4 +-
 scripts/generators/ecs_helpers.py             |   4 +-
 scripts/generators/es_template.py             |   4 +-
 scripts/generators/intermediate_files.py      |  12 +-
 scripts/schema/cleaner.py                     |  10 +-
 scripts/schema/exclude_filter.py              |  78 +++++++++++++
 scripts/schema/finalizer.py                   |  20 ++--
 scripts/schema/loader.py                      |  35 +++++-
 scripts/schema/subset_filter.py               |  37 +-----
 scripts/schema/visitor.py                     |  12 +-
 scripts/tests/unit/test_schema_cleaner.py     |   2 +-
 .../tests/unit/test_schema_exclude_filter.py  | 108 ++++++++++++++++++
 scripts/tests/unit/test_schema_loader.py      |   8 ++
 .../tests/unit/test_schema_subset_filter.py   |  10 +-
 17 files changed, 308 insertions(+), 79 deletions(-)
 create mode 100644 scripts/schema/exclude_filter.py
 create mode 100644 scripts/tests/unit/test_schema_exclude_filter.py

diff --git a/CHANGELOG.next.md b/CHANGELOG.next.md
index fc737b9456..8ea506fe9d 100644
--- a/CHANGELOG.next.md
+++ b/CHANGELOG.next.md
@@ -22,6 +22,7 @@ Thanks, you're awesome :-) -->
 #### Improvements
 
 * Fix ecs GitHub repo link source branch #1393
+* Add --exclude flag to Generator to support field removal testing #1411
 
 #### Deprecated
 
diff --git a/USAGE.md b/USAGE.md
index aadf24b526..4e10ecdc86 100644
--- a/USAGE.md
+++ b/USAGE.md
@@ -26,6 +26,7 @@ relevant artifacts for their unique set of data sources.
   * [Generator Options](#generator-options)
     + [Out](#out)
     + [Include](#include)
+    + [Exclude](#exclude)
     + [Subset](#subset)
     + [Ref](#ref)
     + [Mapping & Template Settings](#mapping--template-settings)
@@ -192,6 +193,41 @@ Include can be used together with the `--ref` flag to merge custom fields into a
 
 > NOTE: The `--include` mechanism will not validate custom YAML files prior to merging. This allows for modifying existing ECS fields in a custom schema without having to redefine all the mandatory field attributes.
 
+#### Exclude
+
+Use the `--exclude` flag to generate ephemeral ECS artifacts based on the current ECS schema field definitions minus fields considered for removal, e.g. to assess impact of removing these. Warning! This is not the recommended route to remove a field permanently as it is not intentended to be invoked during the build process. Definitive field removal should be implemented using a custom [Subset](#subset) or via the [RFC process](https://github.com/elastic/ecs/tree/master/rfcs/README.md). Example:
+
+```
+$ python scripts/generator.py --exclude=../my-project/my-exclude-file.yml
+$ python scripts/generator.py --exclude="../my-project/schemas/a*.yml"
+```
+
+The `--exclude` flag expects a path to one or more YAML files using the same [file format](https://github.com/elastic/ecs/tree/master/schemas#fields-supported-in-schemasyml) as the ECS schema files. You can also use a subset, provided that relevant `name` and `fields` fields are preserved.
+
+```
+---
+- name: log
+  fields:
+    - name: original
+```
+
+The root Field Set `name` must always be present and specified with no dots `.`. Subfields may be specified using dot notation, for example:
+
+```
+---
+- name: log
+  fields:
+    - name: syslog.severity.name
+```
+
+Generate artifacts using `--exclude` to load our custom definitions in addition to `--out` to place them in the desired output directory:
+
+```
+$ python scripts/generator.py --exclude ../myproject/exclude-set.yml/ --out ../myproject/out/
+Loading schemas from local files
+Running generator. ECS version 1.11.0
+```
+
 #### Subset
 
 If your indices will never populate particular ECS fields, there's no need to include those field definitions in your index mappings. The `--subset` argument allows for passing a subset definition YAML file which indicates which field sets or specific fields to include in the generated artifacts.
diff --git a/scripts/generator.py b/scripts/generator.py
index 4b6d738503..d6f35b02b5 100644
--- a/scripts/generator.py
+++ b/scripts/generator.py
@@ -16,6 +16,7 @@
 from schema import cleaner
 from schema import finalizer
 from schema import subset_filter
+from schema import exclude_filter
 
 
 def main():
@@ -52,6 +53,7 @@ def main():
     cleaner.clean(fields, strict=args.strict)
     finalizer.finalize(fields)
     fields = subset_filter.filter(fields, args.subset, out_dir)
+    fields = exclude_filter.exclude(fields, args.exclude)
     nested, flat = intermediate_files.generate(fields, os.path.join(out_dir, 'ecs'), default_dirs)
 
     if args.intermediate_only:
@@ -61,7 +63,7 @@ def main():
     es_template.generate(nested, ecs_generated_version, out_dir, args.mapping_settings)
     es_template.generate_legacy(flat, ecs_generated_version, out_dir, args.template_settings, args.mapping_settings)
     beats.generate(nested, ecs_generated_version, out_dir)
-    if args.include or args.subset:
+    if args.include or args.subset or args.exclude:
         exit()
 
     ecs_helpers.make_dirs(docs_dir)
@@ -74,6 +76,8 @@ def argument_parser():
                                                        Note that "--include experimental/schemas" will also respect this git ref.')
     parser.add_argument('--include', nargs='+',
                         help='include user specified directory of custom field definitions')
+    parser.add_argument('--exclude', nargs='+',
+                        help='exclude user specified subset of the schema')
     parser.add_argument('--subset', nargs='+',
                         help='render a subset of the schema')
     parser.add_argument('--out', action='store', help='directory to output the generated files')
diff --git a/scripts/generators/beats.py b/scripts/generators/beats.py
index 6f70921f67..5708a03555 100644
--- a/scripts/generators/beats.py
+++ b/scripts/generators/beats.py
@@ -83,9 +83,9 @@ def write_beats_yaml(beats_file, ecs_version, out_dir):
 
 
 def file_header():
-    return '''
+    return """
 # WARNING! Do not edit this file directly, it was generated by the ECS project,
 # based on ECS version {version}.
 # Please visit https://github.com/elastic/ecs to suggest changes to ECS fields.
 
-'''.lstrip()
+""".lstrip()
diff --git a/scripts/generators/ecs_helpers.py b/scripts/generators/ecs_helpers.py
index 086f4d592d..fbf7f4a2a1 100644
--- a/scripts/generators/ecs_helpers.py
+++ b/scripts/generators/ecs_helpers.py
@@ -159,7 +159,7 @@ def yaml_load(filename):
 
 
 def list_subtract(original, subtracted):
-    '''Subtract two lists. original = subtracted'''
+    """Subtract two lists. original = subtracted"""
     return [item for item in original if item not in subtracted]
 
 
@@ -175,7 +175,7 @@ def list_extract_keys(lst, key_name):
 
 
 def is_intermediate(field):
-    '''Encapsulates the check to see if a field is an intermediate field or a "real" field.'''
+    """Encapsulates the check to see if a field is an intermediate field or a "real" field."""
     return ('intermediate' in field['field_details'] and field['field_details']['intermediate'])
 
 
diff --git a/scripts/generators/es_template.py b/scripts/generators/es_template.py
index 83ae6fb61a..fb543fa35e 100644
--- a/scripts/generators/es_template.py
+++ b/scripts/generators/es_template.py
@@ -263,13 +263,13 @@ def default_mapping_settings():
 
 
 def es6_type_fallback(mappings):
-    '''
+    """
     Visits each leaf in mappings object and fallback to an
     Elasticsearch 6.x supported type.
 
     Since a field like `wildcard` won't have the same defaults as
     a `keyword` field, we must add any missing defaults.
-    '''
+    """
 
     for (name, details) in mappings.items():
         if 'type' in details:
diff --git a/scripts/generators/intermediate_files.py b/scripts/generators/intermediate_files.py
index d21800936f..c085039b62 100644
--- a/scripts/generators/intermediate_files.py
+++ b/scripts/generators/intermediate_files.py
@@ -20,7 +20,7 @@ def generate(fields, out_dir, default_dirs):
 
 
 def generate_flat_fields(fields):
-    '''Generate ecs_flat.yml'''
+    """Generate ecs_flat.yml"""
     filtered = remove_non_root_reusables(fields)
     flattened = {}
     visitor.visit_fields_with_memo(filtered, accumulate_field, flattened)
@@ -28,7 +28,7 @@ def generate_flat_fields(fields):
 
 
 def accumulate_field(details, memo):
-    '''Visitor function that accumulates all field details in the memo dict'''
+    """Visitor function that accumulates all field details in the memo dict"""
     if 'schema_details' in details or ecs_helpers.is_intermediate(details):
         return
     field_details = copy.deepcopy(details['field_details'])
@@ -39,7 +39,7 @@ def accumulate_field(details, memo):
 
 
 def generate_nested_fields(fields):
-    '''Generate ecs_nested.yml'''
+    """Generate ecs_nested.yml"""
     nested = {}
     # Flatten each field set, but keep all resulting fields nested under their
     # parent/host field set.
@@ -71,13 +71,13 @@ def generate_nested_fields(fields):
 
 
 def remove_internal_attributes(field_details):
-    '''Remove attributes only relevant to the deeply nested structure, but not to ecs_flat/nested.yml.'''
+    """Remove attributes only relevant to the deeply nested structure, but not to ecs_flat/nested.yml."""
     field_details.pop('node_name', None)
     field_details.pop('intermediate', None)
 
 
 def remove_non_root_reusables(fields_nested):
-    '''
+    """
     Remove field sets that have top_level=false from the root of the field definitions.
 
     This attribute means they're only meant to be in the "reusable/expected" locations
@@ -87,7 +87,7 @@ def remove_non_root_reusables(fields_nested):
     still needs to keep all field sets at the root of the YAML file, as it
     the official information about each field set. It's the responsibility of
     users consuming ecs_nested.yml to skip the field sets with top_level=false.
-    '''
+    """
     fields = {}
     for (name, field) in fields_nested.items():
         if 'reusable' not in field['schema_details'] or field['schema_details']['reusable']['top_level']:
diff --git a/scripts/schema/cleaner.py b/scripts/schema/cleaner.py
index b316ef7298..efcc1d08a2 100644
--- a/scripts/schema/cleaner.py
+++ b/scripts/schema/cleaner.py
@@ -56,7 +56,7 @@ def schema_cleanup(schema):
 
 
 def schema_mandatory_attributes(schema):
-    '''Ensures for the presence of the mandatory schema attributes and raises if any are missing'''
+    """Ensures for the presence of the mandatory schema attributes and raises if any are missing"""
     current_schema_attributes = sorted(list(schema['field_details'].keys()) +
                                        list(schema['schema_details'].keys()))
     missing_attributes = ecs_helpers.list_subtract(SCHEMA_MANDATORY_ATTRIBUTES, current_schema_attributes)
@@ -74,7 +74,7 @@ def schema_mandatory_attributes(schema):
 
 
 def schema_assertions_and_warnings(schema):
-    '''Additional checks on a fleshed out schema'''
+    """Additional checks on a fleshed out schema"""
     single_line_short_description(schema, strict=strict_mode)
     if 'beta' in schema['field_details']:
         single_line_beta_description(schema, strict=strict_mode)
@@ -143,7 +143,7 @@ def field_defaults(field):
 
 
 def field_or_multi_field_datatype_defaults(field_details):
-    '''Sets datatype-related defaults on a canonical field or multi-field entries.'''
+    """Sets datatype-related defaults on a canonical field or multi-field entries."""
     if field_details['type'] == 'keyword':
         field_details.setdefault('ignore_above', 1024)
     if field_details['type'] == 'text':
@@ -160,7 +160,7 @@ def field_or_multi_field_datatype_defaults(field_details):
 
 
 def field_mandatory_attributes(field):
-    '''Ensures for the presence of the mandatory field attributes and raises if any are missing'''
+    """Ensures for the presence of the mandatory field attributes and raises if any are missing"""
     if ecs_helpers.is_intermediate(field):
         return
     current_field_attributes = sorted(field['field_details'].keys())
@@ -180,7 +180,7 @@ def field_mandatory_attributes(field):
 
 
 def field_assertions_and_warnings(field):
-    '''Additional checks on a fleshed out field'''
+    """Additional checks on a fleshed out field"""
     if not ecs_helpers.is_intermediate(field):
         # check short description length if in strict mode
         single_line_short_description(field, strict=strict_mode)
diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py
new file mode 100644
index 0000000000..5717ecfb6f
--- /dev/null
+++ b/scripts/schema/exclude_filter.py
@@ -0,0 +1,78 @@
+from schema import loader
+
+# This script should be run downstream of the subset filters - it takes
+# all ECS and custom fields already loaded by the latter and explicitly
+# removes a subset, for example, to simulate impact of future removals
+
+
+def exclude(fields, exclude_file_globs):
+    excludes = load_exclude_definitions(exclude_file_globs)
+
+    if excludes:
+        fields = exclude_fields(fields, excludes)
+
+    return fields
+
+
+def long_path(path_as_list):
+    return '.'.join([e for e in path_as_list])
+
+
+def pop_field(fields, node_path, path, removed):
+    """pops a field from yaml derived dict using path derived from ordered list of nodes"""
+    if node_path[0] in fields:
+        if len(node_path) == 1:
+            flat_name = long_path(path)
+            fields.pop(node_path[0])
+            return flat_name
+        else:
+            inner_field = node_path.pop(0)
+            if 'fields' in fields[inner_field]:
+                popped = pop_field(fields[inner_field]['fields'], node_path, path, removed)
+                # if object field with no remaining fields and not 'base', pop it
+                if fields[inner_field]['fields'] == {} and inner_field != 'base':
+                    fields.pop(inner_field)
+                return popped
+            else:
+                raise ValueError(
+                    '--exclude specified, but no path to field {} found'.format(long_path(path)))
+    else:
+        this_long_path = long_path(path)
+        # Check in case already removed parent
+        if not any([this_long_path.startswith(long_path) for long_path in removed if long_path != None]):
+            raise ValueError('--exclude specified, but no field {} found'.format(this_long_path))
+
+
+def exclude_trace_path(fields, item, path, removed):
+    """traverses paths to one or more nodes in a yaml derived dict"""
+    for list_item in item:
+        node_path = path.copy()
+        # cater for name.with.dots
+        for name in list_item['name'].split('.'):
+            node_path.append(name)
+        if not 'fields' in list_item:
+            parent = node_path[0]
+            removed.append(pop_field(fields, node_path, node_path.copy(), removed))
+            # if parent field has no remaining fields and not 'base', pop it
+            if parent != 'base' and parent in fields and len(fields[parent]['fields']) == 0:
+                fields.pop(parent)
+        else:
+            raise ValueError('--exclude specified, can\'t parse fields in file {}'.format(item))
+
+
+def exclude_fields(fields, excludes):
+    """Traverses fields and eliminates any field which matches the excludes"""
+    if excludes:
+        for ex_list in excludes:
+            for item in ex_list:
+                exclude_trace_path(fields, item['fields'], [item['name']], [])
+    return fields
+
+
+def load_exclude_definitions(file_globs):
+    if not file_globs:
+        return []
+    excludes = loader.load_definitions(file_globs)
+    if not excludes:
+        raise ValueError('--exclude specified, but no exclusions found in {}'.format(file_globs))
+    return excludes
diff --git a/scripts/schema/finalizer.py b/scripts/schema/finalizer.py
index 84b4c21062..648349b8ef 100644
--- a/scripts/schema/finalizer.py
+++ b/scripts/schema/finalizer.py
@@ -19,7 +19,7 @@
 
 
 def finalize(fields):
-    '''Intended entrypoint of the finalizer.'''
+    """Intended entrypoint of the finalizer."""
     perform_reuse(fields)
     calculate_final_values(fields)
 
@@ -46,7 +46,7 @@ def order_reuses(fields):
 
 
 def perform_reuse(fields):
-    '''Performs field reuse in two phases'''
+    """Performs field reuse in two phases"""
     foreign_reuses, self_nestings = order_reuses(fields)
 
     # Phase 1: foreign reuse
@@ -99,11 +99,11 @@ def perform_reuse(fields):
 
 
 def ensure_valid_reuse(reused_schema, destination_schema=None):
-    '''
+    """
     Raise if either the reused schema or destination schema have root=true.
 
     Second param is optional, if testing for a self-nesting (where source=destination).
-    '''
+    """
     if reused_schema['schema_details']['root']:
         msg = "Schema {} has attribute root=true and therefore cannot be reused.".format(
             reused_schema['field_details']['name'])
@@ -115,7 +115,7 @@ def ensure_valid_reuse(reused_schema, destination_schema=None):
 
 
 def append_reused_here(reused_schema, reuse_entry, destination_schema):
-    '''Captures two ways of denoting what field sets are reused under a given field set'''
+    """Captures two ways of denoting what field sets are reused under a given field set"""
     # Legacy, too limited
     destination_schema['schema_details'].setdefault('nestings', [])
     destination_schema['schema_details']['nestings'] = sorted(
@@ -136,7 +136,7 @@ def append_reused_here(reused_schema, reuse_entry, destination_schema):
 
 
 def set_original_fieldset(fields, original_fieldset):
-    '''Recursively set the 'original_fieldset' attribute for all fields in a group of fields'''
+    """Recursively set the 'original_fieldset' attribute for all fields in a group of fields"""
     def func(details):
         # Don't override if already set (e.g. 'group' for user.group.* fields)
         details['field_details'].setdefault('original_fieldset', original_fieldset)
@@ -144,7 +144,7 @@ def func(details):
 
 
 def field_group_at_path(dotted_path, fields):
-    '''Returns the ['fields'] hash at the dotted_path.'''
+    """Returns the ['fields'] hash at the dotted_path."""
     path = dotted_path.split('.')
     nesting = fields
     for next_field in path:
@@ -163,17 +163,17 @@ def field_group_at_path(dotted_path, fields):
 
 
 def calculate_final_values(fields):
-    '''
+    """
     This function navigates all fields recursively.
 
     It populates a few more values for the fields, especially path-based values
     like flat_name.
-    '''
+    """
     visitor.visit_fields_with_path(fields, field_finalizer)
 
 
 def field_finalizer(details, path):
-    '''This is the function called by the visitor to perform the work of calculate_final_values'''
+    """This is the function called by the visitor to perform the work of calculate_final_values"""
     name_array = path + [details['field_details']['node_name']]
     flat_name = '.'.join(name_array)
     details['field_details']['flat_name'] = flat_name
diff --git a/scripts/schema/loader.py b/scripts/schema/loader.py
index 04f3218ae4..a662622274 100644
--- a/scripts/schema/loader.py
+++ b/scripts/schema/loader.py
@@ -1,6 +1,5 @@
 import copy
 import glob
-import os
 import yaml
 
 from generators import ecs_helpers
@@ -109,12 +108,12 @@ def read_schema_blob(blob, ref):
 
 
 def nest_schema(raw, file_name):
-    '''
+    """
     Raw schema files are an array of schema details: [{'name': 'base', ...}]
 
     This function loops over the array (usually 1 schema per file) and turns it into
     a dict with the schema name as the key: { 'base': { 'name': 'base', ...}}
-    '''
+    """
     fields = {}
     for schema in raw:
         if 'name' not in schema:
@@ -251,3 +250,33 @@ def merge_fields(a, b):
             a[key].setdefault('fields', {})
             a[key]['fields'] = merge_fields(a[key]['fields'], b[key]['fields'])
     return a
+
+
+def load_yaml_file(file_name):
+    with open(file_name) as f:
+        return yaml.safe_load(f.read())
+
+
+# You know, for silent tests
+def warn(message):
+    print(message)
+
+
+def eval_globs(globs):
+    """Accepts an array of glob patterns or file names, returns the array of actual files"""
+    all_files = []
+    for g in globs:
+        new_files = glob.glob(g)
+        if len(new_files) == 0:
+            warn("{} did not match any files".format(g))
+        else:
+            all_files.extend(new_files)
+    return all_files
+
+
+def load_definitions(file_globs):
+    sets = []
+    for f in eval_globs(file_globs):
+        raw = load_yaml_file(f)
+        sets.append(raw)
+    return sets
diff --git a/scripts/schema/subset_filter.py b/scripts/schema/subset_filter.py
index 791d6d3cc7..8c91929f0d 100644
--- a/scripts/schema/subset_filter.py
+++ b/scripts/schema/subset_filter.py
@@ -1,8 +1,6 @@
-import glob
-import yaml
 import os
 from generators import intermediate_files
-from schema import cleaner
+from schema import cleaner, loader
 
 # This script takes all ECS and custom fields already loaded, and lets users
 # filter out the ones they don't need.
@@ -22,7 +20,7 @@ def filter(fields, subset_file_globs, out_dir):
 
 
 def combine_all_subsets(subsets):
-    '''Merges N subsets into one. Strips top level 'name' and 'fields' keys as well as non-ECS field options since we can't know how to merge those.'''
+    """Merges N subsets into one. Strips top level 'name' and 'fields' keys as well as non-ECS field options since we can't know how to merge those."""
     merged_subset = {}
     for subset in subsets:
         strip_non_ecs_options(subset['fields'])
@@ -33,37 +31,12 @@ def combine_all_subsets(subsets):
 def load_subset_definitions(file_globs):
     if not file_globs:
         return []
-    subsets = []
-    for f in eval_globs(file_globs):
-        raw = load_yaml_file(f)
-        subsets.append(raw)
+    subsets = loader.load_definitions(file_globs)
     if not subsets:
         raise ValueError('--subset specified, but no subsets found in {}'.format(file_globs))
     return subsets
 
 
-def load_yaml_file(file_name):
-    with open(file_name) as f:
-        return yaml.safe_load(f.read())
-
-
-def eval_globs(globs):
-    '''Accepts an array of glob patterns or file names, returns the array of actual files'''
-    all_files = []
-    for g in globs:
-        new_files = glob.glob(g)
-        if len(new_files) == 0:
-            warn("{} did not match any files".format(g))
-        else:
-            all_files.extend(new_files)
-    return all_files
-
-
-# You know, for silent tests
-def warn(message):
-    print(message)
-
-
 ecs_options = ['fields', 'enabled', 'index']
 
 
@@ -75,7 +48,7 @@ def strip_non_ecs_options(subset):
 
 
 def merge_subsets(a, b):
-    '''Merges field subset definitions together. The b subset is merged into the a subset. Assumes that subsets have been stripped of non-ecs options.'''
+    """Merges field subset definitions together. The b subset is merged into the a subset. Assumes that subsets have been stripped of non-ecs options."""
     for key in b:
         if key not in a:
             a[key] = b[key]
@@ -96,7 +69,7 @@ def merge_subsets(a, b):
 
 
 def extract_matching_fields(fields, subset_definitions):
-    '''Removes fields that are not in the subset definition. Returns a copy without modifying the input fields dict.'''
+    """Removes fields that are not in the subset definition. Returns a copy without modifying the input fields dict."""
     retained_fields = {x: fields[x].copy() for x in subset_definitions}
     for key, val in subset_definitions.items():
         retained_fields[key]['field_details'] = fields[key]['field_details'].copy()
diff --git a/scripts/schema/visitor.py b/scripts/schema/visitor.py
index 5c2e030da5..3c3d762bad 100644
--- a/scripts/schema/visitor.py
+++ b/scripts/schema/visitor.py
@@ -1,5 +1,5 @@
 def visit_fields(fields, fieldset_func=None, field_func=None):
-    '''
+    """
     This function navigates the deeply nested tree structure and runs provided
     functions on each fieldset or field encountered (both optional).
 
@@ -11,7 +11,7 @@ def visit_fields(fields, fieldset_func=None, field_func=None):
 
     The 'field_func(details)' provided will be called for each field, with the dictionary
     containing the field's details ({'field_details': {}, 'fields': {}).
-    '''
+    """
     for (name, details) in fields.items():
         if fieldset_func and 'schema_details' in details:
             fieldset_func(details)
@@ -24,14 +24,14 @@ def visit_fields(fields, fieldset_func=None, field_func=None):
 
 
 def visit_fields_with_path(fields, func, path=[]):
-    '''
+    """
     This function navigates the deeply nested tree structure and runs the provided
     function on all fields and field sets.
 
     The 'func' provided will be called for each field,
     with the dictionary containing their details ({'field_details': {}, 'fields': {})
     as well as the path array leading to the location of the field in question.
-    '''
+    """
     for (name, details) in fields.items():
         if 'field_details' in details:
             func(details, path)
@@ -44,14 +44,14 @@ def visit_fields_with_path(fields, func, path=[]):
 
 
 def visit_fields_with_memo(fields, func, memo=None):
-    '''
+    """
     This function navigates the deeply nested tree structure and runs the provided
     function on all fields and field sets.
 
     The 'func' provided will be called for each field,
     with the dictionary containing their details ({'field_details': {}, 'fields': {})
     as well as the 'memo' you pass in.
-    '''
+    """
     for (name, details) in fields.items():
         if 'field_details' in details:
             func(details, memo)
diff --git a/scripts/tests/unit/test_schema_cleaner.py b/scripts/tests/unit/test_schema_cleaner.py
index 3a6a7830c7..a6c8fd7284 100644
--- a/scripts/tests/unit/test_schema_cleaner.py
+++ b/scripts/tests/unit/test_schema_cleaner.py
@@ -425,7 +425,7 @@ def test_multiline_short_override_description_warns_strict_disabled(self):
             self.fail("cleaner.single_line_short_override_description() raised Exception unexpectedly.")
 
     def test_clean(self):
-        '''A high level sanity test'''
+        """A high level sanity test"""
         fields = self.schema_process()
         cleaner.clean(fields)
         # schemas are processed
diff --git a/scripts/tests/unit/test_schema_exclude_filter.py b/scripts/tests/unit/test_schema_exclude_filter.py
new file mode 100644
index 0000000000..5b6cb5d6ad
--- /dev/null
+++ b/scripts/tests/unit/test_schema_exclude_filter.py
@@ -0,0 +1,108 @@
+from schema import exclude_filter
+import mock
+import os
+import sys
+import unittest
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
+
+
+class TestSchemaExcludeFilter(unittest.TestCase):
+
+    def setUp(self):
+        self.maxDiff = None
+
+    @mock.patch('schema.loader.warn')
+    def test_load_exclude_definitions_raises_when_no_exclude_found(self, mock_warn):
+        with self.assertRaisesRegex(ValueError,
+                                    "--exclude specified, but no exclusions found in \['foo\*.yml'\]"):
+            exclude_filter.load_exclude_definitions(['foo*.yml'])
+
+    def test_exclude_field(self):
+        fields = {'my_field_set': {'fields': {
+            'my_field_exclude': {'field_details': {'flat_name': 'my_field_set.my_field_exclude'}},
+            'my_field_persist': {'field_details': {'flat_name': 'my_field_set.my_field_persist'}}}}}
+        excludes = [
+            [{'name': 'my_field_set', 'fields': [{'name': 'my_field_exclude'}]}]]
+        fields = exclude_filter.exclude_fields(fields, excludes)
+        expect_persisted = {'my_field_set': {'fields': {
+            'my_field_persist': {'field_details': {'flat_name': 'my_field_set.my_field_persist'}}}}}
+        self.assertEqual(fields, expect_persisted)
+
+    def test_exclude_field_deep_path(self):
+        fields = {'d0': {'fields': {
+            'd1': {'field_details': {'flat_name': 'd0.d1'}, 'fields': {
+                'd2': {'field_details': {'flat_name': 'd0.d1.d2'}, 'fields': {
+                    'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}, 'fields': {
+                        'd4': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4'}, 'fields': {
+                            'd5': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4.d5'}}}}}}}}}}}}}
+        excludes = [[{'name': 'd0', 'fields': [{
+            'name': 'd1.d2.d3.d4.d5'}]}]]
+        fields = exclude_filter.exclude_fields(fields, excludes)
+        expect_persisted = {}
+        self.assertEqual(fields, expect_persisted)
+
+    def test_exclude_field_dot_path(self):
+        fields = {'d0': {'fields': {
+            'd1': {'field_details': {'flat_name': 'd0.d1'}, 'fields': {
+                'd2': {'field_details': {'flat_name': 'd0.d1.d2'}, 'fields': {
+                    'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}, 'fields': {
+                        'd4': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4'}, 'fields': {
+                            'd5': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4.d5'}}}}}}}}}}}}}
+        excludes = [[{'name': 'd0', 'fields': [{
+            'name': 'd1.d2.d3.d4.d5'}]}]]
+        fields = exclude_filter.exclude_fields(fields, excludes)
+        expect_persisted = {}
+        self.assertEqual(fields, expect_persisted)
+
+    def test_exclude_field_base_always_persists(self):
+        fields = {'base': {'fields': {
+            'd1': {'field_details': {'flat_name': 'base.d1'}, 'fields': {
+                'd2': {'field_details': {'flat_name': 'base.d1.d2'}, 'fields': {
+                    'd3': {'field_details': {'flat_name': 'base.d1.d2.d3'}, 'fields': {
+                        'd4': {'field_details': {'flat_name': 'base.d1.d2.d3.d4'}, 'fields': {
+                            'd5': {'field_details': {'flat_name': 'base.d1.d2.d3.d4.d5'}}}}}}}}}}}}}
+        excludes = [[{'name': 'base', 'fields': [{
+            'name': 'd1.d2.d3.d4.d5'}]}]]
+        fields = exclude_filter.exclude_fields(fields, excludes)
+        expect_persisted = {'base': {'fields': {}}}
+        self.assertEqual(fields, expect_persisted)
+
+    def test_exclude_fields(self):
+        fields = {'my_field_set': {'fields': {
+            'my_field_exclude_1': {'field_details': {'flat_name': 'my_field_set.my_field_exclude_1'}},
+            'my_field_exclude_2': {'field_details': {'flat_name': 'my_field_set.my_field_exclude_2'}}}}}
+        excludes = [[{'name': 'my_field_set', 'fields': [
+            {'name': 'my_field_exclude_1'}, {'name': 'my_field_exclude_2'}]}]]
+        fields = exclude_filter.exclude_fields(fields, excludes)
+        expect_persisted = {}
+        self.assertEqual(fields, expect_persisted)
+
+    def test_exclude_non_existing_field_set(self):
+        fields = {'my_field_set': {'fields': {
+            'my_field': {'field_details': {'flat_name': 'my_field_set.my_field'}}}}}
+        excludes = [[{'name': 'my_non_existing_field_set', 'fields': [
+            {'name': 'my_field_exclude'}]}]]
+        with self.assertRaisesRegex(ValueError,
+                                    "--exclude specified, but no field my_non_existing_field_set.my_field_exclude found"):
+            exclude_filter.exclude_fields(fields, excludes)
+
+    def test_exclude_non_existing_field(self):
+        fields = {'my_field_set': {'fields': {
+            'my_field': {'field_details': {'flat_name': 'my_field_set.my_field'}}}}}
+        excludes = [[{'name': 'my_field_set', 'fields': [
+            {'name': 'my_non_existing_field'}]}]]
+        with self.assertRaisesRegex(ValueError,
+                                    "--exclude specified, but no field my_field_set.my_non_existing_field found"):
+            exclude_filter.exclude_fields(fields, excludes)
+
+    def test_exclude_non_existing_field_deep_path(self):
+        fields = {'d0': {'fields': {
+            'd1': {'field_details': {'flat_name': 'd0.d1'}}, 'fields': {
+                'd2': {'field_details': {'flat_name': 'd0.d1.d2'}}, 'fields': {
+                    'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}}}}}}}
+        excludes = [[{'name': 'd0', 'fields': [{
+            'name': 'd1.d2.d3.d4.d5'}]}]]
+        with self.assertRaisesRegex(ValueError,
+                                    "--exclude specified, but no path to field d0.d1.d2.d3.d4.d5 found"):
+            exclude_filter.exclude_fields(fields, excludes)
diff --git a/scripts/tests/unit/test_schema_loader.py b/scripts/tests/unit/test_schema_loader.py
index fde33e0a1c..b9b263f5df 100644
--- a/scripts/tests/unit/test_schema_loader.py
+++ b/scripts/tests/unit/test_schema_loader.py
@@ -14,6 +14,14 @@ class TestSchemaLoader(unittest.TestCase):
     def setUp(self):
         self.maxDiff = None
 
+    @mock.patch('schema.loader.warn')
+    def test_eval_globs(self, mock_warn):
+        files = loader.eval_globs(['schemas/*.yml', 'missing*'])
+        self.assertTrue(mock_warn.called, "a warning should have been printed for missing*")
+        self.assertIn('schemas/base.yml', files)
+        self.assertEqual(list(filter(lambda f: f.startswith('missing'), files)), [],
+                         "The 'missing*' pattern should not show up in the resulting files")
+
     # Pseudo-fixtures
 
     def schema_base(self):
diff --git a/scripts/tests/unit/test_schema_subset_filter.py b/scripts/tests/unit/test_schema_subset_filter.py
index f108dba4bc..e7ae5fd211 100644
--- a/scripts/tests/unit/test_schema_subset_filter.py
+++ b/scripts/tests/unit/test_schema_subset_filter.py
@@ -14,15 +14,7 @@ class TestSchemaSubsetFilter(unittest.TestCase):
     def setUp(self):
         self.maxDiff = None
 
-    @mock.patch('schema.subset_filter.warn')
-    def test_eval_globs(self, mock_warn):
-        files = subset_filter.eval_globs(['schemas/*.yml', 'missing*'])
-        self.assertTrue(mock_warn.called, "a warning should have been printed for missing*")
-        self.assertIn('schemas/base.yml', files)
-        self.assertEqual(list(filter(lambda f: f.startswith('missing'), files)), [],
-                         "The 'missing*' pattern should not show up in the resulting files")
-
-    @mock.patch('schema.subset_filter.warn')
+    @mock.patch('schema.loader.warn')
     def test_load_subset_definitions_raises_when_no_subset_found(self, mock_warn):
         with self.assertRaisesRegex(ValueError,
                                     "--subset specified, but no subsets found in \['foo\*.yml'\]"):