diff --git a/CHANGELOG.md b/CHANGELOG.md index a1927875df..23f6728c36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,7 @@ All notable changes to this project will be documented in this file based on the * Field details Jinja2 template components have been consolidated into one template #897 * Add `[discrete]` marker before each section header in field details. #989 +* `--ref` now loads `experimental/schemas` based on git ref in addition to `schemas`. #1063 ## [1.6.0](https://github.com/elastic/ecs/compare/v1.5.0...v1.6.0) diff --git a/USAGE.md b/USAGE.md index cb0c49bf27..aadf24b526 100644 --- a/USAGE.md +++ b/USAGE.md @@ -188,6 +188,8 @@ And looking at a specific artifact, `../myprojects/out/generated/elasticsearch/7 ... ``` +Include can be used together with the `--ref` flag to merge custom fields into a targeted ECS version. See [`Ref`](#ref). + > NOTE: The `--include` mechanism will not validate custom YAML files prior to merging. This allows for modifying existing ECS fields in a custom schema without having to redefine all the mandatory field attributes. #### Subset @@ -235,12 +237,26 @@ It's also possible to combine `--include` and `--subset` together! Do note that #### Ref -The `--ref` argument allows for passing a specific `git` tag (e.g. `v.1.5.0`) or commit hash (`1454f8b`) that will be used to build ECS artifacts. +The `--ref` argument allows for passing a specific `git` tag (e.g. `v1.5.0`) or commit hash (`1454f8b`) that will be used to build ECS artifacts. ``` $ python scripts/generator.py --ref v1.5.0 ``` +The `--ref` argument loads field definitions from the specified git reference (branch, tag, etc.) from directories [`./schemas`](./schemas) and [`./experimental/schemas`](./experimental/schemas) (when specified via `--include`). + +Here's another example loading both ECS fields and [experimental](experimental/README.md) changes *from branch "1.7"*, then adds custom fields on top. + +``` +$ python scripts/generator.py --ref 1.7 --include experimental/schemas ../myproject/fields/custom --out ../myproject/out +``` + +The command above will produce artifacts based on: + +* main ECS field definitions as of branch 1.7 +* experimental ECS changes as of branch 1.7 +* custom fields in `../myproject/fields/custom` as they are on the filesystem + > Note: `--ref` does have a dependency on `git` being installed and all expected commits/tags fetched from the ECS upstream repo. This will unlikely be an issue unless you downloaded the ECS as a zip archive from GitHub vs. cloning it. #### Mapping & Template Settings diff --git a/scripts/generator.py b/scripts/generator.py index b6dcf05db9..0db252648d 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -63,7 +63,8 @@ def main(): def argument_parser(): parser = argparse.ArgumentParser() - parser.add_argument('--ref', action='store', help='git reference to use when building schemas') + parser.add_argument('--ref', action='store', help='Loads fields definitions from `./schemas` subdirectory from specified git reference. \ + Note that "--include experimental/schemas" will also respect this git ref.') parser.add_argument('--include', nargs='+', help='include user specified directory of custom field definitions') parser.add_argument('--subset', nargs='+', diff --git a/scripts/generators/ecs_helpers.py b/scripts/generators/ecs_helpers.py index 2da446f3e3..801319854c 100644 --- a/scripts/generators/ecs_helpers.py +++ b/scripts/generators/ecs_helpers.py @@ -114,6 +114,14 @@ def get_tree_by_ref(ref): return commit.tree +def path_exists_in_git_tree(tree, file_path): + try: + _ = tree[file_path] + except KeyError: + return False + return True + + def usage_doc_files(): usage_docs_dir = os.path.join(os.path.dirname(__file__), '../../docs/usage') usage_docs_path = pathlib.Path(usage_docs_dir) diff --git a/scripts/schema/loader.py b/scripts/schema/loader.py index 16895babbe..e953834d97 100644 --- a/scripts/schema/loader.py +++ b/scripts/schema/loader.py @@ -51,9 +51,18 @@ def load_schemas(ref=None, included_files=[]): schema_files_raw = load_schema_files(ecs_helpers.ecs_files()) fields = deep_nesting_representation(schema_files_raw) - # Custom additional files (never from git ref) + EXPERIMENTAL_SCHEMA_DIR = 'experimental/schemas' + + # Custom additional files if included_files and len(included_files) > 0: print('Loading user defined schemas: {0}'.format(included_files)) + # If --ref provided and --include loading experimental schemas + if ref and EXPERIMENTAL_SCHEMA_DIR in included_files: + exp_schema_files_raw = load_schemas_from_git(ref, target_dir=EXPERIMENTAL_SCHEMA_DIR) + exp_fields = deep_nesting_representation(exp_schema_files_raw) + fields = merge_fields(fields, exp_fields) + included_files.remove(EXPERIMENTAL_SCHEMA_DIR) + # Remaining additional custom files (never from git ref) custom_files = ecs_helpers.get_glob_files(included_files, ecs_helpers.YAML_EXT) custom_fields = deep_nesting_representation(load_schema_files(custom_files)) fields = merge_fields(fields, custom_fields) @@ -68,13 +77,18 @@ def load_schema_files(files): return fields_nested -def load_schemas_from_git(ref): +def load_schemas_from_git(ref, target_dir='schemas'): tree = ecs_helpers.get_tree_by_ref(ref) fields_nested = {} - for blob in tree['schemas'].blobs: - if blob.name.endswith('.yml'): - new_fields = read_schema_blob(blob, ref) - fields_nested = ecs_helpers.safe_merge_dicts(fields_nested, new_fields) + + # Handles case if target dir doesn't exists in git ref + if ecs_helpers.path_exists_in_git_tree(tree, target_dir): + for blob in tree[target_dir].blobs: + if blob.name.endswith('.yml'): + new_fields = read_schema_blob(blob, ref) + fields_nested = ecs_helpers.safe_merge_dicts(fields_nested, new_fields) + else: + raise KeyError(f"Target directory './{target_dir}' not present in git ref '{ref}'!") return fields_nested diff --git a/scripts/tests/test_ecs_helpers.py b/scripts/tests/test_ecs_helpers.py index 2eb5ff0254..79b554ad95 100644 --- a/scripts/tests/test_ecs_helpers.py +++ b/scripts/tests/test_ecs_helpers.py @@ -99,11 +99,19 @@ def test_list_subtract(self): self.assertEqual(ecs_helpers.list_subtract(['a', 'b'], ['a']), ['b']) self.assertEqual(ecs_helpers.list_subtract(['a', 'b'], ['a', 'c']), ['b']) + # git helper tests + def test_get_tree_by_ref(self): ref = 'v1.5.0' tree = ecs_helpers.get_tree_by_ref(ref) self.assertEqual(tree.hexsha, '4449df245f6930d59bcd537a5958891261a9476b') + def test_path_exists_in_git_tree(self): + ref = 'v1.6.0' + tree = ecs_helpers.get_tree_by_ref(ref) + self.assertFalse(ecs_helpers.path_exists_in_git_tree(tree, 'nonexistant')) + self.assertTrue(ecs_helpers.path_exists_in_git_tree(tree, 'schemas')) + if __name__ == '__main__': unittest.main() diff --git a/scripts/tests/unit/test_schema_loader.py b/scripts/tests/unit/test_schema_loader.py index edd585c011..de3a718bd5 100644 --- a/scripts/tests/unit/test_schema_loader.py +++ b/scripts/tests/unit/test_schema_loader.py @@ -79,6 +79,21 @@ def test_load_schemas_no_custom(self): fields['process']['fields']['thread'].keys(), "Fields containing nested fields should at least have the 'fields' subkey") + def test_load_schemas_git_ref(self): + fields = loader.load_schemas(ref='v1.6.0') + self.assertEqual( + ['field_details', 'fields', 'schema_details'], + sorted(fields['process'].keys()), + "Schemas should have 'field_details', 'fields' and 'schema_details' subkeys") + self.assertEqual( + ['field_details'], + list(fields['process']['fields']['pid'].keys()), + "Leaf fields should have only the 'field_details' subkey") + self.assertIn( + 'fields', + fields['process']['fields']['thread'].keys(), + "Fields containing nested fields should at least have the 'fields' subkey") + @mock.patch('schema.loader.read_schema_file') def test_load_schemas_fail_on_accidental_fieldset_redefinition(self, mock_read_schema): mock_read_schema.side_effect = [ @@ -124,6 +139,43 @@ def test_nest_schema_raises_on_missing_schema_name(self): with self.assertRaisesRegex(ValueError, 'incomplete.yml'): loader.nest_schema([{'description': 'just a description'}], 'incomplete.yml') + def test_load_schemas_from_git(self): + fields = loader.load_schemas_from_git('v1.0.0', target_dir='schemas') + self.assertEqual( + ['agent', + 'base', + 'client', + 'cloud', + 'container', + 'destination', + 'ecs', + 'error', + 'event', + 'file', + 'geo', + 'group', + 'host', + 'http', + 'log', + 'network', + 'observer', + 'organization', + 'os', + 'process', + 'related', + 'server', + 'service', + 'source', + 'url', + 'user', + 'user_agent'], + sorted(fields.keys()), + "Raw schema fields should have expected fieldsets for v1.0.0") + + def test_load_schemas_from_git_missing_target_directory(self): + with self.assertRaisesRegex(KeyError, "not present in git ref 'v1.5.0'"): + loader.load_schemas_from_git('v1.5.0', target_dir='experimental') + # nesting stuff def test_nest_fields(self):