From 08fb868b63090863e2947687b670313d1a3bd0c7 Mon Sep 17 00:00:00 2001 From: Ian Knox Date: Mon, 10 May 2021 15:45:07 -0500 Subject: [PATCH 1/4] Adds hash to generic test unique_ids --- CHANGELOG.md | 1 + core/dbt/exceptions.py | 4 ++++ core/dbt/parser/base.py | 26 ++++++++++++++++++++------ core/dbt/parser/docs.py | 4 ++-- core/dbt/parser/schemas.py | 29 +++++++++++++++++++++++++++-- 5 files changed, 54 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76076a9ac92..78c535a29be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ Contributors: - Ensure that schema test macros are properly processed ([#3229](https://github.com/fishtown-analytics/dbt/issues/3229), [#3272](https://github.com/fishtown-analytics/dbt/pull/3272)) - Use absolute path for profiles directory instead of a path relative to the project directory. Note: If a user supplies a relative path to the profiles directory, the value of `args.profiles_dir` will still be absolute. ([#3133](https://github.com/fishtown-analytics/dbt/issues/3133)) - Fix FQN selector unable to find models whose name contains dots ([#3246](https://github.com/fishtown-analytics/dbt/issues/3246)) +- Fix unique_id generation for generic tests so tests with the same FQN but different configuration will run. ([#3254](https://github.com/fishtown-analytics/dbt/issues/3254)) ### Features - Support commit hashes in dbt deps package revision ([#3268](https://github.com/fishtown-analytics/dbt/issues/3268), [#3270](https://github.com/fishtown-analytics/dbt/pull/3270)) diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index 82cd9267bc4..80559e2abbd 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -772,6 +772,10 @@ def raise_duplicate_resource_name(node_1, node_2): elif node_1.resource_type == NodeType.Documentation: get_func = 'doc("{}")'.format(duped_name) elif node_1.resource_type == NodeType.Test and 'schema' in node_1.tags: + # TODO: This causes duplicate schema test errors to be ignored!! + # This PR should ensure this never gets called when the tests are actually different, + # but removing this breaks 50+ tests that currently have the exact same schema + # test defined more than once. :/ return else: get_func = '"{}"'.format(duped_name) diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index e725dc7771a..fa8e6bc2d87 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -2,7 +2,7 @@ import itertools import os from typing import ( - List, Dict, Any, Generic, TypeVar + List, Dict, Any, Generic, Optional, TypeVar ) from dbt.dataclass_schema import ValidationError @@ -55,11 +55,25 @@ def parse_file(self, block: FileBlock) -> None: def resource_type(self) -> NodeType: pass - def generate_unique_id(self, resource_name: str) -> str: - """Returns a unique identifier for a resource""" - return "{}.{}.{}".format(self.resource_type, - self.project.project_name, - resource_name) + def generate_unique_id( + self, + resource_name: str, + hash: Optional[str] = None + ) -> str: + """Returns a unique identifier for a resource + An optional hash may be passed in to ensure uniqueness for edge cases""" + + return '.'.join( + filter( + None, + [ + self.resource_type, + self.project.project_name, + resource_name, + hash + ] + ) + ) class Parser(BaseParser[FinalValue], Generic[FinalValue]): diff --git a/core/dbt/parser/docs.py b/core/dbt/parser/docs.py index 1a30fb40600..0482d7bff26 100644 --- a/core/dbt/parser/docs.py +++ b/core/dbt/parser/docs.py @@ -1,4 +1,4 @@ -from typing import Iterable +from typing import Iterable, Optional import re @@ -23,7 +23,7 @@ def resource_type(self) -> NodeType: def get_compiled_path(cls, block: FileBlock): return block.path.relative_path - def generate_unique_id(self, resource_name: str) -> str: + def generate_unique_id(self, resource_name: str, _: Optional[str] = None) -> str: # because docs are in their own graph namespace, node type doesn't # need to be part of the unique ID. return '{}.{}'.format(self.project.project_name, resource_name) diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 316ef5c9d70..a4aed6fd033 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -2,8 +2,9 @@ import os from abc import ABCMeta, abstractmethod +from hashlib import md5 from typing import ( - Iterable, Dict, Any, Union, List, Optional, Generic, TypeVar, Type + Iterable, Dict, Any, Union, List, Optional, Generic, TypeVar, Type, Iterator ) from dbt.dataclass_schema import ValidationError, dbtClassMixin @@ -351,6 +352,30 @@ def create_test_node( column_name: Optional[str], ) -> ParsedSchemaTestNode: + # TODO: (PR input needed) Should this be applied to all tests? + # In theory we already check non-schema tests for "unique" name uniqueness in + # core/dbt/contracts/graph/manifest.py @ _check_duplicates() + + HASH_LENGTH = 10 + + if 'schema' in tags: + # TODO: (PR input needed) is this a complete set of uniquie-ifying data? + hash_data: Iterator = filter( + None, + [ + name, + test_metadata.get('kwargs').get('model'), # type: ignore + column_name + ]) + hash_string = ''.join(hash_data).encode('utf-8') + test_hash = md5(hash_string).hexdigest()[-HASH_LENGTH:] + unique_id = self.generate_unique_id( + name, + test_hash + ) + else: + unique_id = self.generate_unique_id(name) + dct = { 'alias': name, 'schema': self.default_schema, @@ -364,7 +389,7 @@ def create_test_node( 'original_file_path': target.original_file_path, 'package_name': self.project.project_name, 'raw_sql': raw_sql, - 'unique_id': self.generate_unique_id(name), + 'unique_id': unique_id, 'config': self.config_dict(config), 'test_metadata': test_metadata, 'column_name': column_name, From 2065db23832fd00c5648c6226c02ae010f40b5b0 Mon Sep 17 00:00:00 2001 From: Ian Knox Date: Mon, 10 May 2021 15:49:50 -0500 Subject: [PATCH 2/4] Testing changes to support hashes --- .../name_collision/base.sql | 1 + .../name_collision/base_extension.sql | 1 + .../name_collision/schema.yml | 12 ++++++ .../test_schema_v2_tests.py | 33 +++++++++++++++- .../test_docs_generate.py | 38 +++++++++---------- test/unit/test_parser.py | 6 +-- 6 files changed, 68 insertions(+), 23 deletions(-) create mode 100644 test/integration/008_schema_tests_test/name_collision/base.sql create mode 100644 test/integration/008_schema_tests_test/name_collision/base_extension.sql create mode 100644 test/integration/008_schema_tests_test/name_collision/schema.yml diff --git a/test/integration/008_schema_tests_test/name_collision/base.sql b/test/integration/008_schema_tests_test/name_collision/base.sql new file mode 100644 index 00000000000..f59dcad4cf4 --- /dev/null +++ b/test/integration/008_schema_tests_test/name_collision/base.sql @@ -0,0 +1 @@ +SELECT 'hello_world' AS extension_id \ No newline at end of file diff --git a/test/integration/008_schema_tests_test/name_collision/base_extension.sql b/test/integration/008_schema_tests_test/name_collision/base_extension.sql new file mode 100644 index 00000000000..f09828a9a69 --- /dev/null +++ b/test/integration/008_schema_tests_test/name_collision/base_extension.sql @@ -0,0 +1 @@ +SELECT 'NOT_NULL' AS id \ No newline at end of file diff --git a/test/integration/008_schema_tests_test/name_collision/schema.yml b/test/integration/008_schema_tests_test/name_collision/schema.yml new file mode 100644 index 00000000000..2c950582099 --- /dev/null +++ b/test/integration/008_schema_tests_test/name_collision/schema.yml @@ -0,0 +1,12 @@ +version: 2 +models: +- name: base + columns: + - name: extension_id + tests: + - not_null +- name: base_extension + columns: + - name: id + tests: + - not_null diff --git a/test/integration/008_schema_tests_test/test_schema_v2_tests.py b/test/integration/008_schema_tests_test/test_schema_v2_tests.py index 75c3e77b6f2..c3843127359 100644 --- a/test/integration/008_schema_tests_test/test_schema_v2_tests.py +++ b/test/integration/008_schema_tests_test/test_schema_v2_tests.py @@ -22,7 +22,6 @@ def models(self): def run_schema_validations(self): args = FakeArgs() - test_task = TestTask(args, self.config) return test_task.run() @@ -377,3 +376,35 @@ def test_postgres_test_context_tests(self): # was rendered correctly self.assertRegex(result.node.compiled_sql, r'union all') + +class TestSchemaTestNameCollision(DBTIntegrationTest): + @property + def schema(self): + return "schema_tests_008" + + @property + def models(self): + return "name_collision" + + def run_schema_tests(self): + args = FakeArgs() + test_task = TestTask(args, self.config) + return test_task.run() + + @use_profile('postgres') + def test_postgres_collision_test_names_get_hash(self): + """The models should produce unique IDs with a has appended""" + results = self.run_dbt() + test_results = self.run_schema_tests() + + # both models and both tests run + self.assertEqual(len(results), 2) + self.assertEqual(len(test_results), 2) + + # both tests have the same unique id except for the hash + expected_unique_ids = [ + 'test.test.not_null_base_extension_id.140fbfb43e', + 'test.test.not_null_base_extension_id.d95dd6d353' + ] + self.assertIn(test_results[0].node.unique_id, expected_unique_ids) + self.assertIn(test_results[1].node.unique_id, expected_unique_ids) \ No newline at end of file diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index ac34932a6e1..b336de3877e 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -1333,7 +1333,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'checksum': self._checksum_file(seed_path), 'unrendered_config': unrendered_seed_config, }, - 'test.test.not_null_model_id': { + 'test.test.not_null_model_id.580992bcfd': { 'alias': 'not_null_model_id', 'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/not_null_model_id.sql'), 'column_name': 'id', @@ -1361,7 +1361,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'database': self.default_database, 'tags': ['schema'], 'meta': {}, - 'unique_id': 'test.test.not_null_model_id', + 'unique_id': 'test.test.not_null_model_id.580992bcfd', 'docs': {'show': True}, 'compiled': True, 'compiled_sql': AnyStringWith('where id is null'), @@ -1420,7 +1420,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'unique_id': 'snapshot.test.snapshot_seed', 'unrendered_config': unrendered_snapshot_config, }, - 'test.test.test_nothing_model_': { + 'test.test.test_nothing_model_.9c8389734f': { 'alias': 'test_nothing_model_', 'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/test_nothing_model_.sql'), 'column_name': None, @@ -1448,7 +1448,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'database': self.default_database, 'tags': ['schema'], 'meta': {}, - 'unique_id': 'test.test.test_nothing_model_', + 'unique_id': 'test.test.test_nothing_model_.9c8389734f', 'docs': {'show': True}, 'compiled': True, 'compiled_sql': AnyStringWith('select 0'), @@ -1464,7 +1464,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'checksum': {'name': 'none', 'checksum': ''}, 'unrendered_config': unrendered_test_config, }, - 'test.test.unique_model_id': { + 'test.test.unique_model_id.0ee1634138': { 'alias': 'unique_model_id', 'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/unique_model_id.sql'), 'column_name': 'id', @@ -1492,7 +1492,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'database': self.default_database, 'tags': ['schema'], 'meta': {}, - 'unique_id': 'test.test.unique_model_id', + 'unique_id': 'test.test.unique_model_id.0ee1634138', 'docs': {'show': True}, 'compiled': True, 'compiled_sql': AnyStringWith('count(*)'), @@ -1621,17 +1621,17 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'seed.test.seed': [], 'snapshot.test.snapshot_seed': ['seed.test.seed'], 'source.test.my_source.my_table': [], - 'test.test.not_null_model_id': ['model.test.model'], - 'test.test.test_nothing_model_': ['model.test.model'], - 'test.test.unique_model_id': ['model.test.model'], + 'test.test.not_null_model_id.580992bcfd': ['model.test.model'], + 'test.test.test_nothing_model_.9c8389734f': ['model.test.model'], + 'test.test.unique_model_id.0ee1634138': ['model.test.model'], }, 'child_map': { 'model.test.model': [ 'exposure.test.notebook_exposure', 'exposure.test.simple_exposure', - 'test.test.not_null_model_id', - 'test.test.test_nothing_model_', - 'test.test.unique_model_id', + 'test.test.not_null_model_id.580992bcfd', + 'test.test.test_nothing_model_.9c8389734f', + 'test.test.unique_model_id.0ee1634138', ], 'model.test.second_model': ['exposure.test.notebook_exposure'], 'exposure.test.notebook_exposure': [], @@ -1641,9 +1641,9 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'snapshot.test.snapshot_seed'], 'snapshot.test.snapshot_seed': [], 'source.test.my_source.my_table': ['exposure.test.simple_exposure'], - 'test.test.not_null_model_id': [], - 'test.test.test_nothing_model_': [], - 'test.test.unique_model_id': [], + 'test.test.not_null_model_id.580992bcfd': [], + 'test.test.test_nothing_model_.9c8389734f': [], + 'test.test.unique_model_id.0ee1634138': [], }, 'docs': { 'dbt.__overview__': ANY, @@ -2921,7 +2921,7 @@ def verify_manifest(self, expected_manifest): else: self.assertIn(key, expected_manifest) # sanity check self.assertEqual(manifest[key], expected_manifest[key]) - + def _quote(self, value): quote_char = '`' if self.adapter_type == 'bigquery' else '"' return '{0}{1}{0}'.format(quote_char, value) @@ -2972,7 +2972,7 @@ def expected_run_results(self): 'status': 'success', 'message': None, 'execution_time': AnyFloat(), - 'unique_id': 'test.test.not_null_model_id', + 'unique_id': 'test.test.not_null_model_id.580992bcfd', 'adapter_response': ANY, 'thread_id': ANY, 'timing': [ANY, ANY], @@ -2981,7 +2981,7 @@ def expected_run_results(self): 'status': 'success', 'message': None, 'execution_time': AnyFloat(), - 'unique_id': 'test.test.test_nothing_model_', + 'unique_id': 'test.test.test_nothing_model_.9c8389734f', 'adapter_response': ANY, 'thread_id': ANY, 'timing': [ANY, ANY], @@ -2990,7 +2990,7 @@ def expected_run_results(self): 'status': 'success', 'message': None, 'execution_time': AnyFloat(), - 'unique_id': 'test.test.unique_model_id', + 'unique_id': 'test.test.unique_model_id.0ee1634138', 'adapter_response': ANY, 'thread_id': ANY, 'timing': [ANY, ANY], diff --git a/test/unit/test_parser.py b/test/unit/test_parser.py index d59b2852329..c5680419248 100644 --- a/test/unit/test_parser.py +++ b/test/unit/test_parser.py @@ -393,7 +393,7 @@ def test__parse_basic_model_tests(self): self.assertEqual(tests[0].package_name, 'snowplow') self.assertTrue(tests[0].name.startswith('accepted_values_')) self.assertEqual(tests[0].fqn, ['snowplow', 'schema_test', tests[0].name]) - self.assertEqual(tests[0].unique_id.split('.'), ['test', 'snowplow', tests[0].name]) + self.assertEqual(tests[0].unique_id.split('.'), ['test', 'snowplow', tests[0].name, 'fe3f9c5376']) self.assertEqual(tests[0].test_metadata.name, 'accepted_values') self.assertIsNone(tests[0].test_metadata.namespace) self.assertEqual( @@ -415,7 +415,7 @@ def test__parse_basic_model_tests(self): self.assertEqual(tests[1].fqn, ['snowplow', 'schema_test', tests[1].name]) self.assertTrue(tests[1].name.startswith('foreign_package_test_case_')) self.assertEqual(tests[1].package_name, 'snowplow') - self.assertEqual(tests[1].unique_id.split('.'), ['test', 'snowplow', tests[1].name]) + self.assertEqual(tests[1].unique_id.split('.'), ['test', 'snowplow', tests[1].name, '4f5bdeb105']) self.assertEqual(tests[1].test_metadata.name, 'test_case') self.assertEqual(tests[1].test_metadata.namespace, 'foreign_package') self.assertEqual( @@ -434,7 +434,7 @@ def test__parse_basic_model_tests(self): self.assertEqual(tests[2].package_name, 'snowplow') self.assertTrue(tests[2].name.startswith('not_null_')) self.assertEqual(tests[2].fqn, ['snowplow', 'schema_test', tests[2].name]) - self.assertEqual(tests[2].unique_id.split('.'), ['test', 'snowplow', tests[2].name]) + self.assertEqual(tests[2].unique_id.split('.'), ['test', 'snowplow', tests[2].name, '651854c872']) self.assertEqual(tests[2].test_metadata.name, 'not_null') self.assertIsNone(tests[2].test_metadata.namespace) self.assertEqual( From 10cd06f515ccaf44bbc7ec50b52669834731e67a Mon Sep 17 00:00:00 2001 From: Ian Knox Date: Wed, 12 May 2021 08:38:50 -0500 Subject: [PATCH 3/4] PR feedback --- core/dbt/exceptions.py | 4 -- core/dbt/parser/schemas.py | 39 ++++++++----------- .../test_schema_v2_tests.py | 4 +- .../test_docs_generate.py | 36 ++++++++--------- test/unit/test_parser.py | 6 +-- 5 files changed, 39 insertions(+), 50 deletions(-) diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index 80559e2abbd..82cd9267bc4 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -772,10 +772,6 @@ def raise_duplicate_resource_name(node_1, node_2): elif node_1.resource_type == NodeType.Documentation: get_func = 'doc("{}")'.format(duped_name) elif node_1.resource_type == NodeType.Test and 'schema' in node_1.tags: - # TODO: This causes duplicate schema test errors to be ignored!! - # This PR should ensure this never gets called when the tests are actually different, - # but removing this breaks 50+ tests that currently have the exact same schema - # test defined more than once. :/ return else: get_func = '"{}"'.format(duped_name) diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index a4aed6fd033..99909f5f8be 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -4,7 +4,7 @@ from abc import ABCMeta, abstractmethod from hashlib import md5 from typing import ( - Iterable, Dict, Any, Union, List, Optional, Generic, TypeVar, Type, Iterator + Iterable, Dict, Any, Union, List, Optional, Generic, TypeVar, Type ) from dbt.dataclass_schema import ValidationError, dbtClassMixin @@ -352,29 +352,22 @@ def create_test_node( column_name: Optional[str], ) -> ParsedSchemaTestNode: - # TODO: (PR input needed) Should this be applied to all tests? - # In theory we already check non-schema tests for "unique" name uniqueness in - # core/dbt/contracts/graph/manifest.py @ _check_duplicates() - HASH_LENGTH = 10 - if 'schema' in tags: - # TODO: (PR input needed) is this a complete set of uniquie-ifying data? - hash_data: Iterator = filter( - None, - [ - name, - test_metadata.get('kwargs').get('model'), # type: ignore - column_name - ]) - hash_string = ''.join(hash_data).encode('utf-8') - test_hash = md5(hash_string).hexdigest()[-HASH_LENGTH:] - unique_id = self.generate_unique_id( - name, - test_hash - ) - else: - unique_id = self.generate_unique_id(name) + def get_hashable_md( + data: Union[str, int, float, List, Dict] + ) -> Union[str, List, Dict]: + if type(data) == dict: + return {k: get_hashable_md(data[k]) for k in sorted(data.keys())} # type: ignore + elif type(data) == list: + return [get_hashable_md(val) for val in data] # type: ignore + else: + return str(data) + + hashable_metadata = repr(get_hashable_md(test_metadata)) + hash_list = filter(None, [name, hashable_metadata]) + hash_string = ''.join(hash_list).encode('utf-8') + test_hash = md5(hash_string).hexdigest()[-HASH_LENGTH:] dct = { 'alias': name, @@ -389,7 +382,7 @@ def create_test_node( 'original_file_path': target.original_file_path, 'package_name': self.project.project_name, 'raw_sql': raw_sql, - 'unique_id': unique_id, + 'unique_id': self.generate_unique_id(name, test_hash), 'config': self.config_dict(config), 'test_metadata': test_metadata, 'column_name': column_name, diff --git a/test/integration/008_schema_tests_test/test_schema_v2_tests.py b/test/integration/008_schema_tests_test/test_schema_v2_tests.py index c3843127359..380cb426fa6 100644 --- a/test/integration/008_schema_tests_test/test_schema_v2_tests.py +++ b/test/integration/008_schema_tests_test/test_schema_v2_tests.py @@ -403,8 +403,8 @@ def test_postgres_collision_test_names_get_hash(self): # both tests have the same unique id except for the hash expected_unique_ids = [ - 'test.test.not_null_base_extension_id.140fbfb43e', - 'test.test.not_null_base_extension_id.d95dd6d353' + 'test.test.not_null_base_extension_id.2dbb9627b6', + 'test.test.not_null_base_extension_id.d70fc39f40' ] self.assertIn(test_results[0].node.unique_id, expected_unique_ids) self.assertIn(test_results[1].node.unique_id, expected_unique_ids) \ No newline at end of file diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index b336de3877e..7cbce99bc6b 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -1333,7 +1333,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'checksum': self._checksum_file(seed_path), 'unrendered_config': unrendered_seed_config, }, - 'test.test.not_null_model_id.580992bcfd': { + 'test.test.not_null_model_id.8ab00aacfe': { 'alias': 'not_null_model_id', 'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/not_null_model_id.sql'), 'column_name': 'id', @@ -1361,7 +1361,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'database': self.default_database, 'tags': ['schema'], 'meta': {}, - 'unique_id': 'test.test.not_null_model_id.580992bcfd', + 'unique_id': 'test.test.not_null_model_id.8ab00aacfe', 'docs': {'show': True}, 'compiled': True, 'compiled_sql': AnyStringWith('where id is null'), @@ -1420,7 +1420,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'unique_id': 'snapshot.test.snapshot_seed', 'unrendered_config': unrendered_snapshot_config, }, - 'test.test.test_nothing_model_.9c8389734f': { + 'test.test.test_nothing_model_.596f819934': { 'alias': 'test_nothing_model_', 'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/test_nothing_model_.sql'), 'column_name': None, @@ -1448,7 +1448,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'database': self.default_database, 'tags': ['schema'], 'meta': {}, - 'unique_id': 'test.test.test_nothing_model_.9c8389734f', + 'unique_id': 'test.test.test_nothing_model_.596f819934', 'docs': {'show': True}, 'compiled': True, 'compiled_sql': AnyStringWith('select 0'), @@ -1464,7 +1464,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'checksum': {'name': 'none', 'checksum': ''}, 'unrendered_config': unrendered_test_config, }, - 'test.test.unique_model_id.0ee1634138': { + 'test.test.unique_model_id.3de6fa785f': { 'alias': 'unique_model_id', 'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/unique_model_id.sql'), 'column_name': 'id', @@ -1492,7 +1492,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'database': self.default_database, 'tags': ['schema'], 'meta': {}, - 'unique_id': 'test.test.unique_model_id.0ee1634138', + 'unique_id': 'test.test.unique_model_id.3de6fa785f', 'docs': {'show': True}, 'compiled': True, 'compiled_sql': AnyStringWith('count(*)'), @@ -1621,17 +1621,17 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'seed.test.seed': [], 'snapshot.test.snapshot_seed': ['seed.test.seed'], 'source.test.my_source.my_table': [], - 'test.test.not_null_model_id.580992bcfd': ['model.test.model'], - 'test.test.test_nothing_model_.9c8389734f': ['model.test.model'], - 'test.test.unique_model_id.0ee1634138': ['model.test.model'], + 'test.test.not_null_model_id.8ab00aacfe': ['model.test.model'], + 'test.test.test_nothing_model_.596f819934': ['model.test.model'], + 'test.test.unique_model_id.3de6fa785f': ['model.test.model'], }, 'child_map': { 'model.test.model': [ 'exposure.test.notebook_exposure', 'exposure.test.simple_exposure', - 'test.test.not_null_model_id.580992bcfd', - 'test.test.test_nothing_model_.9c8389734f', - 'test.test.unique_model_id.0ee1634138', + 'test.test.not_null_model_id.8ab00aacfe', + 'test.test.test_nothing_model_.596f819934', + 'test.test.unique_model_id.3de6fa785f', ], 'model.test.second_model': ['exposure.test.notebook_exposure'], 'exposure.test.notebook_exposure': [], @@ -1641,9 +1641,9 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'snapshot.test.snapshot_seed'], 'snapshot.test.snapshot_seed': [], 'source.test.my_source.my_table': ['exposure.test.simple_exposure'], - 'test.test.not_null_model_id.580992bcfd': [], - 'test.test.test_nothing_model_.9c8389734f': [], - 'test.test.unique_model_id.0ee1634138': [], + 'test.test.not_null_model_id.8ab00aacfe': [], + 'test.test.test_nothing_model_.596f819934': [], + 'test.test.unique_model_id.3de6fa785f': [], }, 'docs': { 'dbt.__overview__': ANY, @@ -2972,7 +2972,7 @@ def expected_run_results(self): 'status': 'success', 'message': None, 'execution_time': AnyFloat(), - 'unique_id': 'test.test.not_null_model_id.580992bcfd', + 'unique_id': 'test.test.not_null_model_id.8ab00aacfe', 'adapter_response': ANY, 'thread_id': ANY, 'timing': [ANY, ANY], @@ -2981,7 +2981,7 @@ def expected_run_results(self): 'status': 'success', 'message': None, 'execution_time': AnyFloat(), - 'unique_id': 'test.test.test_nothing_model_.9c8389734f', + 'unique_id': 'test.test.test_nothing_model_.596f819934', 'adapter_response': ANY, 'thread_id': ANY, 'timing': [ANY, ANY], @@ -2990,7 +2990,7 @@ def expected_run_results(self): 'status': 'success', 'message': None, 'execution_time': AnyFloat(), - 'unique_id': 'test.test.unique_model_id.0ee1634138', + 'unique_id': 'test.test.unique_model_id.3de6fa785f', 'adapter_response': ANY, 'thread_id': ANY, 'timing': [ANY, ANY], diff --git a/test/unit/test_parser.py b/test/unit/test_parser.py index c5680419248..582888ae469 100644 --- a/test/unit/test_parser.py +++ b/test/unit/test_parser.py @@ -393,7 +393,7 @@ def test__parse_basic_model_tests(self): self.assertEqual(tests[0].package_name, 'snowplow') self.assertTrue(tests[0].name.startswith('accepted_values_')) self.assertEqual(tests[0].fqn, ['snowplow', 'schema_test', tests[0].name]) - self.assertEqual(tests[0].unique_id.split('.'), ['test', 'snowplow', tests[0].name, 'fe3f9c5376']) + self.assertEqual(tests[0].unique_id.split('.'), ['test', 'snowplow', tests[0].name, '468aa12dc7']) self.assertEqual(tests[0].test_metadata.name, 'accepted_values') self.assertIsNone(tests[0].test_metadata.namespace) self.assertEqual( @@ -415,7 +415,7 @@ def test__parse_basic_model_tests(self): self.assertEqual(tests[1].fqn, ['snowplow', 'schema_test', tests[1].name]) self.assertTrue(tests[1].name.startswith('foreign_package_test_case_')) self.assertEqual(tests[1].package_name, 'snowplow') - self.assertEqual(tests[1].unique_id.split('.'), ['test', 'snowplow', tests[1].name, '4f5bdeb105']) + self.assertEqual(tests[1].unique_id.split('.'), ['test', 'snowplow', tests[1].name, 'b1daa30e01']) self.assertEqual(tests[1].test_metadata.name, 'test_case') self.assertEqual(tests[1].test_metadata.namespace, 'foreign_package') self.assertEqual( @@ -434,7 +434,7 @@ def test__parse_basic_model_tests(self): self.assertEqual(tests[2].package_name, 'snowplow') self.assertTrue(tests[2].name.startswith('not_null_')) self.assertEqual(tests[2].fqn, ['snowplow', 'schema_test', tests[2].name]) - self.assertEqual(tests[2].unique_id.split('.'), ['test', 'snowplow', tests[2].name, '651854c872']) + self.assertEqual(tests[2].unique_id.split('.'), ['test', 'snowplow', tests[2].name, 'bb12aa4687']) self.assertEqual(tests[2].test_metadata.name, 'not_null') self.assertIsNone(tests[2].test_metadata.namespace) self.assertEqual( From 95fc6d43e7f4f44119b52000e46d6f719653b94f Mon Sep 17 00:00:00 2001 From: Ian Knox Date: Wed, 12 May 2021 13:29:24 -0500 Subject: [PATCH 4/4] Additional PR feedback --- core/dbt/parser/schemas.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 99909f5f8be..a158c888282 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -354,6 +354,9 @@ def create_test_node( HASH_LENGTH = 10 + # N.B: This function builds a hashable string from any given test_metadata dict. + # it's a bit fragile for general use (only supports str, int, float, List, Dict) + # but it gets the job done here without the overhead of complete ser(de). def get_hashable_md( data: Union[str, int, float, List, Dict] ) -> Union[str, List, Dict]: @@ -365,8 +368,7 @@ def get_hashable_md( return str(data) hashable_metadata = repr(get_hashable_md(test_metadata)) - hash_list = filter(None, [name, hashable_metadata]) - hash_string = ''.join(hash_list).encode('utf-8') + hash_string = ''.join([name, hashable_metadata]).encode('utf-8') test_hash = md5(hash_string).hexdigest()[-HASH_LENGTH:] dct = {