diff --git a/CHANGELOG.md b/CHANGELOG.md index 76076a9ac92..78c535a29be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ Contributors: - Ensure that schema test macros are properly processed ([#3229](https://github.com/fishtown-analytics/dbt/issues/3229), [#3272](https://github.com/fishtown-analytics/dbt/pull/3272)) - Use absolute path for profiles directory instead of a path relative to the project directory. Note: If a user supplies a relative path to the profiles directory, the value of `args.profiles_dir` will still be absolute. ([#3133](https://github.com/fishtown-analytics/dbt/issues/3133)) - Fix FQN selector unable to find models whose name contains dots ([#3246](https://github.com/fishtown-analytics/dbt/issues/3246)) +- Fix unique_id generation for generic tests so tests with the same FQN but different configuration will run. ([#3254](https://github.com/fishtown-analytics/dbt/issues/3254)) ### Features - Support commit hashes in dbt deps package revision ([#3268](https://github.com/fishtown-analytics/dbt/issues/3268), [#3270](https://github.com/fishtown-analytics/dbt/pull/3270)) diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index e725dc7771a..fa8e6bc2d87 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -2,7 +2,7 @@ import itertools import os from typing import ( - List, Dict, Any, Generic, TypeVar + List, Dict, Any, Generic, Optional, TypeVar ) from dbt.dataclass_schema import ValidationError @@ -55,11 +55,25 @@ def parse_file(self, block: FileBlock) -> None: def resource_type(self) -> NodeType: pass - def generate_unique_id(self, resource_name: str) -> str: - """Returns a unique identifier for a resource""" - return "{}.{}.{}".format(self.resource_type, - self.project.project_name, - resource_name) + def generate_unique_id( + self, + resource_name: str, + hash: Optional[str] = None + ) -> str: + """Returns a unique identifier for a resource + An optional hash may be passed in to ensure uniqueness for edge cases""" + + return '.'.join( + filter( + None, + [ + self.resource_type, + self.project.project_name, + resource_name, + hash + ] + ) + ) class Parser(BaseParser[FinalValue], Generic[FinalValue]): diff --git a/core/dbt/parser/docs.py b/core/dbt/parser/docs.py index 1a30fb40600..0482d7bff26 100644 --- a/core/dbt/parser/docs.py +++ b/core/dbt/parser/docs.py @@ -1,4 +1,4 @@ -from typing import Iterable +from typing import Iterable, Optional import re @@ -23,7 +23,7 @@ def resource_type(self) -> NodeType: def get_compiled_path(cls, block: FileBlock): return block.path.relative_path - def generate_unique_id(self, resource_name: str) -> str: + def generate_unique_id(self, resource_name: str, _: Optional[str] = None) -> str: # because docs are in their own graph namespace, node type doesn't # need to be part of the unique ID. return '{}.{}'.format(self.project.project_name, resource_name) diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 316ef5c9d70..a158c888282 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -2,6 +2,7 @@ import os from abc import ABCMeta, abstractmethod +from hashlib import md5 from typing import ( Iterable, Dict, Any, Union, List, Optional, Generic, TypeVar, Type ) @@ -351,6 +352,25 @@ def create_test_node( column_name: Optional[str], ) -> ParsedSchemaTestNode: + HASH_LENGTH = 10 + + # N.B: This function builds a hashable string from any given test_metadata dict. + # it's a bit fragile for general use (only supports str, int, float, List, Dict) + # but it gets the job done here without the overhead of complete ser(de). + def get_hashable_md( + data: Union[str, int, float, List, Dict] + ) -> Union[str, List, Dict]: + if type(data) == dict: + return {k: get_hashable_md(data[k]) for k in sorted(data.keys())} # type: ignore + elif type(data) == list: + return [get_hashable_md(val) for val in data] # type: ignore + else: + return str(data) + + hashable_metadata = repr(get_hashable_md(test_metadata)) + hash_string = ''.join([name, hashable_metadata]).encode('utf-8') + test_hash = md5(hash_string).hexdigest()[-HASH_LENGTH:] + dct = { 'alias': name, 'schema': self.default_schema, @@ -364,7 +384,7 @@ def create_test_node( 'original_file_path': target.original_file_path, 'package_name': self.project.project_name, 'raw_sql': raw_sql, - 'unique_id': self.generate_unique_id(name), + 'unique_id': self.generate_unique_id(name, test_hash), 'config': self.config_dict(config), 'test_metadata': test_metadata, 'column_name': column_name, diff --git a/test/integration/008_schema_tests_test/name_collision/base.sql b/test/integration/008_schema_tests_test/name_collision/base.sql new file mode 100644 index 00000000000..f59dcad4cf4 --- /dev/null +++ b/test/integration/008_schema_tests_test/name_collision/base.sql @@ -0,0 +1 @@ +SELECT 'hello_world' AS extension_id \ No newline at end of file diff --git a/test/integration/008_schema_tests_test/name_collision/base_extension.sql b/test/integration/008_schema_tests_test/name_collision/base_extension.sql new file mode 100644 index 00000000000..f09828a9a69 --- /dev/null +++ b/test/integration/008_schema_tests_test/name_collision/base_extension.sql @@ -0,0 +1 @@ +SELECT 'NOT_NULL' AS id \ No newline at end of file diff --git a/test/integration/008_schema_tests_test/name_collision/schema.yml b/test/integration/008_schema_tests_test/name_collision/schema.yml new file mode 100644 index 00000000000..2c950582099 --- /dev/null +++ b/test/integration/008_schema_tests_test/name_collision/schema.yml @@ -0,0 +1,12 @@ +version: 2 +models: +- name: base + columns: + - name: extension_id + tests: + - not_null +- name: base_extension + columns: + - name: id + tests: + - not_null diff --git a/test/integration/008_schema_tests_test/test_schema_v2_tests.py b/test/integration/008_schema_tests_test/test_schema_v2_tests.py index 75c3e77b6f2..380cb426fa6 100644 --- a/test/integration/008_schema_tests_test/test_schema_v2_tests.py +++ b/test/integration/008_schema_tests_test/test_schema_v2_tests.py @@ -22,7 +22,6 @@ def models(self): def run_schema_validations(self): args = FakeArgs() - test_task = TestTask(args, self.config) return test_task.run() @@ -377,3 +376,35 @@ def test_postgres_test_context_tests(self): # was rendered correctly self.assertRegex(result.node.compiled_sql, r'union all') + +class TestSchemaTestNameCollision(DBTIntegrationTest): + @property + def schema(self): + return "schema_tests_008" + + @property + def models(self): + return "name_collision" + + def run_schema_tests(self): + args = FakeArgs() + test_task = TestTask(args, self.config) + return test_task.run() + + @use_profile('postgres') + def test_postgres_collision_test_names_get_hash(self): + """The models should produce unique IDs with a has appended""" + results = self.run_dbt() + test_results = self.run_schema_tests() + + # both models and both tests run + self.assertEqual(len(results), 2) + self.assertEqual(len(test_results), 2) + + # both tests have the same unique id except for the hash + expected_unique_ids = [ + 'test.test.not_null_base_extension_id.2dbb9627b6', + 'test.test.not_null_base_extension_id.d70fc39f40' + ] + self.assertIn(test_results[0].node.unique_id, expected_unique_ids) + self.assertIn(test_results[1].node.unique_id, expected_unique_ids) \ No newline at end of file diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index ac34932a6e1..7cbce99bc6b 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -1333,7 +1333,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'checksum': self._checksum_file(seed_path), 'unrendered_config': unrendered_seed_config, }, - 'test.test.not_null_model_id': { + 'test.test.not_null_model_id.8ab00aacfe': { 'alias': 'not_null_model_id', 'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/not_null_model_id.sql'), 'column_name': 'id', @@ -1361,7 +1361,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'database': self.default_database, 'tags': ['schema'], 'meta': {}, - 'unique_id': 'test.test.not_null_model_id', + 'unique_id': 'test.test.not_null_model_id.8ab00aacfe', 'docs': {'show': True}, 'compiled': True, 'compiled_sql': AnyStringWith('where id is null'), @@ -1420,7 +1420,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'unique_id': 'snapshot.test.snapshot_seed', 'unrendered_config': unrendered_snapshot_config, }, - 'test.test.test_nothing_model_': { + 'test.test.test_nothing_model_.596f819934': { 'alias': 'test_nothing_model_', 'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/test_nothing_model_.sql'), 'column_name': None, @@ -1448,7 +1448,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'database': self.default_database, 'tags': ['schema'], 'meta': {}, - 'unique_id': 'test.test.test_nothing_model_', + 'unique_id': 'test.test.test_nothing_model_.596f819934', 'docs': {'show': True}, 'compiled': True, 'compiled_sql': AnyStringWith('select 0'), @@ -1464,7 +1464,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'checksum': {'name': 'none', 'checksum': ''}, 'unrendered_config': unrendered_test_config, }, - 'test.test.unique_model_id': { + 'test.test.unique_model_id.3de6fa785f': { 'alias': 'unique_model_id', 'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/unique_model_id.sql'), 'column_name': 'id', @@ -1492,7 +1492,7 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'database': self.default_database, 'tags': ['schema'], 'meta': {}, - 'unique_id': 'test.test.unique_model_id', + 'unique_id': 'test.test.unique_model_id.3de6fa785f', 'docs': {'show': True}, 'compiled': True, 'compiled_sql': AnyStringWith('count(*)'), @@ -1621,17 +1621,17 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'seed.test.seed': [], 'snapshot.test.snapshot_seed': ['seed.test.seed'], 'source.test.my_source.my_table': [], - 'test.test.not_null_model_id': ['model.test.model'], - 'test.test.test_nothing_model_': ['model.test.model'], - 'test.test.unique_model_id': ['model.test.model'], + 'test.test.not_null_model_id.8ab00aacfe': ['model.test.model'], + 'test.test.test_nothing_model_.596f819934': ['model.test.model'], + 'test.test.unique_model_id.3de6fa785f': ['model.test.model'], }, 'child_map': { 'model.test.model': [ 'exposure.test.notebook_exposure', 'exposure.test.simple_exposure', - 'test.test.not_null_model_id', - 'test.test.test_nothing_model_', - 'test.test.unique_model_id', + 'test.test.not_null_model_id.8ab00aacfe', + 'test.test.test_nothing_model_.596f819934', + 'test.test.unique_model_id.3de6fa785f', ], 'model.test.second_model': ['exposure.test.notebook_exposure'], 'exposure.test.notebook_exposure': [], @@ -1641,9 +1641,9 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False): 'snapshot.test.snapshot_seed'], 'snapshot.test.snapshot_seed': [], 'source.test.my_source.my_table': ['exposure.test.simple_exposure'], - 'test.test.not_null_model_id': [], - 'test.test.test_nothing_model_': [], - 'test.test.unique_model_id': [], + 'test.test.not_null_model_id.8ab00aacfe': [], + 'test.test.test_nothing_model_.596f819934': [], + 'test.test.unique_model_id.3de6fa785f': [], }, 'docs': { 'dbt.__overview__': ANY, @@ -2921,7 +2921,7 @@ def verify_manifest(self, expected_manifest): else: self.assertIn(key, expected_manifest) # sanity check self.assertEqual(manifest[key], expected_manifest[key]) - + def _quote(self, value): quote_char = '`' if self.adapter_type == 'bigquery' else '"' return '{0}{1}{0}'.format(quote_char, value) @@ -2972,7 +2972,7 @@ def expected_run_results(self): 'status': 'success', 'message': None, 'execution_time': AnyFloat(), - 'unique_id': 'test.test.not_null_model_id', + 'unique_id': 'test.test.not_null_model_id.8ab00aacfe', 'adapter_response': ANY, 'thread_id': ANY, 'timing': [ANY, ANY], @@ -2981,7 +2981,7 @@ def expected_run_results(self): 'status': 'success', 'message': None, 'execution_time': AnyFloat(), - 'unique_id': 'test.test.test_nothing_model_', + 'unique_id': 'test.test.test_nothing_model_.596f819934', 'adapter_response': ANY, 'thread_id': ANY, 'timing': [ANY, ANY], @@ -2990,7 +2990,7 @@ def expected_run_results(self): 'status': 'success', 'message': None, 'execution_time': AnyFloat(), - 'unique_id': 'test.test.unique_model_id', + 'unique_id': 'test.test.unique_model_id.3de6fa785f', 'adapter_response': ANY, 'thread_id': ANY, 'timing': [ANY, ANY], diff --git a/test/unit/test_parser.py b/test/unit/test_parser.py index d59b2852329..582888ae469 100644 --- a/test/unit/test_parser.py +++ b/test/unit/test_parser.py @@ -393,7 +393,7 @@ def test__parse_basic_model_tests(self): self.assertEqual(tests[0].package_name, 'snowplow') self.assertTrue(tests[0].name.startswith('accepted_values_')) self.assertEqual(tests[0].fqn, ['snowplow', 'schema_test', tests[0].name]) - self.assertEqual(tests[0].unique_id.split('.'), ['test', 'snowplow', tests[0].name]) + self.assertEqual(tests[0].unique_id.split('.'), ['test', 'snowplow', tests[0].name, '468aa12dc7']) self.assertEqual(tests[0].test_metadata.name, 'accepted_values') self.assertIsNone(tests[0].test_metadata.namespace) self.assertEqual( @@ -415,7 +415,7 @@ def test__parse_basic_model_tests(self): self.assertEqual(tests[1].fqn, ['snowplow', 'schema_test', tests[1].name]) self.assertTrue(tests[1].name.startswith('foreign_package_test_case_')) self.assertEqual(tests[1].package_name, 'snowplow') - self.assertEqual(tests[1].unique_id.split('.'), ['test', 'snowplow', tests[1].name]) + self.assertEqual(tests[1].unique_id.split('.'), ['test', 'snowplow', tests[1].name, 'b1daa30e01']) self.assertEqual(tests[1].test_metadata.name, 'test_case') self.assertEqual(tests[1].test_metadata.namespace, 'foreign_package') self.assertEqual( @@ -434,7 +434,7 @@ def test__parse_basic_model_tests(self): self.assertEqual(tests[2].package_name, 'snowplow') self.assertTrue(tests[2].name.startswith('not_null_')) self.assertEqual(tests[2].fqn, ['snowplow', 'schema_test', tests[2].name]) - self.assertEqual(tests[2].unique_id.split('.'), ['test', 'snowplow', tests[2].name]) + self.assertEqual(tests[2].unique_id.split('.'), ['test', 'snowplow', tests[2].name, 'bb12aa4687']) self.assertEqual(tests[2].test_metadata.name, 'not_null') self.assertIsNone(tests[2].test_metadata.namespace) self.assertEqual(