Skip to content

Commit

Permalink
can we just default materialization to test?
Browse files Browse the repository at this point in the history
  • Loading branch information
Kyle Wigley committed Nov 19, 2020
1 parent ec0f3d2 commit 0e6ac5b
Show file tree
Hide file tree
Showing 6 changed files with 206 additions and 114 deletions.
1 change: 1 addition & 0 deletions core/dbt/contracts/graph/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ class SeedConfig(NodeConfig):

@dataclass
class TestConfig(NodeConfig):
materialized: str = 'test'

This comment has been minimized.

Copy link
@kwigley

kwigley Nov 19, 2020

Contributor

@jtcohen6 so, I haven't given this much thought, but I think this change achieves the desired behavior for #2806. I noticed seeds and snapshots have their own materialization type, it feels like tests should as well. This is either a bad idea or a really easy win! This avoids ripping apart node and config object types in the codebase. Lmk if you have any thoughts and if you get a chance to take it for a spin

This comment has been minimized.

Copy link
@jtcohen6

jtcohen6 Nov 19, 2020

Contributor

I think this might... just... work! It sure seems to in my quick local testing.

I suppose we could just as well make this materialized: str = 'not_applicable', but I quite like making it 'test', and I think this ends up being better than removing materialized as a property entirely.

This way, we can use config.materialized:test as a node selection criterion. (I've been wanting a resource_type selection method, and I'm glad to realize that config.materialized:seed and config.materialized:snapshot also work.

This comment has been minimized.

Copy link
@kwigley

kwigley Nov 19, 2020

Contributor

It feels like we are overloading what materialized means, but that was definitely the case before this change. I'll set up a PR, but I'd like to come back to this after my current work. I didn't want to lose this thought

This comment has been minimized.

Copy link
@jtcohen6

jtcohen6 Nov 19, 2020

Contributor

Fair. It's reasonable enough that seeds and snapshots are both materialized values, since they are genuinely materializations and dbt does materialize them as relational objects in the database. So maybe this would be the step that goes too far

This comment has been minimized.

Copy link
@jtcohen6

jtcohen6 Nov 19, 2020

Contributor

dbt does materialize them as relational objects in the database

Of course, dbt would optionally materialize test failures as objects in the database, too, per #2593 :)

severity: Severity = Severity('ERROR')


Expand Down
81 changes: 51 additions & 30 deletions test/integration/029_docs_generate_tests/test_docs_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ def test_postgres_include_schema(self):
self.assertEqual(len(manifest['nodes']), 1)
self.assertIn('model.test.model', manifest['nodes'])
self.assertIn('schema', manifest['nodes']['model.test.model'])
self.assertEqual('pg', manifest['nodes']['model.test.model']['schema'][:2])
self.assertEqual('pg', manifest['nodes']
['model.test.model']['schema'][:2])


class TestDocsGenerate(DBTIntegrationTest):
Expand All @@ -117,7 +118,6 @@ def tearDown(self):
super().tearDown()
del os.environ['DBT_ENV_CUSTOM_ENV_env_key']


@property
def schema(self):
return 'docs_generate_029'
Expand Down Expand Up @@ -209,11 +209,11 @@ def _redshift_stats(self):
"include": True
},
"diststyle": {
"id": "diststyle",
"label": "Dist Style",
"value": AnyStringWith('AUTO'),
"description": "Distribution style or distribution key column, if key distribution is defined.",
"include": True
"id": "diststyle",
"label": "Dist Style",
"value": AnyStringWith('AUTO'),
"description": "Distribution style or distribution key column, if key distribution is defined.",
"include": True
},
"max_varchar": {
"id": "max_varchar",
Expand Down Expand Up @@ -349,7 +349,7 @@ def _expected_catalog(self, id_type, text_type, time_type, view_type,
table_type, model_stats, seed_stats=None, case=None,
case_columns=False, model_database=None):
if case is None:
case = lambda x: x
def case(x): return x
col_case = case if case_columns else lambda x: x

if seed_stats is None:
Expand Down Expand Up @@ -888,7 +888,8 @@ def verify_catalog(self, expected):

assert set(catalog) == {'errors', 'metadata', 'nodes', 'sources'}

self.verify_metadata(catalog['metadata'], 'https://schemas.getdbt.com/dbt/catalog/v1.json')
self.verify_metadata(
catalog['metadata'], 'https://schemas.getdbt.com/dbt/catalog/v1.json')
assert not catalog['errors']

for key in 'nodes', 'sources':
Expand Down Expand Up @@ -988,7 +989,7 @@ def rendered_tst_config(self, **updates):
result = {
'column_types': {},
'enabled': True,
'materialized': 'view',
'materialized': 'test',
'persist_docs': {},
'post-hook': [],
'pre-hook': [],
Expand Down Expand Up @@ -1071,10 +1072,13 @@ def expected_seeded_manifest(self, model_database=None, quote_model=False):
model_database = self.alternative_database

model_config = self.rendered_model_config(database=model_database)
second_config = self.rendered_model_config(schema=self.alternate_schema[-4:])
second_config = self.rendered_model_config(
schema=self.alternate_schema[-4:])

unrendered_model_config = self.unrendered_model_config(database=model_database, materialized='view')
unrendered_second_config = self.unrendered_model_config(schema=self.alternate_schema[-4:], materialized='view')
unrendered_model_config = self.unrendered_model_config(
database=model_database, materialized='view')
unrendered_second_config = self.unrendered_model_config(
schema=self.alternate_schema[-4:], materialized='view')

seed_config = self.rendered_seed_config()
unrendered_seed_config = self.unrendered_seed_config()
Expand Down Expand Up @@ -2143,7 +2147,8 @@ def expected_bigquery_complex_manifest(self):
'config': self.rendered_model_config(
cluster_by=['first_name'],
materialized='table',
partition_by={'field': 'updated_at', 'data_type': 'date'},
partition_by={'field': 'updated_at',
'data_type': 'date'},
),
'sources': [],
'depends_on': {'macros': [], 'nodes': ['seed.test.seed']},
Expand Down Expand Up @@ -2219,7 +2224,8 @@ def expected_bigquery_complex_manifest(self):
'unrendered_config': self.unrendered_model_config(
cluster_by=['first_name'],
materialized='table',
partition_by={'field': 'updated_at', 'data_type': 'date'},
partition_by={'field': 'updated_at',
'data_type': 'date'},
),
},
'model.test.multi_clustered': {
Expand All @@ -2228,7 +2234,8 @@ def expected_bigquery_complex_manifest(self):
'config': self.rendered_model_config(
cluster_by=['first_name', 'email'],
materialized='table',
partition_by={'field': 'updated_at', 'data_type': 'date'}
partition_by={'field': 'updated_at',
'data_type': 'date'}
),
'sources': [],
'depends_on': {'macros': [], 'nodes': ['seed.test.seed']},
Expand Down Expand Up @@ -2303,7 +2310,8 @@ def expected_bigquery_complex_manifest(self):
'unrendered_config': self.unrendered_model_config(
cluster_by=['first_name', 'email'],
materialized='table',
partition_by={'field': 'updated_at', 'data_type': 'date'}
partition_by={'field': 'updated_at',
'data_type': 'date'}
),
},
'model.test.nested_view': {
Expand Down Expand Up @@ -2864,12 +2872,16 @@ def verify_manifest(self, expected_manifest):

for key in manifest_keys:
if key == 'macros':
self.verify_manifest_macros(manifest, expected_manifest.get('macros'))
self.verify_manifest_macros(
manifest, expected_manifest.get('macros'))
elif key == 'metadata':
metadata = manifest['metadata']
self.verify_metadata(metadata, 'https://schemas.getdbt.com/dbt/manifest/v1.json')
assert 'project_id' in metadata and metadata['project_id'] == '098f6bcd4621d373cade4e832627b4f6'
assert 'send_anonymous_usage_stats' in metadata and metadata['send_anonymous_usage_stats'] is False
self.verify_metadata(
metadata, 'https://schemas.getdbt.com/dbt/manifest/v1.json')
assert 'project_id' in metadata and metadata[
'project_id'] == '098f6bcd4621d373cade4e832627b4f6'
assert 'send_anonymous_usage_stats' in metadata and metadata[
'send_anonymous_usage_stats'] is False
assert 'user_id' in metadata and metadata['user_id'] is None
assert 'adapter_type' in metadata and metadata['adapter_type'] == self.adapter_type
else:
Expand All @@ -2895,9 +2907,12 @@ def expected_run_results(self, quote_schema=True, quote_model=False,
model_database = self.alternative_database

model_config = self.rendered_model_config(database=model_database)
second_model_config = self.rendered_model_config(schema=self.alternate_schema[-4:])
unrendered_model_config = self.unrendered_model_config(database=model_database, materialized='view')
unrendered_second_model_config = self.unrendered_model_config(schema=self.alternate_schema[-4:], materialized='view')
second_model_config = self.rendered_model_config(
schema=self.alternate_schema[-4:])
unrendered_model_config = self.unrendered_model_config(
database=model_database, materialized='view')
unrendered_second_model_config = self.unrendered_model_config(
schema=self.alternate_schema[-4:], materialized='view')
schema = self.unique_schema()

# we are selecting from the seed, which is always in the default db
Expand Down Expand Up @@ -3735,7 +3750,8 @@ def verify_run_results(self, expected_run_results):
run_results = _read_json('./target/run_results.json')

assert 'metadata' in run_results
self.verify_metadata(run_results['metadata'], 'https://schemas.getdbt.com/dbt/run-results/v1.json')
self.verify_metadata(
run_results['metadata'], 'https://schemas.getdbt.com/dbt/run-results/v1.json')
self.assertIn('elapsed_time', run_results)
self.assertGreater(run_results['elapsed_time'], 0)
self.assertTrue(
Expand All @@ -3752,7 +3768,8 @@ def verify_run_results(self, expected_run_results):

@use_profile('postgres')
def test__postgres__run_and_generate_no_compile(self):
self.run_and_generate(alternate_db=self.default_database, args=['--no-compile'])
self.run_and_generate(
alternate_db=self.default_database, args=['--no-compile'])
self.verify_catalog(self.expected_postgres_catalog())
self.assertFalse(os.path.exists('./target/manifest.json'))

Expand All @@ -3777,12 +3794,14 @@ def test__postgres_references(self):

self.verify_catalog(self.expected_postgres_references_catalog())
self.verify_manifest(self.expected_postgres_references_manifest())
self.verify_run_results(self.expected_postgres_references_run_results())
self.verify_run_results(
self.expected_postgres_references_run_results())

@use_profile('postgres')
def test_postgres_asset_paths_copied(self):
self.run_and_generate(
{'asset-paths': [self.dir('assets'), self.dir('non-existent-assets')]},
{'asset-paths': [self.dir('assets'),
self.dir('non-existent-assets')]},
)

assert os.path.exists('./target/assets')
Expand Down Expand Up @@ -3823,7 +3842,8 @@ def connect(*args, **kwargs):

self.verify_catalog(self.expected_snowflake_catalog(case_columns=True))
self.verify_manifest(self.expected_seeded_manifest(quote_model=True))
self.verify_run_results(self.expected_run_results(quote_schema=False, quote_model=True))
self.verify_run_results(self.expected_run_results(
quote_schema=False, quote_model=True))

@use_profile('bigquery')
def test__bigquery__run_and_generate(self):
Expand Down Expand Up @@ -3862,7 +3882,8 @@ def test__redshift__incremental_view(self):
model_count=1,
)
self.verify_catalog(self.expected_redshift_incremental_catalog())
self.verify_manifest(self.expected_redshift_incremental_view_manifest())
self.verify_manifest(
self.expected_redshift_incremental_view_manifest())

@use_profile('presto')
def test__presto__run_and_generate(self):
Expand Down
6 changes: 3 additions & 3 deletions test/integration/047_dbt_ls_test/test_ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def expect_test_output(self):
'tags': ['schema'],
'config': {
'enabled': True,
'materialized': 'view',
'materialized': 'test',
'post-hook': [],
'severity': 'ERROR',
'tags': [],
Expand All @@ -356,7 +356,7 @@ def expect_test_output(self):
'tags': ['data'],
'config': {
'enabled': True,
'materialized': 'view',
'materialized': 'test',
'post-hook': [],
'severity': 'ERROR',
'tags': [],
Expand All @@ -380,7 +380,7 @@ def expect_test_output(self):
'tags': ['schema'],
'config': {
'enabled': True,
'materialized': 'view',
'materialized': 'test',
'post-hook': [],
'severity': 'ERROR',
'tags': [],
Expand Down
46 changes: 30 additions & 16 deletions test/unit/test_contracts_graph_compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,44 +235,55 @@ def test_invalid_bad_type_model(minimal_uncompiled_dict):
lambda u: (u, u.replace(alias='nope')),

# None -> False is a config change even though it's pretty much the same
lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': False})), u.replace(config=u.config.replace(persist_docs={'relation': False}))),
lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': False})), u.replace(config=u.config.replace(persist_docs={'columns': False}))),
lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': False})), u.replace(
config=u.config.replace(persist_docs={'relation': False}))),
lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': False})), u.replace(
config=u.config.replace(persist_docs={'columns': False}))),
# True -> True
lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': True})), u.replace(config=u.config.replace(persist_docs={'relation': True}))),
lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': True})), u.replace(config=u.config.replace(persist_docs={'columns': True}))),
lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': True})), u.replace(
config=u.config.replace(persist_docs={'relation': True}))),
lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': True})), u.replace(
config=u.config.replace(persist_docs={'columns': True}))),

# only columns docs enabled, but description changed
lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': True})), u.replace(config=u.config.replace(persist_docs={'columns': True}), description='a model description')),
lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': True})), u.replace(
config=u.config.replace(persist_docs={'columns': True}), description='a model description')),
# only relation docs eanbled, but columns changed
lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': True})), u.replace(config=u.config.replace(persist_docs={'relation': True}), columns={'a': ColumnInfo(name='a', description='a column description')}))
lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': True})), u.replace(config=u.config.replace(
persist_docs={'relation': True}), columns={'a': ColumnInfo(name='a', description='a column description')}))
]


changed_compiled_models = [
lambda u: (u, None),
lambda u: (u, u.replace(raw_sql='select * from wherever')),
lambda u: (u, u.replace(fqn=['test', 'models', 'subdir', 'foo'], original_file_path='models/subdir/foo.sql', path='/root/models/subdir/foo.sql')),
lambda u: (u, u.replace(fqn=['test', 'models', 'subdir', 'foo'],
original_file_path='models/subdir/foo.sql', path='/root/models/subdir/foo.sql')),
lambda u: (u, replace_config(u, full_refresh=True)),
lambda u: (u, replace_config(u, post_hook=['select 1 as id'])),
lambda u: (u, replace_config(u, pre_hook=['select 1 as id'])),
lambda u: (u, replace_config(u, quoting={'database': True, 'schema': False, 'identifier': False})),
lambda u: (u, replace_config(
u, quoting={'database': True, 'schema': False, 'identifier': False})),
# we changed persist_docs values
lambda u: (u, replace_config(u, persist_docs={'relation': True})),
lambda u: (u, replace_config(u, persist_docs={'columns': True})),
lambda u: (u, replace_config(u, persist_docs={'columns': True, 'relation': True})),
lambda u: (u, replace_config(u, persist_docs={
'columns': True, 'relation': True})),

# None -> False is a config change even though it's pretty much the same
lambda u: (u, replace_config(u, persist_docs={'relation': False})),
lambda u: (u, replace_config(u, persist_docs={'columns': False})),
# persist docs was true for the relation and we changed the model description
lambda u: (
replace_config(u, persist_docs={'relation': True}),
replace_config(u, persist_docs={'relation': True}, description='a model description'),
replace_config(u, persist_docs={
'relation': True}, description='a model description'),
),
# persist docs was true for columns and we changed the model description
lambda u: (
replace_config(u, persist_docs={'columns': True}),
replace_config(u, persist_docs={'columns': True}, columns={'a': ColumnInfo(name='a', description='a column description')})
replace_config(u, persist_docs={'columns': True}, columns={
'a': ColumnInfo(name='a', description='a column description')})
),
# changing alias/schema/database on the config level is a change
lambda u: (u, replace_config(u, database='nope')),
Expand Down Expand Up @@ -408,7 +419,7 @@ def basic_uncompiled_schema_test_dict():
'config': {
'column_types': {},
'enabled': True,
'materialized': 'view',
'materialized': 'test',
'persist_docs': {},
'post-hook': [],
'pre-hook': [],
Expand Down Expand Up @@ -457,7 +468,7 @@ def basic_compiled_schema_test_dict():
'config': {
'column_types': {},
'enabled': True,
'materialized': 'view',
'materialized': 'test',
'persist_docs': {},
'post-hook': [],
'pre-hook': [],
Expand Down Expand Up @@ -539,13 +550,15 @@ def test_invalid_resource_type_schema_test(minimal_schema_test_dict):
lambda u: replace_config(u, full_refresh=True),
lambda u: replace_config(u, post_hook=['select 1 as id']),
lambda u: replace_config(u, pre_hook=['select 1 as id']),
lambda u: replace_config(u, quoting={'database': True, 'schema': False, 'identifier': False}),
lambda u: replace_config(
u, quoting={'database': True, 'schema': False, 'identifier': False}),
]


changed_schema_tests = [
lambda u: None,
lambda u: u.replace(fqn=['test', 'models', 'subdir', 'foo'], original_file_path='models/subdir/foo.sql', path='/root/models/subdir/foo.sql'),
lambda u: u.replace(fqn=['test', 'models', 'subdir', 'foo'],
original_file_path='models/subdir/foo.sql', path='/root/models/subdir/foo.sql'),
lambda u: replace_config(u, severity='warn'),
# If we checked test metadata, these would caount. But we don't, because these changes would all change the unique ID, so it's irrelevant.
# lambda u: u.replace(test_metadata=u.test_metadata.replace(namespace='something')),
Expand All @@ -572,5 +585,6 @@ def test_compare_to_compiled(basic_uncompiled_schema_test_node, basic_compiled_s
compiled = basic_compiled_schema_test_node
assert not uncompiled.same_contents(compiled)
fixed_config = compiled.config.replace(severity=uncompiled.config.severity)
fixed_compiled = compiled.replace(config=fixed_config, unrendered_config=uncompiled.unrendered_config)
fixed_compiled = compiled.replace(
config=fixed_config, unrendered_config=uncompiled.unrendered_config)
assert uncompiled.same_contents(fixed_compiled)
4 changes: 2 additions & 2 deletions test/unit/test_contracts_graph_parsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,7 +982,7 @@ def basic_parsed_schema_test_dict():
'config': {
'column_types': {},
'enabled': True,
'materialized': 'view',
'materialized': 'test',
'persist_docs': {},
'post-hook': [],
'pre-hook': [],
Expand Down Expand Up @@ -1137,7 +1137,7 @@ def test_basic_schema_test_node(minimal_parsed_schema_test_dict, basic_parsed_sc
assert node.empty is False
assert node.is_ephemeral is False
assert node.is_refable is False
assert node.get_materialization() == 'view'
assert node.get_materialization() == 'test'

assert_from_dict(node, minimum, ParsedSchemaTestNode)
pickle.loads(pickle.dumps(node))
Expand Down
Loading

0 comments on commit 0e6ac5b

Please sign in to comment.