From 33836ea5f8e152625ecc53dde6ef6d3ec0ccc667 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 9 Dec 2019 11:56:24 -0700 Subject: [PATCH 1/4] Avoid overlapping validity timestamps, tests --- .../materializations/snapshot/strategies.sql | 22 +++- .../dbt/include/bigquery/macros/adapters.sql | 6 + .../dbt/include/postgres/macros/adapters.sql | 6 + .../dbt/include/redshift/macros/adapters.sql | 6 + .../dbt/include/snowflake/macros/adapters.sql | 7 ++ .../macros/test_no_overlaps.sql | 109 ++++++++++++++++++ .../models/schema.yml | 5 + .../test_simple_snapshot.py | 29 +++-- test/integration/base.py | 4 - 9 files changed, 178 insertions(+), 16 deletions(-) create mode 100644 test/integration/004_simple_snapshot_test/macros/test_no_overlaps.sql create mode 100644 test/integration/004_simple_snapshot_test/models/schema.yml diff --git a/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql index fd7eb2b9624..7847117e005 100644 --- a/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql +++ b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql @@ -82,10 +82,30 @@ {% endmacro %} +{% macro snapshot_string_as_time(timestamp) -%} + {{ adapter_macro('snapshot_string_as_time', timestamp) }} +{%- endmacro %} + + +{% macro default__snapshot_string_as_time(timestamp) %} + {% do exceptions.raise_not_implemented( + 'snapshot_string_as_time macro not implemented for adapter '+adapter.type() + ) %} +{% endmacro %} + {% macro snapshot_check_strategy(node, snapshotted_rel, current_rel, config, target_exists) %} {% set check_cols_config = config['check_cols'] %} {% set primary_key = config['unique_key'] %} - {% set updated_at = snapshot_get_time() %} + {% set select_current_time -%} + select {{ snapshot_get_time() }} as snapshot_start + {%- endset %} + + {# don't access the column by name, to avoid dealing with casing issues on snowflake #} + {%- set now = run_query(select_current_time)[0][0] -%} + {% if now is none or now is undefined -%} + {%- do exceptions.raise_compiler_error('Could not get a snapshot start time from the database') -%} + {%- endif %} + {% set updated_at = snapshot_string_as_time(now) %} {% if check_cols_config == 'all' %} {% set check_cols = get_columns_in_query(node['injected_sql']) %} diff --git a/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql b/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql index 944127024f5..b683bbd26c2 100644 --- a/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql +++ b/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql @@ -103,6 +103,12 @@ {%- endmacro %} +{% macro bigquery__snapshot_string_as_time(timestamp) -%} + {%- set result = 'TIMESTAMP("' ~ timestamp ~ '")' -%} + {{ return(result) }} +{%- endmacro %} + + {% macro bigquery__list_schemas(database) -%} {{ return(adapter.list_schemas()) }} {% endmacro %} diff --git a/plugins/postgres/dbt/include/postgres/macros/adapters.sql b/plugins/postgres/dbt/include/postgres/macros/adapters.sql index 82484b34978..9f7ac647693 100644 --- a/plugins/postgres/dbt/include/postgres/macros/adapters.sql +++ b/plugins/postgres/dbt/include/postgres/macros/adapters.sql @@ -104,6 +104,12 @@ now() {%- endmacro %} +{% macro postgres__snapshot_string_as_time(timestamp) -%} + {%- set result = "'" ~ timestamp ~ "'::timestamp without time zone" -%} + {{ return(result) }} +{%- endmacro %} + + {% macro postgres__snapshot_get_time() -%} {{ current_timestamp() }}::timestamp without time zone {%- endmacro %} diff --git a/plugins/redshift/dbt/include/redshift/macros/adapters.sql b/plugins/redshift/dbt/include/redshift/macros/adapters.sql index fa75765c0a3..d274b9ba7d6 100644 --- a/plugins/redshift/dbt/include/redshift/macros/adapters.sql +++ b/plugins/redshift/dbt/include/redshift/macros/adapters.sql @@ -172,6 +172,12 @@ {{ current_timestamp() }}::timestamp {%- endmacro %} + +{% macro redshift__snapshot_string_as_time(timestamp) -%} + {%- set result = "'" ~ timestamp ~ "'::timestamp" -%} + {{ return(result) }} +{%- endmacro %} + {% macro redshift__make_temp_relation(base_relation, suffix) %} {% do return(postgres__make_temp_relation(base_relation, suffix)) %} {% endmacro %} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql index 5cf8b7b63cd..8c7f48bcdd4 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql @@ -108,6 +108,13 @@ convert_timezone('UTC', current_timestamp()) {%- endmacro %} + +{% macro snowflake__snapshot_string_as_time(timestamp) -%} + {%- set result = "to_timestamp_ntz('" ~ timestamp ~ "')" -%} + {{ return(result) }} +{%- endmacro %} + + {% macro snowflake__snapshot_get_time() -%} to_timestamp_ntz({{ current_timestamp() }}) {%- endmacro %} diff --git a/test/integration/004_simple_snapshot_test/macros/test_no_overlaps.sql b/test/integration/004_simple_snapshot_test/macros/test_no_overlaps.sql new file mode 100644 index 00000000000..5a9b5c25af8 --- /dev/null +++ b/test/integration/004_simple_snapshot_test/macros/test_no_overlaps.sql @@ -0,0 +1,109 @@ +{% macro get_snapshot_unique_id() -%} + {{ return(adapter_macro('get_snapshot_unique_id', )) }} +{%- endmacro %} + +{% macro default__get_snapshot_unique_id() -%} + {% do return("id || '-' || first_name") %} +{%- endmacro %} + + +{% macro bigquery__get_snapshot_unique_id() -%} + {%- do return('concat(cast(id as string), "-", first_name)') -%} +{%- endmacro %} + + +{% macro test_no_overlaps(model, keys) %} + with base as ( + select + {{ get_snapshot_unique_id() }} as dbt_unique_id, + * + from {{ model }} + ), + matches as ( + select rel1.* from base rel1 + inner join base rel2 + using (dbt_unique_id) + where not ( + ((rel1.dbt_valid_from <= rel2.dbt_valid_to) or + (rel2.dbt_valid_to is null) + ) + and + ((rel1.dbt_valid_to >= rel2.dbt_valid_from) or + (rel1.dbt_valid_to is null)) + ) + ) + select count(*) from matches +{% endmacro %} + +{# + mostly copy+pasted from dbt_utils, but I removed some parameters and added + a query that calls get_snapshot_unique_id +#} +{% macro test_mutually_exclusive_ranges(model) %} + +with base as ( + select {{ get_snapshot_unique_id() }} as dbt_unique_id, + * + from {{ model }} +), +window_functions as ( + + select + dbt_valid_from as lower_bound, + coalesce(dbt_valid_to, '2099-1-1T00:00:01') as upper_bound, + + lead(dbt_valid_from) over ( + partition by dbt_unique_id + order by dbt_valid_from + ) as next_lower_bound, + + row_number() over ( + partition by dbt_unique_id + order by dbt_valid_from desc + ) = 1 as is_last_record + + from base + +), + +calc as ( + -- We want to return records where one of our assumptions fails, so we'll use + -- the `not` function with `and` statements so we can write our assumptions nore cleanly + select + *, + + -- For each record: lower_bound should be < upper_bound. + -- Coalesce it to return an error on the null case (implicit assumption + -- these columns are not_null) + coalesce( + lower_bound < upper_bound, + is_last_record + ) as lower_bound_less_than_upper_bound, + + -- For each record: upper_bound {{ allow_gaps_operator }} the next lower_bound. + -- Coalesce it to handle null cases for the last record. + coalesce( + upper_bound <= next_lower_bound, + is_last_record, + false + ) as upper_bound_less_than_or_equal_to_next_lower_bound + + from window_functions + +), + +validation_errors as ( + + select + * + from calc + + where not( + -- THE FOLLOWING SHOULD BE TRUE -- + lower_bound_less_than_upper_bound + and upper_bound_less_than_or_equal_to_next_lower_bound + ) +) + +select count(*) from validation_errors +{% endmacro %} diff --git a/test/integration/004_simple_snapshot_test/models/schema.yml b/test/integration/004_simple_snapshot_test/models/schema.yml new file mode 100644 index 00000000000..187a9d13ab2 --- /dev/null +++ b/test/integration/004_simple_snapshot_test/models/schema.yml @@ -0,0 +1,5 @@ +version: 2 +models: + - name: snapshot_actual + tests: + - mutually_exclusive_ranges diff --git a/test/integration/004_simple_snapshot_test/test_simple_snapshot.py b/test/integration/004_simple_snapshot_test/test_simple_snapshot.py index 683e70afcef..b6a42bdc753 100644 --- a/test/integration/004_simple_snapshot_test/test_simple_snapshot.py +++ b/test/integration/004_simple_snapshot_test/test_simple_snapshot.py @@ -33,6 +33,7 @@ def assert_case_tables_equal(self, actual, expected): self.assertTablesEqual(actual, expected) def assert_expected(self): + self.run_dbt(['test']) self.assert_case_tables_equal('snapshot_actual', 'snapshot_expected') @@ -42,6 +43,7 @@ def project_config(self): return { "data-paths": ['data'], "snapshot-paths": ['test-snapshots-pg'], + 'macro-paths': ['macros'], } @use_profile('postgres') @@ -98,7 +100,7 @@ class TestCustomSnapshotFiles(BaseSimpleSnapshotTest): def project_config(self): return { 'data-paths': ['data'], - 'macro-paths': ['custom-snapshot-macros'], + 'macro-paths': ['custom-snapshot-macros', 'macros'], 'snapshot-paths': ['test-snapshots-pg-custom'], } @@ -128,7 +130,7 @@ class TestNamespacedCustomSnapshotFiles(BaseSimpleSnapshotTest): def project_config(self): return { 'data-paths': ['data'], - 'macro-paths': ['custom-snapshot-macros'], + 'macro-paths': ['custom-snapshot-macros', 'macros'], 'snapshot-paths': ['test-snapshots-pg-custom-namespaced'], } @@ -152,7 +154,7 @@ class TestInvalidNamespacedCustomSnapshotFiles(BaseSimpleSnapshotTest): def project_config(self): return { 'data-paths': ['data'], - 'macro-paths': ['custom-snapshot-macros'], + 'macro-paths': ['custom-snapshot-macros', 'macros'], 'snapshot-paths': ['test-snapshots-pg-custom-invalid'], } @@ -179,6 +181,7 @@ def project_config(self): "data-paths": ['data'], "snapshot-paths": ['test-snapshots-select', 'test-snapshots-pg'], + 'macro-paths': ['macros'], } @use_profile('postgres') @@ -236,7 +239,8 @@ def project_config(self): 'strategy': 'timestamp', 'updated_at': 'updated_at', } - } + }, + 'macro-paths': ['macros'], } @@ -253,16 +257,15 @@ def models(self): def project_config(self): return { "snapshot-paths": ['test-snapshots-bq'], + 'macro-paths': ['macros'], } def assert_expected(self): + self.run_dbt(['test']) self.assertTablesEqual('snapshot_actual', 'snapshot_expected') @use_profile('bigquery') def test__bigquery__simple_snapshot(self): - self.use_default_project() - self.use_profile('bigquery') - self.run_sql_file("seed_bq.sql") self.run_dbt(["snapshot"]) @@ -276,11 +279,8 @@ def test__bigquery__simple_snapshot(self): self.assert_expected() - @use_profile('bigquery') def test__bigquery__snapshot_with_new_field(self): - self.use_default_project() - self.use_profile('bigquery') self.run_sql_file("seed_bq.sql") @@ -341,6 +341,7 @@ def project_config(self): paths = ['test-snapshots-bq'] return { 'snapshot-paths': paths, + 'macro-paths': ['macros'], } def run_snapshot(self): @@ -395,6 +396,7 @@ def project_config(self): paths = ['test-snapshots-pg'] return { 'snapshot-paths': paths, + 'macro-paths': ['macros'], } def target_schema(self): @@ -427,6 +429,7 @@ def models(self): def project_config(self): return { "snapshot-paths": ['test-snapshots-invalid'], + 'macro-paths': ['macros'], } @use_profile('postgres') @@ -456,6 +459,7 @@ def project_config(self): return { "data-paths": ['data'], "snapshot-paths": ['test-check-col-snapshots'], + 'macro-paths': ['macros'], } @@ -472,7 +476,8 @@ def project_config(self): "strategy": "check", "check_cols": ["email"], } - } + }, + 'macro-paths': ['macros'], } @@ -493,6 +498,7 @@ def project_config(self): return { "data-paths": ['data'], "snapshot-paths": ['test-check-col-snapshots-bq'], + 'macro-paths': ['macros'], } @use_profile('bigquery') @@ -562,6 +568,7 @@ def run_snapshot(self): def project_config(self): return { "snapshot-paths": ['test-snapshots-longtext'], + 'macro-paths': ['macros'], } @use_profile('postgres') diff --git a/test/integration/base.py b/test/integration/base.py index 3ebf503b4cb..f8546cd7061 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -545,10 +545,6 @@ def run_dbt_and_check(self, args=None, strict=True, parser=False, profiles_dir=T final_args.append('--log-cache-events') logger.info("Invoking dbt with {}".format(final_args)) - if args is None: - args = ["run"] - - logger.info("Invoking dbt with {}".format(args)) return dbt.handle_and_check(final_args) def run_sql_file(self, path, kwargs=None): From 382a993e688595b669cb7516f544791a94b5306f Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 9 Dec 2019 15:05:21 -0700 Subject: [PATCH 2/4] make the test even more strict --- .../macros/test_no_overlaps.sql | 30 ++----------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/test/integration/004_simple_snapshot_test/macros/test_no_overlaps.sql b/test/integration/004_simple_snapshot_test/macros/test_no_overlaps.sql index 5a9b5c25af8..136c6efe5c8 100644 --- a/test/integration/004_simple_snapshot_test/macros/test_no_overlaps.sql +++ b/test/integration/004_simple_snapshot_test/macros/test_no_overlaps.sql @@ -11,30 +11,6 @@ {%- do return('concat(cast(id as string), "-", first_name)') -%} {%- endmacro %} - -{% macro test_no_overlaps(model, keys) %} - with base as ( - select - {{ get_snapshot_unique_id() }} as dbt_unique_id, - * - from {{ model }} - ), - matches as ( - select rel1.* from base rel1 - inner join base rel2 - using (dbt_unique_id) - where not ( - ((rel1.dbt_valid_from <= rel2.dbt_valid_to) or - (rel2.dbt_valid_to is null) - ) - and - ((rel1.dbt_valid_to >= rel2.dbt_valid_from) or - (rel1.dbt_valid_to is null)) - ) - ) - select count(*) from matches -{% endmacro %} - {# mostly copy+pasted from dbt_utils, but I removed some parameters and added a query that calls get_snapshot_unique_id @@ -83,10 +59,10 @@ calc as ( -- For each record: upper_bound {{ allow_gaps_operator }} the next lower_bound. -- Coalesce it to handle null cases for the last record. coalesce( - upper_bound <= next_lower_bound, + upper_bound = next_lower_bound, is_last_record, false - ) as upper_bound_less_than_or_equal_to_next_lower_bound + ) as upper_bound_equal_to_next_lower_bound from window_functions @@ -101,7 +77,7 @@ validation_errors as ( where not( -- THE FOLLOWING SHOULD BE TRUE -- lower_bound_less_than_upper_bound - and upper_bound_less_than_or_equal_to_next_lower_bound + and upper_bound_equal_to_next_lower_bound ) ) From 9589e7aaa19b67459a30459d8c7f3be06415746a Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 11 Dec 2019 09:14:14 -0700 Subject: [PATCH 3/4] Fix snapshot check all strategy with added column --- .../materializations/snapshot/strategies.sql | 53 +++++++++++--- .../004_simple_snapshot_test/data/seed.csv | 4 + .../data/seed_newcol.csv | 4 + .../test-snapshots-checkall/snapshot.sql | 4 + .../test_simple_snapshot.py | 73 +++++++++++++++++++ test/integration/base.py | 2 +- 6 files changed, 128 insertions(+), 12 deletions(-) create mode 100644 test/integration/004_simple_snapshot_test/data/seed.csv create mode 100644 test/integration/004_simple_snapshot_test/data/seed_newcol.csv create mode 100644 test/integration/004_simple_snapshot_test/test-snapshots-checkall/snapshot.sql diff --git a/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql index 7847117e005..108d3f01709 100644 --- a/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql +++ b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql @@ -93,6 +93,32 @@ ) %} {% endmacro %} + +{% macro snapshot_check_all_get_existing_columns(node, target_exists) -%} + {%- set query_columns = get_columns_in_query(node['injected_sql']) -%} + {%- if not target_exists -%} + {# no table yet -> return whatever the query does #} + {{ return([false, query_columns]) }} + {%- endif -%} + {# handle any schema changes #} + {%- set target_table = node.get('alias', node.get('name')) -%} + {%- set target_relation = adapter.get_relation(database=node.database, schema=node.schema, identifier=target_table) -%} + {%- set existing_cols = get_columns_in_query('select * from ' ~ target_relation) -%} + {%- set ns = namespace() -%} {# handle for-loop scoping with a namespace #} + {%- set ns.column_added = false -%} + + {%- set intersection = [] -%} + {%- for col in query_columns -%} + {%- if col in existing_cols -%} + {%- do intersection.append(col) -%} + {%- else -%} + {% set ns.column_added = true %} + {%- endif -%} + {%- endfor -%} + {{ return([ns.column_added, intersection]) }} +{%- endmacro %} + + {% macro snapshot_check_strategy(node, snapshotted_rel, current_rel, config, target_exists) %} {% set check_cols_config = config['check_cols'] %} {% set primary_key = config['unique_key'] %} @@ -107,24 +133,29 @@ {%- endif %} {% set updated_at = snapshot_string_as_time(now) %} + {% set column_added = false %} + {% if check_cols_config == 'all' %} - {% set check_cols = get_columns_in_query(node['injected_sql']) %} + {% set column_added, check_cols = snapshot_check_all_get_existing_columns(node, target_exists) %} {% elif check_cols_config is iterable and (check_cols_config | length) > 0 %} {% set check_cols = check_cols_config %} {% else %} {% do exceptions.raise_compiler_error("Invalid value for 'check_cols': " ~ check_cols_config) %} {% endif %} - {% set row_changed_expr -%} - ( - {% for col in check_cols %} - {{ snapshotted_rel }}.{{ col }} != {{ current_rel }}.{{ col }} - or - ({{ snapshotted_rel }}.{{ col }} is null) != ({{ current_rel }}.{{ col }} is null) - {%- if not loop.last %} or {% endif %} - - {% endfor %} - ) + {%- set row_changed_expr -%} + ( + {%- if column_added -%} + TRUE + {%- else -%} + {%- for col in check_cols -%} + {{ snapshotted_rel }}.{{ col }} != {{ current_rel }}.{{ col }} + or + ({{ snapshotted_rel }}.{{ col }} is null) != ({{ current_rel }}.{{ col }} is null) + {%- if not loop.last %} or {% endif -%} + {%- endfor -%} + {%- endif -%} + ) {%- endset %} {% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %} diff --git a/test/integration/004_simple_snapshot_test/data/seed.csv b/test/integration/004_simple_snapshot_test/data/seed.csv new file mode 100644 index 00000000000..9da8d46ff03 --- /dev/null +++ b/test/integration/004_simple_snapshot_test/data/seed.csv @@ -0,0 +1,4 @@ +id,first_name +1,Judith +2,Arthur +3,Rachel diff --git a/test/integration/004_simple_snapshot_test/data/seed_newcol.csv b/test/integration/004_simple_snapshot_test/data/seed_newcol.csv new file mode 100644 index 00000000000..005517bdab9 --- /dev/null +++ b/test/integration/004_simple_snapshot_test/data/seed_newcol.csv @@ -0,0 +1,4 @@ +id,first_name,last_name +1,Judith,Kennedy +2,Arthur,Kelly +3,Rachel,Moreno diff --git a/test/integration/004_simple_snapshot_test/test-snapshots-checkall/snapshot.sql b/test/integration/004_simple_snapshot_test/test-snapshots-checkall/snapshot.sql new file mode 100644 index 00000000000..b9cd002ca1b --- /dev/null +++ b/test/integration/004_simple_snapshot_test/test-snapshots-checkall/snapshot.sql @@ -0,0 +1,4 @@ +{% snapshot my_snapshot %} + {{ config(check_cols='all', unique_key='id', strategy='check', target_database=database, target_schema=schema) }} + select * from {{ ref(var('seed_name', 'seed')) }} +{% endsnapshot %} diff --git a/test/integration/004_simple_snapshot_test/test_simple_snapshot.py b/test/integration/004_simple_snapshot_test/test_simple_snapshot.py index b6a42bdc753..ce21887009f 100644 --- a/test/integration/004_simple_snapshot_test/test_simple_snapshot.py +++ b/test/integration/004_simple_snapshot_test/test_simple_snapshot.py @@ -95,6 +95,79 @@ def test__redshift__simple_snapshot(self): self.assert_expected() +class TestSimpleColumnSnapshotFiles(DBTIntegrationTest): + + @property + def schema(self): + return "simple_snapshot_004" + + @property + def models(self): + return "models-checkall" + + @property + def project_config(self): + return { + 'data-paths': ['data'], + 'macro-paths': ['custom-snapshot-macros', 'macros'], + 'snapshot-paths': ['test-snapshots-checkall'], + 'seeds': { + 'quote_columns': False, + } + } + + def _run_snapshot_test(self): + self.run_dbt(['seed']) + self.run_dbt(['snapshot']) + database = self.default_database + if self.adapter_type == 'bigquery': + database = self.adapter.quote(database) + results = self.run_sql( + 'select * from {}.{}.my_snapshot'.format(database, self.unique_schema()), + fetch='all' + ) + self.assertEqual(len(results), 3) + for result in results: + self.assertEqual(len(result), 6) + + self.run_dbt(['snapshot', '--vars', '{seed_name: seed_newcol}']) + results = self.run_sql( + 'select * from {}.{}.my_snapshot where last_name is not NULL'.format(database, self.unique_schema()), + fetch='all' + ) + self.assertEqual(len(results), 3) + + for result in results: + # new column + self.assertEqual(len(result), 7) + self.assertIsNotNone(result[-1]) + + results = self.run_sql( + 'select * from {}.{}.my_snapshot where last_name is NULL'.format(database, self.unique_schema()), + fetch='all' + ) + self.assertEqual(len(results), 3) + for result in results: + # new column + self.assertEqual(len(result), 7) + + @use_profile('postgres') + def test_postgres_renamed_source(self): + self._run_snapshot_test() + + @use_profile('snowflake') + def test_snowflake_renamed_source(self): + self._run_snapshot_test() + + @use_profile('redshift') + def test_redshift_renamed_source(self): + self._run_snapshot_test() + + @use_profile('bigquery') + def test_bigquery_renamed_source(self): + self._run_snapshot_test() + + class TestCustomSnapshotFiles(BaseSimpleSnapshotTest): @property def project_config(self): diff --git a/test/integration/base.py b/test/integration/base.py index f8546cd7061..c5cbc902c72 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -619,7 +619,7 @@ def run_sql_common(self, sql, fetch, conn): else: return except BaseException as e: - if conn.handle and not conn.handle.is_closed(): + if conn.handle and not conn.handle.closed: conn.handle.rollback() print(sql) print(e) From ab4925f59f0bde0555d17f20940a6fd9576585e8 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 11 Dec 2019 13:30:04 -0700 Subject: [PATCH 4/4] this is ok now --- .../004_simple_snapshot_test/test_simple_snapshot.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/test/integration/004_simple_snapshot_test/test_simple_snapshot.py b/test/integration/004_simple_snapshot_test/test_simple_snapshot.py index ce21887009f..1b86de9e21a 100644 --- a/test/integration/004_simple_snapshot_test/test_simple_snapshot.py +++ b/test/integration/004_simple_snapshot_test/test_simple_snapshot.py @@ -592,10 +592,8 @@ def test__bigquery__snapshot_with_new_field(self): # This adds new fields to the source table, and updates the expected snapshot output accordingly self.run_sql_file("add_column_to_source_bq.sql") - # this should fail because `check="all"` will try to compare the nested field - self.run_dbt(['snapshot'], expect_pass=False) - - self.run_dbt(["snapshot", '--select', 'snapshot_actual']) + # check_cols='all' will replace the changed field + self.run_dbt(['snapshot']) # A more thorough test would assert that snapshotted == expected, but BigQuery does not support the # "EXCEPT DISTINCT" operator on nested fields! Instead, just check that schemas are congruent. @@ -610,9 +608,15 @@ def test__bigquery__snapshot_with_new_field(self): schema=self.unique_schema(), table='snapshot_actual' ) + snapshotted_all_cols = self.get_table_columns( + database=self.default_database, + schema=self.unique_schema(), + table='snapshot_checkall' + ) self.assertTrue(len(expected_cols) > 0, "source table does not exist -- bad test") self.assertEqual(len(expected_cols), len(snapshotted_cols), "actual and expected column lengths are different") + self.assertEqual(len(expected_cols), len(snapshotted_all_cols)) for (expected_col, actual_col) in zip(expected_cols, snapshotted_cols): expected_name, expected_type, _ = expected_col