From b047ed82b6519cd2bc82b2c67b5252b88015a91b Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 24 May 2019 12:33:25 -0600 Subject: [PATCH 01/31] detect duplicate archive and node names --- core/dbt/loader.py | 17 +++-- .../models-collision/archive_actual.sql | 1 + .../test_simple_archive.py | 70 +++++++++++++++++++ 3 files changed, 84 insertions(+), 4 deletions(-) create mode 100644 test/integration/004_simple_archive_test/models-collision/archive_actual.sql diff --git a/core/dbt/loader.py b/core/dbt/loader.py index 7e98d0b29ac..dc34d890350 100644 --- a/core/dbt/loader.py +++ b/core/dbt/loader.py @@ -63,6 +63,18 @@ def _load_macros(self, internal_manifest=None): resource_type=NodeType.Macro, )) + def _load_archives_from_project(self): + archive_parser = ArchiveParser(self.root_project, self.all_projects, + self.macro_manifest) + for key, node in archive_parser.load_and_parse().items(): + # we have another archive parser, so we have to check for + # collisions + existing = self.nodes.get(key) + if existing: + dbt.exceptions.raise_duplicate_resource_name(existing, node) + else: + self.nodes[key] = node + def _load_seeds(self): parser = SeedParser(self.root_project, self.all_projects, self.macro_manifest) @@ -86,10 +98,7 @@ def _load_nodes(self): self.macro_manifest) self.nodes.update(hook_parser.load_and_parse()) - archive_parser = ArchiveParser(self.root_project, self.all_projects, - self.macro_manifest) - self.nodes.update(archive_parser.load_and_parse()) - + self._load_archives_from_project() self._load_seeds() def _load_docs(self): diff --git a/test/integration/004_simple_archive_test/models-collision/archive_actual.sql b/test/integration/004_simple_archive_test/models-collision/archive_actual.sql new file mode 100644 index 00000000000..43258a71464 --- /dev/null +++ b/test/integration/004_simple_archive_test/models-collision/archive_actual.sql @@ -0,0 +1 @@ +select 1 as id diff --git a/test/integration/004_simple_archive_test/test_simple_archive.py b/test/integration/004_simple_archive_test/test_simple_archive.py index 04d9bd3eed4..41c7b724367 100644 --- a/test/integration/004_simple_archive_test/test_simple_archive.py +++ b/test/integration/004_simple_archive_test/test_simple_archive.py @@ -405,6 +405,76 @@ def test__postgres__invalid(self): self.assertIn('target_database', str(exc.exception)) +class TestConflictArchive(DBTIntegrationTest): + @property + def schema(self): + return "simple_archive_004" + + @property + def models(self): + return "test/integration/004_simple_archive_test/models" + + @property + def project_config(self): + return {} + + + @use_profile('postgres') + def test__postgres_archive_block_archive_collision(self): + self.use_default_project({ + "archive-paths": ['test/integration/004_simple_archive_test/test-archives-pg'], + "archive": [ + { + "source_schema": self.unique_schema(), + "target_schema": self.unique_schema(), + "tables": [ + { + "source_table": "seed", + "target_table": "archive_actual", + "updated_at": 'updated_at', + "unique_key": '''id || '-' || first_name''' + }, + ], + }, + ], + }) + + with self.assertRaises(dbt.exceptions.CompilationException) as exc: + self.run_dbt(['compile'], expect_pass=False) + + def test__postgres_archive_block_model_collision(self): + self.use_default_project({ + "source-paths": ['test/integration/004_simple_archive_test/models-collision'], + "archive-paths": ['test/integration/004_simple_archive_test/test-archives-pg'], + "archive": [], + }) + + with self.assertRaises(dbt.exceptions.CompilationException) as exc: + self.run_dbt(['compile'], expect_pass=False) + + def test__postgres_archive_model_collision(self): + self.use_default_project({ + "source-paths": ['test/integration/004_simple_archive_test/models-collision'], + "archive": [ + { + "source_schema": self.unique_schema(), + "target_schema": self.unique_schema(), + "tables": [ + { + "source_table": "seed", + "target_table": "archive_actual", + "updated_at": 'updated_at', + "unique_key": '''id || '-' || first_name''' + }, + ], + }, + ], + }) + + with self.assertRaises(dbt.exceptions.CompilationException) as exc: + self.run_dbt(['compile'], expect_pass=False) + + class TestCheckCols(TestSimpleArchiveFiles): NUM_ARCHIVE_MODELS = 2 def _assertTablesEqualSql(self, relation_a, relation_b, columns=None): From a164d83dad45d243b2b7c1b5e81fad402a367983 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 29 May 2019 13:08:33 -0600 Subject: [PATCH 02/31] in the concurrent transactions test, use a completely separate adapter for our goofy sql running --- .../test_concurrent_transaction.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py b/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py index 1b47ded1d10..d9cce7292eb 100644 --- a/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py +++ b/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py @@ -1,6 +1,12 @@ from test.integration.base import DBTIntegrationTest, use_profile import threading -from dbt.adapters.factory import get_adapter +from dbt.adapters.factory import ADAPTER_TYPES + + +def get_adapter_standalone(config): + cls = ADAPTER_TYPES[config.credentials.type] + return cls(config) + class BaseTestConcurrentTransaction(DBTIntegrationTest): @@ -12,8 +18,13 @@ def reset(self): def setUp(self): super(BaseTestConcurrentTransaction, self).setUp() + self._secret_adapter = get_adapter_standalone(self.config) self.reset() + def tearDown(self): + self._secret_adapter.cleanup_connections() + super(BaseTestConcurrentTransaction, self).tearDown() + @property def schema(self): return "concurrent_transaction_032" @@ -30,7 +41,7 @@ def project_config(self): def run_select_and_check(self, rel, sql): connection_name = '__test_{}'.format(id(threading.current_thread())) try: - with get_adapter(self.config).connection_named(connection_name) as conn: + with self._secret_adapter.connection_named(connection_name) as conn: res = self.run_sql_common(self.transform_sql(sql), 'one', conn) # The result is the output of f_sleep(), which is True From af13b2c74545fb393d0408fc24c726aaef22764f Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 29 May 2019 15:29:53 -0600 Subject: [PATCH 03/31] add a retry + sleep loop to registry calls --- core/dbt/clients/registry.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/core/dbt/clients/registry.py b/core/dbt/clients/registry.py index 4ba817d0fcf..809c3091dfd 100644 --- a/core/dbt/clients/registry.py +++ b/core/dbt/clients/registry.py @@ -5,6 +5,7 @@ from dbt.utils import memoized from dbt.logger import GLOBAL_LOGGER as logger import os +import time if os.getenv('DBT_PACKAGE_HUB_URL'): DEFAULT_REGISTRY_BASE_URL = os.getenv('DBT_PACKAGE_HUB_URL') @@ -22,11 +23,20 @@ def _get_url(url, registry_base_url=None): def _wrap_exceptions(fn): @wraps(fn) def wrapper(*args, **kwargs): - try: - return fn(*args, **kwargs) - except requests.exceptions.ConnectionError as e: - six.raise_from( - RegistryException('Unable to connect to registry hub'), e) + max_attempts = 5 + attempt = 0 + while True: + attempt += 1 + try: + return fn(*args, **kwargs) + except requests.exceptions.ConnectionError as exc: + if attempt < max_attempts: + time.sleep(1) + continue + six.raise_from( + RegistryException('Unable to connect to registry hub'), + exc + ) return wrapper From 7d490d4886daab2c16c2a1d517f86a27a8f5dac9 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Fri, 26 Apr 2019 14:00:42 -0400 Subject: [PATCH 04/31] Implement archival using a merge abstraction --- core/dbt/adapters/base/impl.py | 19 +- .../global_project/macros/adapters/common.sql | 40 +- .../global_project/macros/etc/datetime.sql | 4 + .../materializations/archive/archive.sql | 409 +++++++----------- .../archive/archive_merge.sql | 27 ++ .../materializations/archive/strategies.sql | 114 +++++ .../incremental/incremental.sql | 10 +- core/dbt/parser/archives.py | 22 +- .../bigquery/dbt/adapters/bigquery/impl.py | 16 - .../dbt/include/bigquery/macros/adapters.sql | 11 + .../macros/materializations/archive.sql | 16 +- .../dbt/include/postgres/macros/adapters.sql | 16 + .../macros/materializations/archive_merge.sql | 18 + .../dbt/include/redshift/macros/adapters.sql | 21 +- .../macros/materializations/archive_merge.sql | 4 + .../dbt/include/snowflake/macros/adapters.sql | 4 + .../macros/materializations/incremental.sql | 7 +- .../004_simple_archive_test/seed.sql | 13 +- .../test-archives-bq/archive.sql | 2 +- .../test-archives-invalid/archive.sql | 2 +- .../test-archives-longtext/longtext.sql | 2 +- .../test-archives-pg/archive.sql | 2 +- .../test-archives-select/archives.sql | 6 +- .../test-check-col-archives-bq/archive.sql | 4 +- .../test-check-col-archives/archive.sql | 4 +- .../test_concurrent_transaction.py | 7 +- .../033_event_tracking_test/test_events.py | 2 +- test/integration/base.py | 15 +- tox.ini | 40 +- 29 files changed, 488 insertions(+), 369 deletions(-) create mode 100644 core/dbt/include/global_project/macros/materializations/archive/archive_merge.sql create mode 100644 core/dbt/include/global_project/macros/materializations/archive/strategies.sql create mode 100644 plugins/postgres/dbt/include/postgres/macros/materializations/archive_merge.sql create mode 100644 plugins/redshift/dbt/include/redshift/macros/materializations/archive_merge.sql diff --git a/core/dbt/adapters/base/impl.py b/core/dbt/adapters/base/impl.py index e99f999f717..16d4bd300ac 100644 --- a/core/dbt/adapters/base/impl.py +++ b/core/dbt/adapters/base/impl.py @@ -387,6 +387,7 @@ def list_schemas(self, database): '`list_schemas` is not implemented for this adapter!' ) + @available.parse(lambda *a, **k: False) def check_schema_exists(self, database, schema): """Check if a schema exists. @@ -584,7 +585,14 @@ def valid_archive_target(self, relation): dbt.exceptions.raise_compiler_error(msg) @available.parse_none - def expand_target_column_types(self, temp_table, to_relation): + def expand_target_column_types(self, from_relation, to_relation): + if not isinstance(from_relation, self.Relation): + dbt.exceptions.invalid_type_error( + method_name='expand_target_column_types', + arg_name='from_relation', + got_value=from_relation, + expected_type=self.Relation) + if not isinstance(to_relation, self.Relation): dbt.exceptions.invalid_type_error( method_name='expand_target_column_types', @@ -592,14 +600,7 @@ def expand_target_column_types(self, temp_table, to_relation): got_value=to_relation, expected_type=self.Relation) - goal = self.Relation.create( - database=None, - schema=None, - identifier=temp_table, - type='table', - quote_policy=self.config.quoting - ) - self.expand_column_types(goal, to_relation) + self.expand_column_types(from_relation, to_relation) def list_relations(self, database, schema): if self._schema_is_cached(database, schema): diff --git a/core/dbt/include/global_project/macros/adapters/common.sql b/core/dbt/include/global_project/macros/adapters/common.sql index 2fde6e96115..c02239d6f49 100644 --- a/core/dbt/include/global_project/macros/adapters/common.sql +++ b/core/dbt/include/global_project/macros/adapters/common.sql @@ -28,6 +28,22 @@ {%- endif -%} {%- endmacro %} +{% macro get_columns_in_query(select_sql) -%} + {{ return(adapter_macro('get_columns_in_query', select_sql)) }} +{% endmacro %} + +{% macro default__get_columns_in_query(select_sql) %} + {% call statement('get_columns_in_query', fetch_result=True, auto_begin=False) -%} + select * from ( + {{ select_sql }} + ) as __dbt_sbq + where false + limit 0 + {% endcall %} + + {{ return(load_result('get_columns_in_query').table.columns | map(attribute='name') | list) }} +{% endmacro %} + {% macro create_schema(database_name, schema_name) -%} {{ adapter_macro('create_schema', database_name, schema_name) }} {% endmacro %} @@ -71,17 +87,6 @@ {% endmacro %} -{% macro create_archive_table(relation, columns) -%} - {{ adapter_macro('create_archive_table', relation, columns) }} -{%- endmacro %} - -{% macro default__create_archive_table(relation, columns) -%} - create table if not exists {{ relation }} ( - {{ column_list_for_create_table(columns) }} - ); -{% endmacro %} - - {% macro get_catalog(information_schemas) -%} {{ return(adapter_macro('get_catalog', information_schemas)) }} {%- endmacro %} @@ -249,3 +254,16 @@ {% endcall %} {{ return(load_result('check_schema_exists').table) }} {% endmacro %} + +{% macro make_temp_relation(base_relation, suffix='__dbt_tmp') %} + {{ return(adapter_macro('make_temp_relation', base_relation, suffix))}} +{% endmacro %} + +{% macro default__make_temp_relation(base_relation, suffix) %} + {% set tmp_identifier = base_relation.identifier ~ suffix %} + {% set tmp_relation = base_relation.incorporate( + path={"identifier": tmp_identifier}, + table_name=tmp_identifier) -%} + + {% do return(tmp_relation) %} +{% endmacro %} diff --git a/core/dbt/include/global_project/macros/etc/datetime.sql b/core/dbt/include/global_project/macros/etc/datetime.sql index 28a7654110b..f94e8251b2f 100644 --- a/core/dbt/include/global_project/macros/etc/datetime.sql +++ b/core/dbt/include/global_project/macros/etc/datetime.sql @@ -54,3 +54,7 @@ {{ return(dates_in_range(start_date, end_date, in_fmt=date_fmt)) }} {% endmacro %} +{% macro py_current_timestring() %} + {% set dt = modules.datetime.datetime.now() %} + {% do return(dt.strftime("%Y%m%d%H%M%S%f")) %} +{% endmacro %} diff --git a/core/dbt/include/global_project/macros/materializations/archive/archive.sql b/core/dbt/include/global_project/macros/materializations/archive/archive.sql index ead07b657d7..a21bd27213b 100644 --- a/core/dbt/include/global_project/macros/materializations/archive/archive.sql +++ b/core/dbt/include/global_project/macros/materializations/archive/archive.sql @@ -1,26 +1,3 @@ -{# - Create SCD Hash SQL fields cross-db -#} - -{% macro archive_hash_arguments(args) %} - {{ adapter_macro('archive_hash_arguments', args) }} -{% endmacro %} - -{% macro default__archive_hash_arguments(args) %} - md5({% for arg in args %}coalesce(cast({{ arg }} as varchar ), '') {% if not loop.last %} || '|' || {% endif %}{% endfor %}) -{% endmacro %} - -{% macro create_temporary_table(sql, relation) %} - {{ return(adapter_macro('create_temporary_table', sql, relation)) }} -{% endmacro %} - -{% macro default__create_temporary_table(sql, relation) %} - {% call statement() %} - {{ create_table_as(True, relation, sql) }} - {% endcall %} - {{ return(relation) }} -{% endmacro %} - {# Add new columns to the table if applicable #} @@ -36,86 +13,41 @@ {% endfor %} {% endmacro %} -{# - Run the update part of an archive query. Different databases have - tricky differences in their `update` semantics. Table projection is - not allowed on Redshift/pg, but is effectively required on bq. -#} -{% macro archive_update(target_relation, tmp_relation) %} - {{ adapter_macro('archive_update', target_relation, tmp_relation) }} +{% macro post_archive(staging_relation) %} + {{ adapter_macro('post_archive', staging_relation) }} {% endmacro %} -{% macro default__archive_update(target_relation, tmp_relation) %} - update {{ target_relation }} - set dbt_valid_to = tmp.dbt_valid_to - from {{ tmp_relation }} as tmp - where tmp.dbt_scd_id = {{ target_relation }}.dbt_scd_id - and change_type = 'update'; +{% macro default__post_archive(staging_relation) %} + {# no-op #} {% endmacro %} -{% macro archive_get_time() -%} - {{ adapter_macro('archive_get_time') }} -{%- endmacro %} - -{% macro default__archive_get_time() -%} - {{ current_timestamp() }} -{%- endmacro %} +{% macro archive_staging_table_inserts(strategy, source_sql, target_relation) -%} -{% macro snowflake__archive_get_time() -%} - to_timestamp_ntz({{ current_timestamp() }}) -{%- endmacro %} + with archive_query as ( + {{ source_sql }} -{% macro archive_select_generic(source_sql, target_relation, transforms, scd_hash) -%} - with source as ( - {{ source_sql }} ), - {{ transforms }} - merged as ( - select *, 'update' as change_type from updates - union all - select *, 'insert' as change_type from insertions + source_data as ( - ) - - select *, - {{ scd_hash }} as dbt_scd_id - from merged - -{%- endmacro %} + select *, + {{ strategy.scd_id }} as dbt_scd_id, + {{ strategy.unique_key }} as dbt_unique_key, + {{ strategy.updated_at }} as dbt_updated_at, + {{ strategy.updated_at }} as dbt_valid_from, + nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to -{# - Cross-db compatible archival implementation -#} -{% macro archive_select_timestamp(source_sql, target_relation, source_columns, unique_key, updated_at) -%} - {% set timestamp_column = api.Column.create('_', 'timestamp') %} - {% set transforms -%} - current_data as ( - - select - {% for col in source_columns %} - {{ col.name }} {% if not loop.last %},{% endif %} - {% endfor %}, - {{ updated_at }} as dbt_updated_at, - {{ unique_key }} as dbt_pk, - {{ updated_at }} as dbt_valid_from, - {{ timestamp_column.literal('null') }} as tmp_valid_to - from source + from archive_query ), archived_data as ( - select - {% for col in source_columns %} - {{ col.name }}, - {% endfor %} - {{ updated_at }} as dbt_updated_at, - {{ unique_key }} as dbt_pk, - dbt_valid_from, - dbt_valid_to as tmp_valid_to + select *, + {{ strategy.unique_key }} as dbt_unique_key + from {{ target_relation }} ), @@ -123,125 +55,125 @@ insertions as ( select - current_data.*, - {{ timestamp_column.literal('null') }} as dbt_valid_to - from current_data - left outer join archived_data - on archived_data.dbt_pk = current_data.dbt_pk - where - archived_data.dbt_pk is null - or ( - archived_data.dbt_pk is not null - and archived_data.dbt_updated_at < current_data.dbt_updated_at - and archived_data.tmp_valid_to is null + 'insert' as dbt_change_type, + source_data.* + + from source_data + left outer join archived_data on archived_data.dbt_unique_key = source_data.dbt_unique_key + where archived_data.dbt_unique_key is null + or ( + archived_data.dbt_unique_key is not null + and archived_data.dbt_valid_to is null + and ( + {{ strategy.row_changed }} + ) ) - ), - updates as ( + ) + + select * from insertions - select - archived_data.*, - current_data.dbt_updated_at as dbt_valid_to - from current_data - left outer join archived_data - on archived_data.dbt_pk = current_data.dbt_pk - where archived_data.dbt_pk is not null - and archived_data.dbt_updated_at < current_data.dbt_updated_at - and archived_data.tmp_valid_to is null - ), - {%- endset %} - {%- set scd_hash = archive_hash_arguments(['dbt_pk', 'dbt_updated_at']) -%} - {{ archive_select_generic(source_sql, target_relation, transforms, scd_hash) }} {%- endmacro %} -{% macro archive_select_check_cols(source_sql, target_relation, source_columns, unique_key, check_cols) -%} - {%- set timestamp_column = api.Column.create('_', 'timestamp') -%} +{% macro archive_staging_table_updates(strategy, source_sql, target_relation) -%} - {# if we recognize the primary key, it's the newest record, and anything we care about has changed, it's an update candidate #} - {%- set update_candidate -%} - archived_data.dbt_pk is not null - and ( - {%- for col in check_cols %} - current_data.{{ col }} <> archived_data.{{ col }} - {%- if not loop.last %} or {% endif %} - {% endfor -%} - ) - and archived_data.tmp_valid_to is null - {%- endset %} + with archive_query as ( - {% set transforms -%} - current_data as ( + {{ source_sql }} - select - {% for col in source_columns %} - {{ col.name }} {% if not loop.last %},{% endif %} - {% endfor %}, - {{ archive_get_time() }} as dbt_updated_at, - {{ unique_key }} as dbt_pk, - {{ archive_get_time() }} as dbt_valid_from, - {{ timestamp_column.literal('null') }} as tmp_valid_to - from source ), - archived_data as ( + source_data as ( select - {% for col in source_columns %} - {{ col.name }}, - {% endfor %} - dbt_updated_at, - {{ unique_key }} as dbt_pk, - dbt_valid_from, - dbt_valid_to as tmp_valid_to - from {{ target_relation }} + *, + {{ strategy.scd_id }} as dbt_scd_id, + {{ strategy.unique_key }} as dbt_unique_key, + {{ strategy.updated_at }} as dbt_updated_at, + {{ strategy.updated_at }} as dbt_valid_from + from archive_query ), - insertions as ( + archived_data as ( + + select *, + {{ strategy.unique_key }} as dbt_unique_key + + from {{ target_relation }} - select - current_data.*, - {{ timestamp_column.literal('null') }} as dbt_valid_to - from current_data - left outer join archived_data - on archived_data.dbt_pk = current_data.dbt_pk - where - archived_data.dbt_pk is null - or ( {{ update_candidate }} ) ), updates as ( select - archived_data.*, - {{ archive_get_time() }} as dbt_valid_to - from current_data - left outer join archived_data - on archived_data.dbt_pk = current_data.dbt_pk - where {{ update_candidate }} - ), - {%- endset %} + 'update' as dbt_change_type, + archived_data.dbt_scd_id, + source_data.dbt_valid_from as dbt_valid_to + + from source_data + join archived_data on archived_data.dbt_unique_key = source_data.dbt_unique_key + where archived_data.dbt_valid_to is null + and ( + {{ strategy.row_changed }} + ) + + ) + + select * from updates - {%- set hash_components = ['dbt_pk'] %} - {%- do hash_components.extend(check_cols) -%} - {%- set scd_hash = archive_hash_arguments(hash_components) -%} - {{ archive_select_generic(source_sql, target_relation, transforms, scd_hash) }} {%- endmacro %} -{# this is gross #} -{% macro create_empty_table_as(sql) %} - {% set tmp_relation = api.Relation.create(identifier=model['name']+'_dbt_archival_view_tmp', type='view') %} - {% set limited_sql -%} - with cte as ( - {{ sql }} - ) - select * from cte limit 0 - {%- endset %} - {%- set tmp_relation = create_temporary_table(limited_sql, tmp_relation) -%} - {{ return(tmp_relation) }} +{% macro build_archive_table(strategy, sql) %} + + select *, + {{ strategy.scd_id }} as dbt_scd_id, + {{ strategy.updated_at }} as dbt_updated_at, + {{ strategy.updated_at }} as dbt_valid_from, + nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to + from ( + {{ sql }} + ) sbq + +{% endmacro %} + + +{% macro get_or_create_relation(database, schema, identifier, type) %} + {%- set target_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) %} + {% if target_relation %} + {% do return([true, target_relation]) %} + {% endif %} + + {%- set new_relation = api.Relation.create( + database=database, + schema=schema, + identifier=identifier, + type=type + ) -%} + {% do return([false, new_relation]) %} +{% endmacro %} + +{% macro build_archive_staging_table(strategy, sql, target_relation) %} + {% set tmp_relation = make_temp_relation(target_relation) %} + + {% set inserts_select = archive_staging_table_inserts(strategy, sql, target_relation) %} + {% set updates_select = archive_staging_table_updates(strategy, sql, target_relation) %} + + {% call statement('build_archive_staging_relation_inserts') %} + {{ create_table_as(True, tmp_relation, inserts_select) }} + {% endcall %} + + {% call statement('build_archive_staging_relation_updates') %} + insert into {{ tmp_relation }} (dbt_change_type, dbt_scd_id, dbt_valid_to) + select dbt_change_type, dbt_scd_id, dbt_valid_to from ( + {{ updates_select }} + ) dbt_sbq; + {% endcall %} + + {% do return(tmp_relation) %} {% endmacro %} @@ -251,95 +183,74 @@ {%- set target_database = config.get('target_database') -%} {%- set target_schema = config.get('target_schema') -%} {%- set target_table = model.get('alias', model.get('name')) -%} - {%- set strategy = config.get('strategy') -%} - {% set information_schema = api.Relation.create( - database=target_database, - schema=target_schema, - identifier=target_table).information_schema() %} + {%- set strategy_name = config.get('strategy') -%} + {%- set unique_key = config.get('unique_key') %} - {% if not check_schema_exists(information_schema, target_schema) %} - {{ create_schema(target_database, target_schema) }} + {% if not adapter.check_schema_exists(target_database, target_schema) %} + {% do create_schema(target_database, target_schema) %} {% endif %} - {%- set target_relation = adapter.get_relation( - database=target_database, - schema=target_schema, - identifier=target_table) -%} - - {%- if target_relation is none -%} - {%- set target_relation = api.Relation.create( - database=target_database, - schema=target_schema, - identifier=target_table) -%} - {%- elif not target_relation.is_table -%} - {{ exceptions.relation_wrong_type(target_relation, 'table') }} - {%- endif -%} - - {% set source_info_model = create_empty_table_as(model['injected_sql']) %} - - {%- set source_columns = adapter.get_columns_in_relation(source_info_model) -%} + {% set target_relation_exists, target_relation = get_or_create_relation( + database=target_database, + schema=target_schema, + identifier=target_table, + type='table') -%} - {%- set unique_key = config.get('unique_key') -%} - {%- set dest_columns = source_columns + [ - api.Column.create('dbt_valid_from', 'timestamp'), - api.Column.create('dbt_valid_to', 'timestamp'), - api.Column.create('dbt_scd_id', 'string'), - api.Column.create('dbt_updated_at', 'timestamp'), - ] -%} + {%- if not target_relation.is_table -%} + {% do exceptions.relation_wrong_type(target_relation, 'table') %} + {%- endif -%} - {% call statement() %} - {{ create_archive_table(target_relation, dest_columns) }} - {% endcall %} + {% set strategy_macro = strategy_dispatch(strategy_name) %} + {% set strategy = strategy_macro(model, "archived_data", "source_data", config) %} - {% set missing_columns = adapter.get_missing_columns(source_info_model, target_relation) %} + {% if not target_relation_exists %} - {{ create_columns(target_relation, missing_columns) }} + {% set build_sql = build_archive_table(strategy, model['injected_sql']) %} + {% call statement('main') -%} + {{ create_table_as(False, target_relation, build_sql) }} + {% endcall %} - {{ adapter.valid_archive_target(target_relation) }} + {% else %} - {%- set identifier = model['alias'] -%} - {%- set tmp_identifier = model['name'] + '__dbt_archival_tmp' -%} + {{ adapter.valid_archive_target(target_relation) }} - {% set tmp_table_sql -%} + {% set staging_table = build_archive_staging_table(strategy, sql, target_relation) %} - with dbt_archive_sbq as ( + {% do adapter.expand_target_column_types(from_relation=staging_table, + to_relation=target_relation) %} - {% if strategy == 'timestamp' %} - {%- set updated_at = config.get('updated_at') -%} - {{ archive_select_timestamp(model['injected_sql'], target_relation, source_columns, unique_key, updated_at) }} - {% elif strategy == 'check' %} - {%- set check_cols = config.get('check_cols') -%} - {% if check_cols == 'all' %} - {% set check_cols = source_columns | map(attribute='name') | list %} - {% endif %} - {{ archive_select_check_cols(model['injected_sql'], target_relation, source_columns, unique_key, check_cols)}} - {% else %} - {{ exceptions.raise_compiler_error('Got invalid strategy "{}"'.format(strategy)) }} - {% endif %} - ) - select * from dbt_archive_sbq + {% set missing_columns = adapter.get_missing_columns(staging_table, target_relation) + | rejectattr('name', 'equalto', 'dbt_change_type') + | rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE') + | rejectattr('name', 'equalto', 'dbt_unique_key') + | rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY') + | list %} - {%- endset %} + {% do create_columns(target_relation, missing_columns) %} - {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, type='table') -%} - {%- set tmp_relation = create_temporary_table(tmp_table_sql, tmp_relation) -%} + {% set source_columns = adapter.get_columns_in_relation(staging_table) + | rejectattr('name', 'equalto', 'dbt_change_type') + | rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE') + | rejectattr('name', 'equalto', 'dbt_unique_key') + | rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY') + | list %} - {{ adapter.expand_target_column_types(temp_table=tmp_identifier, - to_relation=target_relation) }} + {% call statement('main') %} + {{ archive_merge_sql( + target = target_relation, + source = staging_table, + insert_cols = source_columns + ) + }} + {% endcall %} - {% call statement('_') -%} - {{ archive_update(target_relation, tmp_relation) }} - {% endcall %} + {% endif %} - {% call statement('main') -%} + {{ adapter.commit() }} - insert into {{ target_relation }} ( - {{ column_list(dest_columns) }} - ) - select {{ column_list(dest_columns) }} from {{ tmp_relation }} - where change_type = 'insert'; - {% endcall %} + {% if staging_table is defined %} + {% do post_archive(staging_table) %} + {% endif %} - {{ adapter.commit() }} {% endmaterialization %} diff --git a/core/dbt/include/global_project/macros/materializations/archive/archive_merge.sql b/core/dbt/include/global_project/macros/materializations/archive/archive_merge.sql new file mode 100644 index 00000000000..9b7ae0d25b7 --- /dev/null +++ b/core/dbt/include/global_project/macros/materializations/archive/archive_merge.sql @@ -0,0 +1,27 @@ + +{% macro archive_merge_sql(target, source, insert_cols) -%} + {{ adapter_macro('archive_merge_sql', target, source, insert_cols) }} +{%- endmacro %} + + +{% macro default__archive_merge_sql(target, source, insert_cols) -%} + {%- set insert_cols_csv = insert_cols| map(attribute="name") | join(', ') -%} + + merge into {{ target }} as DBT_INTERNAL_DEST + using {{ source }} as DBT_INTERNAL_SOURCE + on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id + + when matched + and DBT_INTERNAL_DEST.dbt_valid_to is null + and DBT_INTERNAL_SOURCE.dbt_change_type = 'update' + then update + set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to + + when not matched + and DBT_INTERNAL_SOURCE.dbt_change_type = 'insert' + then insert ({{ insert_cols_csv }}) + values ({{ insert_cols_csv }}) + ; +{% endmacro %} + + diff --git a/core/dbt/include/global_project/macros/materializations/archive/strategies.sql b/core/dbt/include/global_project/macros/materializations/archive/strategies.sql new file mode 100644 index 00000000000..3fc7db24eac --- /dev/null +++ b/core/dbt/include/global_project/macros/materializations/archive/strategies.sql @@ -0,0 +1,114 @@ +{# + Dispatch strategies by name, optionally qualified to a package +#} +{% macro strategy_dispatch(name) -%} +{% set original_name = name %} + {% if '.' in name %} + {% set package_name, name = name.split(".", 1) %} + {% else %} + {% set package_name = none %} + {% endif %} + + {% if package_name is none %} + {% set package_context = context %} + {% elif package_name in context %} + {% set package_context = context[package_name] %} + {% else %} + {% set error_msg %} + Could not find package '{{package_name}}', called with '{{original_name}}' + {% endset %} + {{ exceptions.raise_compiler_error(error_msg | trim) }} + {% endif %} + + {%- set search_name = 'archive_' ~ name ~ '_strategy' -%} + + {% if search_name not in package_context %} + {% set error_msg %} + The specified strategy macro '{{name}}' was not found in package '{{ package_name }}' + {% endset %} + {{ exceptions.raise_compiler_error(error_msg | trim) }} + {% endif %} + {{ return(package_context[search_name]) }} +{%- endmacro %} + + +{# + Create SCD Hash SQL fields cross-db +#} +{% macro archive_hash_arguments(args) %} + {{ adapter_macro('archive_hash_arguments', args) }} +{% endmacro %} + + +{% macro default__archive_hash_arguments(args) %} + md5({% for arg in args %} + coalesce(cast({{ arg }} as varchar ), '') {% if not loop.last %} || '|' || {% endif %} + {% endfor %}) +{% endmacro %} + + +{# + Get the current time cross-db +#} +{% macro archive_get_time() -%} + {{ adapter_macro('archive_get_time') }} +{%- endmacro %} + +{% macro default__archive_get_time() -%} + {{ current_timestamp() }} +{%- endmacro %} + + +{# + Core strategy definitions +#} +{% macro archive_timestamp_strategy(node, archived_rel, current_rel, config) %} + {% set primary_key = config['unique_key'] %} + {% set updated_at = config['updated_at'] %} + + {% set row_changed_expr -%} + ({{ archived_rel }}.{{ updated_at }} < {{ current_rel }}.{{ updated_at }}) + {%- endset %} + + {% set scd_id_expr = archive_hash_arguments([primary_key, updated_at]) %} + + {% do return({ + "unique_key": primary_key, + "updated_at": updated_at, + "row_changed": row_changed_expr, + "scd_id": scd_id_expr + }) %} +{% endmacro %} + + +{% macro archive_check_strategy(node, archived_rel, current_rel, config) %} + {% set check_cols_config = config['check_cols'] %} + {% set primary_key = config['unique_key'] %} + {% set updated_at = archive_get_time() %} + + {% if check_cols_config == 'all' %} + {% set check_cols = get_columns_in_query(node['injected_sql']) %} + {% elif check_cols_config is iterable and (check_cols_config | length) > 0 %} + {% set check_cols = check_cols_config %} + {% else %} + {% do exceptions.raise_compiler_error("Invalid value for 'check_cols': " ~ check_cols_config) %} + {% endif %} + + {% set row_changed_expr -%} + ( + {% for col in check_cols %} + {{ archived_rel }}.{{ col }} != {{ current_rel }}.{{ col }} + {%- if not loop.last %} or {% endif %} + {% endfor %} + ) + {%- endset %} + + {% set scd_id_expr = archive_hash_arguments(check_cols) %} + + {% do return({ + "unique_key": primary_key, + "updated_at": updated_at, + "row_changed": row_changed_expr, + "scd_id": scd_id_expr + }) %} +{% endmacro %} diff --git a/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql b/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql index 0dd87ce10eb..66be151e444 100644 --- a/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql +++ b/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql @@ -15,13 +15,9 @@ {%- set unique_key = config.get('unique_key') -%} {%- set identifier = model['alias'] -%} - {%- set tmp_identifier = model['name'] + '__dbt_incremental_tmp' -%} - {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} {%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database, type='table') -%} - {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, - schema=schema, - database=database, type='table') -%} + {%- set tmp_relation = make_temp_relation(target_relation) %} {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} @@ -55,7 +51,7 @@ {%- endcall -%} - {{ adapter.expand_target_column_types(temp_table=tmp_identifier, + {{ adapter.expand_target_column_types(from_relation=tmp_relation, to_relation=target_relation) }} {%- call statement('main') -%} @@ -71,7 +67,7 @@ insert into {{ target_relation }} ({{ dest_cols_csv }}) ( select {{ dest_cols_csv }} - from {{ tmp_relation.include(schema=False, database=False) }} + from {{ tmp_relation }} ); {% endcall %} {%- endif %} diff --git a/core/dbt/parser/archives.py b/core/dbt/parser/archives.py index 981570a48da..fb285bac4fb 100644 --- a/core/dbt/parser/archives.py +++ b/core/dbt/parser/archives.py @@ -11,6 +11,19 @@ import os +def set_archive_attributes(node): + config_keys = { + 'target_database': 'database', + 'target_schema': 'schema' + } + + for config_key, node_key in config_keys.items(): + if config_key in node.config: + setattr(node, node_key, node.config[config_key]) + + return node + + class ArchiveParser(MacrosKnownParser): @classmethod def parse_archives_from_project(cls, config): @@ -87,12 +100,15 @@ def load_and_parse(self): archive.package_name, archive.name) - to_return[node_path] = self.parse_node( + parsed_node = self.parse_node( archive, node_path, self.all_projects.get(archive.package_name), archive_config=archive_config) + # TODO : Add tests for this + to_return[node_path] = set_archive_attributes(parsed_node) + return to_return @@ -138,7 +154,9 @@ def get_fqn(cls, node, package_project_config, extra=[]): def validate_archives(node): if node.resource_type == NodeType.Archive: try: - return ParsedArchiveNode(**node.to_shallow_dict()) + parsed_node = ParsedArchiveNode(**node.to_shallow_dict()) + return set_archive_attributes(parsed_node) + except dbt.exceptions.JSONValidationException as exc: raise dbt.exceptions.CompilationException(str(exc), node) else: diff --git a/plugins/bigquery/dbt/adapters/bigquery/impl.py b/plugins/bigquery/dbt/adapters/bigquery/impl.py index 8710ccb13bc..0e8046a2628 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/impl.py +++ b/plugins/bigquery/dbt/adapters/bigquery/impl.py @@ -343,22 +343,6 @@ def execute_model(self, model, materialization, sql_override=None, return res - @available.parse(_stub_relation) - def create_temporary_table(self, sql, **kwargs): - # BQ queries always return a temp table with their results - query_job, _ = self.connections.raw_execute(sql) - bq_table = query_job.destination - - return self.Relation.create( - database=bq_table.project, - schema=bq_table.dataset_id, - identifier=bq_table.table_id, - quote_policy={ - 'schema': True, - 'identifier': True - }, - type=BigQueryRelation.Table) - @available.parse_none def alter_table_add_columns(self, relation, columns): diff --git a/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql b/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql index 1c87ce4dc18..83c7926ff34 100644 --- a/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql +++ b/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql @@ -34,6 +34,11 @@ create or replace table {{ relation }} {{ partition_by(raw_partition_by) }} {{ cluster_by(raw_cluster_by) }} + {% if temporary %} + OPTIONS( + expiration_timestamp=TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL 12 hour) + ) + {% endif %} as ( {{ sql }} ); @@ -54,6 +59,12 @@ {{ adapter.drop_schema(database_name, schema_name) }} {% endmacro %} +{% macro bigquery__drop_relation(relation) -%} + {% call statement('drop_relation') -%} + drop {{ relation.type }} if exists {{ relation }} + {%- endcall %} +{% endmacro %} + {% macro bigquery__get_columns_in_relation(relation) -%} {{ return(adapter.get_columns_in_relation(relation)) }} {% endmacro %} diff --git a/plugins/bigquery/dbt/include/bigquery/macros/materializations/archive.sql b/plugins/bigquery/dbt/include/bigquery/macros/materializations/archive.sql index 7a95f440f83..87b10589778 100644 --- a/plugins/bigquery/dbt/include/bigquery/macros/materializations/archive.sql +++ b/plugins/bigquery/dbt/include/bigquery/macros/materializations/archive.sql @@ -1,9 +1,3 @@ -{% macro bigquery__create_temporary_table(sql, relation) %} - {% set tmp_relation = adapter.create_temporary_table(sql) %} - {{ return(tmp_relation) }} -{% endmacro %} - - {% macro bigquery__archive_hash_arguments(args) %} to_hex(md5(concat({% for arg in args %}coalesce(cast({{ arg }} as string), ''){% if not loop.last %}, '|',{% endif %}{% endfor %}))) {% endmacro %} @@ -12,11 +6,7 @@ {{ adapter.alter_table_add_columns(relation, columns) }} {% endmacro %} - -{% macro bigquery__archive_update(target_relation, tmp_relation) %} - update {{ target_relation }} as dest - set dest.dbt_valid_to = tmp.dbt_valid_to - from {{ tmp_relation }} as tmp - where tmp.dbt_scd_id = dest.dbt_scd_id - and change_type = 'update'; +{% macro bigquery__post_archive(staging_relation) %} + -- Clean up the archive temp table + {% do drop_relation(staging_relation) %} {% endmacro %} diff --git a/plugins/postgres/dbt/include/postgres/macros/adapters.sql b/plugins/postgres/dbt/include/postgres/macros/adapters.sql index 0bda7fc9ad4..356b32eb66a 100644 --- a/plugins/postgres/dbt/include/postgres/macros/adapters.sql +++ b/plugins/postgres/dbt/include/postgres/macros/adapters.sql @@ -91,3 +91,19 @@ {% macro postgres__current_timestamp() -%} now() {%- endmacro %} + +{% macro postgres__archive_get_time() -%} + {{ current_timestamp() }}::timestamp without time zone +{%- endmacro %} + +{% macro postgres__make_temp_relation(base_relation, suffix) %} + {% set tmp_identifier = base_relation.identifier ~ suffix ~ py_current_timestring() %} + {% do return(base_relation.incorporate( + table_name=tmp_identifier, + path={ + "identifier": tmp_identifier, + "schema": none, + "database": none + })) -%} +{% endmacro %} + diff --git a/plugins/postgres/dbt/include/postgres/macros/materializations/archive_merge.sql b/plugins/postgres/dbt/include/postgres/macros/materializations/archive_merge.sql new file mode 100644 index 00000000000..9665dbd73ca --- /dev/null +++ b/plugins/postgres/dbt/include/postgres/macros/materializations/archive_merge.sql @@ -0,0 +1,18 @@ + +{% macro postgres__archive_merge_sql(target, source, insert_cols) -%} + {%- set insert_cols_csv = insert_cols | map(attribute="name") | join(', ') -%} + + update {{ target }} + set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to + from {{ source }} as DBT_INTERNAL_SOURCE + where DBT_INTERNAL_SOURCE.dbt_scd_id = {{ target }}.dbt_scd_id + and DBT_INTERNAL_SOURCE.dbt_change_type = 'update' + and {{ target }}.dbt_valid_to is null; + + insert into {{ target }} ({{ insert_cols_csv }}) + select {% for column in insert_cols -%} + DBT_INTERNAL_SOURCE.{{ column.name }} {%- if not loop.last %}, {%- endif %} + {%- endfor %} + from {{ source }} as DBT_INTERNAL_SOURCE + where DBT_INTERNAL_SOURCE.dbt_change_type = 'insert'; +{% endmacro %} diff --git a/plugins/redshift/dbt/include/redshift/macros/adapters.sql b/plugins/redshift/dbt/include/redshift/macros/adapters.sql index 29f6ad0b16f..37d79d3416f 100644 --- a/plugins/redshift/dbt/include/redshift/macros/adapters.sql +++ b/plugins/redshift/dbt/include/redshift/macros/adapters.sql @@ -1,3 +1,4 @@ + {% macro dist(dist) %} {%- if dist is not none -%} {%- set dist = dist.strip().lower() -%} @@ -57,15 +58,6 @@ {% endmacro %} -{% macro redshift__create_archive_table(relation, columns) -%} - create table if not exists {{ relation }} ( - {{ column_list_for_create_table(columns) }} - ) - {{ dist('dbt_updated_at') }} - {{ sort('compound', ['dbt_scd_id']) }}; -{%- endmacro %} - - {% macro redshift__create_schema(database_name, schema_name) -%} {{ postgres__create_schema(database_name, schema_name) }} {% endmacro %} @@ -171,10 +163,15 @@ {% macro redshift__check_schema_exists(information_schema, schema) -%} {{ return(postgres__check_schema_exists(information_schema, schema)) }} {%- endmacro %} -list_schemas - -%} {% macro redshift__current_timestamp() -%} getdate() {%- endmacro %} + +{% macro redshift__archive_get_time() -%} + {{ current_timestamp() }}::timestamp +{%- endmacro %} + +{% macro redshift__make_temp_relation(base_relation, suffix) %} + {% do return(postgres__make_temp_relation(base_relation, suffix)) %} +{% endmacro %} diff --git a/plugins/redshift/dbt/include/redshift/macros/materializations/archive_merge.sql b/plugins/redshift/dbt/include/redshift/macros/materializations/archive_merge.sql new file mode 100644 index 00000000000..efde2e8373c --- /dev/null +++ b/plugins/redshift/dbt/include/redshift/macros/materializations/archive_merge.sql @@ -0,0 +1,4 @@ + +{% macro redshift__archive_merge_sql(target, source, insert_cols) -%} + {{ postgres__archive_merge_sql(target, source, insert_cols) }} +{% endmacro %} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql index 7057c6fd653..99eb9224dbd 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql @@ -79,6 +79,10 @@ convert_timezone('UTC', current_timestamp()) {%- endmacro %} +{% macro snowflake__archive_get_time() -%} + to_timestamp_ntz({{ current_timestamp() }}) +{%- endmacro %} + {% macro snowflake__rename_relation(from_relation, to_relation) -%} {% call statement('rename_relation') -%} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index ef7a2ec8e35..87be13a8190 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -11,10 +11,7 @@ identifier=identifier, type='table') -%} - {%- set tmp_relation = api.Relation.create(database=database, - schema=schema, - identifier=identifier ~ "__dbt_tmp", - type='table') -%} + {%- set tmp_relation = make_temp_relation(target_relation) %} -- setup {{ run_hooks(pre_hooks, inside_transaction=False) }} @@ -42,7 +39,7 @@ {{ create_table_as(true, tmp_relation, sql) }} {%- endcall -%} - {{ adapter.expand_target_column_types(temp_table=tmp_relation.identifier, + {{ adapter.expand_target_column_types(from_relation=tmp_relation, to_relation=target_relation) }} {% set incremental_sql %} ( diff --git a/test/integration/004_simple_archive_test/seed.sql b/test/integration/004_simple_archive_test/seed.sql index cee3748faa4..d0ee03181c5 100644 --- a/test/integration/004_simple_archive_test/seed.sql +++ b/test/integration/004_simple_archive_test/seed.sql @@ -1,4 +1,4 @@ - create table {database}.{schema}.seed ( +create table {database}.{schema}.seed ( id INTEGER, first_name VARCHAR(50), last_name VARCHAR(50), @@ -20,7 +20,7 @@ create table {database}.{schema}.archive_expected ( updated_at TIMESTAMP WITHOUT TIME ZONE, dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, - dbt_scd_id VARCHAR(256), + dbt_scd_id VARCHAR(32), dbt_updated_at TIMESTAMP WITHOUT TIME ZONE ); @@ -79,8 +79,6 @@ select md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id from {database}.{schema}.seed; - - create table {database}.{schema}.archive_castillo_expected ( id INTEGER, first_name VARCHAR(50), @@ -93,8 +91,9 @@ create table {database}.{schema}.archive_castillo_expected ( updated_at TIMESTAMP WITHOUT TIME ZONE, dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, - dbt_scd_id VARCHAR(256), + dbt_scd_id VARCHAR(32), dbt_updated_at TIMESTAMP WITHOUT TIME ZONE + ); -- one entry @@ -139,7 +138,7 @@ create table {database}.{schema}.archive_alvarez_expected ( updated_at TIMESTAMP WITHOUT TIME ZONE, dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, - dbt_scd_id VARCHAR(256), + dbt_scd_id VARCHAR(32), dbt_updated_at TIMESTAMP WITHOUT TIME ZONE ); @@ -185,7 +184,7 @@ create table {database}.{schema}.archive_kelly_expected ( updated_at TIMESTAMP WITHOUT TIME ZONE, dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, - dbt_scd_id VARCHAR(256), + dbt_scd_id VARCHAR(32), dbt_updated_at TIMESTAMP WITHOUT TIME ZONE ); diff --git a/test/integration/004_simple_archive_test/test-archives-bq/archive.sql b/test/integration/004_simple_archive_test/test-archives-bq/archive.sql index d7dec9d043e..aff119c410b 100644 --- a/test/integration/004_simple_archive_test/test-archives-bq/archive.sql +++ b/test/integration/004_simple_archive_test/test-archives-bq/archive.sql @@ -9,6 +9,6 @@ updated_at='updated_at', ) }} - select * from `{{database}}`.`{{schema}}`.seed + select * from `{{target.database}}`.`{{schema}}`.seed {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql b/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql index 3bbe49664c1..35340368781 100644 --- a/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql +++ b/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql @@ -7,6 +7,6 @@ updated_at='updated_at', ) }} - select * from {{database}}.{{schema}}.seed + select * from {{target.database}}.{{schema}}.seed {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-longtext/longtext.sql b/test/integration/004_simple_archive_test/test-archives-longtext/longtext.sql index d8c671cb1b9..c16ce784889 100644 --- a/test/integration/004_simple_archive_test/test-archives-longtext/longtext.sql +++ b/test/integration/004_simple_archive_test/test-archives-longtext/longtext.sql @@ -8,5 +8,5 @@ updated_at='updated_at', ) }} - select * from {{database}}.{{schema}}.super_long + select * from {{target.database}}.{{schema}}.super_long {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-pg/archive.sql b/test/integration/004_simple_archive_test/test-archives-pg/archive.sql index 9117a8df1a4..4810fd4a519 100644 --- a/test/integration/004_simple_archive_test/test-archives-pg/archive.sql +++ b/test/integration/004_simple_archive_test/test-archives-pg/archive.sql @@ -9,6 +9,6 @@ updated_at='updated_at', ) }} - select * from {{database}}.{{schema}}.seed + select * from {{target.database}}.{{schema}}.seed {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-select/archives.sql b/test/integration/004_simple_archive_test/test-archives-select/archives.sql index 30e78fe720d..562ec89b3ce 100644 --- a/test/integration/004_simple_archive_test/test-archives-select/archives.sql +++ b/test/integration/004_simple_archive_test/test-archives-select/archives.sql @@ -9,7 +9,7 @@ updated_at='updated_at', ) }} - select * from {{database}}.{{schema}}.seed where last_name = 'Castillo' + select * from {{target.database}}.{{schema}}.seed where last_name = 'Castillo' {% endarchive %} @@ -24,7 +24,7 @@ updated_at='updated_at', ) }} - select * from {{database}}.{{schema}}.seed where last_name = 'Alvarez' + select * from {{target.database}}.{{schema}}.seed where last_name = 'Alvarez' {% endarchive %} @@ -40,6 +40,6 @@ updated_at='updated_at', ) }} - select * from {{database}}.{{schema}}.seed where last_name = 'Kelly' + select * from {{target.database}}.{{schema}}.seed where last_name = 'Kelly' {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql b/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql index 40a2563291f..50eece23b5f 100644 --- a/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql +++ b/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql @@ -8,7 +8,7 @@ check_cols=('email',), ) }} - select * from `{{database}}`.`{{schema}}`.seed + select * from `{{target.database}}`.`{{schema}}`.seed {% endarchive %} @@ -23,5 +23,5 @@ check_cols='all', ) }} - select * from `{{database}}`.`{{schema}}`.seed + select * from `{{target.database}}`.`{{schema}}`.seed {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql b/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql index c3ee6fe2038..314b227634a 100644 --- a/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql +++ b/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql @@ -9,7 +9,7 @@ check_cols=['email'], ) }} - select * from {{database}}.{{schema}}.seed + select * from {{target.database}}.{{schema}}.seed {% endarchive %} @@ -24,5 +24,5 @@ check_cols='all', ) }} - select * from {{database}}.{{schema}}.seed + select * from {{target.database}}.{{schema}}.seed {% endarchive %} diff --git a/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py b/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py index 1b47ded1d10..22498f2f36e 100644 --- a/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py +++ b/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py @@ -1,6 +1,7 @@ from test.integration.base import DBTIntegrationTest, use_profile -import threading +import threading, traceback from dbt.adapters.factory import get_adapter +from dbt.logger import GLOBAL_LOGGER as logger class BaseTestConcurrentTransaction(DBTIntegrationTest): @@ -31,7 +32,7 @@ def run_select_and_check(self, rel, sql): connection_name = '__test_{}'.format(id(threading.current_thread())) try: with get_adapter(self.config).connection_named(connection_name) as conn: - res = self.run_sql_common(self.transform_sql(sql), 'one', conn) + res = self.run_sql_common(self.transform_sql(sql), 'one', conn, verbose=True) # The result is the output of f_sleep(), which is True if res[0] == True: @@ -40,6 +41,8 @@ def run_select_and_check(self, rel, sql): self.query_state[rel] = 'bad' except Exception as e: + logger.info("Caught exception: {}".format(e)) + traceback.print_exc() if 'concurrent transaction' in str(e): self.query_state[rel] = 'error: {}'.format(e) else: diff --git a/test/integration/033_event_tracking_test/test_events.py b/test/integration/033_event_tracking_test/test_events.py index 03f9ad3a9d2..61900e36752 100644 --- a/test/integration/033_event_tracking_test/test_events.py +++ b/test/integration/033_event_tracking_test/test_events.py @@ -610,7 +610,7 @@ def test__event_tracking_archive(self): model_id='3cdcd0fef985948fd33af308468da3b9', index=1, total=1, - status='INSERT 0 1', + status='SELECT 1', materialization='archive' ), self.build_context('archive', 'end', result_type='ok') diff --git a/test/integration/base.py b/test/integration/base.py index c7c67edde2b..516b65f0800 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -129,7 +129,8 @@ def redshift_profile(self): 'user': os.getenv('REDSHIFT_TEST_USER'), 'pass': os.getenv('REDSHIFT_TEST_PASS'), 'dbname': os.getenv('REDSHIFT_TEST_DBNAME'), - 'schema': self.unique_schema() + 'schema': self.unique_schema(), + 'keepalives_idle': 5 } }, 'target': 'default2' @@ -534,10 +535,14 @@ def run_sql_presto(self, sql, fetch, conn): conn.handle.commit() conn.transaction_open = False - def run_sql_common(self, sql, fetch, conn): + def run_sql_common(self, sql, fetch, conn, verbose=False): with conn.handle.cursor() as cursor: try: + if verbose: + logger.debug('running sql: {}'.format(sql)) cursor.execute(sql) + if verbose: + logger.debug('result from sql: {}'.format(cursor.statusmessage)) conn.handle.commit() if fetch == 'one': return cursor.fetchone() @@ -546,9 +551,9 @@ def run_sql_common(self, sql, fetch, conn): else: return except BaseException as e: - conn.handle.rollback() print(sql) print(e) + conn.handle.rollback() raise e finally: conn.transaction_open = False @@ -1015,7 +1020,9 @@ def _assertTableColumnsEqual(self, relation_a, relation_b): text_types = {'text', 'character varying', 'character', 'varchar'} - self.assertEqual(len(table_a_result), len(table_b_result)) + self.assertEqual(len(table_a_result), len(table_b_result), + "{} vs. {}".format(table_a_result, table_b_result)) + for a_column, b_column in zip(table_a_result, table_b_result): a_name, a_type, a_size = a_column b_name, b_type, b_size = b_column diff --git a/tox.ini b/tox.ini index 2134d39fb3c..976a3fa25e5 100644 --- a/tox.ini +++ b/tox.ini @@ -10,14 +10,14 @@ deps = [testenv:unit-py27] basepython = python2.7 -commands = /bin/bash -c '{envpython} -m pytest -v {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/unit' +commands = /bin/bash -c '{envpython} -m pytest --durations 0 -v {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/unit' deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt [testenv:unit-py36] basepython = python3.6 -commands = /bin/bash -c '{envpython} -m pytest -v {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/unit' +commands = /bin/bash -c '{envpython} -m pytest --durations 0 -v {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/unit' deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -27,7 +27,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_postgres {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest --durations 0 -v -m profile_postgres {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/postgres @@ -38,7 +38,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_snowflake {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest --durations 0 -v -m profile_snowflake {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/snowflake @@ -49,7 +49,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_bigquery {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest --durations 0 -v -m profile_bigquery {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/bigquery @@ -60,7 +60,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_redshift {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest --durations 0 -v -m profile_redshift {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/postgres @@ -72,7 +72,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_presto {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest --durations 0 -v -m profile_presto {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/presto @@ -83,7 +83,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_postgres --cov=dbt --cov-branch --cov-report html:htmlcov {posargs} test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest --durations 0 -v -m profile_postgres --cov=dbt --cov-branch --cov-report html:htmlcov {posargs} test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/postgres @@ -94,7 +94,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_snowflake {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest --durations 0 -v -m profile_snowflake {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/snowflake @@ -105,7 +105,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_bigquery {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest --durations 0 -v -m profile_bigquery {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/bigquery @@ -116,7 +116,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_redshift {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest --durations 0 -v -m profile_redshift {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/postgres @@ -128,7 +128,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_presto {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest --durations 0 -v -m profile_presto {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/presto @@ -139,7 +139,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v {posargs}' +commands = /bin/bash -c '{envpython} -m pytest --durations 0 -v {posargs}' deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -149,7 +149,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v {posargs}' +commands = /bin/bash -c '{envpython} -m pytest --durations 0 -v {posargs}' deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -160,7 +160,7 @@ passenv = * setenv = DBT_CONFIG_DIR = ~/.dbt DBT_INVOCATION_ENV = ci-appveyor -commands = pytest -v -m 'profile_postgres or profile_snowflake or profile_bigquery or profile_redshift' --cov=dbt --cov-branch --cov-report html:htmlcov test/integration test/unit +commands = pytest --durations 0 -v -m 'profile_postgres or profile_snowflake or profile_bigquery or profile_redshift' --cov=dbt --cov-branch --cov-report html:htmlcov test/integration test/unit deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -171,7 +171,7 @@ passenv = * setenv = DBT_CONFIG_DIR = ~/.dbt DBT_INVOCATION_ENV = ci-appveyor -commands = python -m pytest -v {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/unit +commands = python -m pytest --durations 0 -v {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/unit deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -183,7 +183,7 @@ passenv = * setenv = DBT_CONFIG_DIR = ~/.dbt DBT_INVOCATION_ENV = ci-appveyor -commands = python -m pytest -v -m profile_postgres {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration +commands = python -m pytest --durations 0 -v -m profile_postgres {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration deps = -e {toxinidir}/core -e {toxinidir}/plugins/postgres @@ -196,7 +196,7 @@ passenv = * setenv = DBT_CONFIG_DIR = ~/.dbt DBT_INVOCATION_ENV = ci-appveyor -commands = python -m pytest -v -m profile_snowflake {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration +commands = python -m pytest --durations 0 -v -m profile_snowflake {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration deps = -e {toxinidir}/core -e {toxinidir}/plugins/snowflake @@ -209,7 +209,7 @@ passenv = * setenv = DBT_CONFIG_DIR = ~/.dbt DBT_INVOCATION_ENV = ci-appveyor -commands = python -m pytest -v -m profile_bigquery {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration +commands = python -m pytest --durations 0 -v -m profile_bigquery {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration deps = -e {toxinidir}/core -e {toxinidir}/plugins/bigquery @@ -222,7 +222,7 @@ passenv = * setenv = DBT_CONFIG_DIR = ~/.dbt DBT_INVOCATION_ENV = ci-appveyor -commands = python -m pytest -v -m profile_redshift {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration +commands = python -m pytest --durations 0 -v -m profile_redshift {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration deps = -e {toxinidir}/core -e {toxinidir}/plugins/postgres From b98ea32add3292134dfd3bc9d22b2a090a338719 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Thu, 30 May 2019 12:06:01 -0400 Subject: [PATCH 05/31] code review --- core/dbt/parser/archives.py | 1 - .../test-archives-bq/archive.sql | 2 +- .../test-archives-invalid/archive.sql | 2 +- .../test-archives-longtext/longtext.sql | 2 +- .../test-archives-pg/archive.sql | 4 +-- .../test-archives-select/archives.sql | 4 +-- .../test-check-col-archives-bq/archive.sql | 4 +-- .../test-check-col-archives/archive.sql | 4 +-- .../test_simple_archive.py | 25 +++++++++++++++++++ 9 files changed, 36 insertions(+), 12 deletions(-) diff --git a/core/dbt/parser/archives.py b/core/dbt/parser/archives.py index fb285bac4fb..fd74cd7f2b4 100644 --- a/core/dbt/parser/archives.py +++ b/core/dbt/parser/archives.py @@ -106,7 +106,6 @@ def load_and_parse(self): self.all_projects.get(archive.package_name), archive_config=archive_config) - # TODO : Add tests for this to_return[node_path] = set_archive_attributes(parsed_node) return to_return diff --git a/test/integration/004_simple_archive_test/test-archives-bq/archive.sql b/test/integration/004_simple_archive_test/test-archives-bq/archive.sql index aff119c410b..d7dec9d043e 100644 --- a/test/integration/004_simple_archive_test/test-archives-bq/archive.sql +++ b/test/integration/004_simple_archive_test/test-archives-bq/archive.sql @@ -9,6 +9,6 @@ updated_at='updated_at', ) }} - select * from `{{target.database}}`.`{{schema}}`.seed + select * from `{{database}}`.`{{schema}}`.seed {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql b/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql index 35340368781..3bbe49664c1 100644 --- a/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql +++ b/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql @@ -7,6 +7,6 @@ updated_at='updated_at', ) }} - select * from {{target.database}}.{{schema}}.seed + select * from {{database}}.{{schema}}.seed {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-longtext/longtext.sql b/test/integration/004_simple_archive_test/test-archives-longtext/longtext.sql index c16ce784889..d8c671cb1b9 100644 --- a/test/integration/004_simple_archive_test/test-archives-longtext/longtext.sql +++ b/test/integration/004_simple_archive_test/test-archives-longtext/longtext.sql @@ -8,5 +8,5 @@ updated_at='updated_at', ) }} - select * from {{target.database}}.{{schema}}.super_long + select * from {{database}}.{{schema}}.super_long {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-pg/archive.sql b/test/integration/004_simple_archive_test/test-archives-pg/archive.sql index 4810fd4a519..133465078c1 100644 --- a/test/integration/004_simple_archive_test/test-archives-pg/archive.sql +++ b/test/integration/004_simple_archive_test/test-archives-pg/archive.sql @@ -3,12 +3,12 @@ {{ config( target_database=var('target_database', database), - target_schema=schema, + target_schema=var('target_schema', schema), unique_key='id || ' ~ "'-'" ~ ' || first_name', strategy='timestamp', updated_at='updated_at', ) }} - select * from {{target.database}}.{{schema}}.seed + select * from {{target.database}}.{{target.schema}}.seed {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-select/archives.sql b/test/integration/004_simple_archive_test/test-archives-select/archives.sql index 562ec89b3ce..7250715e379 100644 --- a/test/integration/004_simple_archive_test/test-archives-select/archives.sql +++ b/test/integration/004_simple_archive_test/test-archives-select/archives.sql @@ -9,7 +9,7 @@ updated_at='updated_at', ) }} - select * from {{target.database}}.{{schema}}.seed where last_name = 'Castillo' + select * from {{database}}.{{schema}}.seed where last_name = 'Castillo' {% endarchive %} @@ -24,7 +24,7 @@ updated_at='updated_at', ) }} - select * from {{target.database}}.{{schema}}.seed where last_name = 'Alvarez' + select * from {{database}}.{{schema}}.seed where last_name = 'Alvarez' {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql b/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql index 50eece23b5f..40a2563291f 100644 --- a/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql +++ b/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql @@ -8,7 +8,7 @@ check_cols=('email',), ) }} - select * from `{{target.database}}`.`{{schema}}`.seed + select * from `{{database}}`.`{{schema}}`.seed {% endarchive %} @@ -23,5 +23,5 @@ check_cols='all', ) }} - select * from `{{target.database}}`.`{{schema}}`.seed + select * from `{{database}}`.`{{schema}}`.seed {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql b/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql index 314b227634a..c3ee6fe2038 100644 --- a/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql +++ b/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql @@ -9,7 +9,7 @@ check_cols=['email'], ) }} - select * from {{target.database}}.{{schema}}.seed + select * from {{database}}.{{schema}}.seed {% endarchive %} @@ -24,5 +24,5 @@ check_cols='all', ) }} - select * from {{target.database}}.{{schema}}.seed + select * from {{database}}.{{schema}}.seed {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test_simple_archive.py b/test/integration/004_simple_archive_test/test_simple_archive.py index 04d9bd3eed4..c7bf29c3ce5 100644 --- a/test/integration/004_simple_archive_test/test_simple_archive.py +++ b/test/integration/004_simple_archive_test/test_simple_archive.py @@ -382,6 +382,31 @@ def run_archive(self): return self.run_dbt(['archive', '--vars', '{{"target_database": {}}}'.format(self.alternative_database)]) +class TestCrossSchemaArchiveFiles(TestSimpleArchive): + @property + def project_config(self): + paths = ['test/integration/004_simple_archive_test/test-archives-pg'] + return { + 'archive-paths': paths, + } + + def target_schema(self): + return "{}_archived".format(self.unique_schema()) + + def run_archive(self): + return self.run_dbt(['archive', '--vars', '{{"target_schema": {}}}'.format(self.target_schema())]) + + @use_profile('postgres') + def test__postgres_ref_archive_cross_schema(self): + self.run_sql_file('test/integration/004_simple_archive_test/seed_pg.sql') + + results = self.run_archive() + self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) + + results = self.run_dbt(['run', '--vars', '{{"target_schema": {}}}'.format(self.target_schema())]) + self.assertEqual(len(results), 1) + + class TestBadArchive(DBTIntegrationTest): @property def schema(self): From 81f4c1bd7cff93dd67ee9ea0f67d69f87a0f7631 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Thu, 30 May 2019 12:18:58 -0400 Subject: [PATCH 06/31] cleanup merge --- .../test_concurrent_transaction.py | 2 -- test/integration/base.py | 15 ++++----------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py b/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py index 25ec97c3812..d9cce7292eb 100644 --- a/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py +++ b/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py @@ -51,8 +51,6 @@ def run_select_and_check(self, rel, sql): self.query_state[rel] = 'bad' except Exception as e: - logger.info("Caught exception: {}".format(e)) - traceback.print_exc() if 'concurrent transaction' in str(e): self.query_state[rel] = 'error: {}'.format(e) else: diff --git a/test/integration/base.py b/test/integration/base.py index 516b65f0800..c7c67edde2b 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -129,8 +129,7 @@ def redshift_profile(self): 'user': os.getenv('REDSHIFT_TEST_USER'), 'pass': os.getenv('REDSHIFT_TEST_PASS'), 'dbname': os.getenv('REDSHIFT_TEST_DBNAME'), - 'schema': self.unique_schema(), - 'keepalives_idle': 5 + 'schema': self.unique_schema() } }, 'target': 'default2' @@ -535,14 +534,10 @@ def run_sql_presto(self, sql, fetch, conn): conn.handle.commit() conn.transaction_open = False - def run_sql_common(self, sql, fetch, conn, verbose=False): + def run_sql_common(self, sql, fetch, conn): with conn.handle.cursor() as cursor: try: - if verbose: - logger.debug('running sql: {}'.format(sql)) cursor.execute(sql) - if verbose: - logger.debug('result from sql: {}'.format(cursor.statusmessage)) conn.handle.commit() if fetch == 'one': return cursor.fetchone() @@ -551,9 +546,9 @@ def run_sql_common(self, sql, fetch, conn, verbose=False): else: return except BaseException as e: + conn.handle.rollback() print(sql) print(e) - conn.handle.rollback() raise e finally: conn.transaction_open = False @@ -1020,9 +1015,7 @@ def _assertTableColumnsEqual(self, relation_a, relation_b): text_types = {'text', 'character varying', 'character', 'varchar'} - self.assertEqual(len(table_a_result), len(table_b_result), - "{} vs. {}".format(table_a_result, table_b_result)) - + self.assertEqual(len(table_a_result), len(table_b_result)) for a_column, b_column in zip(table_a_result, table_b_result): a_name, a_type, a_size = a_column b_name, b_type, b_size = b_column From 69621fe6f9c1b1d0b21df1572392fe4e061013e3 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Thu, 30 May 2019 12:21:42 -0400 Subject: [PATCH 07/31] cleanup tests --- .../004_simple_archive_test/test-archives-select/archives.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/004_simple_archive_test/test-archives-select/archives.sql b/test/integration/004_simple_archive_test/test-archives-select/archives.sql index 7250715e379..30e78fe720d 100644 --- a/test/integration/004_simple_archive_test/test-archives-select/archives.sql +++ b/test/integration/004_simple_archive_test/test-archives-select/archives.sql @@ -40,6 +40,6 @@ updated_at='updated_at', ) }} - select * from {{target.database}}.{{schema}}.seed where last_name = 'Kelly' + select * from {{database}}.{{schema}}.seed where last_name = 'Kelly' {% endarchive %} From 75c8f3218613e487d7d86f2b59a53b4e2d232609 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 30 May 2019 10:54:47 -0600 Subject: [PATCH 08/31] Render source_description fields in sources, fix tests to make sure we actually do that... --- core/dbt/parser/util.py | 20 ++++++++++++++++++- .../test_docs_generate.py | 2 +- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/core/dbt/parser/util.py b/core/dbt/parser/util.py index c5b4a46ae67..e05c13f3f7d 100644 --- a/core/dbt/parser/util.py +++ b/core/dbt/parser/util.py @@ -1,6 +1,7 @@ import dbt.exceptions import dbt.utils +from dbt.node_types import NodeType def docs(node, manifest, config, column_name=None): @@ -146,10 +147,27 @@ def process_docs_for_node(cls, manifest, current_project, node): else: column['description'] = description + @classmethod + def process_docs_for_source(cls, manifest, current_project, source): + context = { + 'doc': docs(source, manifest, current_project), + } + table_description = source.get('description', '') + source_description = source.get('source_description', '') + table_description = dbt.clients.jinja.get_rendered(table_description, + context) + source_description = dbt.clients.jinja.get_rendered(source_description, + context) + source.set('description', table_description) + source.set('source_description', source_description) + @classmethod def process_docs(cls, manifest, current_project): for node in manifest.nodes.values(): - cls.process_docs_for_node(manifest, current_project, node) + if node.resource_type == NodeType.Source: + cls.process_docs_for_source(manifest, current_project, node) + else: + cls.process_docs_for_node(manifest, current_project, node) return manifest @classmethod diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 966f2070c4c..187d6304470 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -1266,7 +1266,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'resource_type': 'source', 'root_path': os.getcwd(), 'schema': my_schema_name, - 'source_description': "{{ doc('source_info') }}", + 'source_description': 'My source', 'source_name': 'my_source', 'unique_id': 'source.test.my_source.my_table' } From 94ae9fd4a7904f288895e90faff7db9b80c9b283 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Thu, 30 May 2019 13:08:27 -0400 Subject: [PATCH 09/31] fix test --- test/integration/004_simple_archive_test/test_simple_archive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/004_simple_archive_test/test_simple_archive.py b/test/integration/004_simple_archive_test/test_simple_archive.py index 4786352439b..b58e15ece78 100644 --- a/test/integration/004_simple_archive_test/test_simple_archive.py +++ b/test/integration/004_simple_archive_test/test_simple_archive.py @@ -397,7 +397,7 @@ def run_archive(self): return self.run_dbt(['archive', '--vars', '{{"target_schema": {}}}'.format(self.target_schema())]) @use_profile('postgres') - def test__postgres_ref_archive_cross_schema(self): + def test__postgres__simple_archive(self): self.run_sql_file('test/integration/004_simple_archive_test/seed_pg.sql') results = self.run_archive() From 82793a02d3d8c987b26a10577e41e2f8ff975b62 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sat, 1 Jun 2019 11:40:14 -0400 Subject: [PATCH 10/31] fix for tests in different logical databases --- .../test-archives-bq/archive.sql | 2 +- .../test-archives-invalid/archive.sql | 2 +- .../test-archives-longtext/longtext.sql | 2 +- .../test-archives-select/archives.sql | 6 +++--- .../test-check-col-archives-bq/archive.sql | 4 ++-- .../test-check-col-archives/archive.sql | 4 ++-- .../004_simple_archive_test/test_simple_archive.py | 14 ++++++++++++-- 7 files changed, 22 insertions(+), 12 deletions(-) diff --git a/test/integration/004_simple_archive_test/test-archives-bq/archive.sql b/test/integration/004_simple_archive_test/test-archives-bq/archive.sql index d7dec9d043e..aff119c410b 100644 --- a/test/integration/004_simple_archive_test/test-archives-bq/archive.sql +++ b/test/integration/004_simple_archive_test/test-archives-bq/archive.sql @@ -9,6 +9,6 @@ updated_at='updated_at', ) }} - select * from `{{database}}`.`{{schema}}`.seed + select * from `{{target.database}}`.`{{schema}}`.seed {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql b/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql index 3bbe49664c1..35340368781 100644 --- a/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql +++ b/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql @@ -7,6 +7,6 @@ updated_at='updated_at', ) }} - select * from {{database}}.{{schema}}.seed + select * from {{target.database}}.{{schema}}.seed {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-longtext/longtext.sql b/test/integration/004_simple_archive_test/test-archives-longtext/longtext.sql index d8c671cb1b9..c16ce784889 100644 --- a/test/integration/004_simple_archive_test/test-archives-longtext/longtext.sql +++ b/test/integration/004_simple_archive_test/test-archives-longtext/longtext.sql @@ -8,5 +8,5 @@ updated_at='updated_at', ) }} - select * from {{database}}.{{schema}}.super_long + select * from {{target.database}}.{{schema}}.super_long {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-select/archives.sql b/test/integration/004_simple_archive_test/test-archives-select/archives.sql index 30e78fe720d..562ec89b3ce 100644 --- a/test/integration/004_simple_archive_test/test-archives-select/archives.sql +++ b/test/integration/004_simple_archive_test/test-archives-select/archives.sql @@ -9,7 +9,7 @@ updated_at='updated_at', ) }} - select * from {{database}}.{{schema}}.seed where last_name = 'Castillo' + select * from {{target.database}}.{{schema}}.seed where last_name = 'Castillo' {% endarchive %} @@ -24,7 +24,7 @@ updated_at='updated_at', ) }} - select * from {{database}}.{{schema}}.seed where last_name = 'Alvarez' + select * from {{target.database}}.{{schema}}.seed where last_name = 'Alvarez' {% endarchive %} @@ -40,6 +40,6 @@ updated_at='updated_at', ) }} - select * from {{database}}.{{schema}}.seed where last_name = 'Kelly' + select * from {{target.database}}.{{schema}}.seed where last_name = 'Kelly' {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql b/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql index 40a2563291f..50eece23b5f 100644 --- a/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql +++ b/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql @@ -8,7 +8,7 @@ check_cols=('email',), ) }} - select * from `{{database}}`.`{{schema}}`.seed + select * from `{{target.database}}`.`{{schema}}`.seed {% endarchive %} @@ -23,5 +23,5 @@ check_cols='all', ) }} - select * from `{{database}}`.`{{schema}}`.seed + select * from `{{target.database}}`.`{{schema}}`.seed {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql b/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql index c3ee6fe2038..314b227634a 100644 --- a/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql +++ b/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql @@ -9,7 +9,7 @@ check_cols=['email'], ) }} - select * from {{database}}.{{schema}}.seed + select * from {{target.database}}.{{schema}}.seed {% endarchive %} @@ -24,5 +24,5 @@ check_cols='all', ) }} - select * from {{database}}.{{schema}}.seed + select * from {{target.database}}.{{schema}}.seed {% endarchive %} diff --git a/test/integration/004_simple_archive_test/test_simple_archive.py b/test/integration/004_simple_archive_test/test_simple_archive.py index b58e15ece78..b6e92a7fa40 100644 --- a/test/integration/004_simple_archive_test/test_simple_archive.py +++ b/test/integration/004_simple_archive_test/test_simple_archive.py @@ -382,7 +382,17 @@ def run_archive(self): return self.run_dbt(['archive', '--vars', '{{"target_database": {}}}'.format(self.alternative_database)]) -class TestCrossSchemaArchiveFiles(TestSimpleArchive): +class TestCrossSchemaArchiveFiles(DBTIntegrationTest): + NUM_ARCHIVE_MODELS = 1 + + @property + def schema(self): + return "simple_archive_004" + + @property + def models(self): + return "test/integration/004_simple_archive_test/models" + @property def project_config(self): paths = ['test/integration/004_simple_archive_test/test-archives-pg'] @@ -397,7 +407,7 @@ def run_archive(self): return self.run_dbt(['archive', '--vars', '{{"target_schema": {}}}'.format(self.target_schema())]) @use_profile('postgres') - def test__postgres__simple_archive(self): + def test__postgres__cross_schema_archive(self): self.run_sql_file('test/integration/004_simple_archive_test/seed_pg.sql') results = self.run_archive() From f26948dde20e04032c58a751019ebe6a1a1c8358 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 3 Jun 2019 15:28:00 -0600 Subject: [PATCH 11/31] remove archive blocks --- core/dbt/config/runtime.py | 15 +- core/dbt/contracts/project.py | 2 +- core/dbt/loader.py | 17 +- core/dbt/parser/__init__.py | 2 - core/dbt/parser/archives.py | 93 +----- .../test_simple_archive.py | 302 +++++------------- test/unit/test_config.py | 76 ++++- 7 files changed, 164 insertions(+), 343 deletions(-) diff --git a/core/dbt/config/runtime.py b/core/dbt/config/runtime.py index 23a0e4c81d5..397d3a74f87 100644 --- a/core/dbt/config/runtime.py +++ b/core/dbt/config/runtime.py @@ -64,7 +64,7 @@ def __init__(self, project_name, version, project_root, source_paths, self.validate() @classmethod - def from_parts(cls, project, profile, args): + def from_parts(cls, project, profile, args, allow_archive_blocks=False): """Instantiate a RuntimeConfig from its components. :param profile Profile: A parsed dbt Profile. @@ -77,6 +77,12 @@ def from_parts(cls, project, profile, args): .DEFAULTS['quote_policy'] ) quoting.update(project.quoting) + if project.archive and not allow_archive_blocks: + # if the user has an `archive` section, raise an error + raise DbtProjectError( + 'Invalid project configuration: "archive" is not allowed' + ) + return cls( project_name=project.project_name, version=project.version, @@ -163,12 +169,14 @@ def validate(self): self.validate_version() @classmethod - def from_args(cls, args): + def from_args(cls, args, allow_archive_blocks=False): """Given arguments, read in dbt_project.yml from the current directory, read in packages.yml if it exists, and use them to find the profile to load. :param args argparse.Namespace: The arguments as parsed from the cli. + :param allow_archive_blocks bool: If True, ignore archive blocks in + configs. This flag exists to enable archive migration. :raises DbtProjectError: If the project is invalid or missing. :raises DbtProfileError: If the profile is invalid or missing. :raises ValidationException: If the cli variables are invalid. @@ -185,5 +193,6 @@ def from_args(cls, args): return cls.from_parts( project=project, profile=profile, - args=args + args=args, + allow_archive_blocks=allow_archive_blocks ) diff --git a/core/dbt/contracts/project.py b/core/dbt/contracts/project.py index 3bda33e2500..3c04f80ee4c 100644 --- a/core/dbt/contracts/project.py +++ b/core/dbt/contracts/project.py @@ -2,7 +2,7 @@ from dbt.logger import GLOBAL_LOGGER as logger # noqa from dbt.utils import deep_merge -# TODO: add description fields. + ARCHIVE_TABLE_CONFIG_CONTRACT = { 'type': 'object', 'additionalProperties': False, diff --git a/core/dbt/loader.py b/core/dbt/loader.py index dc34d890350..021c0b515d5 100644 --- a/core/dbt/loader.py +++ b/core/dbt/loader.py @@ -10,8 +10,8 @@ from dbt.utils import timestring from dbt.parser import MacroParser, ModelParser, SeedParser, AnalysisParser, \ - DocumentationParser, DataTestParser, HookParser, ArchiveParser, \ - SchemaParser, ParserUtils, ArchiveBlockParser + DocumentationParser, DataTestParser, HookParser, SchemaParser, \ + ParserUtils, ArchiveBlockParser from dbt.contracts.project import ProjectList @@ -63,18 +63,6 @@ def _load_macros(self, internal_manifest=None): resource_type=NodeType.Macro, )) - def _load_archives_from_project(self): - archive_parser = ArchiveParser(self.root_project, self.all_projects, - self.macro_manifest) - for key, node in archive_parser.load_and_parse().items(): - # we have another archive parser, so we have to check for - # collisions - existing = self.nodes.get(key) - if existing: - dbt.exceptions.raise_duplicate_resource_name(existing, node) - else: - self.nodes[key] = node - def _load_seeds(self): parser = SeedParser(self.root_project, self.all_projects, self.macro_manifest) @@ -98,7 +86,6 @@ def _load_nodes(self): self.macro_manifest) self.nodes.update(hook_parser.load_and_parse()) - self._load_archives_from_project() self._load_seeds() def _load_docs(self): diff --git a/core/dbt/parser/__init__.py b/core/dbt/parser/__init__.py index 5363aa29400..43cadd8e907 100644 --- a/core/dbt/parser/__init__.py +++ b/core/dbt/parser/__init__.py @@ -1,6 +1,5 @@ from .analysis import AnalysisParser -from .archives import ArchiveParser from .archives import ArchiveBlockParser from .data_test import DataTestParser from .docs import DocumentationParser @@ -14,7 +13,6 @@ __all__ = [ 'AnalysisParser', - 'ArchiveParser', 'ArchiveBlockParser', 'DataTestParser', 'DocumentationParser', diff --git a/core/dbt/parser/archives.py b/core/dbt/parser/archives.py index fd74cd7f2b4..e8cbcc15901 100644 --- a/core/dbt/parser/archives.py +++ b/core/dbt/parser/archives.py @@ -1,15 +1,11 @@ -from dbt.contracts.graph.unparsed import UnparsedNode + from dbt.contracts.graph.parsed import ParsedArchiveNode from dbt.node_types import NodeType -from dbt.parser.base import MacrosKnownParser from dbt.parser.base_sql import BaseSqlParser, SQLParseResult -from dbt.adapters.factory import get_adapter import dbt.clients.jinja import dbt.exceptions import dbt.utils -import os - def set_archive_attributes(node): config_keys = { @@ -24,93 +20,6 @@ def set_archive_attributes(node): return node -class ArchiveParser(MacrosKnownParser): - @classmethod - def parse_archives_from_project(cls, config): - archives = [] - archive_configs = config.archive - - for archive_config in archive_configs: - tables = archive_config.get('tables') - - if tables is None: - continue - - for table in tables: - cfg = table.copy() - source_database = archive_config.get( - 'source_database', - config.credentials.database - ) - cfg['target_database'] = archive_config.get( - 'target_database', - config.credentials.database - ) - - source_schema = archive_config['source_schema'] - cfg['target_schema'] = archive_config.get('target_schema') - # project-defined archives always use the 'timestamp' strategy. - cfg['strategy'] = 'timestamp' - - fake_path = [cfg['target_database'], cfg['target_schema'], - cfg['target_table']] - - relation = get_adapter(config).Relation.create( - database=source_database, - schema=source_schema, - identifier=table['source_table'], - type='table' - ) - - raw_sql = '{{ config(materialized="archive") }}' + \ - 'select * from {!s}'.format(relation) - - archives.append({ - 'name': table.get('target_table'), - 'root_path': config.project_root, - 'resource_type': NodeType.Archive, - 'path': os.path.join('archive', *fake_path), - 'original_file_path': 'dbt_project.yml', - 'package_name': config.project_name, - 'config': cfg, - 'raw_sql': raw_sql - }) - - return archives - - def load_and_parse(self): - """Load and parse archives in a list of projects. Returns a dict - that maps unique ids onto ParsedNodes""" - - archives = [] - to_return = {} - - for name, project in self.all_projects.items(): - archives = archives + self.parse_archives_from_project(project) - - # We're going to have a similar issue with parsed nodes, if we want to - # make parse_node return those. - for a in archives: - # archives have a config, but that would make for an invalid - # UnparsedNode, so remove it and pass it along to parse_node as an - # argument. - archive_config = a.pop('config') - archive = UnparsedNode(**a) - node_path = self.get_path(archive.resource_type, - archive.package_name, - archive.name) - - parsed_node = self.parse_node( - archive, - node_path, - self.all_projects.get(archive.package_name), - archive_config=archive_config) - - to_return[node_path] = set_archive_attributes(parsed_node) - - return to_return - - class ArchiveBlockParser(BaseSqlParser): def parse_archives_from_file(self, file_node, tags=None): # the file node has a 'raw_sql' field that contains the jinja data with diff --git a/test/integration/004_simple_archive_test/test_simple_archive.py b/test/integration/004_simple_archive_test/test_simple_archive.py index b6e92a7fa40..fe451b69958 100644 --- a/test/integration/004_simple_archive_test/test_simple_archive.py +++ b/test/integration/004_simple_archive_test/test_simple_archive.py @@ -2,7 +2,7 @@ import dbt.exceptions -class TestSimpleArchive(DBTIntegrationTest): +class TestSimpleArchiveFiles(DBTIntegrationTest): NUM_ARCHIVE_MODELS = 1 @property @@ -18,29 +18,17 @@ def run_archive(self): @property def project_config(self): - source_table = 'seed' - - if self.adapter_type == 'snowflake': - source_table = source_table.upper() - return { "data-paths": ['test/integration/004_simple_archive_test/data'], - "archive": [ - { - "source_schema": self.unique_schema(), - "target_schema": self.unique_schema(), - "tables": [ - { - "source_table": source_table, - "target_table": "archive_actual", - "updated_at": 'updated_at', - "unique_key": '''id || '-' || first_name''' - }, - ], - }, - ], + "archive-paths": ['test/integration/004_simple_archive_test/test-archives-pg'], } + @use_profile('postgres') + def test__postgres_ref_archive(self): + self.dbt_run_seed_archive() + results = self.run_dbt(['run']) + self.assertEqual(len(results), 1) + def dbt_run_seed_archive(self): if self.adapter_type == 'postgres': self.run_sql_file('test/integration/004_simple_archive_test/seed_pg.sql') @@ -112,8 +100,66 @@ def test__presto__simple_archive_disabled(self): self.assertIn('not implemented for presto', results[0].error) -class TestSimpleArchiveBigquery(DBTIntegrationTest): +class TestSimpleArchiveFileSelects(DBTIntegrationTest): + @property + def schema(self): + return "simple_archive_004" + + @property + def models(self): + return "test/integration/004_simple_archive_test/models" + + @property + def project_config(self): + return { + "data-paths": ['test/integration/004_simple_archive_test/data'], + "archive-paths": ['test/integration/004_simple_archive_test/test-archives-select', + 'test/integration/004_simple_archive_test/test-archives-pg'], + } + + @use_profile('postgres') + def test__postgres__select_archives(self): + self.run_sql_file('test/integration/004_simple_archive_test/seed_pg.sql') + + results = self.run_dbt(['archive']) + self.assertEqual(len(results), 4) + self.assertTablesEqual('archive_castillo', 'archive_castillo_expected') + self.assertTablesEqual('archive_alvarez', 'archive_alvarez_expected') + self.assertTablesEqual('archive_kelly', 'archive_kelly_expected') + self.assertTablesEqual('archive_actual', 'archive_expected') + + self.run_sql_file("test/integration/004_simple_archive_test/invalidate_postgres.sql") + self.run_sql_file("test/integration/004_simple_archive_test/update.sql") + + results = self.run_dbt(['archive']) + self.assertEqual(len(results), 4) + self.assertTablesEqual('archive_castillo', 'archive_castillo_expected') + self.assertTablesEqual('archive_alvarez', 'archive_alvarez_expected') + self.assertTablesEqual('archive_kelly', 'archive_kelly_expected') + self.assertTablesEqual('archive_actual', 'archive_expected') + + @use_profile('postgres') + def test__postgres_exclude_archives(self): + self.run_sql_file('test/integration/004_simple_archive_test/seed_pg.sql') + results = self.run_dbt(['archive', '--exclude', 'archive_castillo']) + self.assertEqual(len(results), 3) + self.assertTableDoesNotExist('archive_castillo') + self.assertTablesEqual('archive_alvarez', 'archive_alvarez_expected') + self.assertTablesEqual('archive_kelly', 'archive_kelly_expected') + self.assertTablesEqual('archive_actual', 'archive_expected') + + @use_profile('postgres') + def test__postgres_select_archives(self): + self.run_sql_file('test/integration/004_simple_archive_test/seed_pg.sql') + results = self.run_dbt(['archive', '--models', 'archive_castillo']) + self.assertEqual(len(results), 1) + self.assertTablesEqual('archive_castillo', 'archive_castillo_expected') + self.assertTableDoesNotExist('archive_alvarez') + self.assertTableDoesNotExist('archive_kelly') + self.assertTableDoesNotExist('archive_actual') + +class TestSimpleArchiveFilesBigquery(DBTIntegrationTest): @property def schema(self): return "simple_archive_004" @@ -125,20 +171,7 @@ def models(self): @property def project_config(self): return { - "archive": [ - { - "source_schema": self.unique_schema(), - "target_schema": self.unique_schema(), - "tables": [ - { - "source_table": 'seed', - "target_table": "archive_actual", - "updated_at": 'updated_at', - "unique_key": "concat(cast(id as string) , '-', first_name)" - } - ] - } - ] + "archive-paths": ['test/integration/004_simple_archive_test/test-archives-bq'], } def assert_expected(self): @@ -209,7 +242,7 @@ def test__bigquery__archive_with_new_field(self): self.assertEqual(expected_type, actual_type, "data types are different") -class TestCrossDBArchive(DBTIntegrationTest): +class TestCrossDBArchiveFiles(DBTIntegrationTest): setup_alternate_db = True @property def schema(self): @@ -220,37 +253,17 @@ def models(self): return "test/integration/004_simple_archive_test/models" @property - def archive_project_config(self): + def project_config(self): if self.adapter_type == 'snowflake': - return { - "source_table": 'SEED', - "target_table": "archive_actual", - "updated_at": 'updated_at', - "unique_key": '''id || '-' || first_name''' - } + paths = ['test/integration/004_simple_archive_test/test-archives-pg'] else: - return { - "source_table": 'seed', - "target_table": "archive_actual", - "updated_at": 'updated_at', - "unique_key": "concat(cast(id as string) , '-', first_name)" - } - - @property - def project_config(self): + paths = ['test/integration/004_simple_archive_test/test-archives-bq'] return { - "archive": [ - { - 'target_database': self.alternative_database, - "source_schema": self.unique_schema(), - "target_schema": self.unique_schema(), - "tables": [self.archive_project_config] - } - ] + 'archive-paths': paths, } def run_archive(self): - return self.run_dbt(['archive']) + return self.run_dbt(['archive', '--vars', '{{"target_database": {}}}'.format(self.alternative_database)]) @use_profile('snowflake') def test__snowflake__cross_archive(self): @@ -285,103 +298,6 @@ def test__bigquery__cross_archive(self): self.assertTablesEqual("archive_expected", "archive_actual", table_b_db=self.alternative_database) -class TestSimpleArchiveFiles(TestSimpleArchive): - @property - def project_config(self): - return { - "data-paths": ['test/integration/004_simple_archive_test/data'], - "archive-paths": ['test/integration/004_simple_archive_test/test-archives-pg'], - } - - @use_profile('postgres') - def test__postgres_ref_archive(self): - self.dbt_run_seed_archive() - results = self.run_dbt(['run']) - self.assertEqual(len(results), 1) - - -class TestSimpleArchiveFileSelects(DBTIntegrationTest): - @property - def schema(self): - return "simple_archive_004" - - @property - def models(self): - return "test/integration/004_simple_archive_test/models" - - @property - def project_config(self): - return { - "data-paths": ['test/integration/004_simple_archive_test/data'], - "archive-paths": ['test/integration/004_simple_archive_test/test-archives-select', - 'test/integration/004_simple_archive_test/test-archives-pg'], - } - - @use_profile('postgres') - def test__postgres__select_archives(self): - self.run_sql_file('test/integration/004_simple_archive_test/seed_pg.sql') - - results = self.run_dbt(['archive']) - self.assertEqual(len(results), 4) - self.assertTablesEqual('archive_castillo', 'archive_castillo_expected') - self.assertTablesEqual('archive_alvarez', 'archive_alvarez_expected') - self.assertTablesEqual('archive_kelly', 'archive_kelly_expected') - self.assertTablesEqual('archive_actual', 'archive_expected') - - self.run_sql_file("test/integration/004_simple_archive_test/invalidate_postgres.sql") - self.run_sql_file("test/integration/004_simple_archive_test/update.sql") - - results = self.run_dbt(['archive']) - self.assertEqual(len(results), 4) - self.assertTablesEqual('archive_castillo', 'archive_castillo_expected') - self.assertTablesEqual('archive_alvarez', 'archive_alvarez_expected') - self.assertTablesEqual('archive_kelly', 'archive_kelly_expected') - self.assertTablesEqual('archive_actual', 'archive_expected') - - @use_profile('postgres') - def test__postgres_exclude_archives(self): - self.run_sql_file('test/integration/004_simple_archive_test/seed_pg.sql') - results = self.run_dbt(['archive', '--exclude', 'archive_castillo']) - self.assertEqual(len(results), 3) - self.assertTableDoesNotExist('archive_castillo') - self.assertTablesEqual('archive_alvarez', 'archive_alvarez_expected') - self.assertTablesEqual('archive_kelly', 'archive_kelly_expected') - self.assertTablesEqual('archive_actual', 'archive_expected') - - @use_profile('postgres') - def test__postgres_select_archives(self): - self.run_sql_file('test/integration/004_simple_archive_test/seed_pg.sql') - results = self.run_dbt(['archive', '--models', 'archive_castillo']) - self.assertEqual(len(results), 1) - self.assertTablesEqual('archive_castillo', 'archive_castillo_expected') - self.assertTableDoesNotExist('archive_alvarez') - self.assertTableDoesNotExist('archive_kelly') - self.assertTableDoesNotExist('archive_actual') - - -class TestSimpleArchiveFilesBigquery(TestSimpleArchiveBigquery): - @property - def project_config(self): - return { - "archive-paths": ['test/integration/004_simple_archive_test/test-archives-bq'], - } - - -class TestCrossDBArchiveFiles(TestCrossDBArchive): - @property - def project_config(self): - if self.adapter_type == 'snowflake': - paths = ['test/integration/004_simple_archive_test/test-archives-pg'] - else: - paths = ['test/integration/004_simple_archive_test/test-archives-bq'] - return { - 'archive-paths': paths, - } - - def run_archive(self): - return self.run_dbt(['archive', '--vars', '{{"target_database": {}}}'.format(self.alternative_database)]) - - class TestCrossSchemaArchiveFiles(DBTIntegrationTest): NUM_ARCHIVE_MODELS = 1 @@ -440,76 +356,6 @@ def test__postgres__invalid(self): self.assertIn('target_database', str(exc.exception)) -class TestConflictArchive(DBTIntegrationTest): - @property - def schema(self): - return "simple_archive_004" - - @property - def models(self): - return "test/integration/004_simple_archive_test/models" - - @property - def project_config(self): - return {} - - - @use_profile('postgres') - def test__postgres_archive_block_archive_collision(self): - self.use_default_project({ - "archive-paths": ['test/integration/004_simple_archive_test/test-archives-pg'], - "archive": [ - { - "source_schema": self.unique_schema(), - "target_schema": self.unique_schema(), - "tables": [ - { - "source_table": "seed", - "target_table": "archive_actual", - "updated_at": 'updated_at', - "unique_key": '''id || '-' || first_name''' - }, - ], - }, - ], - }) - - with self.assertRaises(dbt.exceptions.CompilationException) as exc: - self.run_dbt(['compile'], expect_pass=False) - - def test__postgres_archive_block_model_collision(self): - self.use_default_project({ - "source-paths": ['test/integration/004_simple_archive_test/models-collision'], - "archive-paths": ['test/integration/004_simple_archive_test/test-archives-pg'], - "archive": [], - }) - - with self.assertRaises(dbt.exceptions.CompilationException) as exc: - self.run_dbt(['compile'], expect_pass=False) - - def test__postgres_archive_model_collision(self): - self.use_default_project({ - "source-paths": ['test/integration/004_simple_archive_test/models-collision'], - "archive": [ - { - "source_schema": self.unique_schema(), - "target_schema": self.unique_schema(), - "tables": [ - { - "source_table": "seed", - "target_table": "archive_actual", - "updated_at": 'updated_at', - "unique_key": '''id || '-' || first_name''' - }, - ], - }, - ], - }) - - with self.assertRaises(dbt.exceptions.CompilationException) as exc: - self.run_dbt(['compile'], expect_pass=False) - - class TestCheckCols(TestSimpleArchiveFiles): NUM_ARCHIVE_MODELS = 2 def _assertTablesEqualSql(self, relation_a, relation_b, columns=None): diff --git a/test/unit/test_config.py b/test/unit/test_config.py index c2c70dba04b..628618d94d2 100644 --- a/test/unit/test_config.py +++ b/test/unit/test_config.py @@ -1049,6 +1049,45 @@ def test_impossible_version_range(self): raised = self.from_parts(dbt.exceptions.DbtProjectError) self.assertIn('The package version requirement can never be satisfied', str(raised.exception)) + def test_archive_not_allowed(self): + self.default_project_data['archive'] = [{ + "source_schema": 'a', + "target_schema": 'b', + "tables": [ + { + "source_table": "seed", + "target_table": "archive_actual", + "updated_at": 'updated_at', + "unique_key": '''id || '-' || first_name''' + }, + ], + }] + project = self.get_project() + profile = self.get_profile() + with self.assertRaises(dbt.exceptions.DbtProjectError) as raised: + dbt.config.RuntimeConfig.from_parts(project, profile, self.args) + self.assertIn('Invalid project configuration: "archive" is not allowed', str(raised.exception)) + + def test_archive_allowed(self): + archive_cfg = { + "source_schema": 'a', + "target_schema": 'b', + "tables": [ + { + "source_table": "seed", + "target_table": "archive_actual", + "updated_at": 'updated_at', + "unique_key": '''id || '-' || first_name''' + }, + ], + } + self.default_project_data['archive'] = [archive_cfg] + project = self.get_project() + profile = self.get_profile() + + cfg = dbt.config.RuntimeConfig.from_parts(project, profile, self.args, + allow_archive_blocks=True) + self.assertEqual(cfg.archive, [archive_cfg]) class TestRuntimeConfigFiles(BaseFileTest): @@ -1062,7 +1101,6 @@ def setUp(self): def test_from_args(self): with temp_cd(self.project_dir): config = dbt.config.RuntimeConfig.from_args(self.args) - self.assertEqual(config.project_name, 'my_test_project') self.assertEqual(config.version, '0.0.1') self.assertEqual(config.profile_name, 'default') # on osx, for example, these are not necessarily equal due to /private @@ -1085,6 +1123,41 @@ def test_from_args(self): self.assertEqual(config.archive, []) self.assertEqual(config.seeds, {}) self.assertEqual(config.packages, PackageConfig(packages=[])) + self.assertEqual(config.project_name, 'my_test_project') + + +class TestRuntimeConfigFilesWithArchive(BaseFileTest): + def setUp(self): + super(TestRuntimeConfigFilesWithArchive, self).setUp() + self.default_project_data['archive'] = [ + { + "source_schema": 'a', + "target_schema": 'b', + "tables": [ + { + "source_table": "c", + "target_table": "d", + "updated_at": 'date_field', + "unique_key": 'id', + }, + ], + } + ] + self.write_profile(self.default_profile_data) + self.write_project(self.default_project_data) + # and after the fact, add the project root + self.default_project_data['project-root'] = self.project_dir + + def test_archive_ok_from_args(self): + with temp_cd(self.project_dir): + config = dbt.config.RuntimeConfig.from_args(self.args, allow_archive_blocks=True) + + self.assertEqual(config.archive, self.default_project_data['archive']) + + def test_archive_error(self): + with temp_cd(self.project_dir), self.assertRaises(dbt.exceptions.DbtProjectError) as raised: + dbt.config.RuntimeConfig.from_args(self.args) + self.assertIn('Invalid project configuration: "archive" is not allowed', str(raised.exception)) class TestVariableRuntimeConfigFiles(BaseFileTest): @@ -1136,4 +1209,3 @@ def test_cli_and_env_vars(self): self.assertEqual(config.models['bar']['materialized'], 'blah') self.assertEqual(config.seeds['foo']['post-hook'], "{{ env_var('env_value_target') }}") self.assertEqual(config.seeds['bar']['materialized'], 'blah') - From 704ee588469848c432c4566a1a78f7fd757fff04 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 3 Jun 2019 16:17:21 -0600 Subject: [PATCH 12/31] fix the exit code tests too --- .../023_exit_codes_test/archives-bad/b.sql | 4 +++ .../023_exit_codes_test/archives-good/g.sql | 4 +++ .../023_exit_codes_test/test_exit_codes.py | 31 ++----------------- 3 files changed, 11 insertions(+), 28 deletions(-) create mode 100644 test/integration/023_exit_codes_test/archives-bad/b.sql create mode 100644 test/integration/023_exit_codes_test/archives-good/g.sql diff --git a/test/integration/023_exit_codes_test/archives-bad/b.sql b/test/integration/023_exit_codes_test/archives-bad/b.sql new file mode 100644 index 00000000000..dba11123afc --- /dev/null +++ b/test/integration/023_exit_codes_test/archives-bad/b.sql @@ -0,0 +1,4 @@ +{% archive good_archive %} + {{ config(target_schema=schema, target_database=database, strategy='timestamp', unique_key='id', updated_at='updated_at_not_real')}} + select * from {{ schema }}.good +{% endarchive %} diff --git a/test/integration/023_exit_codes_test/archives-good/g.sql b/test/integration/023_exit_codes_test/archives-good/g.sql new file mode 100644 index 00000000000..fcce3ae911e --- /dev/null +++ b/test/integration/023_exit_codes_test/archives-good/g.sql @@ -0,0 +1,4 @@ +{% archive good_archive %} + {{ config(target_schema=schema, target_database=database, strategy='timestamp', unique_key='id', updated_at='updated_at')}} + select * from {{ schema }}.good +{% endarchive %} diff --git a/test/integration/023_exit_codes_test/test_exit_codes.py b/test/integration/023_exit_codes_test/test_exit_codes.py index 88e16e48e06..dc5236286db 100644 --- a/test/integration/023_exit_codes_test/test_exit_codes.py +++ b/test/integration/023_exit_codes_test/test_exit_codes.py @@ -13,23 +13,11 @@ def schema(self): def models(self): return "test/integration/023_exit_codes_test/models" + @property def project_config(self): return { - "archive": [ - { - "source_schema": self.unique_schema(), - "target_schema": self.unique_schema(), - "tables": [ - { - "source_table": "good", - "target_table": "good_archive", - "updated_at": 'updated_at', - "unique_key": 'id' - } - ] - } - ] + "archive-paths": ['test/integration/023_exit_codes_test/archives-good'], } @use_profile('postgres') @@ -91,20 +79,7 @@ def models(self): @property def project_config(self): return { - "archive": [ - { - "source_schema": self.unique_schema(), - "target_schema": self.unique_schema(), - "tables": [ - { - "source_table": "good", - "target_table": "good_archive", - "updated_at": 'updated_at_not_real', - "unique_key": 'id' - } - ] - } - ] + "archive-paths": ['test/integration/023_exit_codes_test/archives-bad'], } @use_profile('postgres') From 248ca3ff76615f1c4b3369f2a1011f6ec61ce43d Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 3 Jun 2019 16:20:15 -0600 Subject: [PATCH 13/31] fix more tests --- .../033_event_tracking_test/archives/a.sql | 4 ++++ .../033_event_tracking_test/test_events.py | 15 +-------------- 2 files changed, 5 insertions(+), 14 deletions(-) create mode 100644 test/integration/033_event_tracking_test/archives/a.sql diff --git a/test/integration/033_event_tracking_test/archives/a.sql b/test/integration/033_event_tracking_test/archives/a.sql new file mode 100644 index 00000000000..90e4ce9c9a7 --- /dev/null +++ b/test/integration/033_event_tracking_test/archives/a.sql @@ -0,0 +1,4 @@ +{% archive archived %} + {{ config(target_schema=schema, target_database=database, strategy='timestamp', unique_key='id', updated_at='updated_at')}} + select * from {{ schema }}.archivable +{% endarchive %} diff --git a/test/integration/033_event_tracking_test/test_events.py b/test/integration/033_event_tracking_test/test_events.py index 61900e36752..55e325521e6 100644 --- a/test/integration/033_event_tracking_test/test_events.py +++ b/test/integration/033_event_tracking_test/test_events.py @@ -561,20 +561,7 @@ class TestEventTrackingArchive(TestEventTracking): @property def project_config(self): return { - "archive": [ - { - "source_schema": self.unique_schema(), - "target_schema": self.unique_schema(), - "tables": [ - { - "source_table": "archivable", - "target_table": "archived", - "updated_at": '"updated_at"', - "unique_key": '"id"' - } - ] - } - ] + "archive-paths": ['test/integration/033_event_tracking_test/archives'] } @use_profile("postgres") From 001b9abce9aa0c5622f30825ba41fa8352e1298c Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Tue, 28 May 2019 13:57:04 -0400 Subject: [PATCH 14/31] flip around generate_alias_name args --- .../include/global_project/macros/etc/get_custom_alias.sql | 5 +++-- core/dbt/parser/base.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/core/dbt/include/global_project/macros/etc/get_custom_alias.sql b/core/dbt/include/global_project/macros/etc/get_custom_alias.sql index 7a382a58310..1f851097ca8 100644 --- a/core/dbt/include/global_project/macros/etc/get_custom_alias.sql +++ b/core/dbt/include/global_project/macros/etc/get_custom_alias.sql @@ -2,16 +2,17 @@ {# Renders a alias name given a custom alias name. If the custom alias name is none, then the resulting alias is just the filename of the - model. If a alias override is specified, then that is used. + model. If an alias override is specified, then that is used. This macro can be overriden in projects to define different semantics for rendering a alias name. Arguments: custom_alias_name: The custom alias name specified for a model, or none + node: The available node that an alias is being generated for, or none #} -{% macro generate_alias_name(node, custom_alias_name=none) -%} +{% macro generate_alias_name(custom_alias_name=none, node=none) -%} {%- if custom_alias_name is none -%} diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index 3c709f3e459..8c890b92049 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -210,7 +210,7 @@ def _update_parsed_node_info(self, parsed_node, config): alias_override = config.config.get('alias') get_alias = self.get_alias_func() - parsed_node.alias = get_alias(parsed_node, alias_override).strip() + parsed_node.alias = get_alias(alias_override, parsed_node).strip() parsed_node.database = config.config.get( 'database', self.default_database From 99f62e850f8bc0775cab054d8f9fbdf56a435bce Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Tue, 28 May 2019 14:24:58 -0400 Subject: [PATCH 15/31] fix tests --- core/dbt/parser/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index 8c890b92049..e823d2a4c6e 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -118,7 +118,7 @@ def get_alias_func(self): GLOBAL_PROJECT_NAME ) if get_alias_macro is None: - def get_alias(node, custom_alias_name=None): + def get_alias(custom_alias_name=None, node=None): if custom_alias_name is None: return node.name else: From 679784735e725d876d316cb40a9d6bbe1fddde1f Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 29 May 2019 15:06:47 -0600 Subject: [PATCH 16/31] Swap aliases ordering and add node parameter to generate_schema_name Fix many tests Support single-arg generate_schema_name macros Add repeat flag to warn_or_error to suppress duplicate warnings Add a warning if a user's macro does not take a second argument --- core/dbt/exceptions.py | 8 +++- .../macros/etc/get_custom_schema.sql | 8 ++-- core/dbt/parser/base.py | 23 +++++++-- .../schema_override_legacy_macros/schema.sql | 6 +++ .../schema_override_macros/schema.sql | 4 +- .../test_local_dependency.py | 48 ++++++++++++++++++- .../043_custom_aliases_test/macros/macros.sql | 2 +- 7 files changed, 87 insertions(+), 12 deletions(-) create mode 100644 test/integration/006_simple_dependency_test/schema_override_legacy_macros/schema.sql diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index a2034ac6f7e..d3dd355f647 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -652,10 +652,16 @@ def raise_not_implemented(msg): raise NotImplementedException(msg) -def warn_or_error(msg, node=None, log_fmt=None): +_REPEAT_MESSAGE_CACHE = set() + + +def warn_or_error(msg, node=None, log_fmt=None, repeat=True): if dbt.flags.WARN_ERROR: raise_compiler_error(msg, node) else: + if not repeat and msg in _REPEAT_MESSAGE_CACHE: + return + _REPEAT_MESSAGE_CACHE.add(msg) if log_fmt is not None: msg = log_fmt.format(msg) logger.warning(msg) diff --git a/core/dbt/include/global_project/macros/etc/get_custom_schema.sql b/core/dbt/include/global_project/macros/etc/get_custom_schema.sql index f48fd8113d4..9cc6c72bb4e 100644 --- a/core/dbt/include/global_project/macros/etc/get_custom_schema.sql +++ b/core/dbt/include/global_project/macros/etc/get_custom_schema.sql @@ -3,7 +3,7 @@ Renders a schema name given a custom schema name. If the custom schema name is none, then the resulting schema is just the "schema" value in the specified target. If a schema override is specified, then - the resulting schema is the default schema concatenated with the + the resulting schema is the default schema concatenated with the custom schema. This macro can be overriden in projects to define different semantics @@ -11,9 +11,10 @@ Arguments: custom_schema_name: The custom schema name specified for a model, or none + node: The node the schema is being generated for #} -{% macro generate_schema_name(custom_schema_name=none) -%} +{% macro generate_schema_name(custom_schema_name, node) -%} {%- set default_schema = target.schema -%} {%- if custom_schema_name is none -%} @@ -36,9 +37,10 @@ Arguments: custom_schema_name: The custom schema name specified for a model, or none + node: The node the schema is being generated for #} -{% macro generate_schema_name_for_env(custom_schema_name=none) -%} +{% macro generate_schema_name_for_env(custom_schema_name, node) -%} {%- set default_schema = target.schema -%} {%- if target.name == 'prod' and custom_schema_name is not none -%} diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index e823d2a4c6e..39cb9d0887d 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -82,8 +82,9 @@ def get_schema_func(self): 'generate_schema_name', GLOBAL_PROJECT_NAME ) + # this is only true in tests! if get_schema_macro is None: - def get_schema(_): + def get_schema(custom_schema_name=None, node=None): return self.default_schema else: root_context = dbt.context.parser.generate_macro( @@ -117,8 +118,10 @@ def get_alias_func(self): 'generate_alias_name', GLOBAL_PROJECT_NAME ) + + # the generate_alias_name macro might not exist if get_alias_macro is None: - def get_alias(custom_alias_name=None, node=None): + def get_alias(custom_alias_name, node): if custom_alias_name is None: return node.name else: @@ -206,7 +209,21 @@ def _update_parsed_node_info(self, parsed_node, config): # definition, not the current package schema_override = config.config.get('schema') get_schema = self.get_schema_func() - parsed_node.schema = get_schema(schema_override).strip() + try: + schema = get_schema(schema_override, parsed_node) + except dbt.exceptions.CompilationException as exc: + too_many_args = ( + "macro 'dbt_macro__generate_schema_name' takes not more than " + "1 argument(s)" + ) + if too_many_args not in str(exc): + raise + msg = ('The generate_schema_name macro does not accept a second ' + 'argument. This form is deprecated as of 0.14.0') + dbt.exceptions.warn_or_error(msg, node=parsed_node, repeat=False, + log_fmt='WARNING: {}') + schema = get_schema(schema_override) + parsed_node.schema = schema.strip() alias_override = config.config.get('alias') get_alias = self.get_alias_func() diff --git a/test/integration/006_simple_dependency_test/schema_override_legacy_macros/schema.sql b/test/integration/006_simple_dependency_test/schema_override_legacy_macros/schema.sql new file mode 100644 index 00000000000..b2456437d31 --- /dev/null +++ b/test/integration/006_simple_dependency_test/schema_override_legacy_macros/schema.sql @@ -0,0 +1,6 @@ + +{% macro generate_schema_name(schema_name) -%} + + {{ schema_name }}_{{ target.schema }}_macro + +{%- endmacro %} diff --git a/test/integration/006_simple_dependency_test/schema_override_macros/schema.sql b/test/integration/006_simple_dependency_test/schema_override_macros/schema.sql index b2456437d31..615c3fd3b1a 100644 --- a/test/integration/006_simple_dependency_test/schema_override_macros/schema.sql +++ b/test/integration/006_simple_dependency_test/schema_override_macros/schema.sql @@ -1,6 +1,6 @@ -{% macro generate_schema_name(schema_name) -%} +{% macro generate_schema_name(schema_name, node) -%} - {{ schema_name }}_{{ target.schema }}_macro + {{ schema_name }}_{{ node.schema }}_macro {%- endmacro %} diff --git a/test/integration/006_simple_dependency_test/test_local_dependency.py b/test/integration/006_simple_dependency_test/test_local_dependency.py index 27b7a9719a5..6c2a5ca20bd 100644 --- a/test/integration/006_simple_dependency_test/test_local_dependency.py +++ b/test/integration/006_simple_dependency_test/test_local_dependency.py @@ -7,8 +7,7 @@ import dbt.exceptions -class TestSimpleDependency(DBTIntegrationTest): - +class BaseDependencyTest(DBTIntegrationTest): @property def schema(self): return "local_dependency_006" @@ -17,6 +16,12 @@ def schema(self): def models(self): return "test/integration/006_simple_dependency_test/local_models" + def base_schema(self): + return self.unique_schema() + + def configured_schema(self): + return self.unique_schema() + '_configured' + @property def packages_config(self): return { @@ -27,6 +32,17 @@ def packages_config(self): ] } + +class TestSimpleDependency(BaseDependencyTest): + + @property + def schema(self): + return "local_dependency_006" + + @property + def models(self): + return "test/integration/006_simple_dependency_test/local_models" + def base_schema(self): return self.unique_schema() @@ -100,6 +116,34 @@ def test_postgres_local_dependency_out_of_date_no_check(self, mock_get): self.assertEqual(len(results), 3) +class TestDependencyWithLegacySchema(BaseDependencyTest): + @property + def project_config(self): + return { + 'macro-paths': ['test/integration/006_simple_dependency_test/schema_override_legacy_macros'], + 'models': { + 'schema': 'dbt_test', + } + } + + def base_schema(self): + return 'dbt_test_{}_macro'.format(self.unique_schema()) + + def configured_schema(self): + return 'configured_{}_macro'.format(self.unique_schema()) + + @use_profile('postgres') + @mock.patch('dbt.config.project.get_installed_version') + def test_postgres_local_dependency_out_of_date_no_check_no_strict(self, mock_get): + mock_get.return_value = dbt.semver.VersionSpecifier.from_version_string('0.0.1') + self.run_dbt(['deps']) + results = self.run_dbt(['run', '--no-version-check'], strict=False) + self.assertEqual(len(results), 3) + + with self.assertRaises(dbt.exceptions.CompilationException): + results = self.run_dbt(['run', '--no-version-check']) + + class TestSimpleDependencyHooks(DBTIntegrationTest): @property def schema(self): diff --git a/test/integration/043_custom_aliases_test/macros/macros.sql b/test/integration/043_custom_aliases_test/macros/macros.sql index 4bc6a3990e0..17fa7f1ca5e 100644 --- a/test/integration/043_custom_aliases_test/macros/macros.sql +++ b/test/integration/043_custom_aliases_test/macros/macros.sql @@ -1,5 +1,5 @@ -{% macro generate_alias_name(node, custom_alias_name=none) -%} +{% macro generate_alias_name(custom_alias_name, node) -%} {%- if custom_alias_name is none -%} {{ node.name }} {%- else -%} From 0d49295b94f26d5cae0a714393815203c49c4c92 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 30 May 2019 11:11:55 -0600 Subject: [PATCH 17/31] tests, my ancient nemesis --- test/integration/024_custom_schema_test/macros/schema.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/024_custom_schema_test/macros/schema.sql b/test/integration/024_custom_schema_test/macros/schema.sql index 327da5585d3..19f1919a7ed 100644 --- a/test/integration/024_custom_schema_test/macros/schema.sql +++ b/test/integration/024_custom_schema_test/macros/schema.sql @@ -1,5 +1,5 @@ -{% macro generate_schema_name(schema_name) %} +{% macro generate_schema_name(schema_name, node) %} {{ schema_name }}_{{ target.schema }}_macro From 28dc10ed98b5427556d17c4674cb4ccdadbed281 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 3 Jun 2019 14:10:52 -0600 Subject: [PATCH 18/31] PR feedback --- core/dbt/deprecations.py | 12 +++++++ core/dbt/exceptions.py | 8 +---- core/dbt/parser/base.py | 6 ++-- .../schema_override_legacy_macros/schema.sql | 6 ---- .../test_local_dependency.py | 28 --------------- .../boring-models/boring.sql | 1 + .../012_deprecation_tests/macros/schema.sql | 7 ++++ .../test_deprecations.py | 34 ++++++++++++++++--- 8 files changed, 53 insertions(+), 49 deletions(-) delete mode 100644 test/integration/006_simple_dependency_test/schema_override_legacy_macros/schema.sql create mode 100644 test/integration/012_deprecation_tests/boring-models/boring.sql create mode 100644 test/integration/012_deprecation_tests/macros/schema.sql diff --git a/core/dbt/deprecations.py b/core/dbt/deprecations.py index baf92043cf5..047453f0d20 100644 --- a/core/dbt/deprecations.py +++ b/core/dbt/deprecations.py @@ -36,6 +36,17 @@ class SeedDropExistingDeprecation(DBTDeprecation): will be removed in a future version of dbt.""" +class GenerateSchemaNameSingleArgDeprecated(DBTDeprecation): + name = 'generate-schema-name-single-arg' + description = '''As of dbt v0.14.0, the `generate_schema_name` macro + accepts a second "node" argument. The one-argument form of `generate_schema_name` + is deprecated, and will become unsupported in a future release. + + For more information, see: + https://docs.getdbt.com/v0.14/docs/upgrading-to-014 + ''' # noqa + + _adapter_renamed_description = """\ The adapter function `adapter.{old_name}` is deprecated and will be removed in a future release of dbt. Please use `adapter.{new_name}` instead. @@ -72,6 +83,7 @@ def warn(name, *args, **kwargs): deprecations_list = [ DBTRepositoriesDeprecation(), SeedDropExistingDeprecation(), + GenerateSchemaNameSingleArgDeprecated(), ] deprecations = {d.name: d for d in deprecations_list} diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index d3dd355f647..a2034ac6f7e 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -652,16 +652,10 @@ def raise_not_implemented(msg): raise NotImplementedException(msg) -_REPEAT_MESSAGE_CACHE = set() - - -def warn_or_error(msg, node=None, log_fmt=None, repeat=True): +def warn_or_error(msg, node=None, log_fmt=None): if dbt.flags.WARN_ERROR: raise_compiler_error(msg, node) else: - if not repeat and msg in _REPEAT_MESSAGE_CACHE: - return - _REPEAT_MESSAGE_CACHE.add(msg) if log_fmt is not None: msg = log_fmt.format(msg) logger.warning(msg) diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index 39cb9d0887d..ab6c8990bb0 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -13,6 +13,7 @@ from dbt.logger import GLOBAL_LOGGER as logger from dbt.contracts.graph.parsed import ParsedNode from dbt.parser.source_config import SourceConfig +from dbt import deprecations class BaseParser(object): @@ -218,10 +219,7 @@ def _update_parsed_node_info(self, parsed_node, config): ) if too_many_args not in str(exc): raise - msg = ('The generate_schema_name macro does not accept a second ' - 'argument. This form is deprecated as of 0.14.0') - dbt.exceptions.warn_or_error(msg, node=parsed_node, repeat=False, - log_fmt='WARNING: {}') + deprecations.warn('generate-schema-name-single-arg') schema = get_schema(schema_override) parsed_node.schema = schema.strip() diff --git a/test/integration/006_simple_dependency_test/schema_override_legacy_macros/schema.sql b/test/integration/006_simple_dependency_test/schema_override_legacy_macros/schema.sql deleted file mode 100644 index b2456437d31..00000000000 --- a/test/integration/006_simple_dependency_test/schema_override_legacy_macros/schema.sql +++ /dev/null @@ -1,6 +0,0 @@ - -{% macro generate_schema_name(schema_name) -%} - - {{ schema_name }}_{{ target.schema }}_macro - -{%- endmacro %} diff --git a/test/integration/006_simple_dependency_test/test_local_dependency.py b/test/integration/006_simple_dependency_test/test_local_dependency.py index 6c2a5ca20bd..e98075b2c24 100644 --- a/test/integration/006_simple_dependency_test/test_local_dependency.py +++ b/test/integration/006_simple_dependency_test/test_local_dependency.py @@ -116,34 +116,6 @@ def test_postgres_local_dependency_out_of_date_no_check(self, mock_get): self.assertEqual(len(results), 3) -class TestDependencyWithLegacySchema(BaseDependencyTest): - @property - def project_config(self): - return { - 'macro-paths': ['test/integration/006_simple_dependency_test/schema_override_legacy_macros'], - 'models': { - 'schema': 'dbt_test', - } - } - - def base_schema(self): - return 'dbt_test_{}_macro'.format(self.unique_schema()) - - def configured_schema(self): - return 'configured_{}_macro'.format(self.unique_schema()) - - @use_profile('postgres') - @mock.patch('dbt.config.project.get_installed_version') - def test_postgres_local_dependency_out_of_date_no_check_no_strict(self, mock_get): - mock_get.return_value = dbt.semver.VersionSpecifier.from_version_string('0.0.1') - self.run_dbt(['deps']) - results = self.run_dbt(['run', '--no-version-check'], strict=False) - self.assertEqual(len(results), 3) - - with self.assertRaises(dbt.exceptions.CompilationException): - results = self.run_dbt(['run', '--no-version-check']) - - class TestSimpleDependencyHooks(DBTIntegrationTest): @property def schema(self): diff --git a/test/integration/012_deprecation_tests/boring-models/boring.sql b/test/integration/012_deprecation_tests/boring-models/boring.sql new file mode 100644 index 00000000000..43258a71464 --- /dev/null +++ b/test/integration/012_deprecation_tests/boring-models/boring.sql @@ -0,0 +1 @@ +select 1 as id diff --git a/test/integration/012_deprecation_tests/macros/schema.sql b/test/integration/012_deprecation_tests/macros/schema.sql new file mode 100644 index 00000000000..d3884257ad6 --- /dev/null +++ b/test/integration/012_deprecation_tests/macros/schema.sql @@ -0,0 +1,7 @@ +{% macro generate_schema_name(schema_name) -%} + {%- if schema_name is none -%} + {{ target.schema }} + {%- else -%} + {{ schema_name }} + {%- endif -%} +{%- endmacro %} diff --git a/test/integration/012_deprecation_tests/test_deprecations.py b/test/integration/012_deprecation_tests/test_deprecations.py index d1a75d1c0bd..7a9d6249270 100644 --- a/test/integration/012_deprecation_tests/test_deprecations.py +++ b/test/integration/012_deprecation_tests/test_deprecations.py @@ -4,9 +4,9 @@ import dbt.exceptions -class TestDeprecations(DBTIntegrationTest): +class BaseTestDeprecations(DBTIntegrationTest): def setUp(self): - super(TestDeprecations, self).setUp() + super(BaseTestDeprecations, self).setUp() deprecations.reset_deprecations() @property @@ -21,6 +21,8 @@ def dir(path): def models(self): return self.dir("models") + +class TestDeprecations(BaseTestDeprecations): @use_profile('postgres') def test_postgres_deprecations_fail(self): self.run_dbt(strict=True, expect_pass=False) @@ -29,5 +31,29 @@ def test_postgres_deprecations_fail(self): def test_postgres_deprecations(self): self.assertEqual(deprecations.active_deprecations, set()) self.run_dbt(strict=False) - self.assertEqual({'adapter:already_exists'}, - deprecations.active_deprecations) + expected = {'adapter:already_exists'} + self.assertEqual(expected, deprecations.active_deprecations) + + +class TestMacroDeprecations(BaseTestDeprecations): + @property + def models(self): + return self.dir('boring-models') + + @property + def project_config(self): + return { + 'macro-paths': [self.dir('macros')], + } + + @use_profile('postgres') + def test_postgres_deprecations_fail(self): + with self.assertRaises(dbt.exceptions.CompilationException): + self.run_dbt(strict=True) + + @use_profile('postgres') + def test_postgres_deprecations(self): + self.assertEqual(deprecations.active_deprecations, set()) + self.run_dbt(strict=False) + expected = {'generate-schema-name-single-arg'} + self.assertEqual(expected, deprecations.active_deprecations) From 1c3a02b2c8cba899b156d34926527d96c83c1410 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 4 Jun 2019 07:20:55 -0600 Subject: [PATCH 19/31] PR feedback --- core/dbt/config/runtime.py | 25 +++++++++++++++++-------- test/unit/test_config.py | 8 ++++---- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/core/dbt/config/runtime.py b/core/dbt/config/runtime.py index 397d3a74f87..21b978e1f1e 100644 --- a/core/dbt/config/runtime.py +++ b/core/dbt/config/runtime.py @@ -12,6 +12,15 @@ from .project import Project +_ARCHIVE_REMOVED_MESSAGE = ''' +The `archive` section in `dbt_project.yml` is no longer supported. Please use a +`snapshot` block instead. For more information on snapshot blocks and a script +to help migrate these archives, please consult the 0.14.0 migration guide: + +https://docs.getdbt.com/v0.14/docs/upgrading-to-014 +'''.strip() + + class RuntimeConfig(Project, Profile): """The runtime configuration, as constructed from its components. There's a lot because there is a lot of stuff! @@ -64,12 +73,14 @@ def __init__(self, project_name, version, project_root, source_paths, self.validate() @classmethod - def from_parts(cls, project, profile, args, allow_archive_blocks=False): + def from_parts(cls, project, profile, args, allow_archive_configs=False): """Instantiate a RuntimeConfig from its components. :param profile Profile: A parsed dbt Profile. :param project Project: A parsed dbt Project. :param args argparse.Namespace: The parsed command-line arguments. + :param allow_archive_configs bool: If True, ignore archive blocks in + configs. This flag exists to enable archive migration. :returns RuntimeConfig: The new configuration. """ quoting = deepcopy( @@ -77,11 +88,9 @@ def from_parts(cls, project, profile, args, allow_archive_blocks=False): .DEFAULTS['quote_policy'] ) quoting.update(project.quoting) - if project.archive and not allow_archive_blocks: + if project.archive and not allow_archive_configs: # if the user has an `archive` section, raise an error - raise DbtProjectError( - 'Invalid project configuration: "archive" is not allowed' - ) + raise DbtProjectError(_ARCHIVE_REMOVED_MESSAGE) return cls( project_name=project.project_name, @@ -169,13 +178,13 @@ def validate(self): self.validate_version() @classmethod - def from_args(cls, args, allow_archive_blocks=False): + def from_args(cls, args, allow_archive_configs=False): """Given arguments, read in dbt_project.yml from the current directory, read in packages.yml if it exists, and use them to find the profile to load. :param args argparse.Namespace: The arguments as parsed from the cli. - :param allow_archive_blocks bool: If True, ignore archive blocks in + :param allow_archive_configs bool: If True, ignore archive blocks in configs. This flag exists to enable archive migration. :raises DbtProjectError: If the project is invalid or missing. :raises DbtProfileError: If the profile is invalid or missing. @@ -194,5 +203,5 @@ def from_args(cls, args, allow_archive_blocks=False): project=project, profile=profile, args=args, - allow_archive_blocks=allow_archive_blocks + allow_archive_configs=allow_archive_configs ) diff --git a/test/unit/test_config.py b/test/unit/test_config.py index 628618d94d2..cae2d126ef4 100644 --- a/test/unit/test_config.py +++ b/test/unit/test_config.py @@ -1066,7 +1066,7 @@ def test_archive_not_allowed(self): profile = self.get_profile() with self.assertRaises(dbt.exceptions.DbtProjectError) as raised: dbt.config.RuntimeConfig.from_parts(project, profile, self.args) - self.assertIn('Invalid project configuration: "archive" is not allowed', str(raised.exception)) + self.assertIn('The `archive` section in `dbt_project.yml` is no longer supported', str(raised.exception)) def test_archive_allowed(self): archive_cfg = { @@ -1086,7 +1086,7 @@ def test_archive_allowed(self): profile = self.get_profile() cfg = dbt.config.RuntimeConfig.from_parts(project, profile, self.args, - allow_archive_blocks=True) + allow_archive_configs=True) self.assertEqual(cfg.archive, [archive_cfg]) @@ -1150,14 +1150,14 @@ def setUp(self): def test_archive_ok_from_args(self): with temp_cd(self.project_dir): - config = dbt.config.RuntimeConfig.from_args(self.args, allow_archive_blocks=True) + config = dbt.config.RuntimeConfig.from_args(self.args, allow_archive_configs=True) self.assertEqual(config.archive, self.default_project_data['archive']) def test_archive_error(self): with temp_cd(self.project_dir), self.assertRaises(dbt.exceptions.DbtProjectError) as raised: dbt.config.RuntimeConfig.from_args(self.args) - self.assertIn('Invalid project configuration: "archive" is not allowed', str(raised.exception)) + self.assertIn('The `archive` section in `dbt_project.yml` is no longer supported', str(raised.exception)) class TestVariableRuntimeConfigFiles(BaseFileTest): From 788507e0462dbe7a0403e353b49ebe13e660c332 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 17 May 2019 12:04:04 -0600 Subject: [PATCH 20/31] Get windows up and running for RPC Refactor process bootstrapping to a function - avoid trying to pickle "RequestTaskHandler"s on windows Move user config consequences out of main - We need this for RPC stuff too Reset profile values and plugins on windows process start Disable "kill" command on Windows --- core/dbt/config/profile.py | 14 ++++++ core/dbt/main.py | 11 +---- core/dbt/rpc.py | 92 ++++++++++++++++++++++++-------------- 3 files changed, 73 insertions(+), 44 deletions(-) diff --git a/core/dbt/config/profile.py b/core/dbt/config/profile.py index d48c16b295f..f5d7c05d6e6 100644 --- a/core/dbt/config/profile.py +++ b/core/dbt/config/profile.py @@ -11,6 +11,8 @@ from dbt.exceptions import RuntimeException from dbt.logger import GLOBAL_LOGGER as logger from dbt.utils import parse_cli_vars +from dbt import tracking +from dbt.ui import printer from .renderer import ConfigRenderer @@ -95,6 +97,18 @@ def from_directory(cls, directory): user_cfg = profile.get('config', {}) return cls.from_dict(user_cfg) + def set_values(self, cookie_dir): + if self.send_anonymous_usage_stats: + tracking.initialize_tracking(cookie_dir) + else: + tracking.do_not_track() + + if self.use_colors: + printer.use_colors() + + if self.printer_width: + printer.printer_width(self.printer_width) + class Profile(object): def __init__(self, profile_name, target_name, config, threads, diff --git a/core/dbt/main.py b/core/dbt/main.py index 34907f196e1..d2d483713f9 100644 --- a/core/dbt/main.py +++ b/core/dbt/main.py @@ -126,16 +126,7 @@ def initialize_config_values(parsed): except RuntimeException: cfg = UserConfig.from_dict(None) - if cfg.send_anonymous_usage_stats: - dbt.tracking.initialize_tracking(parsed.profiles_dir) - else: - dbt.tracking.do_not_track() - - if cfg.use_colors: - dbt.ui.printer.use_colors() - - if cfg.printer_width: - dbt.ui.printer.printer_width(cfg.printer_width) + cfg.set_values(parsed.profiles_dir) def handle_and_check(args): diff --git a/core/dbt/rpc.py b/core/dbt/rpc.py index 9335fc3051e..2e77563bbdc 100644 --- a/core/dbt/rpc.py +++ b/core/dbt/rpc.py @@ -16,9 +16,10 @@ import time from collections import namedtuple +from dbt.adapters.factory import load_plugin +from dbt.compat import QueueEmpty from dbt.logger import RPC_LOGGER as logger from dbt.logger import add_queue_handler -from dbt.compat import QueueEmpty import dbt.exceptions @@ -130,6 +131,54 @@ def __getitem__(self, key): return self.rpc_factory(task) +def _nt_setup(config, args): + """ + On windows, we have to do a some things that dbt does dynamically at + process load. + + These things are inherited automatically on posix, where fork() keeps + everything in memory. + """ + # reload the active plugin + load_plugin(config.credentials.type) + + # reset tracking, etc + config.config.set_values(args.profiles_dir) + + +def _task_bootstrap(task, queue, kwargs): + signal.signal(signal.SIGTERM, sigterm_handler) + # the first thing we do in a new process: start logging + add_queue_handler(queue) + # on windows, we need to reload our plugins because of how it starts new + # processes. At this point there are no adapter plugins loaded! + if os.name == 'nt': + _nt_setup(task.config, task.args) + + error = None + result = None + try: + result = task.handle_request(**kwargs) + except RPCException as exc: + error = exc + except dbt.exceptions.RPCKilledException as exc: + # do NOT log anything here, you risk triggering a deadlock on the + # queue handler we inserted above + error = dbt_error(exc) + except dbt.exceptions.Exception as exc: + logger.debug('dbt runtime exception', exc_info=True) + error = dbt_error(exc) + except Exception as exc: + logger.debug('uncaught python exception', exc_info=True) + error = server_error(exc) + + # put whatever result we got onto the queue as well. + if error is not None: + queue.put([QueueMessageType.Error, error.error]) + else: + queue.put([QueueMessageType.Result, result]) + + class RequestTaskHandler(object): def __init__(self, task, http_request, json_rpc_request): self.task = task @@ -211,44 +260,17 @@ def get_result(self): result['logs'] = self.logs return result - def task_bootstrap(self, kwargs): - signal.signal(signal.SIGTERM, sigterm_handler) - # the first thing we do in a new process: start logging - add_queue_handler(self.queue) - - error = None - result = None - try: - result = self.task.handle_request(**kwargs) - except RPCException as exc: - error = exc - except dbt.exceptions.RPCKilledException as exc: - # do NOT log anything here, you risk triggering a deadlock on the - # queue handler we inserted above - error = dbt_error(exc) - except dbt.exceptions.Exception as exc: - logger.debug('dbt runtime exception', exc_info=True) - error = dbt_error(exc) - except Exception as exc: - logger.debug('uncaught python exception', exc_info=True) - error = server_error(exc) - - # put whatever result we got onto the queue as well. - if error is not None: - self.queue.put([QueueMessageType.Error, error.error]) - else: - self.queue.put([QueueMessageType.Result, result]) - def handle(self, kwargs): self.started = time.time() self.timeout = kwargs.pop('timeout', None) self.queue = multiprocessing.Queue() self.process = multiprocessing.Process( - target=self.task_bootstrap, - args=(kwargs,) + target=_task_bootstrap, + args=(self.task, self.queue, kwargs) ) self.process.start() - return self.get_result() + result = self.get_result() + return result @property def state(self): @@ -347,7 +369,7 @@ def process_kill(self, task_id): def rpc_builtin(self, method_name): if method_name == 'ps': return self.process_listing - if method_name == 'kill': + if method_name == 'kill' and os.name != 'nt': return self.process_kill return None @@ -360,7 +382,9 @@ def mark_done(self, request_handler): self.completed[task_id] = self.tasks.pop(task_id) def methods(self): - rpc_builtin_methods = ['ps', 'kill'] + rpc_builtin_methods = ['ps'] + if os.name != 'nt': + rpc_builtin_methods.append('kill') return list(self._rpc_task_map) + rpc_builtin_methods From 2a9ae83270474bb7f383544cf3586caa67000714 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 20 May 2019 12:35:30 -0600 Subject: [PATCH 21/31] Re-enable windows tests Disable kill tests Don't expect logs on timed-out windows tests (windows is slow!) --- test/integration/042_sources_test/test_sources.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/integration/042_sources_test/test_sources.py b/test/integration/042_sources_test/test_sources.py index b5ec108e289..475e3243677 100644 --- a/test/integration/042_sources_test/test_sources.py +++ b/test/integration/042_sources_test/test_sources.py @@ -9,6 +9,7 @@ from datetime import datetime, timedelta import requests +from pytest import mark from dbt.exceptions import CompilationException from test.integration.base import DBTIntegrationTest, use_profile, AnyFloat, \ @@ -364,7 +365,6 @@ def wait_result(self): )select * from __dbt__CTE__ephemeral_model''' -@unittest.skipIf(os.name == 'nt', 'Windows not supported for now') class TestRPCServer(BaseSourcesTest): def setUp(self): super(TestRPCServer, self).setUp() @@ -709,6 +709,7 @@ def test_run_postgres(self): table={'column_names': ['id'], 'rows': [[1.0]]} ) + @mark.skipif(os.name == 'nt', reason='"kill" not supported on windows') @use_profile('postgres') def test_ps_kill_postgres(self): done_query = self.query('compile', 'select 1 as id', name='done').json() @@ -794,6 +795,7 @@ def _get_sleep_query(self): self.assertTrue(False, 'request ID never found running!') + @mark.skipif(os.name == 'nt', reason='"kill" not supported on windows') @use_profile('postgres') def test_ps_kill_longwait_postgres(self): pg_sleeper, sleep_task_id, request_id = self._get_sleep_query() @@ -881,4 +883,6 @@ def test_timeout_postgres(self): self.assertIn('message', error_data) self.assertEqual(error_data['message'], 'RPC timed out after 1s') self.assertIn('logs', error_data) - self.assertTrue(len(error_data['logs']) > 0) + # on windows, process start is so slow that frequently we won't have collected any logs + if os.name != 'nt': + self.assertTrue(len(error_data['logs']) > 0) From 963b0e23ee9a62fee053d6f32b63d95b4c9b10e9 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 4 Jun 2019 07:59:41 -0600 Subject: [PATCH 22/31] remove drop_existing flag + its deprecation warning, make flags update themselves, reset flags from args on windows processes --- core/dbt/deprecations.py | 8 -------- core/dbt/flags.py | 14 ++++++++++++++ core/dbt/main.py | 27 +-------------------------- core/dbt/rpc.py | 3 +++ 4 files changed, 18 insertions(+), 34 deletions(-) diff --git a/core/dbt/deprecations.py b/core/dbt/deprecations.py index 047453f0d20..8d0a7db8bf9 100644 --- a/core/dbt/deprecations.py +++ b/core/dbt/deprecations.py @@ -29,13 +29,6 @@ class DBTRepositoriesDeprecation(DBTDeprecation): """ -class SeedDropExistingDeprecation(DBTDeprecation): - name = 'drop-existing' - description = """The --drop-existing argument to `dbt seed` has been - deprecated. Please use --full-refresh instead. The --drop-existing option - will be removed in a future version of dbt.""" - - class GenerateSchemaNameSingleArgDeprecated(DBTDeprecation): name = 'generate-schema-name-single-arg' description = '''As of dbt v0.14.0, the `generate_schema_name` macro @@ -82,7 +75,6 @@ def warn(name, *args, **kwargs): deprecations_list = [ DBTRepositoriesDeprecation(), - SeedDropExistingDeprecation(), GenerateSchemaNameSingleArgDeprecated(), ] diff --git a/core/dbt/flags.py b/core/dbt/flags.py index f1fccd40e07..9b27ca4207c 100644 --- a/core/dbt/flags.py +++ b/core/dbt/flags.py @@ -13,3 +13,17 @@ def reset(): USE_CACHE = True WARN_ERROR = False TEST_NEW_PARSER = False + + +def set_from_args(args): + global STRICT_MODE, FULL_REFRESH, USE_CACHE, WARN_ERROR, TEST_NEW_PARSER + USE_CACHE = getattr(args, 'use_cache', True) + + FULL_REFRESH = getattr(args, 'full_refresh', False) + STRICT_MODE = getattr(args, 'strict', False) + WARN_ERROR = ( + STRICT_MODE or + getattr(args, 'warn_error', False) + ) + + TEST_NEW_PARSER = getattr(args, 'test_new_parser', False) diff --git a/core/dbt/main.py b/core/dbt/main.py index d2d483713f9..4b7fd2fb67b 100644 --- a/core/dbt/main.py +++ b/core/dbt/main.py @@ -176,7 +176,7 @@ def track_run(task): def run_from_args(parsed): log_cache_events(getattr(parsed, 'log_cache_events', False)) - update_flags(parsed) + flags.set_from_args(parsed) parsed.cls.pre_init_hook() logger.info("Running with dbt{}".format(dbt.version.installed)) @@ -199,26 +199,6 @@ def run_from_args(parsed): return task, results -def update_flags(parsed): - flags.USE_CACHE = getattr(parsed, 'use_cache', True) - - arg_drop_existing = getattr(parsed, 'drop_existing', False) - arg_full_refresh = getattr(parsed, 'full_refresh', False) - flags.STRICT_MODE = getattr(parsed, 'strict', False) - flags.WARN_ERROR = ( - flags.STRICT_MODE or - getattr(parsed, 'warn_error', False) - ) - - if arg_drop_existing: - dbt.deprecations.warn('drop-existing') - flags.FULL_REFRESH = True - elif arg_full_refresh: - flags.FULL_REFRESH = True - - flags.TEST_NEW_PARSER = getattr(parsed, 'test_new_parser', False) - - def _build_base_subparser(): base_subparser = argparse.ArgumentParser(add_help=False) @@ -451,11 +431,6 @@ def _build_seed_subparser(subparsers, base_subparser): 'seed', parents=[base_subparser], help="Load data from csv files into your data warehouse.") - seed_sub.add_argument( - '--drop-existing', - action='store_true', - help='(DEPRECATED) Use --full-refresh instead.' - ) seed_sub.add_argument( '--full-refresh', action='store_true', diff --git a/core/dbt/rpc.py b/core/dbt/rpc.py index 2e77563bbdc..87305d89ee7 100644 --- a/core/dbt/rpc.py +++ b/core/dbt/rpc.py @@ -18,6 +18,7 @@ from dbt.adapters.factory import load_plugin from dbt.compat import QueueEmpty +from dbt import flags from dbt.logger import RPC_LOGGER as logger from dbt.logger import add_queue_handler import dbt.exceptions @@ -139,6 +140,8 @@ def _nt_setup(config, args): These things are inherited automatically on posix, where fork() keeps everything in memory. """ + # reset flags + flags.set_from_args(args) # reload the active plugin load_plugin(config.credentials.type) From f48f78fc58ea42a8d66bfa6af36d3493cc447d0c Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 3 Jun 2019 14:53:37 -0600 Subject: [PATCH 23/31] rename a couple things, this will not work atm --- core/dbt/compilation.py | 2 +- core/dbt/node_runners.py | 4 ++-- core/dbt/node_types.py | 6 +++--- core/dbt/ui/printer.py | 4 ++-- test/unit/test_jinja.py | 18 +++++++++--------- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/core/dbt/compilation.py b/core/dbt/compilation.py index 86d2fc23fae..4b10ddfd61c 100644 --- a/core/dbt/compilation.py +++ b/core/dbt/compilation.py @@ -27,7 +27,7 @@ def print_compile_stats(stats): names = { NodeType.Model: 'models', NodeType.Test: 'tests', - NodeType.Archive: 'archives', + NodeType.Snapshot: 'snapshots', NodeType.Analysis: 'analyses', NodeType.Macro: 'macros', NodeType.Operation: 'operations', diff --git a/core/dbt/node_runners.py b/core/dbt/node_runners.py index bba860c9356..09d43e55974 100644 --- a/core/dbt/node_runners.py +++ b/core/dbt/node_runners.py @@ -488,12 +488,12 @@ class ArchiveRunner(ModelRunner): def describe_node(self): cfg = self.node.get('config', {}) return ( - "archive {name} --> {target_database}.{target_schema}.{name}" + "snapshot {name} --> {target_database}.{target_schema}.{name}" .format(name=self.node.name, **cfg) ) def print_result_line(self, result): - dbt.ui.printer.print_archive_result_line(result, self.node_index, + dbt.ui.printer.print_snapshot_result_line(result, self.node_index, self.num_nodes) diff --git a/core/dbt/node_types.py b/core/dbt/node_types.py index d0a94404ae0..a153d8587a0 100644 --- a/core/dbt/node_types.py +++ b/core/dbt/node_types.py @@ -4,7 +4,7 @@ class NodeType(object): Model = 'model' Analysis = 'analysis' Test = 'test' - Archive = 'archive' + Snapshot = 'snapshot' Macro = 'macro' Operation = 'operation' Seed = 'seed' @@ -17,7 +17,7 @@ def executable(cls): return [ cls.Model, cls.Test, - cls.Archive, + cls.Snapshot, cls.Analysis, cls.Operation, cls.Seed, @@ -30,7 +30,7 @@ def refable(cls): return [ cls.Model, cls.Seed, - cls.Archive, + cls.Snapshot, ] diff --git a/core/dbt/ui/printer.py b/core/dbt/ui/printer.py index a862c8e4c49..f12636617ee 100644 --- a/core/dbt/ui/printer.py +++ b/core/dbt/ui/printer.py @@ -185,10 +185,10 @@ def print_model_result_line(result, description, index, total): result.execution_time) -def print_archive_result_line(result, index, total): +def print_snapshot_result_line(result, index, total): model = result.node - info, status = get_printable_result(result, 'archived', 'archiving') + info, status = get_printable_result(result, 'snapshotted', 'snapshotting') cfg = model.get('config', {}) msg = "{info} {name} --> {target_database}.{target_schema}.{name}".format( diff --git a/test/unit/test_jinja.py b/test/unit/test_jinja.py index 5bcab016666..6c718f43073 100644 --- a/test/unit/test_jinja.py +++ b/test/unit/test_jinja.py @@ -77,7 +77,7 @@ def test_nested_comments(self): self.assertEqual(blocks[0].full_block, block_data) def test_complex_file(self): - all_blocks = extract_toplevel_blocks(complex_archive_file) + all_blocks = extract_toplevel_blocks(complex_snapshot_file) blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] self.assertEqual(len(blocks), 3) self.assertEqual(blocks[0].block_type_name, 'mytype') @@ -252,10 +252,10 @@ def test_awful_jinja(self): self.assertEqual(blocks[0].full_block, '''{% do\n set('foo="bar"')\n%}''') self.assertEqual(blocks[1].block_type_name, 'set') self.assertEqual(blocks[1].full_block, '''{% set x = ("100" + "hello'" + '%}') %}''') - self.assertEqual(blocks[2].block_type_name, 'archive') + self.assertEqual(blocks[2].block_type_name, 'snapshot') self.assertEqual(blocks[2].contents, '\n '.join([ - '''{% set x = ("{% endarchive %}" + (40 * '%})')) %}''', - '{# {% endarchive %} #}', + '''{% set x = ("{% endsnapshot %}" + (40 * '%})')) %}''', + '{# {% endsnapshot %} #}', '{% embedded %}', ' some block data right here', '{% endembedded %}' @@ -293,7 +293,7 @@ def test_quoted_endblock_within_block(self): {% endmyothertype %} ''' -complex_archive_file = ''' +complex_snapshot_file = ''' {#some stuff {% mytype foo %} #} {% mytype foo %} some stuff {% endmytype %} @@ -306,13 +306,13 @@ def test_quoted_endblock_within_block(self): set('foo="bar"') %} {% set x = ("100" + "hello'" + '%}') %} -{% archive something -%} - {% set x = ("{% endarchive %}" + (40 * '%})')) %} - {# {% endarchive %} #} +{% snapshot something -%} + {% set x = ("{% endsnapshot %}" + (40 * '%})')) %} + {# {% endsnapshot %} #} {% embedded %} some block data right here {% endembedded %} -{%- endarchive %} +{%- endsnapshot %} {% raw %} {% set x = SYNTAX ERROR} From f3701ab837add35e4adfc40eb656e19b32e1d731 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 5 Jun 2019 08:40:37 -0600 Subject: [PATCH 24/31] archives -> snapshots, except legacy stuff --- core/dbt/adapters/base/impl.py | 8 +- core/dbt/compilation.py | 6 +- core/dbt/config/project.py | 10 +- core/dbt/config/runtime.py | 6 +- core/dbt/contracts/graph/parsed.py | 12 +- core/dbt/contracts/graph/unparsed.py | 6 +- core/dbt/contracts/project.py | 2 +- core/dbt/deprecations.py | 12 + .../archive.sql => snapshot/snapshot.sql} | 62 +-- .../snapshot_merge.sql} | 6 +- .../{archive => snapshot}/strategies.sql | 28 +- core/dbt/loader.py | 6 +- core/dbt/main.py | 73 +-- core/dbt/node_runners.py | 4 +- core/dbt/parser/__init__.py | 4 +- core/dbt/parser/base.py | 10 +- core/dbt/parser/{archives.py => snapshots.py} | 36 +- core/dbt/parser/source_config.py | 6 +- core/dbt/task/archive.py | 19 - core/dbt/task/list.py | 2 +- core/dbt/task/snapshot.py | 25 + .../{archive.sql => snapshot.sql} | 6 +- .../dbt/include/postgres/macros/adapters.sql | 2 +- .../{archive_merge.sql => snapshot_merge.sql} | 2 +- .../dbt/include/redshift/macros/adapters.sql | 2 +- .../macros/materializations/archive_merge.sql | 4 - .../materializations/snapshot_merge.sql | 4 + .../dbt/include/snowflake/macros/adapters.sql | 2 +- .../models/ref_archive.sql | 1 - .../test_simple_archive.py | 489 ------------------ .../add_column_to_source_bq.sql | 4 +- .../data/seed.csv | 0 .../invalidate_bigquery.sql | 2 +- .../invalidate_postgres.sql | 8 +- .../invalidate_snowflake.sql | 2 +- .../models-collision/snapshot_actual.sql} | 0 .../models/.gitkeep | 0 .../models/ref_snapshot.sql | 1 + .../seed.sql | 34 +- .../seed_bq.sql | 10 +- .../seed_longtext.sql | 0 .../seed_pg.sql | 34 +- .../test-check-col-snapshots-bq/snapshot.sql} | 8 +- .../test-check-col-snapshots/snapshot.sql} | 8 +- .../test-snapshots-bq/snapshot.sql} | 4 +- .../test-snapshots-invalid/snapshot.sql} | 4 +- .../test-snapshots-longtext/snapshot.sql} | 4 +- .../test-snapshots-pg/snapshot.sql} | 4 +- .../test-snapshots-select/snapshot.sql} | 12 +- .../test_simple_snapshot.py | 489 ++++++++++++++++++ .../update.sql | 36 +- .../update_bq.sql | 10 +- .../{archives-bad => snapshots-bad}/b.sql | 4 +- .../{archives-good => snapshots-good}/g.sql | 4 +- .../023_exit_codes_test/test_exit_codes.py | 18 +- .../{archivable.sql => snapshottable.sql} | 0 .../{archives => snapshots}/a.sql | 6 +- .../033_event_tracking_test/test_events.py | 18 +- .../archive.sql => snapshots/snapshot.sql} | 4 +- test/integration/047_dbt_ls_test/test_ls.py | 24 +- 60 files changed, 815 insertions(+), 792 deletions(-) rename core/dbt/include/global_project/macros/materializations/{archive/archive.sql => snapshot/snapshot.sql} (76%) rename core/dbt/include/global_project/macros/materializations/{archive/archive_merge.sql => snapshot/snapshot_merge.sql} (76%) rename core/dbt/include/global_project/macros/materializations/{archive => snapshot}/strategies.sql (75%) rename core/dbt/parser/{archives.py => snapshots.py} (69%) delete mode 100644 core/dbt/task/archive.py create mode 100644 core/dbt/task/snapshot.py rename plugins/bigquery/dbt/include/bigquery/macros/materializations/{archive.sql => snapshot.sql} (70%) rename plugins/postgres/dbt/include/postgres/macros/materializations/{archive_merge.sql => snapshot_merge.sql} (90%) delete mode 100644 plugins/redshift/dbt/include/redshift/macros/materializations/archive_merge.sql create mode 100644 plugins/redshift/dbt/include/redshift/macros/materializations/snapshot_merge.sql delete mode 100644 test/integration/004_simple_archive_test/models/ref_archive.sql delete mode 100644 test/integration/004_simple_archive_test/test_simple_archive.py rename test/integration/{004_simple_archive_test => 004_simple_snapshot_test}/add_column_to_source_bq.sql (90%) rename test/integration/{004_simple_archive_test => 004_simple_snapshot_test}/data/seed.csv (100%) rename test/integration/{004_simple_archive_test => 004_simple_snapshot_test}/invalidate_bigquery.sql (87%) rename test/integration/{004_simple_archive_test => 004_simple_snapshot_test}/invalidate_postgres.sql (76%) rename test/integration/{004_simple_archive_test => 004_simple_snapshot_test}/invalidate_snowflake.sql (86%) rename test/integration/{004_simple_archive_test/models-collision/archive_actual.sql => 004_simple_snapshot_test/models-collision/snapshot_actual.sql} (100%) rename test/integration/{004_simple_archive_test => 004_simple_snapshot_test}/models/.gitkeep (100%) create mode 100644 test/integration/004_simple_snapshot_test/models/ref_snapshot.sql rename test/integration/{004_simple_archive_test => 004_simple_snapshot_test}/seed.sql (88%) rename test/integration/{004_simple_archive_test => 004_simple_snapshot_test}/seed_bq.sql (94%) rename test/integration/{004_simple_archive_test => 004_simple_snapshot_test}/seed_longtext.sql (100%) rename test/integration/{004_simple_archive_test => 004_simple_snapshot_test}/seed_pg.sql (88%) rename test/integration/{004_simple_archive_test/test-check-col-archives-bq/archive.sql => 004_simple_snapshot_test/test-check-col-snapshots-bq/snapshot.sql} (87%) rename test/integration/{004_simple_archive_test/test-check-col-archives/archive.sql => 004_simple_snapshot_test/test-check-col-snapshots/snapshot.sql} (86%) rename test/integration/{004_simple_archive_test/test-archives-bq/archive.sql => 004_simple_snapshot_test/test-snapshots-bq/snapshot.sql} (87%) rename test/integration/{004_simple_archive_test/test-archives-invalid/archive.sql => 004_simple_snapshot_test/test-snapshots-invalid/snapshot.sql} (83%) rename test/integration/{004_simple_archive_test/test-archives-longtext/longtext.sql => 004_simple_snapshot_test/test-snapshots-longtext/snapshot.sql} (85%) rename test/integration/{004_simple_archive_test/test-archives-pg/archive.sql => 004_simple_snapshot_test/test-snapshots-pg/snapshot.sql} (87%) rename test/integration/{004_simple_archive_test/test-archives-select/archives.sql => 004_simple_snapshot_test/test-snapshots-select/snapshot.sql} (87%) create mode 100644 test/integration/004_simple_snapshot_test/test_simple_snapshot.py rename test/integration/{004_simple_archive_test => 004_simple_snapshot_test}/update.sql (86%) rename test/integration/{004_simple_archive_test => 004_simple_snapshot_test}/update_bq.sql (91%) rename test/integration/023_exit_codes_test/{archives-bad => snapshots-bad}/b.sql (78%) rename test/integration/023_exit_codes_test/{archives-good => snapshots-good}/g.sql (77%) rename test/integration/033_event_tracking_test/models/{archivable.sql => snapshottable.sql} (100%) rename test/integration/033_event_tracking_test/{archives => snapshots}/a.sql (58%) rename test/integration/047_dbt_ls_test/{archives/archive.sql => snapshots/snapshot.sql} (86%) diff --git a/core/dbt/adapters/base/impl.py b/core/dbt/adapters/base/impl.py index 16d4bd300ac..9ad85f1e0a5 100644 --- a/core/dbt/adapters/base/impl.py +++ b/core/dbt/adapters/base/impl.py @@ -543,7 +543,7 @@ def get_missing_columns(self, from_relation, to_relation): ] @available.parse_none - def valid_archive_target(self, relation): + def valid_snapshot_target(self, relation): """Ensure that the target relation is valid, by making sure it has the expected columns. @@ -553,7 +553,7 @@ def valid_archive_target(self, relation): """ if not isinstance(relation, self.Relation): dbt.exceptions.invalid_type_error( - method_name='is_existing_old_style_archive', + method_name='valid_snapshot_target', arg_name='relation', got_value=relation, expected_type=self.Relation) @@ -573,13 +573,13 @@ def valid_archive_target(self, relation): if missing: if extra: msg = ( - 'Archive target has ("{}") but not ("{}") - is it an ' + 'Snapshot target has ("{}") but not ("{}") - is it an ' 'unmigrated previous version archive?' .format('", "'.join(extra), '", "'.join(missing)) ) else: msg = ( - 'Archive target is not an archive table (missing "{}")' + 'Snapshot target is not a snapshot table (missing "{}")' .format('", "'.join(missing)) ) dbt.exceptions.raise_compiler_error(msg) diff --git a/core/dbt/compilation.py b/core/dbt/compilation.py index 4b10ddfd61c..0ab041948a1 100644 --- a/core/dbt/compilation.py +++ b/core/dbt/compilation.py @@ -140,9 +140,9 @@ def compile_node(self, node, manifest, extra_context=None): # don't wrap schema tests or analyses. injected_node.wrapped_sql = injected_node.injected_sql - elif is_type(injected_node, NodeType.Archive): + elif is_type(injected_node, NodeType.Snapshot): # unfortunately we do everything automagically for - # archives. in the future it'd be nice to generate + # snapshots. in the future it'd be nice to generate # the SQL at the parser level. pass @@ -209,7 +209,7 @@ def _is_writable(node): if not node.injected_sql: return False - if dbt.utils.is_type(node, NodeType.Archive): + if dbt.utils.is_type(node, NodeType.Snapshot): return False return True diff --git a/core/dbt/config/project.py b/core/dbt/config/project.py index 17729196cd3..584fcd20c66 100644 --- a/core/dbt/config/project.py +++ b/core/dbt/config/project.py @@ -143,7 +143,7 @@ def _parse_versions(versions): class Project(object): def __init__(self, project_name, version, project_root, profile_name, source_paths, macro_paths, data_paths, test_paths, - analysis_paths, docs_paths, target_path, archive_paths, + analysis_paths, docs_paths, target_path, snapshot_paths, clean_targets, log_path, modules_path, quoting, models, on_run_start, on_run_end, archive, seeds, dbt_version, packages): @@ -158,7 +158,7 @@ def __init__(self, project_name, version, project_root, profile_name, self.analysis_paths = analysis_paths self.docs_paths = docs_paths self.target_path = target_path - self.archive_paths = archive_paths + self.snapshot_paths = snapshot_paths self.clean_targets = clean_targets self.log_path = log_path self.modules_path = modules_path @@ -241,7 +241,7 @@ def from_project_config(cls, project_dict, packages_dict=None): analysis_paths = project_dict.get('analysis-paths', []) docs_paths = project_dict.get('docs-paths', source_paths[:]) target_path = project_dict.get('target-path', 'target') - archive_paths = project_dict.get('archive-paths', ['archives']) + snapshot_paths = project_dict.get('snapshot-paths', ['snapshots']) # should this also include the modules path by default? clean_targets = project_dict.get('clean-targets', [target_path]) log_path = project_dict.get('log-path', 'logs') @@ -275,7 +275,7 @@ def from_project_config(cls, project_dict, packages_dict=None): analysis_paths=analysis_paths, docs_paths=docs_paths, target_path=target_path, - archive_paths=archive_paths, + snapshot_paths=snapshot_paths, clean_targets=clean_targets, log_path=log_path, modules_path=modules_path, @@ -323,7 +323,7 @@ def to_project_config(self, with_packages=False): 'analysis-paths': self.analysis_paths, 'docs-paths': self.docs_paths, 'target-path': self.target_path, - 'archive-paths': self.archive_paths, + 'snapshot-paths': self.snapshot_paths, 'clean-targets': self.clean_targets, 'log-path': self.log_path, 'quoting': self.quoting, diff --git a/core/dbt/config/runtime.py b/core/dbt/config/runtime.py index 21b978e1f1e..7d0f3d2bd35 100644 --- a/core/dbt/config/runtime.py +++ b/core/dbt/config/runtime.py @@ -27,7 +27,7 @@ class RuntimeConfig(Project, Profile): """ def __init__(self, project_name, version, project_root, source_paths, macro_paths, data_paths, test_paths, analysis_paths, - docs_paths, target_path, archive_paths, clean_targets, + docs_paths, target_path, snapshot_paths, clean_targets, log_path, modules_path, quoting, models, on_run_start, on_run_end, archive, seeds, dbt_version, profile_name, target_name, config, threads, credentials, packages, args): @@ -48,7 +48,7 @@ def __init__(self, project_name, version, project_root, source_paths, analysis_paths=analysis_paths, docs_paths=docs_paths, target_path=target_path, - archive_paths=archive_paths, + snapshot_paths=snapshot_paths, clean_targets=clean_targets, log_path=log_path, modules_path=modules_path, @@ -103,7 +103,7 @@ def from_parts(cls, project, profile, args, allow_archive_configs=False): analysis_paths=project.analysis_paths, docs_paths=project.docs_paths, target_path=project.target_path, - archive_paths=project.archive_paths, + snapshot_paths=project.snapshot_paths, clean_targets=project.clean_targets, log_path=project.log_path, modules_path=project.modules_path, diff --git a/core/dbt/contracts/graph/parsed.py b/core/dbt/contracts/graph/parsed.py index 8647d83741b..62f3f65730b 100644 --- a/core/dbt/contracts/graph/parsed.py +++ b/core/dbt/contracts/graph/parsed.py @@ -444,7 +444,7 @@ def config(self, value): self._contents['config'] = value -ARCHIVE_CONFIG_CONTRACT = { +SNAPSHOT_CONFIG_CONTRACT = { 'properties': { 'target_database': { 'type': 'string', @@ -500,21 +500,21 @@ def config(self, value): } -PARSED_ARCHIVE_NODE_CONTRACT = deep_merge( +PARSED_SNAPSHOT_NODE_CONTRACT = deep_merge( PARSED_NODE_CONTRACT, { 'properties': { - 'config': ARCHIVE_CONFIG_CONTRACT, + 'config': SNAPSHOT_CONFIG_CONTRACT, 'resource_type': { - 'enum': [NodeType.Archive], + 'enum': [NodeType.Snapshot], }, }, } ) -class ParsedArchiveNode(ParsedNode): - SCHEMA = PARSED_ARCHIVE_NODE_CONTRACT +class ParsedSnapshotNode(ParsedNode): + SCHEMA = PARSED_SNAPSHOT_NODE_CONTRACT # The parsed node update is only the 'patch', not the test. The test became a diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py index c6e6bbbd75b..4b16f7e6146 100644 --- a/core/dbt/contracts/graph/unparsed.py +++ b/core/dbt/contracts/graph/unparsed.py @@ -39,7 +39,7 @@ 'type': 'string', 'description': ( 'For nodes defined in SQL files, this is just the contents ' - 'of that file. For schema tests, archives, etc. this is ' + 'of that file. For schema tests, snapshots, etc. this is ' 'generated by dbt.'), }, 'index': { @@ -73,8 +73,8 @@ NodeType.Analysis, NodeType.Operation, NodeType.Seed, - # we need this if parse_node is going to handle archives. - NodeType.Archive, + # we need this if parse_node is going to handle snapshots. + NodeType.Snapshot, NodeType.RPCCall, ] }, diff --git a/core/dbt/contracts/project.py b/core/dbt/contracts/project.py index 3c04f80ee4c..206f82cda17 100644 --- a/core/dbt/contracts/project.py +++ b/core/dbt/contracts/project.py @@ -91,7 +91,7 @@ 'target-path': { 'type': 'string', }, - 'archive-paths': { + 'snapshot-paths': { 'type': 'array', 'items': {'type': 'string'}, }, diff --git a/core/dbt/deprecations.py b/core/dbt/deprecations.py index 8d0a7db8bf9..ddac04c0f63 100644 --- a/core/dbt/deprecations.py +++ b/core/dbt/deprecations.py @@ -40,6 +40,17 @@ class GenerateSchemaNameSingleArgDeprecated(DBTDeprecation): ''' # noqa +class ArchiveDeprecated(DBTDeprecation): + name = 'archives' + description = '''As of dbt v0.14.0, the `dbt archive` command is renamed to + `dbt snapshot` and "archives" are "snapshots". The `dbt archive` command will + be removed in a future release. + + For more information, see: + https://docs.getdbt.com/v0.14/docs/upgrading-to-014 + ''' + + _adapter_renamed_description = """\ The adapter function `adapter.{old_name}` is deprecated and will be removed in a future release of dbt. Please use `adapter.{new_name}` instead. @@ -76,6 +87,7 @@ def warn(name, *args, **kwargs): deprecations_list = [ DBTRepositoriesDeprecation(), GenerateSchemaNameSingleArgDeprecated(), + ArchiveDeprecated(), ] deprecations = {d.name: d for d in deprecations_list} diff --git a/core/dbt/include/global_project/macros/materializations/archive/archive.sql b/core/dbt/include/global_project/macros/materializations/snapshot/snapshot.sql similarity index 76% rename from core/dbt/include/global_project/macros/materializations/archive/archive.sql rename to core/dbt/include/global_project/macros/materializations/snapshot/snapshot.sql index a21bd27213b..2e8fcaaac2d 100644 --- a/core/dbt/include/global_project/macros/materializations/archive/archive.sql +++ b/core/dbt/include/global_project/macros/materializations/snapshot/snapshot.sql @@ -14,18 +14,18 @@ {% endmacro %} -{% macro post_archive(staging_relation) %} - {{ adapter_macro('post_archive', staging_relation) }} +{% macro post_snapshot(staging_relation) %} + {{ adapter_macro('post_snapshot', staging_relation) }} {% endmacro %} -{% macro default__post_archive(staging_relation) %} +{% macro default__post_snapshot(staging_relation) %} {# no-op #} {% endmacro %} -{% macro archive_staging_table_inserts(strategy, source_sql, target_relation) -%} +{% macro snapshot_staging_table_inserts(strategy, source_sql, target_relation) -%} - with archive_query as ( + with snapshot_query as ( {{ source_sql }} @@ -40,10 +40,10 @@ {{ strategy.updated_at }} as dbt_valid_from, nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to - from archive_query + from snapshot_query ), - archived_data as ( + snapshotted_data as ( select *, {{ strategy.unique_key }} as dbt_unique_key @@ -59,11 +59,11 @@ source_data.* from source_data - left outer join archived_data on archived_data.dbt_unique_key = source_data.dbt_unique_key - where archived_data.dbt_unique_key is null + left outer join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key + where snapshotted_data.dbt_unique_key is null or ( - archived_data.dbt_unique_key is not null - and archived_data.dbt_valid_to is null + snapshotted_data.dbt_unique_key is not null + and snapshotted_data.dbt_valid_to is null and ( {{ strategy.row_changed }} ) @@ -76,9 +76,9 @@ {%- endmacro %} -{% macro archive_staging_table_updates(strategy, source_sql, target_relation) -%} +{% macro snapshot_staging_table_updates(strategy, source_sql, target_relation) -%} - with archive_query as ( + with snapshot_query as ( {{ source_sql }} @@ -93,10 +93,10 @@ {{ strategy.updated_at }} as dbt_updated_at, {{ strategy.updated_at }} as dbt_valid_from - from archive_query + from snapshot_query ), - archived_data as ( + snapshotted_data as ( select *, {{ strategy.unique_key }} as dbt_unique_key @@ -109,12 +109,12 @@ select 'update' as dbt_change_type, - archived_data.dbt_scd_id, + snapshotted_data.dbt_scd_id, source_data.dbt_valid_from as dbt_valid_to from source_data - join archived_data on archived_data.dbt_unique_key = source_data.dbt_unique_key - where archived_data.dbt_valid_to is null + join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key + where snapshotted_data.dbt_valid_to is null and ( {{ strategy.row_changed }} ) @@ -126,7 +126,7 @@ {%- endmacro %} -{% macro build_archive_table(strategy, sql) %} +{% macro build_snapshot_table(strategy, sql) %} select *, {{ strategy.scd_id }} as dbt_scd_id, @@ -156,17 +156,17 @@ {% do return([false, new_relation]) %} {% endmacro %} -{% macro build_archive_staging_table(strategy, sql, target_relation) %} +{% macro build_snapshot_staging_table(strategy, sql, target_relation) %} {% set tmp_relation = make_temp_relation(target_relation) %} - {% set inserts_select = archive_staging_table_inserts(strategy, sql, target_relation) %} - {% set updates_select = archive_staging_table_updates(strategy, sql, target_relation) %} + {% set inserts_select = snapshot_staging_table_inserts(strategy, sql, target_relation) %} + {% set updates_select = snapshot_staging_table_updates(strategy, sql, target_relation) %} - {% call statement('build_archive_staging_relation_inserts') %} + {% call statement('build_snapshot_staging_relation_inserts') %} {{ create_table_as(True, tmp_relation, inserts_select) }} {% endcall %} - {% call statement('build_archive_staging_relation_updates') %} + {% call statement('build_snapshot_staging_relation_updates') %} insert into {{ tmp_relation }} (dbt_change_type, dbt_scd_id, dbt_valid_to) select dbt_change_type, dbt_scd_id, dbt_valid_to from ( {{ updates_select }} @@ -177,7 +177,7 @@ {% endmacro %} -{% materialization archive, default %} +{% materialization snapshot, default %} {%- set config = model['config'] -%} {%- set target_database = config.get('target_database') -%} @@ -202,20 +202,20 @@ {%- endif -%} {% set strategy_macro = strategy_dispatch(strategy_name) %} - {% set strategy = strategy_macro(model, "archived_data", "source_data", config) %} + {% set strategy = strategy_macro(model, "snapshotted_data", "source_data", config) %} {% if not target_relation_exists %} - {% set build_sql = build_archive_table(strategy, model['injected_sql']) %} + {% set build_sql = build_snapshot_table(strategy, model['injected_sql']) %} {% call statement('main') -%} {{ create_table_as(False, target_relation, build_sql) }} {% endcall %} {% else %} - {{ adapter.valid_archive_target(target_relation) }} + {{ adapter.valid_snapshot_target(target_relation) }} - {% set staging_table = build_archive_staging_table(strategy, sql, target_relation) %} + {% set staging_table = build_snapshot_staging_table(strategy, sql, target_relation) %} {% do adapter.expand_target_column_types(from_relation=staging_table, to_relation=target_relation) %} @@ -237,7 +237,7 @@ | list %} {% call statement('main') %} - {{ archive_merge_sql( + {{ snapshot_merge_sql( target = target_relation, source = staging_table, insert_cols = source_columns @@ -250,7 +250,7 @@ {{ adapter.commit() }} {% if staging_table is defined %} - {% do post_archive(staging_table) %} + {% do post_snapshot(staging_table) %} {% endif %} {% endmaterialization %} diff --git a/core/dbt/include/global_project/macros/materializations/archive/archive_merge.sql b/core/dbt/include/global_project/macros/materializations/snapshot/snapshot_merge.sql similarity index 76% rename from core/dbt/include/global_project/macros/materializations/archive/archive_merge.sql rename to core/dbt/include/global_project/macros/materializations/snapshot/snapshot_merge.sql index 9b7ae0d25b7..4a22df603c2 100644 --- a/core/dbt/include/global_project/macros/materializations/archive/archive_merge.sql +++ b/core/dbt/include/global_project/macros/materializations/snapshot/snapshot_merge.sql @@ -1,10 +1,10 @@ -{% macro archive_merge_sql(target, source, insert_cols) -%} - {{ adapter_macro('archive_merge_sql', target, source, insert_cols) }} +{% macro snapshot_merge_sql(target, source, insert_cols) -%} + {{ adapter_macro('snapshot_merge_sql', target, source, insert_cols) }} {%- endmacro %} -{% macro default__archive_merge_sql(target, source, insert_cols) -%} +{% macro default__snapshot_merge_sql(target, source, insert_cols) -%} {%- set insert_cols_csv = insert_cols| map(attribute="name") | join(', ') -%} merge into {{ target }} as DBT_INTERNAL_DEST diff --git a/core/dbt/include/global_project/macros/materializations/archive/strategies.sql b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql similarity index 75% rename from core/dbt/include/global_project/macros/materializations/archive/strategies.sql rename to core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql index 3fc7db24eac..304b7c5d42c 100644 --- a/core/dbt/include/global_project/macros/materializations/archive/strategies.sql +++ b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql @@ -20,7 +20,7 @@ {{ exceptions.raise_compiler_error(error_msg | trim) }} {% endif %} - {%- set search_name = 'archive_' ~ name ~ '_strategy' -%} + {%- set search_name = 'snapshot_' ~ name ~ '_strategy' -%} {% if search_name not in package_context %} {% set error_msg %} @@ -35,12 +35,12 @@ {# Create SCD Hash SQL fields cross-db #} -{% macro archive_hash_arguments(args) %} - {{ adapter_macro('archive_hash_arguments', args) }} +{% macro snapshot_hash_arguments(args) %} + {{ adapter_macro('snapshot_hash_arguments', args) }} {% endmacro %} -{% macro default__archive_hash_arguments(args) %} +{% macro default__snapshot_hash_arguments(args) %} md5({% for arg in args %} coalesce(cast({{ arg }} as varchar ), '') {% if not loop.last %} || '|' || {% endif %} {% endfor %}) @@ -50,11 +50,11 @@ {# Get the current time cross-db #} -{% macro archive_get_time() -%} - {{ adapter_macro('archive_get_time') }} +{% macro snapshot_get_time() -%} + {{ adapter_macro('snapshot_get_time') }} {%- endmacro %} -{% macro default__archive_get_time() -%} +{% macro default__snapshot_get_time() -%} {{ current_timestamp() }} {%- endmacro %} @@ -62,15 +62,15 @@ {# Core strategy definitions #} -{% macro archive_timestamp_strategy(node, archived_rel, current_rel, config) %} +{% macro snapshot_timestamp_strategy(node, snapshotted_rel, current_rel, config) %} {% set primary_key = config['unique_key'] %} {% set updated_at = config['updated_at'] %} {% set row_changed_expr -%} - ({{ archived_rel }}.{{ updated_at }} < {{ current_rel }}.{{ updated_at }}) + ({{ snapshotted_rel }}.{{ updated_at }} < {{ current_rel }}.{{ updated_at }}) {%- endset %} - {% set scd_id_expr = archive_hash_arguments([primary_key, updated_at]) %} + {% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %} {% do return({ "unique_key": primary_key, @@ -81,10 +81,10 @@ {% endmacro %} -{% macro archive_check_strategy(node, archived_rel, current_rel, config) %} +{% macro snapshot_check_strategy(node, snapshotted_rel, current_rel, config) %} {% set check_cols_config = config['check_cols'] %} {% set primary_key = config['unique_key'] %} - {% set updated_at = archive_get_time() %} + {% set updated_at = snapshot_get_time() %} {% if check_cols_config == 'all' %} {% set check_cols = get_columns_in_query(node['injected_sql']) %} @@ -97,13 +97,13 @@ {% set row_changed_expr -%} ( {% for col in check_cols %} - {{ archived_rel }}.{{ col }} != {{ current_rel }}.{{ col }} + {{ snapshotted_rel }}.{{ col }} != {{ current_rel }}.{{ col }} {%- if not loop.last %} or {% endif %} {% endfor %} ) {%- endset %} - {% set scd_id_expr = archive_hash_arguments(check_cols) %} + {% set scd_id_expr = snapshot_hash_arguments(check_cols) %} {% do return({ "unique_key": primary_key, diff --git a/core/dbt/loader.py b/core/dbt/loader.py index 021c0b515d5..5f87a19b539 100644 --- a/core/dbt/loader.py +++ b/core/dbt/loader.py @@ -11,7 +11,7 @@ from dbt.parser import MacroParser, ModelParser, SeedParser, AnalysisParser, \ DocumentationParser, DataTestParser, HookParser, SchemaParser, \ - ParserUtils, ArchiveBlockParser + ParserUtils, SnapshotParser from dbt.contracts.project import ProjectList @@ -75,8 +75,8 @@ def _load_seeds(self): def _load_nodes(self): self._load_sql_nodes(ModelParser, NodeType.Model, 'source_paths') - self._load_sql_nodes(ArchiveBlockParser, NodeType.Archive, - 'archive_paths') + self._load_sql_nodes(SnapshotParser, NodeType.Snapshot, + 'snapshot_paths') self._load_sql_nodes(AnalysisParser, NodeType.Analysis, 'analysis_paths') self._load_sql_nodes(DataTestParser, NodeType.Test, 'test_paths', diff --git a/core/dbt/main.py b/core/dbt/main.py index 4b7fd2fb67b..67592aa5e45 100644 --- a/core/dbt/main.py +++ b/core/dbt/main.py @@ -17,7 +17,7 @@ import dbt.task.init as init_task import dbt.task.seed as seed_task import dbt.task.test as test_task -import dbt.task.archive as archive_task +import dbt.task.snapshot as snapshot_task import dbt.task.generate as generate_task import dbt.task.serve as serve_task import dbt.task.freshness as freshness_task @@ -318,9 +318,9 @@ def _build_deps_subparser(subparsers, base_subparser): return sub -def _build_archive_subparser(subparsers, base_subparser): +def _build_snapshot_subparser(subparsers, base_subparser, which='snapshot'): sub = subparsers.add_parser( - 'archive', + which, parents=[base_subparser], help="Record changes to a mutable table over time." "\nMust be configured in your dbt_project.yml.") @@ -329,11 +329,11 @@ def _build_archive_subparser(subparsers, base_subparser): type=int, required=False, help=""" - Specify number of threads to use while archiving tables. Overrides + Specify number of threads to use while snapshotting tables. Overrides settings in profiles.yml. """ ) - sub.set_defaults(cls=archive_task.ArchiveTask, which='archive') + sub.set_defaults(cls=snapshot_task.SnapshotTask, which=which) return sub @@ -587,6 +587,36 @@ def _build_list_subparser(subparsers, base_subparser): return sub +def _build_run_operation_subparser(subparsers, base_subparser): + sub = subparsers.add_parser( + 'run-operation', + parents=[base_subparser], + help=""" + (beta) Run the named macro with any supplied arguments. This + subcommand is unstable and subject to change in a future release + of dbt. Please use it with caution""" + ) + sub.add_argument( + '--macro', + required=True, + help=""" + Specify the macro to invoke. dbt will call this macro with the + supplied arguments and then exit""" + ) + sub.add_argument( + '--args', + type=str, + default='{}', + help=""" + Supply arguments to the macro. This dictionary will be mapped + to the keyword arguments defined in the selected macro. This + argument should be a YAML string, eg. '{my_variable: my_value}'""" + ) + sub.set_defaults(cls=run_operation_task.RunOperationTask, + which='run-operation') + return sub + + def parse_args(args): p = DBTArgumentParser( prog='dbt: data build tool', @@ -673,7 +703,8 @@ def parse_args(args): _build_deps_subparser(subs, base_subparser) _build_list_subparser(subs, base_subparser) - archive_sub = _build_archive_subparser(subs, base_subparser) + snapshot_sub = _build_snapshot_subparser(subs, base_subparser) + archive_sub = _build_snapshot_subparser(subs, base_subparser, 'archive') rpc_sub = _build_rpc_subparser(subs, base_subparser) run_sub = _build_run_subparser(subs, base_subparser) compile_sub = _build_compile_subparser(subs, base_subparser) @@ -684,40 +715,14 @@ def parse_args(args): rpc_sub) # --models, --exclude _add_selection_arguments(run_sub, compile_sub, generate_sub, test_sub, - archive_sub) + archive_sub, snapshot_sub) # --full-refresh _add_table_mutability_arguments(run_sub, compile_sub) _build_seed_subparser(subs, base_subparser) _build_docs_serve_subparser(docs_subs, base_subparser) _build_source_snapshot_freshness_subparser(source_subs, base_subparser) - - sub = subs.add_parser( - 'run-operation', - parents=[base_subparser], - help=""" - (beta) Run the named macro with any supplied arguments. This - subcommand is unstable and subject to change in a future release - of dbt. Please use it with caution""" - ) - sub.add_argument( - '--macro', - required=True, - help=""" - Specify the macro to invoke. dbt will call this macro with the - supplied arguments and then exit""" - ) - sub.add_argument( - '--args', - type=str, - default='{}', - help=""" - Supply arguments to the macro. This dictionary will be mapped - to the keyword arguments defined in the selected macro. This - argument should be a YAML string, eg. '{my_variable: my_value}'""" - ) - sub.set_defaults(cls=run_operation_task.RunOperationTask, - which='run-operation') + _build_run_operation_subparser(subs, base_subparser) if len(args) == 0: p.print_help() diff --git a/core/dbt/node_runners.py b/core/dbt/node_runners.py index 09d43e55974..70014053b4c 100644 --- a/core/dbt/node_runners.py +++ b/core/dbt/node_runners.py @@ -484,7 +484,7 @@ def after_execute(self, result): self.print_result_line(result) -class ArchiveRunner(ModelRunner): +class SnapshotRunner(ModelRunner): def describe_node(self): cfg = self.node.get('config', {}) return ( @@ -494,7 +494,7 @@ def describe_node(self): def print_result_line(self, result): dbt.ui.printer.print_snapshot_result_line(result, self.node_index, - self.num_nodes) + self.num_nodes) class SeedRunner(ModelRunner): diff --git a/core/dbt/parser/__init__.py b/core/dbt/parser/__init__.py index 43cadd8e907..cbecdaa1201 100644 --- a/core/dbt/parser/__init__.py +++ b/core/dbt/parser/__init__.py @@ -1,6 +1,6 @@ from .analysis import AnalysisParser -from .archives import ArchiveBlockParser +from .snapshots import SnapshotParser from .data_test import DataTestParser from .docs import DocumentationParser from .hooks import HookParser @@ -13,7 +13,7 @@ __all__ = [ 'AnalysisParser', - 'ArchiveBlockParser', + 'SnapshotParser', 'DataTestParser', 'DocumentationParser', 'HookParser', diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index ab6c8990bb0..632fa60d58b 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -139,7 +139,7 @@ def get_alias(custom_alias_name, node): def _build_intermediate_node_dict(self, config, node_dict, node_path, package_project_config, tags, fqn, - agate_table, archive_config, + agate_table, snapshot_config, column_name): """Update the unparsed node dictionary and build the basis for an intermediate ParsedNode that will be passed into the renderer @@ -153,7 +153,7 @@ def _build_intermediate_node_dict(self, config, node_dict, node_path, # been called from jinja yet). But the Var() call below needs info # about project level configs b/c they might contain refs. # TODO: Restructure this? - config_dict = coalesce(archive_config, {}) + config_dict = coalesce(snapshot_config, {}) config_dict.update(config.config) empty = ( @@ -246,11 +246,11 @@ def _update_parsed_node_info(self, parsed_node, config): def parse_node(self, node, node_path, package_project_config, tags=None, fqn_extra=None, fqn=None, agate_table=None, - archive_config=None, column_name=None): + snapshot_config=None, column_name=None): """Parse a node, given an UnparsedNode and any other required information. agate_table should be set if the node came from a seed file. - archive_config should be set if the node is an Archive node. + snapshot_config should be set if the node is an Snapshot node. column_name should be set if the node is a Test node associated with a particular column. """ @@ -270,7 +270,7 @@ def parse_node(self, node, node_path, package_project_config, tags=None, parsed_dict = self._build_intermediate_node_dict( config, node.serialize(), node_path, config, tags, fqn, - agate_table, archive_config, column_name + agate_table, snapshot_config, column_name ) parsed_node = ParsedNode(**parsed_dict) diff --git a/core/dbt/parser/archives.py b/core/dbt/parser/snapshots.py similarity index 69% rename from core/dbt/parser/archives.py rename to core/dbt/parser/snapshots.py index e8cbcc15901..84e3d6b8c90 100644 --- a/core/dbt/parser/archives.py +++ b/core/dbt/parser/snapshots.py @@ -1,5 +1,5 @@ -from dbt.contracts.graph.parsed import ParsedArchiveNode +from dbt.contracts.graph.parsed import ParsedSnapshotNode from dbt.node_types import NodeType from dbt.parser.base_sql import BaseSqlParser, SQLParseResult import dbt.clients.jinja @@ -7,7 +7,7 @@ import dbt.utils -def set_archive_attributes(node): +def set_snapshot_attributes(node): config_keys = { 'target_database': 'database', 'target_schema': 'schema' @@ -20,10 +20,10 @@ def set_archive_attributes(node): return node -class ArchiveBlockParser(BaseSqlParser): - def parse_archives_from_file(self, file_node, tags=None): +class SnapshotParser(BaseSqlParser): + def parse_snapshots_from_file(self, file_node, tags=None): # the file node has a 'raw_sql' field that contains the jinja data with - # (we hope!) `archive` blocks + # (we hope!) `snapshot` blocks try: blocks = dbt.clients.jinja.extract_toplevel_blocks( file_node['raw_sql'] @@ -33,8 +33,8 @@ def parse_archives_from_file(self, file_node, tags=None): exc.node = file_node raise for block in blocks: - if block.block_type_name != NodeType.Archive: - # non-archive blocks are just ignored + if block.block_type_name != NodeType.Snapshot: + # non-snapshot blocks are just ignored continue name = block.block_name raw_sql = block.contents @@ -59,11 +59,11 @@ def get_fqn(cls, node, package_project_config, extra=[]): return fqn @staticmethod - def validate_archives(node): - if node.resource_type == NodeType.Archive: + def validate_snapshots(node): + if node.resource_type == NodeType.Snapshot: try: - parsed_node = ParsedArchiveNode(**node.to_shallow_dict()) - return set_archive_attributes(parsed_node) + parsed_node = ParsedSnapshotNode(**node.to_shallow_dict()) + return set_snapshot_attributes(parsed_node) except dbt.exceptions.JSONValidationException as exc: raise dbt.exceptions.CompilationException(str(exc), node) @@ -76,17 +76,17 @@ def parse_sql_nodes(self, nodes, tags=None): results = SQLParseResult() - # in archives, we have stuff in blocks. + # in snapshots, we have stuff in blocks. for file_node in nodes: - archive_nodes = list( - self.parse_archives_from_file(file_node, tags=tags) + snapshot_nodes = list( + self.parse_snapshots_from_file(file_node, tags=tags) ) - found = super(ArchiveBlockParser, self).parse_sql_nodes( - nodes=archive_nodes, tags=tags + found = super(SnapshotParser, self).parse_sql_nodes( + nodes=snapshot_nodes, tags=tags ) - # make sure our blocks are going to work when we try to archive + # make sure our blocks are going to work when we try to snapshot # them! - found.parsed = {k: self.validate_archives(v) for + found.parsed = {k: self.validate_snapshots(v) for k, v in found.parsed.items()} results.update(found) diff --git a/core/dbt/parser/source_config.py b/core/dbt/parser/source_config.py index 119ccc99948..1d8c6b9ba7c 100644 --- a/core/dbt/parser/source_config.py +++ b/core/dbt/parser/source_config.py @@ -66,8 +66,8 @@ def config(self): if self.node_type == NodeType.Seed: defaults['materialized'] = 'seed' - elif self.node_type == NodeType.Archive: - defaults['materialized'] = 'archive' + elif self.node_type == NodeType.Snapshot: + defaults['materialized'] = 'snapshot' if self.node_type == NodeType.Test: defaults['severity'] = 'ERROR' @@ -155,7 +155,7 @@ def get_project_config(self, runtime_config): if self.node_type == NodeType.Seed: model_configs = runtime_config.seeds - elif self.node_type == NodeType.Archive: + elif self.node_type == NodeType.Snapshot: model_configs = {} else: model_configs = runtime_config.models diff --git a/core/dbt/task/archive.py b/core/dbt/task/archive.py deleted file mode 100644 index 0cfbd82aa5c..00000000000 --- a/core/dbt/task/archive.py +++ /dev/null @@ -1,19 +0,0 @@ -from dbt.node_runners import ArchiveRunner -from dbt.node_types import NodeType -from dbt.task.run import RunTask - - -class ArchiveTask(RunTask): - def raise_on_first_error(self): - return False - - def build_query(self): - return { - "include": self.args.models, - "exclude": self.args.exclude, - "resource_types": [NodeType.Archive], - "tags": [], - } - - def get_runner_type(self): - return ArchiveRunner diff --git a/core/dbt/task/list.py b/core/dbt/task/list.py index 55204454268..c900106110f 100644 --- a/core/dbt/task/list.py +++ b/core/dbt/task/list.py @@ -12,7 +12,7 @@ class ListTask(GraphRunnableTask): DEFAULT_RESOURCE_VALUES = frozenset(( NodeType.Model, - NodeType.Archive, + NodeType.Snapshot, NodeType.Seed, NodeType.Test, NodeType.Source, diff --git a/core/dbt/task/snapshot.py b/core/dbt/task/snapshot.py new file mode 100644 index 00000000000..a6a9ca11e37 --- /dev/null +++ b/core/dbt/task/snapshot.py @@ -0,0 +1,25 @@ +from dbt.node_runners import SnapshotRunner +from dbt.node_types import NodeType +from dbt.task.run import RunTask +from dbt.deprecations import warn + + +class SnapshotTask(RunTask): + def __init__(self, args, config): + super(SnapshotTask, self).__init__(args, config) + if args.which == 'archive': + warn('archives') + + def raise_on_first_error(self): + return False + + def build_query(self): + return { + "include": self.args.models, + "exclude": self.args.exclude, + "resource_types": [NodeType.Snapshot], + "tags": [], + } + + def get_runner_type(self): + return SnapshotRunner diff --git a/plugins/bigquery/dbt/include/bigquery/macros/materializations/archive.sql b/plugins/bigquery/dbt/include/bigquery/macros/materializations/snapshot.sql similarity index 70% rename from plugins/bigquery/dbt/include/bigquery/macros/materializations/archive.sql rename to plugins/bigquery/dbt/include/bigquery/macros/materializations/snapshot.sql index 87b10589778..4cd5a04467c 100644 --- a/plugins/bigquery/dbt/include/bigquery/macros/materializations/archive.sql +++ b/plugins/bigquery/dbt/include/bigquery/macros/materializations/snapshot.sql @@ -1,4 +1,4 @@ -{% macro bigquery__archive_hash_arguments(args) %} +{% macro bigquery__snapshot_hash_arguments(args) %} to_hex(md5(concat({% for arg in args %}coalesce(cast({{ arg }} as string), ''){% if not loop.last %}, '|',{% endif %}{% endfor %}))) {% endmacro %} @@ -6,7 +6,7 @@ {{ adapter.alter_table_add_columns(relation, columns) }} {% endmacro %} -{% macro bigquery__post_archive(staging_relation) %} - -- Clean up the archive temp table +{% macro bigquery__post_snapshot(staging_relation) %} + -- Clean up the snapshot temp table {% do drop_relation(staging_relation) %} {% endmacro %} diff --git a/plugins/postgres/dbt/include/postgres/macros/adapters.sql b/plugins/postgres/dbt/include/postgres/macros/adapters.sql index 356b32eb66a..c087a17667c 100644 --- a/plugins/postgres/dbt/include/postgres/macros/adapters.sql +++ b/plugins/postgres/dbt/include/postgres/macros/adapters.sql @@ -92,7 +92,7 @@ now() {%- endmacro %} -{% macro postgres__archive_get_time() -%} +{% macro postgres__snapshot_get_time() -%} {{ current_timestamp() }}::timestamp without time zone {%- endmacro %} diff --git a/plugins/postgres/dbt/include/postgres/macros/materializations/archive_merge.sql b/plugins/postgres/dbt/include/postgres/macros/materializations/snapshot_merge.sql similarity index 90% rename from plugins/postgres/dbt/include/postgres/macros/materializations/archive_merge.sql rename to plugins/postgres/dbt/include/postgres/macros/materializations/snapshot_merge.sql index 9665dbd73ca..5efccf16610 100644 --- a/plugins/postgres/dbt/include/postgres/macros/materializations/archive_merge.sql +++ b/plugins/postgres/dbt/include/postgres/macros/materializations/snapshot_merge.sql @@ -1,5 +1,5 @@ -{% macro postgres__archive_merge_sql(target, source, insert_cols) -%} +{% macro postgres__snapshot_merge_sql(target, source, insert_cols) -%} {%- set insert_cols_csv = insert_cols | map(attribute="name") | join(', ') -%} update {{ target }} diff --git a/plugins/redshift/dbt/include/redshift/macros/adapters.sql b/plugins/redshift/dbt/include/redshift/macros/adapters.sql index 37d79d3416f..ed593cd5f60 100644 --- a/plugins/redshift/dbt/include/redshift/macros/adapters.sql +++ b/plugins/redshift/dbt/include/redshift/macros/adapters.sql @@ -168,7 +168,7 @@ getdate() {%- endmacro %} -{% macro redshift__archive_get_time() -%} +{% macro redshift__snapshot_get_time() -%} {{ current_timestamp() }}::timestamp {%- endmacro %} diff --git a/plugins/redshift/dbt/include/redshift/macros/materializations/archive_merge.sql b/plugins/redshift/dbt/include/redshift/macros/materializations/archive_merge.sql deleted file mode 100644 index efde2e8373c..00000000000 --- a/plugins/redshift/dbt/include/redshift/macros/materializations/archive_merge.sql +++ /dev/null @@ -1,4 +0,0 @@ - -{% macro redshift__archive_merge_sql(target, source, insert_cols) -%} - {{ postgres__archive_merge_sql(target, source, insert_cols) }} -{% endmacro %} diff --git a/plugins/redshift/dbt/include/redshift/macros/materializations/snapshot_merge.sql b/plugins/redshift/dbt/include/redshift/macros/materializations/snapshot_merge.sql new file mode 100644 index 00000000000..eda31472733 --- /dev/null +++ b/plugins/redshift/dbt/include/redshift/macros/materializations/snapshot_merge.sql @@ -0,0 +1,4 @@ + +{% macro redshift__snapshot_merge_sql(target, source, insert_cols) -%} + {{ postgres__snapshot_merge_sql(target, source, insert_cols) }} +{% endmacro %} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql index 99eb9224dbd..63dcb21f0af 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql @@ -79,7 +79,7 @@ convert_timezone('UTC', current_timestamp()) {%- endmacro %} -{% macro snowflake__archive_get_time() -%} +{% macro snowflake__snapshot_get_time() -%} to_timestamp_ntz({{ current_timestamp() }}) {%- endmacro %} diff --git a/test/integration/004_simple_archive_test/models/ref_archive.sql b/test/integration/004_simple_archive_test/models/ref_archive.sql deleted file mode 100644 index 5e92ea1da6e..00000000000 --- a/test/integration/004_simple_archive_test/models/ref_archive.sql +++ /dev/null @@ -1 +0,0 @@ -select * from {{ ref('archive_actual') }} diff --git a/test/integration/004_simple_archive_test/test_simple_archive.py b/test/integration/004_simple_archive_test/test_simple_archive.py deleted file mode 100644 index fe451b69958..00000000000 --- a/test/integration/004_simple_archive_test/test_simple_archive.py +++ /dev/null @@ -1,489 +0,0 @@ -from test.integration.base import DBTIntegrationTest, use_profile -import dbt.exceptions - - -class TestSimpleArchiveFiles(DBTIntegrationTest): - NUM_ARCHIVE_MODELS = 1 - - @property - def schema(self): - return "simple_archive_004" - - @property - def models(self): - return "test/integration/004_simple_archive_test/models" - - def run_archive(self): - return self.run_dbt(['archive']) - - @property - def project_config(self): - return { - "data-paths": ['test/integration/004_simple_archive_test/data'], - "archive-paths": ['test/integration/004_simple_archive_test/test-archives-pg'], - } - - @use_profile('postgres') - def test__postgres_ref_archive(self): - self.dbt_run_seed_archive() - results = self.run_dbt(['run']) - self.assertEqual(len(results), 1) - - def dbt_run_seed_archive(self): - if self.adapter_type == 'postgres': - self.run_sql_file('test/integration/004_simple_archive_test/seed_pg.sql') - else: - self.run_sql_file('test/integration/004_simple_archive_test/seed.sql') - - results = self.run_archive() - self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) - - def assert_case_tables_equal(self, actual, expected): - if self.adapter_type == 'snowflake': - actual = actual.upper() - expected = expected.upper() - - self.assertTablesEqual(actual, expected) - - def assert_expected(self): - self.assert_case_tables_equal('archive_actual', 'archive_expected') - - @use_profile('postgres') - def test__postgres__simple_archive(self): - self.dbt_run_seed_archive() - - self.assert_expected() - - self.run_sql_file("test/integration/004_simple_archive_test/invalidate_postgres.sql") - self.run_sql_file("test/integration/004_simple_archive_test/update.sql") - - results = self.run_archive() - self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) - - self.assert_expected() - - @use_profile('snowflake') - def test__snowflake__simple_archive(self): - self.dbt_run_seed_archive() - - self.assert_expected() - - self.run_sql_file("test/integration/004_simple_archive_test/invalidate_snowflake.sql") - self.run_sql_file("test/integration/004_simple_archive_test/update.sql") - - results = self.run_archive() - self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) - - self.assert_expected() - - @use_profile('redshift') - def test__redshift__simple_archive(self): - self.dbt_run_seed_archive() - - self.assert_expected() - - self.run_sql_file("test/integration/004_simple_archive_test/invalidate_postgres.sql") - self.run_sql_file("test/integration/004_simple_archive_test/update.sql") - - results = self.run_archive() - self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) - - self.assert_expected() - - @use_profile('presto') - def test__presto__simple_archive_disabled(self): - results = self.run_dbt(["seed"]) - self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) - # presto does not run archives - results = self.run_dbt(["archive"], expect_pass=False) - self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) - self.assertIn('not implemented for presto', results[0].error) - - -class TestSimpleArchiveFileSelects(DBTIntegrationTest): - @property - def schema(self): - return "simple_archive_004" - - @property - def models(self): - return "test/integration/004_simple_archive_test/models" - - @property - def project_config(self): - return { - "data-paths": ['test/integration/004_simple_archive_test/data'], - "archive-paths": ['test/integration/004_simple_archive_test/test-archives-select', - 'test/integration/004_simple_archive_test/test-archives-pg'], - } - - @use_profile('postgres') - def test__postgres__select_archives(self): - self.run_sql_file('test/integration/004_simple_archive_test/seed_pg.sql') - - results = self.run_dbt(['archive']) - self.assertEqual(len(results), 4) - self.assertTablesEqual('archive_castillo', 'archive_castillo_expected') - self.assertTablesEqual('archive_alvarez', 'archive_alvarez_expected') - self.assertTablesEqual('archive_kelly', 'archive_kelly_expected') - self.assertTablesEqual('archive_actual', 'archive_expected') - - self.run_sql_file("test/integration/004_simple_archive_test/invalidate_postgres.sql") - self.run_sql_file("test/integration/004_simple_archive_test/update.sql") - - results = self.run_dbt(['archive']) - self.assertEqual(len(results), 4) - self.assertTablesEqual('archive_castillo', 'archive_castillo_expected') - self.assertTablesEqual('archive_alvarez', 'archive_alvarez_expected') - self.assertTablesEqual('archive_kelly', 'archive_kelly_expected') - self.assertTablesEqual('archive_actual', 'archive_expected') - - @use_profile('postgres') - def test__postgres_exclude_archives(self): - self.run_sql_file('test/integration/004_simple_archive_test/seed_pg.sql') - results = self.run_dbt(['archive', '--exclude', 'archive_castillo']) - self.assertEqual(len(results), 3) - self.assertTableDoesNotExist('archive_castillo') - self.assertTablesEqual('archive_alvarez', 'archive_alvarez_expected') - self.assertTablesEqual('archive_kelly', 'archive_kelly_expected') - self.assertTablesEqual('archive_actual', 'archive_expected') - - @use_profile('postgres') - def test__postgres_select_archives(self): - self.run_sql_file('test/integration/004_simple_archive_test/seed_pg.sql') - results = self.run_dbt(['archive', '--models', 'archive_castillo']) - self.assertEqual(len(results), 1) - self.assertTablesEqual('archive_castillo', 'archive_castillo_expected') - self.assertTableDoesNotExist('archive_alvarez') - self.assertTableDoesNotExist('archive_kelly') - self.assertTableDoesNotExist('archive_actual') - - -class TestSimpleArchiveFilesBigquery(DBTIntegrationTest): - @property - def schema(self): - return "simple_archive_004" - - @property - def models(self): - return "test/integration/004_simple_archive_test/models" - - @property - def project_config(self): - return { - "archive-paths": ['test/integration/004_simple_archive_test/test-archives-bq'], - } - - def assert_expected(self): - self.assertTablesEqual('archive_actual', 'archive_expected') - - @use_profile('bigquery') - def test__bigquery__simple_archive(self): - self.use_default_project() - self.use_profile('bigquery') - - self.run_sql_file("test/integration/004_simple_archive_test/seed_bq.sql") - - self.run_dbt(["archive"]) - - self.assert_expected() - - self.run_sql_file("test/integration/004_simple_archive_test/invalidate_bigquery.sql") - self.run_sql_file("test/integration/004_simple_archive_test/update_bq.sql") - - self.run_dbt(["archive"]) - - self.assert_expected() - - - @use_profile('bigquery') - def test__bigquery__archive_with_new_field(self): - self.use_default_project() - self.use_profile('bigquery') - - self.run_sql_file("test/integration/004_simple_archive_test/seed_bq.sql") - - self.run_dbt(["archive"]) - - self.assertTablesEqual("archive_expected", "archive_actual") - - self.run_sql_file("test/integration/004_simple_archive_test/invalidate_bigquery.sql") - self.run_sql_file("test/integration/004_simple_archive_test/update_bq.sql") - - # This adds new fields to the source table, and updates the expected archive output accordingly - self.run_sql_file("test/integration/004_simple_archive_test/add_column_to_source_bq.sql") - - self.run_dbt(["archive"]) - - # A more thorough test would assert that archived == expected, but BigQuery does not support the - # "EXCEPT DISTINCT" operator on nested fields! Instead, just check that schemas are congruent. - - expected_cols = self.get_table_columns( - database=self.default_database, - schema=self.unique_schema(), - table='archive_expected' - ) - archived_cols = self.get_table_columns( - database=self.default_database, - schema=self.unique_schema(), - table='archive_actual' - ) - - self.assertTrue(len(expected_cols) > 0, "source table does not exist -- bad test") - self.assertEqual(len(expected_cols), len(archived_cols), "actual and expected column lengths are different") - - for (expected_col, actual_col) in zip(expected_cols, archived_cols): - expected_name, expected_type, _ = expected_col - actual_name, actual_type, _ = actual_col - self.assertTrue(expected_name is not None) - self.assertTrue(expected_type is not None) - - self.assertEqual(expected_name, actual_name, "names are different") - self.assertEqual(expected_type, actual_type, "data types are different") - - -class TestCrossDBArchiveFiles(DBTIntegrationTest): - setup_alternate_db = True - @property - def schema(self): - return "simple_archive_004" - - @property - def models(self): - return "test/integration/004_simple_archive_test/models" - - @property - def project_config(self): - if self.adapter_type == 'snowflake': - paths = ['test/integration/004_simple_archive_test/test-archives-pg'] - else: - paths = ['test/integration/004_simple_archive_test/test-archives-bq'] - return { - 'archive-paths': paths, - } - - def run_archive(self): - return self.run_dbt(['archive', '--vars', '{{"target_database": {}}}'.format(self.alternative_database)]) - - @use_profile('snowflake') - def test__snowflake__cross_archive(self): - self.run_sql_file("test/integration/004_simple_archive_test/seed.sql") - - results = self.run_archive() - self.assertEqual(len(results), 1) - - self.assertTablesEqual("ARCHIVE_EXPECTED", "ARCHIVE_ACTUAL", table_b_db=self.alternative_database) - - self.run_sql_file("test/integration/004_simple_archive_test/invalidate_snowflake.sql") - self.run_sql_file("test/integration/004_simple_archive_test/update.sql") - - results = self.run_archive() - self.assertEqual(len(results), 1) - - self.assertTablesEqual("ARCHIVE_EXPECTED", "ARCHIVE_ACTUAL", table_b_db=self.alternative_database) - - @use_profile('bigquery') - def test__bigquery__cross_archive(self): - self.run_sql_file("test/integration/004_simple_archive_test/seed_bq.sql") - - self.run_archive() - - self.assertTablesEqual("archive_expected", "archive_actual", table_b_db=self.alternative_database) - - self.run_sql_file("test/integration/004_simple_archive_test/invalidate_bigquery.sql") - self.run_sql_file("test/integration/004_simple_archive_test/update_bq.sql") - - self.run_archive() - - self.assertTablesEqual("archive_expected", "archive_actual", table_b_db=self.alternative_database) - - -class TestCrossSchemaArchiveFiles(DBTIntegrationTest): - NUM_ARCHIVE_MODELS = 1 - - @property - def schema(self): - return "simple_archive_004" - - @property - def models(self): - return "test/integration/004_simple_archive_test/models" - - @property - def project_config(self): - paths = ['test/integration/004_simple_archive_test/test-archives-pg'] - return { - 'archive-paths': paths, - } - - def target_schema(self): - return "{}_archived".format(self.unique_schema()) - - def run_archive(self): - return self.run_dbt(['archive', '--vars', '{{"target_schema": {}}}'.format(self.target_schema())]) - - @use_profile('postgres') - def test__postgres__cross_schema_archive(self): - self.run_sql_file('test/integration/004_simple_archive_test/seed_pg.sql') - - results = self.run_archive() - self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) - - results = self.run_dbt(['run', '--vars', '{{"target_schema": {}}}'.format(self.target_schema())]) - self.assertEqual(len(results), 1) - - -class TestBadArchive(DBTIntegrationTest): - @property - def schema(self): - return "simple_archive_004" - - @property - def models(self): - return "test/integration/004_simple_archive_test/models" - - @property - def project_config(self): - return { - "archive-paths": ['test/integration/004_simple_archive_test/test-archives-invalid'], - } - - @use_profile('postgres') - def test__postgres__invalid(self): - with self.assertRaises(dbt.exceptions.CompilationException) as exc: - self.run_dbt(['compile'], expect_pass=False) - - self.assertIn('target_database', str(exc.exception)) - - -class TestCheckCols(TestSimpleArchiveFiles): - NUM_ARCHIVE_MODELS = 2 - def _assertTablesEqualSql(self, relation_a, relation_b, columns=None): - # When building the equality tests, only test columns that don't start - # with 'dbt_', because those are time-sensitive - if columns is None: - columns = [c for c in self.get_relation_columns(relation_a) if not c[0].lower().startswith('dbt_')] - return super(TestCheckCols, self)._assertTablesEqualSql( - relation_a, - relation_b, - columns=columns - ) - - def assert_expected(self): - super(TestCheckCols, self).assert_expected() - self.assert_case_tables_equal('archive_checkall', 'archive_expected') - - @property - def project_config(self): - return { - "data-paths": ['test/integration/004_simple_archive_test/data'], - "archive-paths": ['test/integration/004_simple_archive_test/test-check-col-archives'], - } - - -class TestCheckColsBigquery(TestSimpleArchiveFilesBigquery): - def _assertTablesEqualSql(self, relation_a, relation_b, columns=None): - # When building the equality tests, only test columns that don't start - # with 'dbt_', because those are time-sensitive - if columns is None: - columns = [c for c in self.get_relation_columns(relation_a) if not c[0].lower().startswith('dbt_')] - return super(TestCheckColsBigquery, self)._assertTablesEqualSql( - relation_a, - relation_b, - columns=columns - ) - - def assert_expected(self): - super(TestCheckColsBigquery, self).assert_expected() - self.assertTablesEqual('archive_checkall', 'archive_expected') - - @property - def project_config(self): - return { - "data-paths": ['test/integration/004_simple_archive_test/data'], - "archive-paths": ['test/integration/004_simple_archive_test/test-check-col-archives-bq'], - } - - @use_profile('bigquery') - def test__bigquery__archive_with_new_field(self): - self.use_default_project() - self.use_profile('bigquery') - - self.run_sql_file("test/integration/004_simple_archive_test/seed_bq.sql") - - self.run_dbt(["archive"]) - - self.assertTablesEqual("archive_expected", "archive_actual") - self.assertTablesEqual("archive_expected", "archive_checkall") - - self.run_sql_file("test/integration/004_simple_archive_test/invalidate_bigquery.sql") - self.run_sql_file("test/integration/004_simple_archive_test/update_bq.sql") - - # This adds new fields to the source table, and updates the expected archive output accordingly - self.run_sql_file("test/integration/004_simple_archive_test/add_column_to_source_bq.sql") - - # this should fail because `check="all"` will try to compare the nested field - self.run_dbt(['archive'], expect_pass=False) - - self.run_dbt(["archive", '-m', 'archive_actual']) - - # A more thorough test would assert that archived == expected, but BigQuery does not support the - # "EXCEPT DISTINCT" operator on nested fields! Instead, just check that schemas are congruent. - - expected_cols = self.get_table_columns( - database=self.default_database, - schema=self.unique_schema(), - table='archive_expected' - ) - archived_cols = self.get_table_columns( - database=self.default_database, - schema=self.unique_schema(), - table='archive_actual' - ) - - self.assertTrue(len(expected_cols) > 0, "source table does not exist -- bad test") - self.assertEqual(len(expected_cols), len(archived_cols), "actual and expected column lengths are different") - - for (expected_col, actual_col) in zip(expected_cols, archived_cols): - expected_name, expected_type, _ = expected_col - actual_name, actual_type, _ = actual_col - self.assertTrue(expected_name is not None) - self.assertTrue(expected_type is not None) - - self.assertEqual(expected_name, actual_name, "names are different") - self.assertEqual(expected_type, actual_type, "data types are different") - - -class TestLongText(DBTIntegrationTest): - - @property - def schema(self): - return "simple_archive_004" - - @property - def models(self): - return "test/integration/004_simple_archive_test/models" - - def run_archive(self): - return self.run_dbt(['archive']) - - @property - def project_config(self): - return { - "archive-paths": ['test/integration/004_simple_archive_test/test-archives-longtext'], - } - - @use_profile('postgres') - def test__postgres__long_text(self): - self.run_sql_file('test/integration/004_simple_archive_test/seed_longtext.sql') - results = self.run_dbt(['archive']) - self.assertEqual(len(results), 1) - - with self.adapter.connection_named('test'): - status, results = self.adapter.execute( - 'select * from {}.{}.archive_actual'.format(self.default_database, self.unique_schema()), - fetch=True - ) - self.assertEqual(len(results), 2) - got_names = set(r.get('longstring') for r in results) - self.assertEqual(got_names, {'a' * 500, 'short'}) diff --git a/test/integration/004_simple_archive_test/add_column_to_source_bq.sql b/test/integration/004_simple_snapshot_test/add_column_to_source_bq.sql similarity index 90% rename from test/integration/004_simple_archive_test/add_column_to_source_bq.sql rename to test/integration/004_simple_snapshot_test/add_column_to_source_bq.sql index 895d54eebab..e1babb82c88 100644 --- a/test/integration/004_simple_archive_test/add_column_to_source_bq.sql +++ b/test/integration/004_simple_snapshot_test/add_column_to_source_bq.sql @@ -27,7 +27,7 @@ create or replace table {schema}.seed as ( ); -create or replace table {schema}.archive_expected as ( +create or replace table {schema}.snapshot_expected as ( select *, [ @@ -51,6 +51,6 @@ create or replace table {schema}.archive_expected as ( 2 ] as repeated_field - from {schema}.archive_expected + from {schema}.snapshot_expected ); diff --git a/test/integration/004_simple_archive_test/data/seed.csv b/test/integration/004_simple_snapshot_test/data/seed.csv similarity index 100% rename from test/integration/004_simple_archive_test/data/seed.csv rename to test/integration/004_simple_snapshot_test/data/seed.csv diff --git a/test/integration/004_simple_archive_test/invalidate_bigquery.sql b/test/integration/004_simple_snapshot_test/invalidate_bigquery.sql similarity index 87% rename from test/integration/004_simple_archive_test/invalidate_bigquery.sql rename to test/integration/004_simple_snapshot_test/invalidate_bigquery.sql index 32ac3f0eceb..ac703d5ee49 100644 --- a/test/integration/004_simple_archive_test/invalidate_bigquery.sql +++ b/test/integration/004_simple_snapshot_test/invalidate_bigquery.sql @@ -7,6 +7,6 @@ where id >= 10 and id <= 20; -- invalidate records 11 - 21 -update {database}.{schema}.archive_expected set +update {database}.{schema}.snapshot_expected set dbt_valid_to = timestamp_add(updated_at, interval 1 hour) where id >= 10 and id <= 20; diff --git a/test/integration/004_simple_archive_test/invalidate_postgres.sql b/test/integration/004_simple_snapshot_test/invalidate_postgres.sql similarity index 76% rename from test/integration/004_simple_archive_test/invalidate_postgres.sql rename to test/integration/004_simple_snapshot_test/invalidate_postgres.sql index 491afccfac2..11178a71a9c 100644 --- a/test/integration/004_simple_archive_test/invalidate_postgres.sql +++ b/test/integration/004_simple_snapshot_test/invalidate_postgres.sql @@ -7,21 +7,21 @@ where id >= 10 and id <= 20; -- invalidate records 11 - 21 -update {schema}.archive_expected set +update {schema}.snapshot_expected set dbt_valid_to = updated_at + interval '1 hour' where id >= 10 and id <= 20; -update {schema}.archive_castillo_expected set +update {schema}.snapshot_castillo_expected set dbt_valid_to = updated_at + interval '1 hour' where id >= 10 and id <= 20; -update {schema}.archive_alvarez_expected set +update {schema}.snapshot_alvarez_expected set dbt_valid_to = updated_at + interval '1 hour' where id >= 10 and id <= 20; -update {schema}.archive_kelly_expected set +update {schema}.snapshot_kelly_expected set dbt_valid_to = updated_at + interval '1 hour' where id >= 10 and id <= 20; diff --git a/test/integration/004_simple_archive_test/invalidate_snowflake.sql b/test/integration/004_simple_snapshot_test/invalidate_snowflake.sql similarity index 86% rename from test/integration/004_simple_archive_test/invalidate_snowflake.sql rename to test/integration/004_simple_snapshot_test/invalidate_snowflake.sql index 86e3a3c7a40..da3974f6d51 100644 --- a/test/integration/004_simple_archive_test/invalidate_snowflake.sql +++ b/test/integration/004_simple_snapshot_test/invalidate_snowflake.sql @@ -7,6 +7,6 @@ where id >= 10 and id <= 20; -- invalidate records 11 - 21 -update {database}.{schema}.archive_expected set +update {database}.{schema}.snapshot_expected set dbt_valid_to = DATEADD(hour, 1, updated_at) where id >= 10 and id <= 20; diff --git a/test/integration/004_simple_archive_test/models-collision/archive_actual.sql b/test/integration/004_simple_snapshot_test/models-collision/snapshot_actual.sql similarity index 100% rename from test/integration/004_simple_archive_test/models-collision/archive_actual.sql rename to test/integration/004_simple_snapshot_test/models-collision/snapshot_actual.sql diff --git a/test/integration/004_simple_archive_test/models/.gitkeep b/test/integration/004_simple_snapshot_test/models/.gitkeep similarity index 100% rename from test/integration/004_simple_archive_test/models/.gitkeep rename to test/integration/004_simple_snapshot_test/models/.gitkeep diff --git a/test/integration/004_simple_snapshot_test/models/ref_snapshot.sql b/test/integration/004_simple_snapshot_test/models/ref_snapshot.sql new file mode 100644 index 00000000000..c453929cebb --- /dev/null +++ b/test/integration/004_simple_snapshot_test/models/ref_snapshot.sql @@ -0,0 +1 @@ +select * from {{ ref('snapshot_actual') }} diff --git a/test/integration/004_simple_archive_test/seed.sql b/test/integration/004_simple_snapshot_test/seed.sql similarity index 88% rename from test/integration/004_simple_archive_test/seed.sql rename to test/integration/004_simple_snapshot_test/seed.sql index d0ee03181c5..3f814a3cb66 100644 --- a/test/integration/004_simple_archive_test/seed.sql +++ b/test/integration/004_simple_snapshot_test/seed.sql @@ -8,7 +8,7 @@ create table {database}.{schema}.seed ( updated_at TIMESTAMP WITHOUT TIME ZONE ); -create table {database}.{schema}.archive_expected ( +create table {database}.{schema}.snapshot_expected ( id INTEGER, first_name VARCHAR(50), last_name VARCHAR(50), @@ -16,7 +16,7 @@ create table {database}.{schema}.archive_expected ( gender VARCHAR(50), ip_address VARCHAR(20), - -- archival fields + -- snapshotting fields updated_at TIMESTAMP WITHOUT TIME ZONE, dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, @@ -49,8 +49,8 @@ insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, (20, 'Phyllis', 'Fox', 'pfoxj@creativecommons.org', 'Female', '163.191.232.95', '2016-08-21 10:35:19'); --- populate archive table -insert into {database}.{schema}.archive_expected ( +-- populate snapshot table +insert into {database}.{schema}.snapshot_expected ( id, first_name, last_name, @@ -72,14 +72,14 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id from {database}.{schema}.seed; -create table {database}.{schema}.archive_castillo_expected ( +create table {database}.{schema}.snapshot_castillo_expected ( id INTEGER, first_name VARCHAR(50), last_name VARCHAR(50), @@ -87,7 +87,7 @@ create table {database}.{schema}.archive_castillo_expected ( gender VARCHAR(50), ip_address VARCHAR(20), - -- archival fields + -- snapshotting fields updated_at TIMESTAMP WITHOUT TIME ZONE, dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, @@ -97,7 +97,7 @@ create table {database}.{schema}.archive_castillo_expected ( ); -- one entry -insert into {database}.{schema}.archive_castillo_expected ( +insert into {database}.{schema}.snapshot_castillo_expected ( id, first_name, last_name, @@ -119,14 +119,14 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id from {database}.{schema}.seed where last_name = 'Castillo'; -create table {database}.{schema}.archive_alvarez_expected ( +create table {database}.{schema}.snapshot_alvarez_expected ( id INTEGER, first_name VARCHAR(50), last_name VARCHAR(50), @@ -134,7 +134,7 @@ create table {database}.{schema}.archive_alvarez_expected ( gender VARCHAR(50), ip_address VARCHAR(20), - -- archival fields + -- snapshotting fields updated_at TIMESTAMP WITHOUT TIME ZONE, dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, @@ -143,7 +143,7 @@ create table {database}.{schema}.archive_alvarez_expected ( ); -- 0 entries -insert into {database}.{schema}.archive_alvarez_expected ( +insert into {database}.{schema}.snapshot_alvarez_expected ( id, first_name, last_name, @@ -165,14 +165,14 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id from {database}.{schema}.seed where last_name = 'Alvarez'; -create table {database}.{schema}.archive_kelly_expected ( +create table {database}.{schema}.snapshot_kelly_expected ( id INTEGER, first_name VARCHAR(50), last_name VARCHAR(50), @@ -180,7 +180,7 @@ create table {database}.{schema}.archive_kelly_expected ( gender VARCHAR(50), ip_address VARCHAR(20), - -- archival fields + -- snapshotting fields updated_at TIMESTAMP WITHOUT TIME ZONE, dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, @@ -190,7 +190,7 @@ create table {database}.{schema}.archive_kelly_expected ( -- 2 entries -insert into {database}.{schema}.archive_kelly_expected ( +insert into {database}.{schema}.snapshot_kelly_expected ( id, first_name, last_name, @@ -212,7 +212,7 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, diff --git a/test/integration/004_simple_archive_test/seed_bq.sql b/test/integration/004_simple_snapshot_test/seed_bq.sql similarity index 94% rename from test/integration/004_simple_archive_test/seed_bq.sql rename to test/integration/004_simple_snapshot_test/seed_bq.sql index 9ef91799604..1b75c525f0d 100644 --- a/test/integration/004_simple_archive_test/seed_bq.sql +++ b/test/integration/004_simple_snapshot_test/seed_bq.sql @@ -8,7 +8,7 @@ create table {database}.{schema}.seed ( updated_at TIMESTAMP ); -create table {database}.{schema}.archive_expected ( +create table {database}.{schema}.snapshot_expected ( id INT64, first_name STRING, last_name STRING, @@ -16,7 +16,7 @@ create table {database}.{schema}.archive_expected ( gender STRING, ip_address STRING, - -- archival fields + -- snapshotting fields updated_at TIMESTAMP, dbt_valid_from TIMESTAMP, dbt_valid_to TIMESTAMP, @@ -49,8 +49,8 @@ insert {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_ad (20, 'Phyllis', 'Fox', 'pfoxj@creativecommons.org', 'Female', '163.191.232.95', '2016-08-21 10:35:19'); --- populate archive table -insert {database}.{schema}.archive_expected ( +-- populate snapshot table +insert {database}.{schema}.snapshot_expected ( id, first_name, last_name, @@ -72,7 +72,7 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, cast(null as timestamp) as dbt_valid_to, updated_at as dbt_updated_at, diff --git a/test/integration/004_simple_archive_test/seed_longtext.sql b/test/integration/004_simple_snapshot_test/seed_longtext.sql similarity index 100% rename from test/integration/004_simple_archive_test/seed_longtext.sql rename to test/integration/004_simple_snapshot_test/seed_longtext.sql diff --git a/test/integration/004_simple_archive_test/seed_pg.sql b/test/integration/004_simple_snapshot_test/seed_pg.sql similarity index 88% rename from test/integration/004_simple_archive_test/seed_pg.sql rename to test/integration/004_simple_snapshot_test/seed_pg.sql index 5538245caa4..7abc1d02477 100644 --- a/test/integration/004_simple_archive_test/seed_pg.sql +++ b/test/integration/004_simple_snapshot_test/seed_pg.sql @@ -8,7 +8,7 @@ updated_at TIMESTAMP WITHOUT TIME ZONE ); -create table {database}.{schema}.archive_expected ( +create table {database}.{schema}.snapshot_expected ( id INTEGER, first_name VARCHAR(50), last_name VARCHAR(50), @@ -16,7 +16,7 @@ create table {database}.{schema}.archive_expected ( gender VARCHAR(50), ip_address VARCHAR(20), - -- archival fields + -- snapshotting fields updated_at TIMESTAMP WITHOUT TIME ZONE, dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, @@ -49,8 +49,8 @@ insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, (20, 'Phyllis', 'Fox', 'pfoxj@creativecommons.org', 'Female', '163.191.232.95', '2016-08-21 10:35:19'); --- populate archive table -insert into {database}.{schema}.archive_expected ( +-- populate snapshot table +insert into {database}.{schema}.snapshot_expected ( id, first_name, last_name, @@ -72,7 +72,7 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, @@ -81,7 +81,7 @@ from {database}.{schema}.seed; -create table {database}.{schema}.archive_castillo_expected ( +create table {database}.{schema}.snapshot_castillo_expected ( id INTEGER, first_name VARCHAR(50), last_name VARCHAR(50), @@ -89,7 +89,7 @@ create table {database}.{schema}.archive_castillo_expected ( gender VARCHAR(50), ip_address VARCHAR(20), - -- archival fields + -- snapshotting fields updated_at TIMESTAMP WITHOUT TIME ZONE, dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, @@ -98,7 +98,7 @@ create table {database}.{schema}.archive_castillo_expected ( ); -- one entry -insert into {database}.{schema}.archive_castillo_expected ( +insert into {database}.{schema}.snapshot_castillo_expected ( id, first_name, last_name, @@ -120,14 +120,14 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id from {database}.{schema}.seed where last_name = 'Castillo'; -create table {database}.{schema}.archive_alvarez_expected ( +create table {database}.{schema}.snapshot_alvarez_expected ( id INTEGER, first_name VARCHAR(50), last_name VARCHAR(50), @@ -135,7 +135,7 @@ create table {database}.{schema}.archive_alvarez_expected ( gender VARCHAR(50), ip_address VARCHAR(20), - -- archival fields + -- snapshotting fields updated_at TIMESTAMP WITHOUT TIME ZONE, dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, @@ -144,7 +144,7 @@ create table {database}.{schema}.archive_alvarez_expected ( ); -- 0 entries -insert into {database}.{schema}.archive_alvarez_expected ( +insert into {database}.{schema}.snapshot_alvarez_expected ( id, first_name, last_name, @@ -166,14 +166,14 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id from {database}.{schema}.seed where last_name = 'Alvarez'; -create table {database}.{schema}.archive_kelly_expected ( +create table {database}.{schema}.snapshot_kelly_expected ( id INTEGER, first_name VARCHAR(50), last_name VARCHAR(50), @@ -181,7 +181,7 @@ create table {database}.{schema}.archive_kelly_expected ( gender VARCHAR(50), ip_address VARCHAR(20), - -- archival fields + -- snapshotting fields updated_at TIMESTAMP WITHOUT TIME ZONE, dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, @@ -191,7 +191,7 @@ create table {database}.{schema}.archive_kelly_expected ( -- 2 entries -insert into {database}.{schema}.archive_kelly_expected ( +insert into {database}.{schema}.snapshot_kelly_expected ( id, first_name, last_name, @@ -213,7 +213,7 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, diff --git a/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql b/test/integration/004_simple_snapshot_test/test-check-col-snapshots-bq/snapshot.sql similarity index 87% rename from test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql rename to test/integration/004_simple_snapshot_test/test-check-col-snapshots-bq/snapshot.sql index 50eece23b5f..33c3e4e5fff 100644 --- a/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql +++ b/test/integration/004_simple_snapshot_test/test-check-col-snapshots-bq/snapshot.sql @@ -1,4 +1,4 @@ -{% archive archive_actual %} +{% snapshot snapshot_actual %} {{ config( target_database=var('target_database', database), @@ -9,11 +9,11 @@ ) }} select * from `{{target.database}}`.`{{schema}}`.seed -{% endarchive %} +{% endsnapshot %} {# This should be exactly the same #} -{% archive archive_checkall %} +{% snapshot snapshot_checkall %} {{ config( target_database=var('target_database', database), @@ -24,4 +24,4 @@ ) }} select * from `{{target.database}}`.`{{schema}}`.seed -{% endarchive %} +{% endsnapshot %} diff --git a/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql b/test/integration/004_simple_snapshot_test/test-check-col-snapshots/snapshot.sql similarity index 86% rename from test/integration/004_simple_archive_test/test-check-col-archives/archive.sql rename to test/integration/004_simple_snapshot_test/test-check-col-snapshots/snapshot.sql index 314b227634a..dd85ed7535a 100644 --- a/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql +++ b/test/integration/004_simple_snapshot_test/test-check-col-snapshots/snapshot.sql @@ -1,4 +1,4 @@ -{% archive archive_actual %} +{% snapshot snapshot_actual %} {{ config( @@ -11,10 +11,10 @@ }} select * from {{target.database}}.{{schema}}.seed -{% endarchive %} +{% endsnapshot %} {# This should be exactly the same #} -{% archive archive_checkall %} +{% snapshot snapshot_checkall %} {{ config( target_database=var('target_database', database), @@ -25,4 +25,4 @@ ) }} select * from {{target.database}}.{{schema}}.seed -{% endarchive %} +{% endsnapshot %} diff --git a/test/integration/004_simple_archive_test/test-archives-bq/archive.sql b/test/integration/004_simple_snapshot_test/test-snapshots-bq/snapshot.sql similarity index 87% rename from test/integration/004_simple_archive_test/test-archives-bq/archive.sql rename to test/integration/004_simple_snapshot_test/test-snapshots-bq/snapshot.sql index aff119c410b..639e65904b7 100644 --- a/test/integration/004_simple_archive_test/test-archives-bq/archive.sql +++ b/test/integration/004_simple_snapshot_test/test-snapshots-bq/snapshot.sql @@ -1,4 +1,4 @@ -{% archive archive_actual %} +{% snapshot snapshot_actual %} {{ config( @@ -11,4 +11,4 @@ }} select * from `{{target.database}}`.`{{schema}}`.seed -{% endarchive %} +{% endsnapshot %} diff --git a/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql b/test/integration/004_simple_snapshot_test/test-snapshots-invalid/snapshot.sql similarity index 83% rename from test/integration/004_simple_archive_test/test-archives-invalid/archive.sql rename to test/integration/004_simple_snapshot_test/test-snapshots-invalid/snapshot.sql index 35340368781..0e39d9aa739 100644 --- a/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql +++ b/test/integration/004_simple_snapshot_test/test-snapshots-invalid/snapshot.sql @@ -1,4 +1,4 @@ -{% archive no_target_database %} +{% snapshot no_target_database %} {{ config( target_schema=schema, @@ -9,4 +9,4 @@ }} select * from {{target.database}}.{{schema}}.seed -{% endarchive %} +{% endsnapshot %} diff --git a/test/integration/004_simple_archive_test/test-archives-longtext/longtext.sql b/test/integration/004_simple_snapshot_test/test-snapshots-longtext/snapshot.sql similarity index 85% rename from test/integration/004_simple_archive_test/test-archives-longtext/longtext.sql rename to test/integration/004_simple_snapshot_test/test-snapshots-longtext/snapshot.sql index c16ce784889..35a563e3f75 100644 --- a/test/integration/004_simple_archive_test/test-archives-longtext/longtext.sql +++ b/test/integration/004_simple_snapshot_test/test-snapshots-longtext/snapshot.sql @@ -1,4 +1,4 @@ -{% archive archive_actual %} +{% snapshot snapshot_actual %} {{ config( target_database=var('target_database', database), @@ -9,4 +9,4 @@ ) }} select * from {{target.database}}.{{schema}}.super_long -{% endarchive %} +{% endsnapshot %} diff --git a/test/integration/004_simple_archive_test/test-archives-pg/archive.sql b/test/integration/004_simple_snapshot_test/test-snapshots-pg/snapshot.sql similarity index 87% rename from test/integration/004_simple_archive_test/test-archives-pg/archive.sql rename to test/integration/004_simple_snapshot_test/test-snapshots-pg/snapshot.sql index 133465078c1..70be6d4ed89 100644 --- a/test/integration/004_simple_archive_test/test-archives-pg/archive.sql +++ b/test/integration/004_simple_snapshot_test/test-snapshots-pg/snapshot.sql @@ -1,4 +1,4 @@ -{% archive archive_actual %} +{% snapshot snapshot_actual %} {{ config( @@ -11,4 +11,4 @@ }} select * from {{target.database}}.{{target.schema}}.seed -{% endarchive %} +{% endsnapshot %} diff --git a/test/integration/004_simple_archive_test/test-archives-select/archives.sql b/test/integration/004_simple_snapshot_test/test-snapshots-select/snapshot.sql similarity index 87% rename from test/integration/004_simple_archive_test/test-archives-select/archives.sql rename to test/integration/004_simple_snapshot_test/test-snapshots-select/snapshot.sql index 562ec89b3ce..afbf39a4745 100644 --- a/test/integration/004_simple_archive_test/test-archives-select/archives.sql +++ b/test/integration/004_simple_snapshot_test/test-snapshots-select/snapshot.sql @@ -1,4 +1,4 @@ -{% archive archive_castillo %} +{% snapshot snapshot_castillo %} {{ config( @@ -11,9 +11,9 @@ }} select * from {{target.database}}.{{schema}}.seed where last_name = 'Castillo' -{% endarchive %} +{% endsnapshot %} -{% archive archive_alvarez %} +{% snapshot snapshot_alvarez %} {{ config( @@ -26,10 +26,10 @@ }} select * from {{target.database}}.{{schema}}.seed where last_name = 'Alvarez' -{% endarchive %} +{% endsnapshot %} -{% archive archive_kelly %} +{% snapshot snapshot_kelly %} {{ config( @@ -42,4 +42,4 @@ }} select * from {{target.database}}.{{schema}}.seed where last_name = 'Kelly' -{% endarchive %} +{% endsnapshot %} diff --git a/test/integration/004_simple_snapshot_test/test_simple_snapshot.py b/test/integration/004_simple_snapshot_test/test_simple_snapshot.py new file mode 100644 index 00000000000..0ea53ef688f --- /dev/null +++ b/test/integration/004_simple_snapshot_test/test_simple_snapshot.py @@ -0,0 +1,489 @@ +from test.integration.base import DBTIntegrationTest, use_profile +import dbt.exceptions + + +class TestSimpleSnapshotFiles(DBTIntegrationTest): + NUM_SNAPSHOT_MODELS = 1 + + @property + def schema(self): + return "simple_snapshot_004" + + @property + def models(self): + return "test/integration/004_simple_snapshot_test/models" + + def run_snapshot(self): + return self.run_dbt(['snapshot']) + + @property + def project_config(self): + return { + "data-paths": ['test/integration/004_simple_snapshot_test/data'], + "snapshot-paths": ['test/integration/004_simple_snapshot_test/test-snapshots-pg'], + } + + @use_profile('postgres') + def test__postgres_ref_snapshot(self): + self.dbt_run_seed_snapshot() + results = self.run_dbt(['run']) + self.assertEqual(len(results), 1) + + def dbt_run_seed_snapshot(self): + if self.adapter_type == 'postgres': + self.run_sql_file('test/integration/004_simple_snapshot_test/seed_pg.sql') + else: + self.run_sql_file('test/integration/004_simple_snapshot_test/seed.sql') + + results = self.run_snapshot() + self.assertEqual(len(results), self.NUM_SNAPSHOT_MODELS) + + def assert_case_tables_equal(self, actual, expected): + if self.adapter_type == 'snowflake': + actual = actual.upper() + expected = expected.upper() + + self.assertTablesEqual(actual, expected) + + def assert_expected(self): + self.assert_case_tables_equal('snapshot_actual', 'snapshot_expected') + + @use_profile('postgres') + def test__postgres__simple_snapshot(self): + self.dbt_run_seed_snapshot() + + self.assert_expected() + + self.run_sql_file("test/integration/004_simple_snapshot_test/invalidate_postgres.sql") + self.run_sql_file("test/integration/004_simple_snapshot_test/update.sql") + + results = self.run_snapshot() + self.assertEqual(len(results), self.NUM_SNAPSHOT_MODELS) + + self.assert_expected() + + @use_profile('snowflake') + def test__snowflake__simple_snapshot(self): + self.dbt_run_seed_snapshot() + + self.assert_expected() + + self.run_sql_file("test/integration/004_simple_snapshot_test/invalidate_snowflake.sql") + self.run_sql_file("test/integration/004_simple_snapshot_test/update.sql") + + results = self.run_snapshot() + self.assertEqual(len(results), self.NUM_SNAPSHOT_MODELS) + + self.assert_expected() + + @use_profile('redshift') + def test__redshift__simple_snapshot(self): + self.dbt_run_seed_snapshot() + + self.assert_expected() + + self.run_sql_file("test/integration/004_simple_snapshot_test/invalidate_postgres.sql") + self.run_sql_file("test/integration/004_simple_snapshot_test/update.sql") + + results = self.run_snapshot() + self.assertEqual(len(results), self.NUM_SNAPSHOT_MODELS) + + self.assert_expected() + + @use_profile('presto') + def test__presto__simple_snapshot_disabled(self): + results = self.run_dbt(["seed"]) + self.assertEqual(len(results), self.NUM_SNAPSHOT_MODELS) + # presto does not run snapshots + results = self.run_dbt(["snapshot"], expect_pass=False) + self.assertEqual(len(results), self.NUM_SNAPSHOT_MODELS) + self.assertIn('not implemented for presto', results[0].error) + + +class TestSimpleSnapshotFileSelects(DBTIntegrationTest): + @property + def schema(self): + return "simple_snapshot_004" + + @property + def models(self): + return "test/integration/004_simple_snapshot_test/models" + + @property + def project_config(self): + return { + "data-paths": ['test/integration/004_simple_snapshot_test/data'], + "snapshot-paths": ['test/integration/004_simple_snapshot_test/test-snapshots-select', + 'test/integration/004_simple_snapshot_test/test-snapshots-pg'], + } + + @use_profile('postgres') + def test__postgres__select_snapshots(self): + self.run_sql_file('test/integration/004_simple_snapshot_test/seed_pg.sql') + + results = self.run_dbt(['snapshot']) + self.assertEqual(len(results), 4) + self.assertTablesEqual('snapshot_castillo', 'snapshot_castillo_expected') + self.assertTablesEqual('snapshot_alvarez', 'snapshot_alvarez_expected') + self.assertTablesEqual('snapshot_kelly', 'snapshot_kelly_expected') + self.assertTablesEqual('snapshot_actual', 'snapshot_expected') + + self.run_sql_file("test/integration/004_simple_snapshot_test/invalidate_postgres.sql") + self.run_sql_file("test/integration/004_simple_snapshot_test/update.sql") + + results = self.run_dbt(['snapshot']) + self.assertEqual(len(results), 4) + self.assertTablesEqual('snapshot_castillo', 'snapshot_castillo_expected') + self.assertTablesEqual('snapshot_alvarez', 'snapshot_alvarez_expected') + self.assertTablesEqual('snapshot_kelly', 'snapshot_kelly_expected') + self.assertTablesEqual('snapshot_actual', 'snapshot_expected') + + @use_profile('postgres') + def test__postgres_exclude_snapshots(self): + self.run_sql_file('test/integration/004_simple_snapshot_test/seed_pg.sql') + results = self.run_dbt(['snapshot', '--exclude', 'snapshot_castillo']) + self.assertEqual(len(results), 3) + self.assertTableDoesNotExist('snapshot_castillo') + self.assertTablesEqual('snapshot_alvarez', 'snapshot_alvarez_expected') + self.assertTablesEqual('snapshot_kelly', 'snapshot_kelly_expected') + self.assertTablesEqual('snapshot_actual', 'snapshot_expected') + + @use_profile('postgres') + def test__postgres_select_snapshots(self): + self.run_sql_file('test/integration/004_simple_snapshot_test/seed_pg.sql') + results = self.run_dbt(['snapshot', '--models', 'snapshot_castillo']) + self.assertEqual(len(results), 1) + self.assertTablesEqual('snapshot_castillo', 'snapshot_castillo_expected') + self.assertTableDoesNotExist('snapshot_alvarez') + self.assertTableDoesNotExist('snapshot_kelly') + self.assertTableDoesNotExist('snapshot_actual') + + +class TestSimpleSnapshotFilesBigquery(DBTIntegrationTest): + @property + def schema(self): + return "simple_snapshot_004" + + @property + def models(self): + return "test/integration/004_simple_snapshot_test/models" + + @property + def project_config(self): + return { + "snapshot-paths": ['test/integration/004_simple_snapshot_test/test-snapshots-bq'], + } + + def assert_expected(self): + self.assertTablesEqual('snapshot_actual', 'snapshot_expected') + + @use_profile('bigquery') + def test__bigquery__simple_snapshot(self): + self.use_default_project() + self.use_profile('bigquery') + + self.run_sql_file("test/integration/004_simple_snapshot_test/seed_bq.sql") + + self.run_dbt(["snapshot"]) + + self.assert_expected() + + self.run_sql_file("test/integration/004_simple_snapshot_test/invalidate_bigquery.sql") + self.run_sql_file("test/integration/004_simple_snapshot_test/update_bq.sql") + + self.run_dbt(["snapshot"]) + + self.assert_expected() + + + @use_profile('bigquery') + def test__bigquery__snapshot_with_new_field(self): + self.use_default_project() + self.use_profile('bigquery') + + self.run_sql_file("test/integration/004_simple_snapshot_test/seed_bq.sql") + + self.run_dbt(["snapshot"]) + + self.assertTablesEqual("snapshot_expected", "snapshot_actual") + + self.run_sql_file("test/integration/004_simple_snapshot_test/invalidate_bigquery.sql") + self.run_sql_file("test/integration/004_simple_snapshot_test/update_bq.sql") + + # This adds new fields to the source table, and updates the expected snapshot output accordingly + self.run_sql_file("test/integration/004_simple_snapshot_test/add_column_to_source_bq.sql") + + self.run_dbt(["snapshot"]) + + # A more thorough test would assert that snapshotted == expected, but BigQuery does not support the + # "EXCEPT DISTINCT" operator on nested fields! Instead, just check that schemas are congruent. + + expected_cols = self.get_table_columns( + database=self.default_database, + schema=self.unique_schema(), + table='snapshot_expected' + ) + snapshotted_cols = self.get_table_columns( + database=self.default_database, + schema=self.unique_schema(), + table='snapshot_actual' + ) + + self.assertTrue(len(expected_cols) > 0, "source table does not exist -- bad test") + self.assertEqual(len(expected_cols), len(snapshotted_cols), "actual and expected column lengths are different") + + for (expected_col, actual_col) in zip(expected_cols, snapshotted_cols): + expected_name, expected_type, _ = expected_col + actual_name, actual_type, _ = actual_col + self.assertTrue(expected_name is not None) + self.assertTrue(expected_type is not None) + + self.assertEqual(expected_name, actual_name, "names are different") + self.assertEqual(expected_type, actual_type, "data types are different") + + +class TestCrossDBSnapshotFiles(DBTIntegrationTest): + setup_alternate_db = True + @property + def schema(self): + return "simple_snapshot_004" + + @property + def models(self): + return "test/integration/004_simple_snapshot_test/models" + + @property + def project_config(self): + if self.adapter_type == 'snowflake': + paths = ['test/integration/004_simple_snapshot_test/test-snapshots-pg'] + else: + paths = ['test/integration/004_simple_snapshot_test/test-snapshots-bq'] + return { + 'snapshot-paths': paths, + } + + def run_snapshot(self): + return self.run_dbt(['snapshot', '--vars', '{{"target_database": {}}}'.format(self.alternative_database)]) + + @use_profile('snowflake') + def test__snowflake__cross_snapshot(self): + self.run_sql_file("test/integration/004_simple_snapshot_test/seed.sql") + + results = self.run_snapshot() + self.assertEqual(len(results), 1) + + self.assertTablesEqual("SNAPSHOT_EXPECTED", "SNAPSHOT_ACTUAL", table_b_db=self.alternative_database) + + self.run_sql_file("test/integration/004_simple_snapshot_test/invalidate_snowflake.sql") + self.run_sql_file("test/integration/004_simple_snapshot_test/update.sql") + + results = self.run_snapshot() + self.assertEqual(len(results), 1) + + self.assertTablesEqual("SNAPSHOT_EXPECTED", "SNAPSHOT_ACTUAL", table_b_db=self.alternative_database) + + @use_profile('bigquery') + def test__bigquery__cross_snapshot(self): + self.run_sql_file("test/integration/004_simple_snapshot_test/seed_bq.sql") + + self.run_snapshot() + + self.assertTablesEqual("snapshot_expected", "snapshot_actual", table_b_db=self.alternative_database) + + self.run_sql_file("test/integration/004_simple_snapshot_test/invalidate_bigquery.sql") + self.run_sql_file("test/integration/004_simple_snapshot_test/update_bq.sql") + + self.run_snapshot() + + self.assertTablesEqual("snapshot_expected", "snapshot_actual", table_b_db=self.alternative_database) + + +class TestCrossSchemaSnapshotFiles(DBTIntegrationTest): + NUM_SNAPSHOT_MODELS = 1 + + @property + def schema(self): + return "simple_snapshot_004" + + @property + def models(self): + return "test/integration/004_simple_snapshot_test/models" + + @property + def project_config(self): + paths = ['test/integration/004_simple_snapshot_test/test-snapshots-pg'] + return { + 'snapshot-paths': paths, + } + + def target_schema(self): + return "{}_snapshotted".format(self.unique_schema()) + + def run_snapshot(self): + return self.run_dbt(['snapshot', '--vars', '{{"target_schema": {}}}'.format(self.target_schema())]) + + @use_profile('postgres') + def test__postgres__cross_schema_snapshot(self): + self.run_sql_file('test/integration/004_simple_snapshot_test/seed_pg.sql') + + results = self.run_snapshot() + self.assertEqual(len(results), self.NUM_SNAPSHOT_MODELS) + + results = self.run_dbt(['run', '--vars', '{{"target_schema": {}}}'.format(self.target_schema())]) + self.assertEqual(len(results), 1) + + +class TestBadSnapshot(DBTIntegrationTest): + @property + def schema(self): + return "simple_snapshot_004" + + @property + def models(self): + return "test/integration/004_simple_snapshot_test/models" + + @property + def project_config(self): + return { + "snapshot-paths": ['test/integration/004_simple_snapshot_test/test-snapshots-invalid'], + } + + @use_profile('postgres') + def test__postgres__invalid(self): + with self.assertRaises(dbt.exceptions.CompilationException) as exc: + self.run_dbt(['compile'], expect_pass=False) + + self.assertIn('target_database', str(exc.exception)) + + +class TestCheckCols(TestSimpleSnapshotFiles): + NUM_SNAPSHOT_MODELS = 2 + def _assertTablesEqualSql(self, relation_a, relation_b, columns=None): + # When building the equality tests, only test columns that don't start + # with 'dbt_', because those are time-sensitive + if columns is None: + columns = [c for c in self.get_relation_columns(relation_a) if not c[0].lower().startswith('dbt_')] + return super(TestCheckCols, self)._assertTablesEqualSql( + relation_a, + relation_b, + columns=columns + ) + + def assert_expected(self): + super(TestCheckCols, self).assert_expected() + self.assert_case_tables_equal('snapshot_checkall', 'snapshot_expected') + + @property + def project_config(self): + return { + "data-paths": ['test/integration/004_simple_snapshot_test/data'], + "snapshot-paths": ['test/integration/004_simple_snapshot_test/test-check-col-snapshots'], + } + + +class TestCheckColsBigquery(TestSimpleSnapshotFilesBigquery): + def _assertTablesEqualSql(self, relation_a, relation_b, columns=None): + # When building the equality tests, only test columns that don't start + # with 'dbt_', because those are time-sensitive + if columns is None: + columns = [c for c in self.get_relation_columns(relation_a) if not c[0].lower().startswith('dbt_')] + return super(TestCheckColsBigquery, self)._assertTablesEqualSql( + relation_a, + relation_b, + columns=columns + ) + + def assert_expected(self): + super(TestCheckColsBigquery, self).assert_expected() + self.assertTablesEqual('snapshot_checkall', 'snapshot_expected') + + @property + def project_config(self): + return { + "data-paths": ['test/integration/004_simple_snapshot_test/data'], + "snapshot-paths": ['test/integration/004_simple_snapshot_test/test-check-col-snapshots-bq'], + } + + @use_profile('bigquery') + def test__bigquery__snapshot_with_new_field(self): + self.use_default_project() + self.use_profile('bigquery') + + self.run_sql_file("test/integration/004_simple_snapshot_test/seed_bq.sql") + + self.run_dbt(["snapshot"]) + + self.assertTablesEqual("snapshot_expected", "snapshot_actual") + self.assertTablesEqual("snapshot_expected", "snapshot_checkall") + + self.run_sql_file("test/integration/004_simple_snapshot_test/invalidate_bigquery.sql") + self.run_sql_file("test/integration/004_simple_snapshot_test/update_bq.sql") + + # This adds new fields to the source table, and updates the expected snapshot output accordingly + self.run_sql_file("test/integration/004_simple_snapshot_test/add_column_to_source_bq.sql") + + # this should fail because `check="all"` will try to compare the nested field + self.run_dbt(['snapshot'], expect_pass=False) + + self.run_dbt(["snapshot", '-m', 'snapshot_actual']) + + # A more thorough test would assert that snapshotted == expected, but BigQuery does not support the + # "EXCEPT DISTINCT" operator on nested fields! Instead, just check that schemas are congruent. + + expected_cols = self.get_table_columns( + database=self.default_database, + schema=self.unique_schema(), + table='snapshot_expected' + ) + snapshotted_cols = self.get_table_columns( + database=self.default_database, + schema=self.unique_schema(), + table='snapshot_actual' + ) + + self.assertTrue(len(expected_cols) > 0, "source table does not exist -- bad test") + self.assertEqual(len(expected_cols), len(snapshotted_cols), "actual and expected column lengths are different") + + for (expected_col, actual_col) in zip(expected_cols, snapshotted_cols): + expected_name, expected_type, _ = expected_col + actual_name, actual_type, _ = actual_col + self.assertTrue(expected_name is not None) + self.assertTrue(expected_type is not None) + + self.assertEqual(expected_name, actual_name, "names are different") + self.assertEqual(expected_type, actual_type, "data types are different") + + +class TestLongText(DBTIntegrationTest): + + @property + def schema(self): + return "simple_snapshot_004" + + @property + def models(self): + return "test/integration/004_simple_snapshot_test/models" + + def run_snapshot(self): + return self.run_dbt(['snapshot']) + + @property + def project_config(self): + return { + "snapshot-paths": ['test/integration/004_simple_snapshot_test/test-snapshots-longtext'], + } + + @use_profile('postgres') + def test__postgres__long_text(self): + self.run_sql_file('test/integration/004_simple_snapshot_test/seed_longtext.sql') + results = self.run_dbt(['snapshot']) + self.assertEqual(len(results), 1) + + with self.adapter.connection_named('test'): + status, results = self.adapter.execute( + 'select * from {}.{}.snapshot_actual'.format(self.default_database, self.unique_schema()), + fetch=True + ) + self.assertEqual(len(results), 2) + got_names = set(r.get('longstring') for r in results) + self.assertEqual(got_names, {'a' * 500, 'short'}) diff --git a/test/integration/004_simple_archive_test/update.sql b/test/integration/004_simple_snapshot_test/update.sql similarity index 86% rename from test/integration/004_simple_archive_test/update.sql rename to test/integration/004_simple_snapshot_test/update.sql index 0959cf9fa3f..5787dd2d9f6 100644 --- a/test/integration/004_simple_archive_test/update.sql +++ b/test/integration/004_simple_snapshot_test/update.sql @@ -1,6 +1,6 @@ -- insert v2 of the 11 - 21 records -insert into {database}.{schema}.archive_expected ( +insert into {database}.{schema}.snapshot_expected ( id, first_name, last_name, @@ -22,7 +22,7 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, @@ -31,7 +31,7 @@ from {database}.{schema}.seed where id >= 10 and id <= 20; -insert into {database}.{schema}.archive_castillo_expected ( +insert into {database}.{schema}.snapshot_castillo_expected ( id, first_name, last_name, @@ -53,7 +53,7 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, @@ -62,7 +62,7 @@ from {database}.{schema}.seed where id >= 10 and id <= 20 and last_name = 'Castillo'; -insert into {database}.{schema}.archive_alvarez_expected ( +insert into {database}.{schema}.snapshot_alvarez_expected ( id, first_name, last_name, @@ -84,7 +84,7 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, @@ -93,7 +93,7 @@ from {database}.{schema}.seed where id >= 10 and id <= 20 and last_name = 'Alvarez'; -insert into {database}.{schema}.archive_kelly_expected ( +insert into {database}.{schema}.snapshot_kelly_expected ( id, first_name, last_name, @@ -115,7 +115,7 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, @@ -137,8 +137,8 @@ insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, (30, 'Carl', 'Coleman', 'ccolemant@wikipedia.org', 'Male', '82.227.154.83', '2016-05-26 16:46:40'); --- add these new records to the archive table -insert into {database}.{schema}.archive_expected ( +-- add these new records to the snapshot table +insert into {database}.{schema}.snapshot_expected ( id, first_name, last_name, @@ -160,7 +160,7 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, @@ -169,8 +169,8 @@ from {database}.{schema}.seed where id > 20; --- add these new records to the archive table -insert into {database}.{schema}.archive_castillo_expected ( +-- add these new records to the snapshot table +insert into {database}.{schema}.snapshot_castillo_expected ( id, first_name, last_name, @@ -192,7 +192,7 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, @@ -200,7 +200,7 @@ select from {database}.{schema}.seed where id > 20 and last_name = 'Castillo'; -insert into {database}.{schema}.archive_alvarez_expected ( +insert into {database}.{schema}.snapshot_alvarez_expected ( id, first_name, last_name, @@ -222,7 +222,7 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, @@ -230,7 +230,7 @@ select from {database}.{schema}.seed where id > 20 and last_name = 'Alvarez'; -insert into {database}.{schema}.archive_kelly_expected ( +insert into {database}.{schema}.snapshot_kelly_expected ( id, first_name, last_name, @@ -252,7 +252,7 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, null::timestamp as dbt_valid_to, updated_at as dbt_updated_at, diff --git a/test/integration/004_simple_archive_test/update_bq.sql b/test/integration/004_simple_snapshot_test/update_bq.sql similarity index 91% rename from test/integration/004_simple_archive_test/update_bq.sql rename to test/integration/004_simple_snapshot_test/update_bq.sql index aa56fb839a9..5c972d8af5c 100644 --- a/test/integration/004_simple_archive_test/update_bq.sql +++ b/test/integration/004_simple_snapshot_test/update_bq.sql @@ -1,6 +1,6 @@ -- insert v2 of the 11 - 21 records -insert {database}.{schema}.archive_expected ( +insert {database}.{schema}.snapshot_expected ( id, first_name, last_name, @@ -22,7 +22,7 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, cast(null as timestamp) as dbt_valid_to, updated_at as dbt_updated_at, @@ -45,8 +45,8 @@ insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, (30, 'Carl', 'Coleman', 'ccolemant@wikipedia.org', 'Male', '82.227.154.83', '2016-05-26 16:46:40'); --- add these new records to the archive table -insert {database}.{schema}.archive_expected ( +-- add these new records to the snapshot table +insert {database}.{schema}.snapshot_expected ( id, first_name, last_name, @@ -68,7 +68,7 @@ select gender, ip_address, updated_at, - -- fields added by archival + -- fields added by snapshotting updated_at as dbt_valid_from, cast(null as timestamp) as dbt_valid_to, updated_at as dbt_updated_at, diff --git a/test/integration/023_exit_codes_test/archives-bad/b.sql b/test/integration/023_exit_codes_test/snapshots-bad/b.sql similarity index 78% rename from test/integration/023_exit_codes_test/archives-bad/b.sql rename to test/integration/023_exit_codes_test/snapshots-bad/b.sql index dba11123afc..52425b7c9bc 100644 --- a/test/integration/023_exit_codes_test/archives-bad/b.sql +++ b/test/integration/023_exit_codes_test/snapshots-bad/b.sql @@ -1,4 +1,4 @@ -{% archive good_archive %} +{% snapshot good_snapshot %} {{ config(target_schema=schema, target_database=database, strategy='timestamp', unique_key='id', updated_at='updated_at_not_real')}} select * from {{ schema }}.good -{% endarchive %} +{% endsnapshot %} diff --git a/test/integration/023_exit_codes_test/archives-good/g.sql b/test/integration/023_exit_codes_test/snapshots-good/g.sql similarity index 77% rename from test/integration/023_exit_codes_test/archives-good/g.sql rename to test/integration/023_exit_codes_test/snapshots-good/g.sql index fcce3ae911e..0c1205d9441 100644 --- a/test/integration/023_exit_codes_test/archives-good/g.sql +++ b/test/integration/023_exit_codes_test/snapshots-good/g.sql @@ -1,4 +1,4 @@ -{% archive good_archive %} +{% snapshot good_snapshot %} {{ config(target_schema=schema, target_database=database, strategy='timestamp', unique_key='id', updated_at='updated_at')}} select * from {{ schema }}.good -{% endarchive %} +{% endsnapshot %} diff --git a/test/integration/023_exit_codes_test/test_exit_codes.py b/test/integration/023_exit_codes_test/test_exit_codes.py index dc5236286db..cffd169c86d 100644 --- a/test/integration/023_exit_codes_test/test_exit_codes.py +++ b/test/integration/023_exit_codes_test/test_exit_codes.py @@ -17,7 +17,7 @@ def models(self): @property def project_config(self): return { - "archive-paths": ['test/integration/023_exit_codes_test/archives-good'], + "snapshot-paths": ['test/integration/023_exit_codes_test/snapshots-good'], } @use_profile('postgres') @@ -59,14 +59,14 @@ def test___compile(self): self.assertTrue(success) @use_profile('postgres') - def test___archive_pass(self): + def test___snapshot_pass(self): self.run_dbt_and_check(['run', '--model', 'good']) - results, success = self.run_dbt_and_check(['archive']) + results, success = self.run_dbt_and_check(['snapshot']) self.assertEqual(len(results), 1) - self.assertTableDoesExist('good_archive') + self.assertTableDoesExist('good_snapshot') self.assertTrue(success) -class TestExitCodesArchiveFail(DBTIntegrationTest): +class TestExitCodesSnapshotFail(DBTIntegrationTest): @property def schema(self): @@ -79,18 +79,18 @@ def models(self): @property def project_config(self): return { - "archive-paths": ['test/integration/023_exit_codes_test/archives-bad'], + "snapshot-paths": ['test/integration/023_exit_codes_test/snapshots-bad'], } @use_profile('postgres') - def test___archive_fail(self): + def test___snapshot_fail(self): results, success = self.run_dbt_and_check(['run', '--model', 'good']) self.assertTrue(success) self.assertEqual(len(results), 1) - results, success = self.run_dbt_and_check(['archive']) + results, success = self.run_dbt_and_check(['snapshot']) self.assertEqual(len(results), 1) - self.assertTableDoesNotExist('good_archive') + self.assertTableDoesNotExist('good_snapshot') self.assertFalse(success) class TestExitCodesDeps(DBTIntegrationTest): diff --git a/test/integration/033_event_tracking_test/models/archivable.sql b/test/integration/033_event_tracking_test/models/snapshottable.sql similarity index 100% rename from test/integration/033_event_tracking_test/models/archivable.sql rename to test/integration/033_event_tracking_test/models/snapshottable.sql diff --git a/test/integration/033_event_tracking_test/archives/a.sql b/test/integration/033_event_tracking_test/snapshots/a.sql similarity index 58% rename from test/integration/033_event_tracking_test/archives/a.sql rename to test/integration/033_event_tracking_test/snapshots/a.sql index 90e4ce9c9a7..dd90278e560 100644 --- a/test/integration/033_event_tracking_test/archives/a.sql +++ b/test/integration/033_event_tracking_test/snapshots/a.sql @@ -1,4 +1,4 @@ -{% archive archived %} +{% snapshot snapshotted %} {{ config(target_schema=schema, target_database=database, strategy='timestamp', unique_key='id', updated_at='updated_at')}} - select * from {{ schema }}.archivable -{% endarchive %} + select * from {{ schema }}.snapshottable +{% endsnapshot %} diff --git a/test/integration/033_event_tracking_test/test_events.py b/test/integration/033_event_tracking_test/test_events.py index 55e325521e6..1f7f8d69b97 100644 --- a/test/integration/033_event_tracking_test/test_events.py +++ b/test/integration/033_event_tracking_test/test_events.py @@ -557,16 +557,16 @@ def test__event_tracking_unable_to_connect(self): ) -class TestEventTrackingArchive(TestEventTracking): +class TestEventTrackingSnapshot(TestEventTracking): @property def project_config(self): return { - "archive-paths": ['test/integration/033_event_tracking_test/archives'] + "snapshot-paths": ['test/integration/033_event_tracking_test/snapshots'] } @use_profile("postgres") - def test__event_tracking_archive(self): - self.run_dbt(["run", "--models", "archivable"]) + def test__event_tracking_snapshot(self): + self.run_dbt(["run", "--models", "snapshottable"]) expected_calls = [ call( @@ -591,20 +591,20 @@ def test__event_tracking_archive(self): # the model here has a raw_sql that contains the schema, which changes expected_contexts = [ - self.build_context('archive', 'start'), + self.build_context('snapshot', 'start'), self.run_context( hashed_contents=ANY, - model_id='3cdcd0fef985948fd33af308468da3b9', + model_id='820793a4def8d8a38d109a9709374849', index=1, total=1, status='SELECT 1', - materialization='archive' + materialization='snapshot' ), - self.build_context('archive', 'end', result_type='ok') + self.build_context('snapshot', 'end', result_type='ok') ] self.run_event_test( - ["archive"], + ["snapshot"], expected_calls, expected_contexts ) diff --git a/test/integration/047_dbt_ls_test/archives/archive.sql b/test/integration/047_dbt_ls_test/snapshots/snapshot.sql similarity index 86% rename from test/integration/047_dbt_ls_test/archives/archive.sql rename to test/integration/047_dbt_ls_test/snapshots/snapshot.sql index c609604eec6..60d803dfbb4 100644 --- a/test/integration/047_dbt_ls_test/archives/archive.sql +++ b/test/integration/047_dbt_ls_test/snapshots/snapshot.sql @@ -1,4 +1,4 @@ -{% archive my_archive %} +{% snapshot my_snapshot %} {{ config( target_database=var('target_database', database), @@ -9,4 +9,4 @@ ) }} select * from {{database}}.{{schema}}.seed -{% endarchive %} +{% endsnapshot %} diff --git a/test/integration/047_dbt_ls_test/test_ls.py b/test/integration/047_dbt_ls_test/test_ls.py index b31ed4688bd..33b2fe4db87 100644 --- a/test/integration/047_dbt_ls_test/test_ls.py +++ b/test/integration/047_dbt_ls_test/test_ls.py @@ -23,7 +23,7 @@ def models(self): def project_config(self): return { 'analysis-paths': [self.dir('analyses')], - 'archive-paths': [self.dir('archives')], + 'snapshot-paths': [self.dir('snapshots')], 'macro-paths': [self.dir('macros')], 'data-paths': [self.dir('data')], } @@ -55,18 +55,18 @@ def expect_given_output(self, args, expectations): else: self.assertEqual(got, expected) - def expect_archive_output(self): + def expect_snapshot_output(self): expectations = { - 'name': 'my_archive', - 'selector': 'archive.test.my_archive', + 'name': 'my_snapshot', + 'selector': 'snapshot.test.my_snapshot', 'json': { - 'name': 'my_archive', + 'name': 'my_snapshot', 'package_name': 'test', 'depends_on': {'nodes': [], 'macros': []}, 'tags': [], 'config': { 'enabled': True, - 'materialized': 'archive', + 'materialized': 'snapshot', 'post-hook': [], 'tags': [], 'pre-hook': [], @@ -79,12 +79,12 @@ def expect_archive_output(self): 'strategy': 'timestamp', 'updated_at': 'updated_at' }, - 'alias': 'my_archive', - 'resource_type': 'archive', + 'alias': 'my_snapshot', + 'resource_type': 'snapshot', }, - 'path': self.dir('archives/archive.sql'), + 'path': self.dir('snapshots/snapshot.sql'), } - self.expect_given_output(['--resource-type', 'archive'], expectations) + self.expect_given_output(['--resource-type', 'snapshot'], expectations) def expect_analyses_output(self): expectations = { @@ -251,7 +251,7 @@ def expect_test_output(self): def expect_all_output(self): expected_default = { - 'archive.test.my_archive', + 'snapshot.test.my_snapshot', 'model.test.inner', 'model.test.outer', 'seed.test.seed', @@ -289,7 +289,7 @@ def expect_select(self): @use_profile('postgres') def test_postgres_ls(self): - self.expect_archive_output() + self.expect_snapshot_output() self.expect_analyses_output() self.expect_model_output() self.expect_source_output() From ab59ebe4f2ab702bc96cc893cb75f0dea1f37ab6 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 5 Jun 2019 13:39:45 -0600 Subject: [PATCH 25/31] Fix docs blocks parsing issues Rename documentation node type to docs so we can filter on it (is this breaking?) Fix block extractor bug with macros/docs that contain quotes Fix block extractor bug with expressions --- core/dbt/clients/_jinja_blocks.py | 43 ++++++++++++++++++++++++++++--- core/dbt/node_types.py | 2 +- core/dbt/parser/docs.py | 34 +++++++++++++++++------- test/unit/test_docs_blocks.py | 7 ++--- test/unit/test_jinja.py | 22 ++++++++++++++++ 5 files changed, 89 insertions(+), 19 deletions(-) diff --git a/core/dbt/clients/_jinja_blocks.py b/core/dbt/clients/_jinja_blocks.py index 6207ab4956c..cb97044348b 100644 --- a/core/dbt/clients/_jinja_blocks.py +++ b/core/dbt/clients/_jinja_blocks.py @@ -52,6 +52,8 @@ def end_pat(self): RAW_START_PATTERN = regex( r'(?:\s*\{\%\-|\{\%)\s*(?P(raw))\s*(?:\-\%\}\s*|\%\})' ) +EXPR_START_PATTERN = regex(r'(?P(\{\{\s*))') +EXPR_END_PATTERN = regex(r'(?P(\s*\}\}))') BLOCK_START_PATTERN = regex(''.join(( r'(?:\s*\{\%\-|\{\%)\s*', @@ -92,6 +94,8 @@ def end_pat(self): r'"([^"\\]*(?:\\.[^"\\]*)*)"))' ) +QUOTE_START_PATTERN = regex(r'''(?P(['"]))''') + # any number of non-quote characters, followed by: # - quote: a quote mark indicating start of a string (you'll want to backtrack # the regex end on quotes and then match with the string pattern) @@ -179,6 +183,31 @@ def _expect_match(self, expected_name, *patterns, **kwargs): dbt.exceptions.raise_compiler_error(msg) return match + def handle_expr(self): + """Handle an expression. At this point we're at a string like: + {{ 1 + 2 }} + ^ right here + + We expect to find a `}}`, but we might find one in a string before + that. Imagine the case of `{{ 2 * "}}" }}`... + + You're not allowed to have blocks or comments inside an expr so it is + pretty straightforward, I hope: only strings can get in the way. + """ + while True: + match = self._expect_match('}}', + EXPR_END_PATTERN, + QUOTE_START_PATTERN) + if match.groupdict().get('expr_end') is not None: + break + else: + # it's a quote. we haven't advanced for this match yet, so + # just slurp up the whole string, no need to rewind. + match = self._expect_match('string', STRING_PATTERN) + self.advance(match.end()) + + self.advance(match.end()) + def handle_block(self, match, block_start=None): """Handle a block. The current state of the parser should be after the open block is completed: @@ -197,12 +226,18 @@ def handle_block(self, match, block_start=None): self._block_contents = '' + search = [found.end_pat(), COMMENT_START_PATTERN, RAW_START_PATTERN, + EXPR_START_PATTERN] + + # docs and macros do not honor embedded quotes + if found.block_type_name not in ('docs', 'macro'): + # is this right? + search.append(QUOTE_START_PATTERN) + # you can have as many comments in your block as you'd like! while True: match = self._expect_match( - '"{}"'.format(found.end_block_type_name), - found.end_pat(), COMMENT_START_PATTERN, RAW_START_PATTERN, - regex('''(?P(['"]))''') + '"{}"'.format(found.end_block_type_name), *search ) groups = match.groupdict() if groups.get('endblock') is not None: @@ -218,6 +253,8 @@ def handle_block(self, match, block_start=None): self.rewind() match = self._expect_match('any string', STRING_PATTERN) self.advance(match.end()) + elif groups.get('expr_start') is not None: + self.handle_expr() else: raise dbt.exceptions.InternalException( 'unhandled regex in handle_block, no match: {}' diff --git a/core/dbt/node_types.py b/core/dbt/node_types.py index d0a94404ae0..4b43cae0dbb 100644 --- a/core/dbt/node_types.py +++ b/core/dbt/node_types.py @@ -8,7 +8,7 @@ class NodeType(object): Macro = 'macro' Operation = 'operation' Seed = 'seed' - Documentation = 'documentation' + Documentation = 'docs' Source = 'source' RPCCall = 'rpc' diff --git a/core/dbt/parser/docs.py b/core/dbt/parser/docs.py index 840ecdb1a9f..5d22aa526d7 100644 --- a/core/dbt/parser/docs.py +++ b/core/dbt/parser/docs.py @@ -3,6 +3,8 @@ from dbt.parser.base import BaseParser from dbt.contracts.graph.unparsed import UnparsedDocumentationFile from dbt.contracts.graph.parsed import ParsedDocumentation +from dbt.clients.jinja import extract_toplevel_blocks, get_template +from dbt.clients import system import jinja2.runtime import os @@ -16,14 +18,12 @@ def load_file(cls, package_name, root_dir, relative_dirs): """ extension = "[!.#~]*.md" - file_matches = dbt.clients.system.find_matching( - root_dir, - relative_dirs, - extension) + file_matches = system.find_matching(root_dir, relative_dirs, extension) for file_match in file_matches: - file_contents = dbt.clients.system.load_file_contents( - file_match.get('absolute_path'), strip=False) + file_contents = system.load_file_contents( + file_match.get('absolute_path'), + strip=False) parts = dbt.utils.split_path(file_match.get('relative_path', '')) name, _ = os.path.splitext(parts[-1]) @@ -44,12 +44,26 @@ def load_file(cls, package_name, root_dir, relative_dirs): def parse(self, docfile): try: - template = dbt.clients.jinja.get_template(docfile.file_contents, - {}) - except dbt.exceptions.CompilationException as e: - e.node = docfile + blocks = extract_toplevel_blocks(docfile.file_contents) + except dbt.exceptions.CompilationException as exc: + if exc.node is None: + exc.node = docfile raise + for block in blocks: + if block.block_type_name != NodeType.Documentation: + continue + + try: + template = get_template(block.full_block, {}) + except dbt.exceptions.CompilationException as e: + e.node = docfile + raise + # in python 3.x this can just be "yield from" isntead of a loop + for d in self._parse_template_docs(template, docfile): + yield d + + def _parse_template_docs(self, template, docfile): for key, item in template.module.__dict__.items(): if type(item) != jinja2.runtime.Macro: continue diff --git a/test/unit/test_docs_blocks.py b/test/unit/test_docs_blocks.py index 104ae251af4..3da26948250 100644 --- a/test/unit/test_docs_blocks.py +++ b/test/unit/test_docs_blocks.py @@ -2,16 +2,12 @@ import mock import unittest -from dbt.config import RuntimeConfig from dbt.node_types import NodeType -import dbt.utils from dbt.parser import docs from dbt.contracts.graph.unparsed import UnparsedDocumentationFile from .utils import config_from_parts_or_dicts -#DocumentationParser - SNOWPLOW_SESSIONS_DOCS = r''' This table contains one record for every session recorded by Snowplow. @@ -98,7 +94,8 @@ def setUp(self): self.subdir_project_config = config_from_parts_or_dicts( project=subdir_project, profile=profile_data ) - @mock.patch('dbt.clients.system') + + @mock.patch('dbt.parser.docs.system') def test_load_file(self, system): system.load_file_contents.return_value = TEST_DOCUMENTATION_FILE system.find_matching.return_value = [{ diff --git a/test/unit/test_jinja.py b/test/unit/test_jinja.py index 5bcab016666..a76b8f88d3b 100644 --- a/test/unit/test_jinja.py +++ b/test/unit/test_jinja.py @@ -271,6 +271,28 @@ def test_quoted_endblock_within_block(self): self.assertEqual(blocks[0].block_type_name, 'myblock') self.assertEqual(blocks[0].contents, '{% set x = ("{% endmyblock %}") %} ') + def test_docs_block(self): + body = '{% docs __my_doc__ %} asdf {# nope {% enddocs %}} #} {% enddocs %} {% docs __my_other_doc__ %} asdf "{% enddocs %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 2) + self.assertEqual(blocks[0].block_type_name, 'docs') + self.assertEqual(blocks[0].contents, ' asdf {# nope {% enddocs %}} #} ') + self.assertEqual(blocks[0].block_name, '__my_doc__') + self.assertEqual(blocks[1].block_type_name, 'docs') + self.assertEqual(blocks[1].contents, ' asdf "') + self.assertEqual(blocks[1].block_name, '__my_other_doc__') + + def test_docs_block_expr(self): + body = '{% docs more_doc %} asdf {{ "{% enddocs %}" ~ "}}" }}{% enddocs %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].block_type_name, 'docs') + self.assertEqual(blocks[0].contents, ' asdf {{ "{% enddocs %}" ~ "}}" }}') + self.assertEqual(blocks[0].block_name, 'more_doc') + + bar_block = '''{% mytype bar %} {# a comment that inside it has From 1489393489dd96260652a5b47123c6b3a7abf34c Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 6 Jun 2019 10:59:29 -0600 Subject: [PATCH 26/31] documentation -> docs --- .../029_docs_generate_tests/test_docs_generate.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 187d6304470..5271c654eaa 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -1282,7 +1282,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'original_file_path': docs_path, 'package_name': 'test', 'path': 'docs.md', - 'resource_type': 'documentation', + 'resource_type': 'docs', 'root_path': os.getcwd(), 'unique_id': 'test.ephemeral_summary' }, @@ -1293,7 +1293,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'original_file_path': docs_path, 'package_name': 'test', 'path': 'docs.md', - 'resource_type': 'documentation', + 'resource_type': 'docs', 'root_path': os.getcwd(), 'unique_id': 'test.source_info', }, @@ -1304,7 +1304,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'original_file_path': docs_path, 'package_name': 'test', 'path': 'docs.md', - 'resource_type': 'documentation', + 'resource_type': 'docs', 'root_path': os.getcwd(), 'unique_id': 'test.summary_count' }, @@ -1315,7 +1315,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'original_file_path': docs_path, 'package_name': 'test', 'path': 'docs.md', - 'resource_type': 'documentation', + 'resource_type': 'docs', 'root_path': os.getcwd(), 'unique_id': 'test.summary_first_name' }, @@ -1326,7 +1326,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'original_file_path': docs_path, 'package_name': 'test', 'path': 'docs.md', - 'resource_type': 'documentation', + 'resource_type': 'docs', 'root_path': os.getcwd(), 'unique_id': 'test.table_info' }, @@ -1340,7 +1340,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'original_file_path': docs_path, 'package_name': 'test', 'path': 'docs.md', - 'resource_type': 'documentation', + 'resource_type': 'docs', 'root_path': os.getcwd(), 'unique_id': 'test.view_summary' }, From a4a9221d959b16e7cbd3ebee654dd7e5f98e623a Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 6 Jun 2019 12:04:53 -0600 Subject: [PATCH 27/31] add a missing test file --- test/integration/029_docs_generate_tests/models/readme.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 test/integration/029_docs_generate_tests/models/readme.md diff --git a/test/integration/029_docs_generate_tests/models/readme.md b/test/integration/029_docs_generate_tests/models/readme.md new file mode 100644 index 00000000000..d59a7f44724 --- /dev/null +++ b/test/integration/029_docs_generate_tests/models/readme.md @@ -0,0 +1 @@ +This is a readme.md file with {{ invalid-ish jinja }} in it From 03f50f560b84409d8aa3371ea7f3bc91dcafebf5 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 6 Jun 2019 14:35:22 -0600 Subject: [PATCH 28/31] PR feedback, fix "usage" line --- core/dbt/main.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/core/dbt/main.py b/core/dbt/main.py index 67592aa5e45..107ff234b8e 100644 --- a/core/dbt/main.py +++ b/core/dbt/main.py @@ -319,11 +319,18 @@ def _build_deps_subparser(subparsers, base_subparser): def _build_snapshot_subparser(subparsers, base_subparser, which='snapshot'): + if which == 'archive': + helpmsg = ( + 'DEPRECATED: This command is deprecated and will\n' + 'be removed in a future release. Use dbt snapshot instead.' + ) + else: + helpmsg = 'Execute snapshots defined in your project' + sub = subparsers.add_parser( which, parents=[base_subparser], - help="Record changes to a mutable table over time." - "\nMust be configured in your dbt_project.yml.") + help=helpmsg) sub.add_argument( '--threads', type=int, @@ -619,7 +626,7 @@ def _build_run_operation_subparser(subparsers, base_subparser): def parse_args(args): p = DBTArgumentParser( - prog='dbt: data build tool', + prog='dbt', formatter_class=argparse.RawTextHelpFormatter, description="An ELT tool for managing your SQL " "transformations and data models." From eb12ef1dcd50aa5f58e0727831650a757e86488f Mon Sep 17 00:00:00 2001 From: "tom.bescherer" Date: Thu, 6 Jun 2019 16:40:32 -0400 Subject: [PATCH 29/31] move target_model vars inside loop to avoid reuse on subsequent refs --- core/dbt/parser/util.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/core/dbt/parser/util.py b/core/dbt/parser/util.py index e05c13f3f7d..2d9b2c146b4 100644 --- a/core/dbt/parser/util.py +++ b/core/dbt/parser/util.py @@ -173,11 +173,10 @@ def process_docs(cls, manifest, current_project): @classmethod def process_refs_for_node(cls, manifest, current_project, node): """Given a manifest and a node in that manifest, process its refs""" - target_model = None - target_model_name = None - target_model_package = None - for ref in node.refs: + target_model = None + target_model_name = None + target_model_package = None if len(ref) == 1: target_model_name = ref[0] elif len(ref) == 2: From ca31b79cc007a264d54b915a78f14879d0652825 Mon Sep 17 00:00:00 2001 From: "tom.bescherer" Date: Fri, 7 Jun 2019 11:57:19 -0400 Subject: [PATCH 30/31] add a newline for readability --- core/dbt/parser/util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/core/dbt/parser/util.py b/core/dbt/parser/util.py index 2d9b2c146b4..a17a7f87bd9 100644 --- a/core/dbt/parser/util.py +++ b/core/dbt/parser/util.py @@ -177,6 +177,7 @@ def process_refs_for_node(cls, manifest, current_project, node): target_model = None target_model_name = None target_model_package = None + if len(ref) == 1: target_model_name = ref[0] elif len(ref) == 2: From 0ca602612471ea3b1fa8cdf6fad7d9805a5575c6 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 10 Jun 2019 07:13:46 -0600 Subject: [PATCH 31/31] increase tracking timeout 2s -> 5s --- core/dbt/tracking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/tracking.py b/core/dbt/tracking.py index c9524bf857b..c1376bd70f6 100644 --- a/core/dbt/tracking.py +++ b/core/dbt/tracking.py @@ -44,7 +44,7 @@ def handle_failure(num_ok, unsent): def http_get(self, payload): sp_logger.info("Sending GET request to %s..." % self.endpoint) sp_logger.debug("Payload: %s" % payload) - r = requests.get(self.endpoint, params=payload, timeout=2.0) + r = requests.get(self.endpoint, params=payload, timeout=5.0) msg = "GET request finished with status code: " + str(r.status_code) if self.is_good_status_code(r.status_code):