From acd978e054e5d0e432f4881b404f81a405b55e55 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 29 Apr 2020 10:20:08 -0600 Subject: [PATCH 1/3] Column and table comments - add some table comment framework stuff - have redshift/postgres catalogs include table comments - have redshift/postgres add comments to columns/tables/views - push some bigquery-specific formatting into bigquery - add tests for table comments --- CHANGELOG.md | 1 + .../global_project/macros/adapters/common.sql | 39 ++++++ .../macros/etc/get_relation_comment.sql | 22 +++- .../materializations/snapshot/snapshot.sql | 22 ++-- .../dbt/include/bigquery/macros/adapters.sql | 11 +- .../dbt/include/postgres/macros/adapters.sql | 41 ++++++ .../dbt/include/postgres/macros/catalog.sql | 6 +- .../dbt/include/redshift/macros/adapters.sql | 26 +++- .../dbt/include/redshift/macros/catalog.sql | 59 ++++----- .../test_docs_generate.py | 8 +- .../models/my_fun_docs.md | 10 ++ .../060_persist_docs_tests/models/schema.yml | 45 +++++++ .../models/table_model.sql | 2 + .../models/view_model.sql | 2 + .../test_persist_docs.py | 124 ++++++++++++++++++ 15 files changed, 361 insertions(+), 57 deletions(-) create mode 100644 test/integration/060_persist_docs_tests/models/my_fun_docs.md create mode 100644 test/integration/060_persist_docs_tests/models/schema.yml create mode 100644 test/integration/060_persist_docs_tests/models/table_model.sql create mode 100644 test/integration/060_persist_docs_tests/models/view_model.sql create mode 100644 test/integration/060_persist_docs_tests/test_persist_docs.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a9515b19a65..f0c5506b4ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - Sources (and therefore freshness tests) can be enabled and disabled via dbt_project.yml ([#2283](https://github.com/fishtown-analytics/dbt/issues/2283), [#2312](https://github.com/fishtown-analytics/dbt/pull/2312), [#2357](https://github.com/fishtown-analytics/dbt/pull/2357)) - schema.yml files are now fully rendered in a context that is aware of vars declared in from dbt_project.yml files ([#2269](https://github.com/fishtown-analytics/dbt/issues/2269), [#2357](https://github.com/fishtown-analytics/dbt/pull/2357)) - Sources from dependencies can be overridden in schema.yml files ([#2287](https://github.com/fishtown-analytics/dbt/issues/2287), [#2357](https://github.com/fishtown-analytics/dbt/pull/2357)) +- Implement persist_docs for both `relation` and `comments` on postgres and redshift, and extract them when getting the catalog. ([#2333](https://github.com/fishtown-analytics/dbt/issues/2333), [#2378](https://github.com/fishtown-analytics/dbt/pull/2378)) ### Fixes - When a jinja value is undefined, give a helpful error instead of failing with cryptic "cannot pickle ParserMacroCapture" errors ([#2110](https://github.com/fishtown-analytics/dbt/issues/2110), [#2184](https://github.com/fishtown-analytics/dbt/pull/2184)) diff --git a/core/dbt/include/global_project/macros/adapters/common.sql b/core/dbt/include/global_project/macros/adapters/common.sql index 97a17599939..ea347b63f3b 100644 --- a/core/dbt/include/global_project/macros/adapters/common.sql +++ b/core/dbt/include/global_project/macros/adapters/common.sql @@ -78,6 +78,7 @@ as ( {{ sql }} ); + {% endmacro %} {% macro create_view_as(relation, sql) -%} @@ -280,3 +281,41 @@ {% macro set_sql_header(config) -%} {{ config.set('sql_header', caller()) }} {%- endmacro %} + + +{%- macro set_relation_comment(relation) -%} + {%- set raw_persist_docs = config.get('persist_docs', {}) -%} + {%- set comment = get_relation_comment(raw_persist_docs, model) -%} + {%- if comment is not none -%} + {{ alter_relation_comment(relation, comment) }} + {%- endif -%} +{%- endmacro -%} + + +{%- macro set_column_comments(relation) -%} + {%- set raw_persist_docs = config.get('persist_docs', {}) -%} + {%- set column_dict = get_relation_column_comments(raw_persist_docs, model) -%} + {%- if column_dict is not none -%} + {{ alter_column_comment(relation, column_dict) }} + {%- endif -%} +{%- endmacro -%} + + +{# copy+pasted from the snowflake PR - delete these 4 on merge #} +{% macro alter_column_comment(relation, column_dict) -%} + {{ return(adapter_macro('alter_column_comment', relation, column_dict)) }} +{% endmacro %} + +{% macro default__alter_column_comment(relation, column_dict) -%} + {{ exceptions.raise_not_implemented( + 'alter_column_comment macro not implemented for adapter '+adapter.type()) }} +{% endmacro %} + +{% macro alter_relation_comment(relation, relation_comment) -%} + {{ return(adapter_macro('alter_relation_comment', relation, relation_comment)) }} +{% endmacro %} + +{% macro default__alter_relation_comment(relation, relation_comment) -%} + {{ exceptions.raise_not_implemented( + 'alter_relation_comment macro not implemented for adapter '+adapter.type()) }} +{% endmacro %} diff --git a/core/dbt/include/global_project/macros/etc/get_relation_comment.sql b/core/dbt/include/global_project/macros/etc/get_relation_comment.sql index 2c9baa30782..6f20e85f46f 100644 --- a/core/dbt/include/global_project/macros/etc/get_relation_comment.sql +++ b/core/dbt/include/global_project/macros/etc/get_relation_comment.sql @@ -1,8 +1,3 @@ -{% macro table_options() %} - {%- set raw_persist_docs = config.get('persist_docs', {}) -%} - -{%- endmacro -%} - {% macro get_relation_comment(persist_docs, model) %} {%- if persist_docs is not mapping -%} @@ -10,7 +5,22 @@ {% endif %} {% if persist_docs.get('relation', false) %} - {{ return((model.description | tojson)[1:-1]) }} + {{ return(model.description) }} + {%- else -%} + {{ return(none) }} + {% endif %} + +{% endmacro %} + + +{# copy+pasted from the snowflake PR - delete this on merge #} +{% macro get_relation_column_comments(persist_docs, model) %} + {%- if persist_docs is not mapping -%} + {{ exceptions.raise_compiler_error("Invalid value provided for 'persist_docs'. Expected dict but got value: " ~ persist_docs) }} + {% endif %} + + {% if persist_docs.get('columns', false) and model.columns|length != 0 %} + {{ return(model.columns) }} {%- else -%} {{ return(none) }} {% endif %} diff --git a/core/dbt/include/global_project/macros/materializations/snapshot/snapshot.sql b/core/dbt/include/global_project/macros/materializations/snapshot/snapshot.sql index a0166931f34..d43388ff5f2 100644 --- a/core/dbt/include/global_project/macros/materializations/snapshot/snapshot.sql +++ b/core/dbt/include/global_project/macros/materializations/snapshot/snapshot.sql @@ -210,9 +210,7 @@ {% if not target_relation_exists %} {% set build_sql = build_snapshot_table(strategy, model['injected_sql']) %} - {% call statement('main') -%} - {{ create_table_as(False, target_relation, build_sql) }} - {% endcall %} + {% set final_sql = create_table_as(False, target_relation, build_sql) %} {% else %} @@ -245,17 +243,19 @@ {% do quoted_source_columns.append(adapter.quote(column.name)) %} {% endfor %} - {% call statement('main') %} - {{ snapshot_merge_sql( - target = target_relation, - source = staging_table, - insert_cols = quoted_source_columns - ) - }} - {% endcall %} + {% set final_sql = snapshot_merge_sql( + target = target_relation, + source = staging_table, + insert_cols = quoted_source_columns + ) + %} {% endif %} + {% call statement('main') %} + {{ final_sql }} + {% endcall %} + {{ run_hooks(post_hooks, inside_transaction=True) }} {{ adapter.commit() }} diff --git a/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql b/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql index a318257e74b..78549c4d086 100644 --- a/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql +++ b/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql @@ -27,12 +27,21 @@ {%- endmacro -%} + +{%- macro bigquery_escape_comment(comment) -%} + {%- if comment is not string -%} + {%- do exceptions.raise_compiler_exception('cannot escape a non-string: ' ~ comment) -%} + {%- endif -%} + {%- do return((comment | tojson)[1:-1]) -%} +{%- endmacro -%} + + {% macro bigquery_table_options(persist_docs, temporary, kms_key_name, labels) %} {% set opts = {} -%} {%- set description = get_relation_comment(persist_docs, model) -%} {%- if description is not none -%} - {%- do opts.update({'description': "'" ~ description ~ "'"}) -%} + {%- do opts.update({'description': "'" ~ bigquery_escape_comment(description) ~ "'"}) -%} {%- endif -%} {%- if temporary -%} {% do opts.update({'expiration_timestamp': 'TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL 12 hour)'}) %} diff --git a/plugins/postgres/dbt/include/postgres/macros/adapters.sql b/plugins/postgres/dbt/include/postgres/macros/adapters.sql index c95cb4b0b2c..f9be5d302aa 100644 --- a/plugins/postgres/dbt/include/postgres/macros/adapters.sql +++ b/plugins/postgres/dbt/include/postgres/macros/adapters.sql @@ -12,8 +12,18 @@ as ( {{ sql }} ); + + {{ set_relation_comment(relation) }} + {{ set_column_comments(relation) }} {%- endmacro %} + +{% macro postgres__create_view_as(relation, sql) %} + {{ default__create_view_as(relation, sql) }} + {{ set_relation_comment(relation) }} + {{ set_column_comments(relation) }} +{% endmacro %} + {% macro postgres__create_schema(database_name, schema_name) -%} {% if database_name -%} {{ adapter.verify_database(database_name) }} @@ -127,3 +137,34 @@ })) -%} {% endmacro %} + +{# + By using dollar-quoting like this, users can embed anything they want into their comments + (including nested dollar-quoting), as long as they don't use this exact dollar-quoting + label. It would be nice to just pick a new one but eventually you do have to give up. +#} +{% macro postgres_escape_comment(comment) -%} + {% if comment is not string %} + {% do exceptions.raise_compiler_exception('cannot escape a non-string: ' ~ comment) %} + {% endif %} + {%- set magic = '$dbt_comment_literal_block$' -%} + {%- if magic in comment -%} + {%- do exceptions.raise_compiler_exception('The string ' ~ magic ~ ' is not allowed in comments.') -%} + {%- endif -%} + {{ magic }}{{ comment }}{{ magic }} +{%- endmacro %} + + +{% macro postgres__alter_relation_comment(relation, comment) %} + {% set escaped_comment = postgres_escape_comment(comment) %} + comment on {{ relation.type }} {{ relation }} is {{ escaped_comment }}; +{% endmacro %} + + +{% macro postgres__alter_column_comment(relation, column_dict) %} + {% for column_name in column_dict %} + {% set comment = column_dict[column_name]['description'] %} + {% set escaped_comment = postgres_escape_comment(comment) %} + comment on column {{ relation }}.{{ column_name }} is {{ escaped_comment }}; + {% endfor %} +{% endmacro %} diff --git a/plugins/postgres/dbt/include/postgres/macros/catalog.sql b/plugins/postgres/dbt/include/postgres/macros/catalog.sql index 377fdbd9ae9..505af99f793 100644 --- a/plugins/postgres/dbt/include/postgres/macros/catalog.sql +++ b/plugins/postgres/dbt/include/postgres/macros/catalog.sql @@ -17,16 +17,18 @@ when 'v' then 'VIEW' else 'BASE TABLE' end as table_type, - null::text as table_comment, + tbl_desc.description as table_comment, col.attname as column_name, col.attnum as column_index, pg_catalog.format_type(col.atttypid, col.atttypmod) as column_type, - null::text as column_comment, + col_desc.description as column_comment, pg_get_userbyid(tbl.relowner) as table_owner from pg_catalog.pg_namespace sch join pg_catalog.pg_class tbl on tbl.relnamespace = sch.oid join pg_catalog.pg_attribute col on col.attrelid = tbl.oid + left outer join pg_catalog.pg_description tbl_desc on (tbl_desc.objoid = tbl.oid and tbl_desc.objsubid = 0) + left outer join pg_catalog.pg_description col_desc on (col_desc.objoid = tbl.oid and col_desc.objsubid = col.attnum) where ( {%- for schema in schemas -%} diff --git a/plugins/redshift/dbt/include/redshift/macros/adapters.sql b/plugins/redshift/dbt/include/redshift/macros/adapters.sql index eef21c7e46c..9342a733832 100644 --- a/plugins/redshift/dbt/include/redshift/macros/adapters.sql +++ b/plugins/redshift/dbt/include/redshift/macros/adapters.sql @@ -49,12 +49,16 @@ as ( {{ sql }} ); + + {{ set_relation_comment(relation) }} + {{ set_column_comments(relation) }} {%- endmacro %} {% macro redshift__create_view_as(relation, sql) -%} + {%- set binding = config.get('bind', default=True) -%} - {% set bind_qualifier = '' if config.get('bind', default=True) else 'with no schema binding' %} + {% set bind_qualifier = '' if binding else 'with no schema binding' %} {%- set sql_header = config.get('sql_header', none) -%} {{ sql_header if sql_header is not none }} @@ -62,6 +66,15 @@ create view {{ relation }} as ( {{ sql }} ) {{ bind_qualifier }}; + + {# + For late-binding views, it's possible to set comments on the view (though they don't seem to end up anywhere). + Unfortunately, setting comments on columns just results in an error. + #} + {{ set_relation_comment(relation) }} + {% if binding %} + {{ set_column_comments(relation) }} + {% endif %} {% endmacro %} @@ -188,3 +201,14 @@ {% macro redshift__make_temp_relation(base_relation, suffix) %} {% do return(postgres__make_temp_relation(base_relation, suffix)) %} {% endmacro %} + + +{% macro redshift__alter_relation_comment(relation, comment) %} + {% do return(postgres__alter_relation_comment(relation, comment)) %} +{% endmacro %} + + +{% macro redshift__alter_column_comment(relation, column_dict) %} + {% do return(postgres__alter_column_comment(relation, column_dict)) %} +{% endmacro %} + diff --git a/plugins/redshift/dbt/include/redshift/macros/catalog.sql b/plugins/redshift/dbt/include/redshift/macros/catalog.sql index 83935f67a35..2eadc87d67c 100644 --- a/plugins/redshift/dbt/include/redshift/macros/catalog.sql +++ b/plugins/redshift/dbt/include/redshift/macros/catalog.sql @@ -23,6 +23,32 @@ order by "column_index" ), + early_binding as ( + select + '{{ database }}'::varchar as table_database, + sch.nspname as table_schema, + tbl.relname as table_name, + case tbl.relkind + when 'v' then 'VIEW' + else 'BASE TABLE' + end as table_type, + tbl_desc.description as table_comment, + col.attname as column_name, + col.attnum as column_index, + pg_catalog.format_type(col.atttypid, col.atttypmod) as column_type, + col_desc.description as column_comment + + from pg_catalog.pg_namespace sch + join pg_catalog.pg_class tbl on tbl.relnamespace = sch.oid + join pg_catalog.pg_attribute col on col.attrelid = tbl.oid + left outer join pg_catalog.pg_description tbl_desc on (tbl_desc.objoid = tbl.oid and tbl_desc.objsubid = 0) + left outer join pg_catalog.pg_description col_desc on (col_desc.objoid = tbl.oid and col_desc.objsubid = col.attnum) + where upper(sch.nspname) = upper('{{ schema }}') + and tbl.relkind in ('r', 'v', 'f', 'p') + and col.attnum > 0 + and not col.attisdropped + ), + table_owners as ( select @@ -45,41 +71,10 @@ ), - tables as ( - - select - table_catalog as table_database, - table_schema, - table_name, - table_type - - from information_schema.tables - - ), - - table_columns as ( - - select - '{{ database }}'::varchar as table_database, - table_schema, - table_name, - null::varchar as table_comment, - - column_name, - ordinal_position as column_index, - data_type as column_type, - null::varchar as column_comment - - - from information_schema."columns" - - ), - unioned as ( select * - from tables - join table_columns using (table_database, table_schema, table_name) + from early_binding union all diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index b6d3bfe1ce9..2b2506ed1c8 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -708,7 +708,7 @@ def expected_bigquery_complex_catalog(self): def expected_redshift_catalog(self): return self._expected_catalog( id_type='integer', - text_type='character varying', + text_type=AnyStringWith('character varying'), time_type='timestamp without time zone', view_type='VIEW', table_type='BASE TABLE', @@ -787,19 +787,19 @@ def expected_redshift_incremental_catalog(self): 'first_name': { 'name': 'first_name', 'index': 2, - 'type': 'character varying', + 'type': 'character varying(5)', 'comment': None, }, 'email': { 'name': 'email', 'index': 3, - 'type': 'character varying', + 'type': 'character varying(23)', 'comment': None, }, 'ip_address': { 'name': 'ip_address', 'index': 4, - 'type': 'character varying', + 'type': 'character varying(14)', 'comment': None, }, 'updated_at': { diff --git a/test/integration/060_persist_docs_tests/models/my_fun_docs.md b/test/integration/060_persist_docs_tests/models/my_fun_docs.md new file mode 100644 index 00000000000..f3c0fbf55ec --- /dev/null +++ b/test/integration/060_persist_docs_tests/models/my_fun_docs.md @@ -0,0 +1,10 @@ +{% docs my_fun_doc %} +name Column description "with double quotes" +and with 'single quotes' as welll as other; +'''abc123''' +reserved -- characters +-- +/* comment */ +Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + +{% enddocs %} diff --git a/test/integration/060_persist_docs_tests/models/schema.yml b/test/integration/060_persist_docs_tests/models/schema.yml new file mode 100644 index 00000000000..74dfa0a3378 --- /dev/null +++ b/test/integration/060_persist_docs_tests/models/schema.yml @@ -0,0 +1,45 @@ +version: 2 + +models: + - name: table_model + description: | + Table model description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + columns: + - name: id + description: | + id Column description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + - name: name + description: | + Some stuff here and then a call to + {{ doc('my_fun_doc')}} + - name: view_model + description: | + View model description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + columns: + - name: id + description: | + id Column description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting diff --git a/test/integration/060_persist_docs_tests/models/table_model.sql b/test/integration/060_persist_docs_tests/models/table_model.sql new file mode 100644 index 00000000000..c0e93c3f307 --- /dev/null +++ b/test/integration/060_persist_docs_tests/models/table_model.sql @@ -0,0 +1,2 @@ +{{ config(materialized='table') }} +select 1 as id, 'Joe' as name diff --git a/test/integration/060_persist_docs_tests/models/view_model.sql b/test/integration/060_persist_docs_tests/models/view_model.sql new file mode 100644 index 00000000000..a6f96a16d5d --- /dev/null +++ b/test/integration/060_persist_docs_tests/models/view_model.sql @@ -0,0 +1,2 @@ +{{ config(materialized='view') }} +select 2 as id, 'Bob' as name diff --git a/test/integration/060_persist_docs_tests/test_persist_docs.py b/test/integration/060_persist_docs_tests/test_persist_docs.py new file mode 100644 index 00000000000..f5890741c5f --- /dev/null +++ b/test/integration/060_persist_docs_tests/test_persist_docs.py @@ -0,0 +1,124 @@ +from test.integration.base import DBTIntegrationTest, use_profile + +import json + + +class BasePersistDocsTest(DBTIntegrationTest): + @property + def schema(self): + return "persist_docs_060" + + @property + def models(self): + return "models" + + def _assert_common_comments(self, *comments): + for comment in comments: + assert '"with double quotes"' in comment + assert """'''abc123'''""" in comment + assert '\n' in comment + assert 'Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting' in comment + assert '/* comment */' in comment + assert '--\n' in comment + + def _assert_has_table_comments(self, table_node): + table_comment = table_node['metadata']['comment'] + assert table_comment.startswith('Table model description') + + table_id_comment = table_node['columns']['id']['comment'] + assert table_id_comment.startswith('id Column description') + + table_name_comment = table_node['columns']['name']['comment'] + assert table_name_comment.startswith('Some stuff here and then a call to') + + self._assert_common_comments( + table_comment, table_id_comment, table_name_comment + ) + + def _assert_has_view_comments(self, view_node, has_node_comments=True, has_column_comments=True): + view_comment = view_node['metadata']['comment'] + if has_node_comments: + assert view_comment.startswith('View model description') + self._assert_common_comments(view_comment) + else: + assert view_comment is None + + view_id_comment = view_node['columns']['id']['comment'] + if has_column_comments: + assert view_id_comment.startswith('id Column description') + self._assert_common_comments(view_id_comment) + else: + assert view_id_comment is None + + view_name_comment = view_node['columns']['name']['comment'] + assert view_name_comment is None + + +class TestPersistDocs(BasePersistDocsTest): + @property + def project_config(self): + return { + 'config-version': 2, + 'models': { + 'test': { + '+persist_docs': { + "relation": True, + "columns": True, + }, + } + } + } + + def run_has_comments_pglike(self): + self.run_dbt() + self.run_dbt(['docs', 'generate']) + with open('target/catalog.json') as fp: + catalog_data = json.load(fp) + assert 'nodes' in catalog_data + assert len(catalog_data['nodes']) == 2 + table_node = catalog_data['nodes']['model.test.table_model'] + view_node = self._assert_has_table_comments(table_node) + + view_node = catalog_data['nodes']['model.test.view_model'] + self._assert_has_view_comments(view_node) + + @use_profile('postgres') + def test_postgres_comments(self): + self.run_has_comments_pglike() + + @use_profile('redshift') + def test_redshift_comments(self): + self.run_has_comments_pglike() + + +class TestPersistDocsLateBinding(BasePersistDocsTest): + @property + def project_config(self): + return { + 'config-version': 2, + 'models': { + 'test': { + '+persist_docs': { + "relation": True, + "columns": True, + }, + 'view_model': { + 'bind': False, + } + } + } + } + + @use_profile('redshift') + def test_redshift_late_binding_view(self): + self.run_dbt() + self.run_dbt(['docs', 'generate']) + with open('target/catalog.json') as fp: + catalog_data = json.load(fp) + assert 'nodes' in catalog_data + assert len(catalog_data['nodes']) == 2 + table_node = catalog_data['nodes']['model.test.table_model'] + view_node = self._assert_has_table_comments(table_node) + + view_node = catalog_data['nodes']['model.test.view_model'] + self._assert_has_view_comments(view_node, False, False) From d17e706351e8d0e7944d4e39104baa23a0c9b02a Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 30 Apr 2020 08:39:51 -0600 Subject: [PATCH 2/3] improve parsing errors, include line:char instead of absolute character index --- core/dbt/clients/_jinja_blocks.py | 26 +++++++++++++++++++++++--- core/dbt/parser/macros.py | 24 ++++++++++++++---------- test/unit/test_jinja.py | 10 ++++++++-- 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/core/dbt/clients/_jinja_blocks.py b/core/dbt/clients/_jinja_blocks.py index 16cb459a6c1..8a5a1dae948 100644 --- a/core/dbt/clients/_jinja_blocks.py +++ b/core/dbt/clients/_jinja_blocks.py @@ -92,6 +92,18 @@ def __init__(self, data): self._parenthesis_stack = [] self.pos = 0 + def linepos(self, end=None) -> str: + """Given an absolute position in the input data, return a pair of + line number + relative position to the start of the line. + """ + end_val: int = self.pos if end is None else end + data = self.data[:end_val] + # if not found, rfind returns -1, and -1+1=0, which is perfect! + last_line_start = data.rfind('\n') + 1 + # it's easy to forget this, but line numbers are 1-indexed + line_number = data.count('\n') + 1 + return f'{line_number}:{end_val - last_line_start}' + def advance(self, new_position): self.pos = new_position @@ -320,20 +332,28 @@ def find_blocks(self, allowed_blocks=None, collect_raw_data=True): dbt.exceptions.raise_compiler_error(( 'Got an unexpected control flow end tag, got {} but ' 'never saw a preceeding {} (@ {})' - ).format(tag.block_type_name, expected, tag.start)) + ).format( + tag.block_type_name, + expected, + self.tag_parser.linepos(tag.start) + )) expected = _CONTROL_FLOW_TAGS[found] if expected != tag.block_type_name: dbt.exceptions.raise_compiler_error(( 'Got an unexpected control flow end tag, got {} but ' 'expected {} next (@ {})' - ).format(tag.block_type_name, expected, tag.start)) + ).format( + tag.block_type_name, + expected, + self.tag_parser.linepos(tag.start) + )) if tag.block_type_name in allowed_blocks: if self.stack: dbt.exceptions.raise_compiler_error(( 'Got a block definition inside control flow at {}. ' 'All dbt block definitions must be at the top level' - ).format(tag.start)) + ).format(self.tag_parser.linepos(tag.start))) if self.current is not None: dbt.exceptions.raise_compiler_error( duplicate_tags.format(outer=self.current, inner=tag) diff --git a/core/dbt/parser/macros.py b/core/dbt/parser/macros.py index a3495c33ec6..7f796106b63 100644 --- a/core/dbt/parser/macros.py +++ b/core/dbt/parser/macros.py @@ -48,22 +48,26 @@ def parse_macro( def parse_unparsed_macros( self, base_node: UnparsedMacro ) -> Iterable[ParsedMacro]: - blocks: List[jinja.BlockTag] = [ - t for t in - jinja.extract_toplevel_blocks( - base_node.raw_sql, - allowed_blocks={'macro', 'materialization'}, - collect_raw_data=False, - ) - if isinstance(t, jinja.BlockTag) - ] + try: + blocks: List[jinja.BlockTag] = [ + t for t in + jinja.extract_toplevel_blocks( + base_node.raw_sql, + allowed_blocks={'macro', 'materialization'}, + collect_raw_data=False, + ) + if isinstance(t, jinja.BlockTag) + ] + except CompilationException as exc: + exc.add_node(base_node) + raise for block in blocks: try: ast = jinja.parse(block.full_block) except CompilationException as e: e.add_node(base_node) - raise e + raise macro_nodes = list(ast.find_all(jinja2.nodes.Macro)) diff --git a/test/unit/test_jinja.py b/test/unit/test_jinja.py index 10ff0ab9ae9..5d10eaf61ca 100644 --- a/test/unit/test_jinja.py +++ b/test/unit/test_jinja.py @@ -313,13 +313,19 @@ def test_endif(self): body = '{% snapshot foo %}select * from thing{% endsnapshot%}{% endif %}' with self.assertRaises(CompilationException) as err: extract_toplevel_blocks(body) - self.assertIn('Got an unexpected control flow end tag, got endif but never saw a preceeding if (@ 53)', str(err.exception)) + self.assertIn('Got an unexpected control flow end tag, got endif but never saw a preceeding if (@ 1:53)', str(err.exception)) def test_if_endfor(self): body = '{% if x %}...{% endfor %}{% endif %}' with self.assertRaises(CompilationException) as err: extract_toplevel_blocks(body) - self.assertIn('Got an unexpected control flow end tag, got endfor but expected endif next (@ 13)', str(err.exception)) + self.assertIn('Got an unexpected control flow end tag, got endfor but expected endif next (@ 1:13)', str(err.exception)) + + def test_if_endfor_newlines(self): + body = '{% if x %}\n ...\n {% endfor %}\n{% endif %}' + with self.assertRaises(CompilationException) as err: + extract_toplevel_blocks(body) + self.assertIn('Got an unexpected control flow end tag, got endfor but expected endif next (@ 3:4)', str(err.exception)) bar_block = '''{% mytype bar %} From c6603be194f8fa93242fa455f03e68fc71c021ce Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 30 Apr 2020 14:05:36 -0600 Subject: [PATCH 3/3] Handle funky incremental behavior in case existing materializations use it - also fixed the default incremental to include that it is creating a table --- .../macros/materializations/incremental/incremental.sql | 2 +- plugins/postgres/dbt/include/postgres/macros/adapters.sql | 2 ++ plugins/redshift/dbt/include/redshift/macros/adapters.sql | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql b/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql index 7ac4a2ae289..e185d8205d8 100644 --- a/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql +++ b/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql @@ -4,7 +4,7 @@ {% set unique_key = config.get('unique_key') %} {% set full_refresh_mode = flags.FULL_REFRESH %} - {% set target_relation = this %} + {% set target_relation = this.incorporate(type='table') %} {% set existing_relation = load_relation(this) %} {% set tmp_relation = make_temp_relation(this) %} diff --git a/plugins/postgres/dbt/include/postgres/macros/adapters.sql b/plugins/postgres/dbt/include/postgres/macros/adapters.sql index f9be5d302aa..54bb1e13410 100644 --- a/plugins/postgres/dbt/include/postgres/macros/adapters.sql +++ b/plugins/postgres/dbt/include/postgres/macros/adapters.sql @@ -13,6 +13,7 @@ {{ sql }} ); + {% set relation = relation.incorporate(type='table') %} {{ set_relation_comment(relation) }} {{ set_column_comments(relation) }} {%- endmacro %} @@ -20,6 +21,7 @@ {% macro postgres__create_view_as(relation, sql) %} {{ default__create_view_as(relation, sql) }} + {%- set relation = relation.incorporate(type='view') -%} {{ set_relation_comment(relation) }} {{ set_column_comments(relation) }} {% endmacro %} diff --git a/plugins/redshift/dbt/include/redshift/macros/adapters.sql b/plugins/redshift/dbt/include/redshift/macros/adapters.sql index 9342a733832..beace1e0294 100644 --- a/plugins/redshift/dbt/include/redshift/macros/adapters.sql +++ b/plugins/redshift/dbt/include/redshift/macros/adapters.sql @@ -50,6 +50,7 @@ {{ sql }} ); + {% set relation = relation.incorporate(type='table') %} {{ set_relation_comment(relation) }} {{ set_column_comments(relation) }} {%- endmacro %} @@ -71,6 +72,7 @@ For late-binding views, it's possible to set comments on the view (though they don't seem to end up anywhere). Unfortunately, setting comments on columns just results in an error. #} + {% set relation = relation.incorporate(type='view') %} {{ set_relation_comment(relation) }} {% if binding %} {{ set_column_comments(relation) }}