From 046942bc1290e6d2116738f6a6e43319f10a6d37 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 11 Jul 2018 09:13:00 -0400 Subject: [PATCH 01/18] Write out some SQL --- .../global_project/macros/adapters/common.sql | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/dbt/include/global_project/macros/adapters/common.sql b/dbt/include/global_project/macros/adapters/common.sql index 7b09ba1bbbb..2da58fc5e4a 100644 --- a/dbt/include/global_project/macros/adapters/common.sql +++ b/dbt/include/global_project/macros/adapters/common.sql @@ -124,3 +124,42 @@ {# str() on all returns. To get the results, you'll need to use #} {# context['load_result']('catalog') #} {%- endmacro %} + + +{# The only difference between snowflake and postgres is casing... #} +{% macro snowflake__get_catalog() -%} + {%- call statement('catalog', fetch_result=True) -%} + with tables as ( + select + table_schema, + table_name, + table_type + + from information_schema.tables + + ), + + columns as ( + + select + table_schema, + table_name, + null as table_comment, + + column_name, + ordinal_position as column_index, + data_type as column_type, + null as column_comment + + + from information_schema.columns + + ) + + select * + from tables + join columns using (table_schema, table_name) + + where table_schema != 'INFORMATION_SCHEMA' + {%- endcall -%} +{%- endmacro %} From 22d8ad68e54409ed5c11f7840f55bd49a5b6c9d0 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 12 Jul 2018 15:42:53 -0400 Subject: [PATCH 02/18] Add snowflake tests, rework existing postgres to use dbt seed + dbt run + dbt docs generate --- .../029_docs_generate_tests/models/.gitkeep | 0 .../029_docs_generate_tests/models/model.sql | 7 + .../models/view_summary.sql | 18 -- .../029_docs_generate_tests/seed/seed.csv | 2 + .../{seed.sql => seed_pg.sql} | 4 + .../seed_snowflake.sql | 35 ++++ .../test_docs_generate.py | 160 ++++++++++-------- 7 files changed, 138 insertions(+), 88 deletions(-) delete mode 100644 test/integration/029_docs_generate_tests/models/.gitkeep create mode 100644 test/integration/029_docs_generate_tests/models/model.sql delete mode 100644 test/integration/029_docs_generate_tests/models/view_summary.sql create mode 100644 test/integration/029_docs_generate_tests/seed/seed.csv rename test/integration/029_docs_generate_tests/{seed.sql => seed_pg.sql} (92%) create mode 100644 test/integration/029_docs_generate_tests/seed_snowflake.sql diff --git a/test/integration/029_docs_generate_tests/models/.gitkeep b/test/integration/029_docs_generate_tests/models/.gitkeep deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/test/integration/029_docs_generate_tests/models/model.sql b/test/integration/029_docs_generate_tests/models/model.sql new file mode 100644 index 00000000000..f96ac3f33c8 --- /dev/null +++ b/test/integration/029_docs_generate_tests/models/model.sql @@ -0,0 +1,7 @@ +{{ + config( + materialized='view' + ) +}} + +select 1 as id diff --git a/test/integration/029_docs_generate_tests/models/view_summary.sql b/test/integration/029_docs_generate_tests/models/view_summary.sql deleted file mode 100644 index 4ff38b12de7..00000000000 --- a/test/integration/029_docs_generate_tests/models/view_summary.sql +++ /dev/null @@ -1,18 +0,0 @@ - -{{ - config( - materialized='view' - ) -}} - - -with t as ( - - select * from {{ ref('view_model') }} - -) - -select date_trunc('year', updated_at) as year, - count(*) -from t -group by 1 diff --git a/test/integration/029_docs_generate_tests/seed/seed.csv b/test/integration/029_docs_generate_tests/seed/seed.csv new file mode 100644 index 00000000000..ef154f552c9 --- /dev/null +++ b/test/integration/029_docs_generate_tests/seed/seed.csv @@ -0,0 +1,2 @@ +id,first_name,email,ip_address,updated_at +1,Larry,lking0@miitbeian.gov.cn,69.135.206.194,2008-09-12 19:08:31 diff --git a/test/integration/029_docs_generate_tests/seed.sql b/test/integration/029_docs_generate_tests/seed_pg.sql similarity index 92% rename from test/integration/029_docs_generate_tests/seed.sql rename to test/integration/029_docs_generate_tests/seed_pg.sql index 5af712a2c0f..9c372019e31 100644 --- a/test/integration/029_docs_generate_tests/seed.sql +++ b/test/integration/029_docs_generate_tests/seed_pg.sql @@ -29,3 +29,7 @@ INSERT INTO {schema}.seed_summary VALUES ('2008-01-01 00:00:00',6); + +create view {schema}.test_view as ( + select 1 as id +); diff --git a/test/integration/029_docs_generate_tests/seed_snowflake.sql b/test/integration/029_docs_generate_tests/seed_snowflake.sql new file mode 100644 index 00000000000..2fb8f77eca0 --- /dev/null +++ b/test/integration/029_docs_generate_tests/seed_snowflake.sql @@ -0,0 +1,35 @@ +create table {schema}.seed ( + "id" INTEGER, + "first_name" VARCHAR(11), + "email" VARCHAR(31), + "ip_address" VARCHAR(15), + "updated_at" TIMESTAMP WITHOUT TIME ZONE +); + + +INSERT INTO {schema}.seed + ("id","first_name","email","ip_address","updated_at") +VALUES + (1,'Larry','lking0@miitbeian.gov.cn','69.135.206.194','2008-09-12 19:08:31'); + +create table {schema}.seed_config_expected_1 as ( + + select *, 'default'::text as "c1", 'default'::text as "c2", 'was true'::text as "some_bool" from {schema}.seed + +); + + +create table {schema}.seed_summary ( + "year" timestamp without time zone, + "count" bigint +); + +INSERT INTO {schema}.seed_summary + ("year","count") +VALUES + ('2008-01-01 00:00:00',6); + + +create view {schema}.test_view as ( + select 1 as "id" +); diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 25bb443283b..438492ec17f 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -7,30 +7,37 @@ class TestDocsGenerate(DBTIntegrationTest): - def setUp(self): - super(TestDocsGenerate, self).setUp() - self.run_sql_file("test/integration/029_docs_generate_tests/seed.sql") - @property def schema(self): - return "simple_dependency_029" + return 'simple_dependency_029' + + @staticmethod + def dir(path): + return "test/integration/029_docs_generate_tests/" + path.lstrip("/") @property def models(self): - return "test/integration/029_docs_generate_tests/models" + return self.dir("models") @property def project_config(self): return { - "repositories": [ + 'repositories': [ 'https://github.com/fishtown-analytics/dbt-integration-project' - ] + ], + 'quoting': { + 'identifier': False + } } @attr(type='postgres') - def test_simple_generate(self): - self.run_dbt(["deps"]) - self.run_dbt(["docs", "generate"]) + def test__postgres__simple_generate(self): + self.use_profile('postgres') + self.use_default_project({"data-paths": [self.dir("seed")]}) + + self.assertEqual(len(self.run_dbt(["seed"])), 1) + self.assertEqual(len(self.run_dbt()), 1) + self.run_dbt(['docs', 'generate']) self.assertTrue(os.path.exists('./target/catalog.json')) with open('./target/catalog.json') as fp: @@ -40,54 +47,30 @@ def test_simple_generate(self): self.assertIn(my_schema_name, data) my_schema = data[my_schema_name] expected = { - 'seed': { + 'model': { 'metadata': { 'schema': my_schema_name, - 'name': 'seed', - 'type': 'BASE TABLE', - 'comment': None + 'name': 'model', + 'type': 'VIEW', + 'comment': None, }, 'columns': [ { 'name': 'id', 'index': 1, 'type': 'integer', - 'comment': None - }, - { - 'name': 'first_name', - 'index': 2, - 'type': 'character varying', - 'comment': None - }, - { - 'name': 'email', 'index': 3, - 'type': 'character varying', 'comment': None, }, - { - 'name': 'ip_address', - 'index': 4, - 'type': 'character varying', - 'comment': None - }, - { - 'name': 'updated_at', - 'index': 5, - 'type': 'timestamp without time zone', - 'comment': None - }, ], }, - 'seed_config_expected_1': - { - 'metadata': { + 'seed': { + 'metadata': { 'schema': my_schema_name, - 'name': 'seed_config_expected_1', + 'name': 'seed', 'type': 'BASE TABLE', 'comment': None, }, - 'columns': [ + 'columns': [ { 'name': 'id', 'index': 1, @@ -97,19 +80,19 @@ def test_simple_generate(self): { 'name': 'first_name', 'index': 2, - 'type': 'character varying', + 'type': 'text', 'comment': None, }, { 'name': 'email', 'index': 3, - 'type': 'character varying', + 'type': 'text', 'comment': None, }, { 'name': 'ip_address', 'index': 4, - 'type': 'character varying', + 'type': 'text', 'comment': None, }, { @@ -118,48 +101,85 @@ def test_simple_generate(self): 'type': 'timestamp without time zone', 'comment': None, }, + ], + }, + } + + self.assertEqual(expected, my_schema) + + @attr(type='snowflake') + def test__snowflake__simple_generate(self): + self.use_profile('snowflake') + self.use_default_project({"data-paths": [self.dir("seed")]}) + + self.assertEqual(len(self.run_dbt(["seed"])), 1) + self.assertEqual(len(self.run_dbt()), 1) + self.run_dbt(['docs', 'generate']) + self.assertTrue(os.path.exists('./target/catalog.json')) + + with open('./target/catalog.json') as fp: + data = json.load(fp) + + my_schema_name = self.unique_schema() + self.assertIn(my_schema_name, data) + my_schema = data[my_schema_name] + expected = { + 'MODEL': { + 'metadata': { + 'schema': my_schema_name, + 'name': 'MODEL', + 'type': 'VIEW', + 'comment': None, + }, + 'columns': [ { - 'name': 'c1', - 'index': 6, - 'type': 'text', - 'comment': None, - }, - { - 'name': 'c2', - 'index': 7, - 'type': 'text', - 'comment': None, - }, - { - 'name': 'some_bool', - 'index': 8, - 'type': 'text', + 'name': 'ID', + 'index': 1, + 'type': 'NUMBER', 'comment': None, }, ], }, - 'seed_summary': { + 'SEED': { 'metadata': { 'schema': my_schema_name, - 'name': 'seed_summary', + 'name': 'SEED', 'type': 'BASE TABLE', - 'comment': None + 'comment': None, }, 'columns': [ { - 'name': 'year', + 'name': 'ID', 'index': 1, - 'type': 'timestamp without time zone', + 'type': 'NUMBER', 'comment': None, }, { - 'name': 'count', + 'name': 'FIRST_NAME', 'index': 2, - 'type': 'bigint', + 'type': 'TEXT', 'comment': None, }, - ] - } + { + 'name': 'EMAIL', + 'index': 3, + 'type': 'TEXT', + 'comment': None, + }, + { + 'name': 'IP_ADDRESS', + 'index': 4, + 'type': 'TEXT', + 'comment': None, + }, + { + 'name': 'UPDATED_AT', + 'index': 5, + 'type': 'TIMESTAMP_NTZ', + 'comment': None, + }, + ], + }, } self.assertEqual(expected, my_schema) From 5d97937ff440b2f5d1b68412957d519f8c3a213c Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 12 Jul 2018 16:33:33 -0400 Subject: [PATCH 03/18] Add tests for dbt run generating a manifest --- .../029_docs_generate_tests/models/model.sql | 2 +- .../029_docs_generate_tests/seed_pg.sql | 35 -- .../seed_snowflake.sql | 35 -- .../test_docs_generate.py | 306 ++++++++++++------ 4 files changed, 203 insertions(+), 175 deletions(-) delete mode 100644 test/integration/029_docs_generate_tests/seed_pg.sql delete mode 100644 test/integration/029_docs_generate_tests/seed_snowflake.sql diff --git a/test/integration/029_docs_generate_tests/models/model.sql b/test/integration/029_docs_generate_tests/models/model.sql index f96ac3f33c8..e831275ddc1 100644 --- a/test/integration/029_docs_generate_tests/models/model.sql +++ b/test/integration/029_docs_generate_tests/models/model.sql @@ -4,4 +4,4 @@ ) }} -select 1 as id +select * from {{ ref('seed') }} diff --git a/test/integration/029_docs_generate_tests/seed_pg.sql b/test/integration/029_docs_generate_tests/seed_pg.sql deleted file mode 100644 index 9c372019e31..00000000000 --- a/test/integration/029_docs_generate_tests/seed_pg.sql +++ /dev/null @@ -1,35 +0,0 @@ -create table {schema}.seed ( - id INTEGER, - first_name VARCHAR(11), - email VARCHAR(31), - ip_address VARCHAR(15), - updated_at TIMESTAMP WITHOUT TIME ZONE -); - - -INSERT INTO {schema}.seed - ("id","first_name","email","ip_address","updated_at") -VALUES - (1,'Larry','lking0@miitbeian.gov.cn','69.135.206.194','2008-09-12 19:08:31'); - -create table {schema}.seed_config_expected_1 as ( - - select *, 'default'::text as c1, 'default'::text as c2, 'was true'::text as some_bool from {schema}.seed - -); - - -create table {schema}.seed_summary ( - year timestamp without time zone, - count bigint -); - -INSERT INTO {schema}.seed_summary - ("year","count") -VALUES - ('2008-01-01 00:00:00',6); - - -create view {schema}.test_view as ( - select 1 as id -); diff --git a/test/integration/029_docs_generate_tests/seed_snowflake.sql b/test/integration/029_docs_generate_tests/seed_snowflake.sql deleted file mode 100644 index 2fb8f77eca0..00000000000 --- a/test/integration/029_docs_generate_tests/seed_snowflake.sql +++ /dev/null @@ -1,35 +0,0 @@ -create table {schema}.seed ( - "id" INTEGER, - "first_name" VARCHAR(11), - "email" VARCHAR(31), - "ip_address" VARCHAR(15), - "updated_at" TIMESTAMP WITHOUT TIME ZONE -); - - -INSERT INTO {schema}.seed - ("id","first_name","email","ip_address","updated_at") -VALUES - (1,'Larry','lking0@miitbeian.gov.cn','69.135.206.194','2008-09-12 19:08:31'); - -create table {schema}.seed_config_expected_1 as ( - - select *, 'default'::text as "c1", 'default'::text as "c2", 'was true'::text as "some_bool" from {schema}.seed - -); - - -create table {schema}.seed_summary ( - "year" timestamp without time zone, - "count" bigint -); - -INSERT INTO {schema}.seed_summary - ("year","count") -VALUES - ('2008-01-01 00:00:00',6); - - -create view {schema}.test_view as ( - select 1 as "id" -); diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 438492ec17f..719bbf808fb 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -30,23 +30,174 @@ def project_config(self): } } - @attr(type='postgres') - def test__postgres__simple_generate(self): - self.use_profile('postgres') + def run_and_generate(self): self.use_default_project({"data-paths": [self.dir("seed")]}) self.assertEqual(len(self.run_dbt(["seed"])), 1) self.assertEqual(len(self.run_dbt()), 1) self.run_dbt(['docs', 'generate']) + + def verify_catalog(self, expected): self.assertTrue(os.path.exists('./target/catalog.json')) with open('./target/catalog.json') as fp: - data = json.load(fp) + catalog = json.load(fp) + + my_schema_name = self.unique_schema() + self.assertIn(my_schema_name, catalog) + my_schema = catalog[my_schema_name] + self.assertEqual(expected, my_schema) + + def verify_manifest_macros(self, manifest): + # just test a known global macro to avoid having to update this every + # time they change. + self.assertIn('macro.dbt.column_list', manifest['macros']) + macro = manifest['macros']['macro.dbt.column_list'] + self.assertEqual( + set(macro), + { + 'path', 'original_file_path', 'package_name', 'raw_sql', + 'root_path', 'name', 'unique_id', 'tags', 'resource_type', + 'depends_on' + } + ) + # Don't compare the sql, just make sure it exists + self.assertTrue(len(macro['raw_sql']) > 10) + without_sql = {k: v for k, v in macro.items() if k != 'raw_sql'} + self.assertEqual( + without_sql, + { + 'path': 'materializations/helpers.sql', + 'original_file_path': 'materializations/helpers.sql', + 'package_name': 'dbt', + 'root_path': '/usr/src/app/dbt/include/global_project', + 'name': 'column_list', + 'unique_id': 'macro.dbt.column_list', + 'tags': [], + 'resource_type': 'macro', + 'depends_on': {'macros': []} + } + ) + + def verify_manifest(self): + self.assertTrue(os.path.exists('./target/manifest.json')) + + with open('./target/manifest.json') as fp: + manifest = json.load(fp) + + self.assertEqual( + set(manifest), + {'nodes', 'macros', 'parent_map', 'child_map'} + ) + + self.verify_manifest_macros(manifest) + manifest_without_macros = { + k: v for k, v in manifest.items() if k != 'macros' + } + # the manifest should be consistent across DBs for this test + model_sql_path = self.dir('models/model.sql') + my_schema_name = self.unique_schema() + expected_manifest = { + 'nodes': { + 'model.test.model': { + 'name': 'model', + 'root_path': '/usr/src/app', + 'resource_type': 'model', + 'path': 'model.sql', + 'original_file_path': model_sql_path, + 'package_name': 'test', + 'raw_sql': open(model_sql_path).read().rstrip('\n'), + 'refs': [['seed']], + 'depends_on': {'nodes': ['seed.test.seed'], 'macros': []}, + 'unique_id': 'model.test.model', + 'empty': False, + 'fqn': ['test', 'model'], + 'tags': [], + 'config': {'enabled': True, + 'materialized': 'view', + 'pre-hook': [], + 'post-hook': [], + 'vars': {}, + 'column_types': {}, + 'quoting': {}}, + 'schema': my_schema_name, + 'alias': 'model' + }, + 'seed.test.seed': { + 'path': 'seed.csv', + 'name': 'seed', + 'root_path': '/usr/src/app', + 'resource_type': 'seed', + 'raw_sql': '-- csv --', + 'package_name': 'test', + 'original_file_path': self.dir('seed/seed.csv'), + 'refs': [], + 'depends_on': {'nodes': [], 'macros': []}, + 'unique_id': 'seed.test.seed', + 'empty': False, + 'fqn': ['test', 'seed'], + 'tags': [], + 'config': {'enabled': True, + 'materialized': 'seed', + 'pre-hook': [], + 'post-hook': [], + 'vars': {}, + 'column_types': {}, + 'quoting': {}}, + 'schema': my_schema_name, + 'alias': 'seed' + }, + }, + 'parent_map': { + 'model.test.model': ['seed.test.seed'], + 'seed.test.seed': [], + }, + 'child_map': { + 'model.test.model': [], + 'seed.test.seed': ['model.test.model'], + }, + } + self.assertEqual(manifest_without_macros, expected_manifest) + + @attr(type='postgres') + def test__postgres__run_and_generate(self): + self.use_profile('postgres') + self.run_and_generate() my_schema_name = self.unique_schema() - self.assertIn(my_schema_name, data) - my_schema = data[my_schema_name] - expected = { + expected_cols = [ + { + 'name': 'id', + 'index': 1, + 'type': 'integer', + 'comment': None, + }, + { + 'name': 'first_name', + 'index': 2, + 'type': 'text', + 'comment': None, + }, + { + 'name': 'email', + 'index': 3, + 'type': 'text', + 'comment': None, + }, + { + 'name': 'ip_address', + 'index': 4, + 'type': 'text', + 'comment': None, + }, + { + 'name': 'updated_at', + 'index': 5, + 'type': 'timestamp without time zone', + 'comment': None, + }, + ] + expected_catalog = { 'model': { 'metadata': { 'schema': my_schema_name, @@ -54,14 +205,7 @@ def test__postgres__simple_generate(self): 'type': 'VIEW', 'comment': None, }, - 'columns': [ - { - 'name': 'id', - 'index': 1, - 'type': 'integer', - 'comment': None, - }, - ], + 'columns': expected_cols, }, 'seed': { 'metadata': { @@ -70,60 +214,51 @@ def test__postgres__simple_generate(self): 'type': 'BASE TABLE', 'comment': None, }, - 'columns': [ - { - 'name': 'id', - 'index': 1, - 'type': 'integer', - 'comment': None, - }, - { - 'name': 'first_name', - 'index': 2, - 'type': 'text', - 'comment': None, - }, - { - 'name': 'email', - 'index': 3, - 'type': 'text', - 'comment': None, - }, - { - 'name': 'ip_address', - 'index': 4, - 'type': 'text', - 'comment': None, - }, - { - 'name': 'updated_at', - 'index': 5, - 'type': 'timestamp without time zone', - 'comment': None, - }, - ], + 'columns': expected_cols, }, } - - self.assertEqual(expected, my_schema) + self.verify_catalog(expected_catalog) + model_sql_path = self.dir('models/model.sql') + self.verify_manifest() @attr(type='snowflake') - def test__snowflake__simple_generate(self): + def test__snowflake__run_and_generate(self): self.use_profile('snowflake') - self.use_default_project({"data-paths": [self.dir("seed")]}) - - self.assertEqual(len(self.run_dbt(["seed"])), 1) - self.assertEqual(len(self.run_dbt()), 1) - self.run_dbt(['docs', 'generate']) - self.assertTrue(os.path.exists('./target/catalog.json')) - - with open('./target/catalog.json') as fp: - data = json.load(fp) - + self.run_and_generate() my_schema_name = self.unique_schema() - self.assertIn(my_schema_name, data) - my_schema = data[my_schema_name] - expected = { + expected_cols = [ + { + 'name': 'ID', + 'index': 1, + 'type': 'NUMBER', + 'comment': None, + }, + { + 'name': 'FIRST_NAME', + 'index': 2, + 'type': 'TEXT', + 'comment': None, + }, + { + 'name': 'EMAIL', + 'index': 3, + 'type': 'TEXT', + 'comment': None, + }, + { + 'name': 'IP_ADDRESS', + 'index': 4, + 'type': 'TEXT', + 'comment': None, + }, + { + 'name': 'UPDATED_AT', + 'index': 5, + 'type': 'TIMESTAMP_NTZ', + 'comment': None, + }, + ] + expected_catalog = { 'MODEL': { 'metadata': { 'schema': my_schema_name, @@ -131,14 +266,7 @@ def test__snowflake__simple_generate(self): 'type': 'VIEW', 'comment': None, }, - 'columns': [ - { - 'name': 'ID', - 'index': 1, - 'type': 'NUMBER', - 'comment': None, - }, - ], + 'columns': expected_cols, }, 'SEED': { 'metadata': { @@ -147,39 +275,9 @@ def test__snowflake__simple_generate(self): 'type': 'BASE TABLE', 'comment': None, }, - 'columns': [ - { - 'name': 'ID', - 'index': 1, - 'type': 'NUMBER', - 'comment': None, - }, - { - 'name': 'FIRST_NAME', - 'index': 2, - 'type': 'TEXT', - 'comment': None, - }, - { - 'name': 'EMAIL', - 'index': 3, - 'type': 'TEXT', - 'comment': None, - }, - { - 'name': 'IP_ADDRESS', - 'index': 4, - 'type': 'TEXT', - 'comment': None, - }, - { - 'name': 'UPDATED_AT', - 'index': 5, - 'type': 'TIMESTAMP_NTZ', - 'comment': None, - }, - ], + 'columns': expected_cols, }, } - self.assertEqual(expected, my_schema) + self.verify_catalog(expected_catalog) + self.verify_manifest() From 7832322927e09763fbef27e21ba167e2c16ebe1f Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 12 Jul 2018 16:48:24 -0400 Subject: [PATCH 04/18] Implement get_catalog for snowflake, move adapter-side logic into the default one --- dbt/adapters/default/impl.py | 11 +++++-- dbt/adapters/postgres/impl.py | 13 -------- .../global_project/macros/adapters/common.sql | 33 +++++++++++-------- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/dbt/adapters/default/impl.py b/dbt/adapters/default/impl.py index f6489416f3b..c8a7eea3f64 100644 --- a/dbt/adapters/default/impl.py +++ b/dbt/adapters/default/impl.py @@ -17,6 +17,9 @@ from dbt.adapters.default.relation import DefaultRelation +GET_CATALOG_OPERATION_NAME = 'get_catalog_data' +GET_CATALOG_RESULT_KEY = 'catalog' # defined in get_catalog() macro + lock = multiprocessing.Lock() connections_in_use = {} connections_available = [] @@ -809,5 +812,9 @@ def run_operation(cls, profile, project_cfg, manifest, operation_name, ### @classmethod def get_catalog(cls, profile, project_cfg, manifest): - raise dbt.exceptions.NotImplementedException( - '`get_catalog` is not implemented for this adapter!') + results = cls.run_operation(profile, project_cfg, manifest, + GET_CATALOG_OPERATION_NAME, + GET_CATALOG_RESULT_KEY) + schemas = cls.get_existing_schemas(profile, project_cfg) + results = results.table.where(lambda r: r['table_schema'] in schemas) + return results diff --git a/dbt/adapters/postgres/impl.py b/dbt/adapters/postgres/impl.py index 89fa0ea538b..eaaee37a55f 100644 --- a/dbt/adapters/postgres/impl.py +++ b/dbt/adapters/postgres/impl.py @@ -9,9 +9,6 @@ from dbt.logger import GLOBAL_LOGGER as logger -GET_CATALOG_OPERATION_NAME = 'get_catalog_data' -GET_CATALOG_RESULT_KEY = 'catalog' # defined in get_catalog() macro - class PostgresAdapter(dbt.adapters.default.DefaultAdapter): @@ -221,13 +218,3 @@ def convert_date_type(cls, agate_table, col_idx): def convert_time_type(cls, agate_table, col_idx): return "time" - @classmethod - def get_catalog(cls, profile, project_cfg, manifest): - results = cls.run_operation(profile, project_cfg, manifest, - GET_CATALOG_OPERATION_NAME, - GET_CATALOG_RESULT_KEY) - - schemas = cls.get_existing_schemas(profile, project_cfg) - results = results.table.where(lambda r: r['table_schema'] in schemas) - - return results diff --git a/dbt/include/global_project/macros/adapters/common.sql b/dbt/include/global_project/macros/adapters/common.sql index 2da58fc5e4a..db0462d134c 100644 --- a/dbt/include/global_project/macros/adapters/common.sql +++ b/dbt/include/global_project/macros/adapters/common.sql @@ -131,25 +131,32 @@ {%- call statement('catalog', fetch_result=True) -%} with tables as ( select - table_schema, - table_name, - table_type + table_schema as "table_schema", + table_name as "table_name", + table_type as "table_type" from information_schema.tables + union + + select + table_schema as "table_schema", + table_name as "table_name", + 'VIEW' as "table_type" + from information_schema.views ), columns as ( select - table_schema, - table_name, - null as table_comment, + table_schema as "table_schema", + table_name as "table_name", + null as "table_comment", - column_name, - ordinal_position as column_index, - data_type as column_type, - null as column_comment + column_name as "column_name", + ordinal_position as "column_index", + data_type as "column_type", + null as "column_comment" from information_schema.columns @@ -158,8 +165,8 @@ select * from tables - join columns using (table_schema, table_name) - - where table_schema != 'INFORMATION_SCHEMA' + join columns using ("table_schema", "table_name") + where "table_schema" != 'INFORMATION_SCHEMA' + order by "column_index" {%- endcall -%} {%- endmacro %} From b1ab19ff60cb616c6a609bc664ca742e7caf1a3b Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 12 Jul 2018 16:52:01 -0400 Subject: [PATCH 05/18] I never remember to run pep8 --- dbt/adapters/postgres/impl.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dbt/adapters/postgres/impl.py b/dbt/adapters/postgres/impl.py index eaaee37a55f..24ed054f79c 100644 --- a/dbt/adapters/postgres/impl.py +++ b/dbt/adapters/postgres/impl.py @@ -217,4 +217,3 @@ def convert_date_type(cls, agate_table, col_idx): @classmethod def convert_time_type(cls, agate_table, col_idx): return "time" - From 3bfa6bb90a4cf05b886cc05ed284af1ac362825c Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 13 Jul 2018 09:27:43 -0400 Subject: [PATCH 06/18] Fix up paths so they make work in CI as well asl ocally --- .../029_docs_generate_tests/test_docs_generate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 719bbf808fb..0e3f78ac4e0 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -70,7 +70,7 @@ def verify_manifest_macros(self, manifest): 'path': 'materializations/helpers.sql', 'original_file_path': 'materializations/helpers.sql', 'package_name': 'dbt', - 'root_path': '/usr/src/app/dbt/include/global_project', + 'root_path': os.path.join(os.getcwd(), 'dbt/include/global_project'), 'name': 'column_list', 'unique_id': 'macro.dbt.column_list', 'tags': [], @@ -101,7 +101,7 @@ def verify_manifest(self): 'nodes': { 'model.test.model': { 'name': 'model', - 'root_path': '/usr/src/app', + 'root_path': os.getcwd(), 'resource_type': 'model', 'path': 'model.sql', 'original_file_path': model_sql_path, @@ -126,7 +126,7 @@ def verify_manifest(self): 'seed.test.seed': { 'path': 'seed.csv', 'name': 'seed', - 'root_path': '/usr/src/app', + 'root_path': os.getcwd(), 'resource_type': 'seed', 'raw_sql': '-- csv --', 'package_name': 'test', From 461da2f0c302f4ba42303815bad39573e45e2b0d Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 13 Jul 2018 11:21:32 -0400 Subject: [PATCH 07/18] Remove misleading comment, explicity order postgres results like I did for snowflake --- dbt/include/global_project/macros/adapters/common.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/include/global_project/macros/adapters/common.sql b/dbt/include/global_project/macros/adapters/common.sql index db0462d134c..cc8ffbc5c63 100644 --- a/dbt/include/global_project/macros/adapters/common.sql +++ b/dbt/include/global_project/macros/adapters/common.sql @@ -119,6 +119,7 @@ where table_schema != 'information_schema' and table_schema not like 'pg_%' + order by "column_index" {%- endcall -%} {# There's no point in returning anything as the jinja macro stuff calls #} {# str() on all returns. To get the results, you'll need to use #} @@ -126,7 +127,6 @@ {%- endmacro %} -{# The only difference between snowflake and postgres is casing... #} {% macro snowflake__get_catalog() -%} {%- call statement('catalog', fetch_result=True) -%} with tables as ( From 85a4413f4f9625ade9564bde789a6f91ceb12ba4 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 16 Jul 2018 10:31:29 -0600 Subject: [PATCH 08/18] PR feedback: union all and dbt schemas only --- dbt/adapters/default/impl.py | 5 ++++- dbt/include/global_project/macros/adapters/common.sql | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/default/impl.py b/dbt/adapters/default/impl.py index c8a7eea3f64..10e3c7a2388 100644 --- a/dbt/adapters/default/impl.py +++ b/dbt/adapters/default/impl.py @@ -815,6 +815,9 @@ def get_catalog(cls, profile, project_cfg, manifest): results = cls.run_operation(profile, project_cfg, manifest, GET_CATALOG_OPERATION_NAME, GET_CATALOG_RESULT_KEY) - schemas = cls.get_existing_schemas(profile, project_cfg) + schemas = list({ + node.to_dict()['schema'] + for node in manifest.nodes.values() + }) results = results.table.where(lambda r: r['table_schema'] in schemas) return results diff --git a/dbt/include/global_project/macros/adapters/common.sql b/dbt/include/global_project/macros/adapters/common.sql index cc8ffbc5c63..88f893723a9 100644 --- a/dbt/include/global_project/macros/adapters/common.sql +++ b/dbt/include/global_project/macros/adapters/common.sql @@ -137,7 +137,7 @@ from information_schema.tables - union + union all select table_schema as "table_schema", From d4e2cfdbcf13fa767216353b2f84872e26c25503 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 16 Jul 2018 09:31:44 -0600 Subject: [PATCH 09/18] bigquery catalog/manifest support --- dbt/adapters/bigquery/impl.py | 36 +++++++++++ .../test_docs_generate.py | 61 +++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index d36da44365f..d740c5c5d5e 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -613,3 +613,39 @@ def expand_target_column_types(cls, profile, project_cfg, temp_table, to_schema, to_table, model_name=None): # This is a no-op on BigQuery pass + + @classmethod + def get_catalog(cls, profile, project_cfg, manifest): + schemas = {node.to_dict()['schema'] for node in manifest.nodes.values()} + + column_names = [ + 'table_schema', + 'table_name', + 'table_type', + 'table_comment', + 'column_name', + 'column_index', + 'column_type', + 'column_comment', + ] + columns = [] + + for schema_name in schemas: + relations = cls.list_relations(profile, project_cfg, schema_name) + for relation in relations: + cols = cls.get_columns_in_table(profile, project_cfg, schema_name, relation.name) + for col_index, col in enumerate(cols): + column_data = [ + relation.schema, + relation.name, + relation.type, + None, + col.name, + col_index, + col.data_type, + None, + ] + columns.append(dict(zip(column_names, column_data))) + + + return dbt.clients.agate_helper.table_from_data(columns, column_names) diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 0e3f78ac4e0..cb6a4842b04 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -281,3 +281,64 @@ def test__snowflake__run_and_generate(self): self.verify_catalog(expected_catalog) self.verify_manifest() + + @attr(type='bigquery') + def test__bigquery__run_and_generate(self): + self.use_profile('bigquery') + self.run_and_generate() + my_schema_name = self.unique_schema() + expected_cols = [ + { + 'name': 'id', + 'index': 0, + 'type': 'INTEGER', + 'comment': None, + }, + { + 'name': 'first_name', + 'index': 1, + 'type': 'STRING', + 'comment': None, + }, + { + 'name': 'email', + 'index': 2, + 'type': 'STRING', + 'comment': None, + }, + { + 'name': 'ip_address', + 'index': 3, + 'type': 'STRING', + 'comment': None, + }, + { + 'name': 'updated_at', + 'index': 4, + 'type': 'DATETIME', + 'comment': None, + }, + ] + expected_catalog = { + 'model': { + 'metadata': { + 'schema': my_schema_name, + 'name': 'model', + 'type': 'view', + 'comment': None, + }, + 'columns': expected_cols, + }, + 'seed': { + 'metadata': { + 'schema': my_schema_name, + 'name': 'seed', + 'type': 'table', + 'comment': None, + }, + 'columns': expected_cols, + }, + } + self.verify_catalog(expected_catalog) + model_sql_path = self.dir('models/model.sql') + self.verify_manifest() From 1454572df42c258da81a7c317d5322bd9bb80c63 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 16 Jul 2018 09:38:15 -0600 Subject: [PATCH 10/18] pep8 --- dbt/adapters/bigquery/impl.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index d740c5c5d5e..8eb3b04b143 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -616,9 +616,12 @@ def expand_target_column_types(cls, profile, project_cfg, temp_table, @classmethod def get_catalog(cls, profile, project_cfg, manifest): - schemas = {node.to_dict()['schema'] for node in manifest.nodes.values()} + schemas = { + node.to_dict()['schema'] + for node in manifest.nodes.values() + } - column_names = [ + column_names = ( 'table_schema', 'table_name', 'table_type', @@ -627,15 +630,16 @@ def get_catalog(cls, profile, project_cfg, manifest): 'column_index', 'column_type', 'column_comment', - ] + ) columns = [] for schema_name in schemas: relations = cls.list_relations(profile, project_cfg, schema_name) for relation in relations: - cols = cls.get_columns_in_table(profile, project_cfg, schema_name, relation.name) + cols = cls.get_columns_in_table(profile, project_cfg, + schema_name, relation.name) for col_index, col in enumerate(cols): - column_data = [ + column_data = ( relation.schema, relation.name, relation.type, @@ -644,8 +648,7 @@ def get_catalog(cls, profile, project_cfg, manifest): col_index, col.data_type, None, - ] + ) columns.append(dict(zip(column_names, column_data))) - return dbt.clients.agate_helper.table_from_data(columns, column_names) From d4597df1ff83792580c8e2d9b0814375762e14b3 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 16 Jul 2018 13:15:22 -0600 Subject: [PATCH 11/18] Don't need to union anything here --- dbt/include/global_project/macros/adapters/common.sql | 8 -------- .../029_docs_generate_tests/test_docs_generate.py | 3 +++ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/dbt/include/global_project/macros/adapters/common.sql b/dbt/include/global_project/macros/adapters/common.sql index 88f893723a9..c2c5c1d8597 100644 --- a/dbt/include/global_project/macros/adapters/common.sql +++ b/dbt/include/global_project/macros/adapters/common.sql @@ -136,14 +136,6 @@ table_type as "table_type" from information_schema.tables - - union all - - select - table_schema as "table_schema", - table_name as "table_name", - 'VIEW' as "table_type" - from information_schema.views ), columns as ( diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 0e3f78ac4e0..6289a790c9a 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -6,6 +6,9 @@ class TestDocsGenerate(DBTIntegrationTest): + def setUp(self): + super(TestDocsGenerate,self).setUp() + self.maxDiff = None @property def schema(self): From 26df7214605e4f6e859d147c6f3ad0057bc838e2 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 16 Jul 2018 14:05:49 -0600 Subject: [PATCH 12/18] Windows path nonsense --- .../029_docs_generate_tests/test_docs_generate.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 6289a790c9a..91e75750d15 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -16,7 +16,8 @@ def schema(self): @staticmethod def dir(path): - return "test/integration/029_docs_generate_tests/" + path.lstrip("/") + return os.path.join('test', 'integration', '029_docs_generate_tests', + path) @property def models(self): @@ -67,13 +68,16 @@ def verify_manifest_macros(self, manifest): # Don't compare the sql, just make sure it exists self.assertTrue(len(macro['raw_sql']) > 10) without_sql = {k: v for k, v in macro.items() if k != 'raw_sql'} + # Windows means we can't hard-code this. + helpers_path = os.path.join('materializations', 'helpers.sql') self.assertEqual( without_sql, { - 'path': 'materializations/helpers.sql', - 'original_file_path': 'materializations/helpers.sql', + 'path': helpers_path, + 'original_file_path': helpers_path, 'package_name': 'dbt', - 'root_path': os.path.join(os.getcwd(), 'dbt/include/global_project'), + 'root_path': os.path.join(os.getcwd(), 'dbt','include', + 'global_project'), 'name': 'column_list', 'unique_id': 'macro.dbt.column_list', 'tags': [], @@ -133,7 +137,8 @@ def verify_manifest(self): 'resource_type': 'seed', 'raw_sql': '-- csv --', 'package_name': 'test', - 'original_file_path': self.dir('seed/seed.csv'), + 'original_file_path': self.dir(os.path.join('seed', + 'seed.csv')), 'refs': [], 'depends_on': {'nodes': [], 'macros': []}, 'unique_id': 'seed.test.seed', From 7fb6e95dba545c335ac92dd44cbcf4f1dcda77a4 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 17 Jul 2018 06:57:46 -0600 Subject: [PATCH 13/18] Remove extra line --- test/integration/029_docs_generate_tests/test_docs_generate.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 96a7b61d309..200f59131df 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -346,5 +346,4 @@ def test__bigquery__run_and_generate(self): }, } self.verify_catalog(expected_catalog) - model_sql_path = self.dir('models/model.sql') self.verify_manifest() From 694c5085991253e11e0e9ee97d8841c334424c26 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 17 Jul 2018 12:48:21 -0600 Subject: [PATCH 14/18] Handle nested records properly in bigquery --- dbt/adapters/bigquery/impl.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 8eb3b04b143..661753c7c84 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -614,6 +614,19 @@ def expand_target_column_types(cls, profile, project_cfg, temp_table, # This is a no-op on BigQuery pass + @classmethod + def _flat_columns_in_table(cls, profile, project_cfg, schema_name, + table_name): + """An iterator over the columns for a given schema and table. Yields + index, column pairs. + """ + cols = cls.get_columns_in_table(profile, project_cfg, + schema_name, relation.name) + for col in cols: + flattened = col.flatten() + for subcol in flattened: + yield subcol + @classmethod def get_catalog(cls, profile, project_cfg, manifest): schemas = { @@ -636,17 +649,21 @@ def get_catalog(cls, profile, project_cfg, manifest): for schema_name in schemas: relations = cls.list_relations(profile, project_cfg, schema_name) for relation in relations: - cols = cls.get_columns_in_table(profile, project_cfg, - schema_name, relation.name) - for col_index, col in enumerate(cols): + flattened = cls._flat_columns_in_table( + profile, + project_cfg, + schema_name, + relation.name + ) + for index, column in enumerate(flattened, start=1): column_data = ( relation.schema, relation.name, relation.type, None, - col.name, - col_index, - col.data_type, + column.name, + index, + column.data_type, None, ) columns.append(dict(zip(column_names, column_data))) From 75f0a221ccd6b1b4f656e7eee2def6f8895fe750 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 17 Jul 2018 12:50:54 -0600 Subject: [PATCH 15/18] Add a little decorator to do attr + use_profile --- test/integration/base.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/test/integration/base.py b/test/integration/base.py index 188aeb04802..c71e8f076b9 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -5,6 +5,9 @@ import random import time import json +from functools import wraps + +from nose.plugins.attrib import attr from dbt.adapters.factory import get_adapter from dbt.project import Project @@ -528,3 +531,28 @@ def assertTableColumnsEqual(self, table_a, table_b, table_a_schema=None, table_b def assertEquals(self, *args, **kwargs): # assertEquals is deprecated. This makes the warnings less chatty self.assertEqual(*args, **kwargs) + + +def use_profile(profile_name): + """A decorator to declare a test method as using a particular profile. + Handles both setting the nose attr and calling self.use_profile. + + Use like this: + + class TestSomething(DBIntegrationTest): + @use_profile('postgres') + def test_postgres_thing(self): + self.assertEqual(self.adapter_type, 'postgres') + + @use_profile('snowflake') + def test_snowflake_thing(self): + self.assertEqual(self.adapter_type, 'snowflake') + """ + def outer(wrapped): + @attr(type=profile_name) + @wraps(wrapped) + def func(self, *args, **kwargs): + self.use_profile(profile_name) + return wrapped(self, *args, **kwargs) + return func + return outer From e7a4641da0199561d66bd2fc1f0d0c13a3bce6a2 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 17 Jul 2018 12:51:28 -0600 Subject: [PATCH 16/18] Add new BQ test for nested records, manifest not complete --- .../bq_models/model.sql | 7 + .../bq_models/seed.sql | 9 ++ .../test_docs_generate.py | 128 +++++++++++++----- 3 files changed, 111 insertions(+), 33 deletions(-) create mode 100644 test/integration/029_docs_generate_tests/bq_models/model.sql create mode 100644 test/integration/029_docs_generate_tests/bq_models/seed.sql diff --git a/test/integration/029_docs_generate_tests/bq_models/model.sql b/test/integration/029_docs_generate_tests/bq_models/model.sql new file mode 100644 index 00000000000..e831275ddc1 --- /dev/null +++ b/test/integration/029_docs_generate_tests/bq_models/model.sql @@ -0,0 +1,7 @@ +{{ + config( + materialized='view' + ) +}} + +select * from {{ ref('seed') }} diff --git a/test/integration/029_docs_generate_tests/bq_models/seed.sql b/test/integration/029_docs_generate_tests/bq_models/seed.sql new file mode 100644 index 00000000000..012951927a6 --- /dev/null +++ b/test/integration/029_docs_generate_tests/bq_models/seed.sql @@ -0,0 +1,9 @@ +select + 1 as field_1, + 2 as field_2, + 3 as field_3, + + struct( + 4 as field_4, + 5 as field_5 + ) as nested_field diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 200f59131df..175ea0ef895 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -1,8 +1,7 @@ import json import os -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestDocsGenerate(DBTIntegrationTest): @@ -12,7 +11,7 @@ def setUp(self): @property def schema(self): - return 'simple_dependency_029' + return 'docs_generate_029' @staticmethod def dir(path): @@ -86,21 +85,7 @@ def verify_manifest_macros(self, manifest): } ) - def verify_manifest(self): - self.assertTrue(os.path.exists('./target/manifest.json')) - - with open('./target/manifest.json') as fp: - manifest = json.load(fp) - - self.assertEqual( - set(manifest), - {'nodes', 'macros', 'parent_map', 'child_map'} - ) - - self.verify_manifest_macros(manifest) - manifest_without_macros = { - k: v for k, v in manifest.items() if k != 'macros' - } + def expected_seeded_manifest(self): # the manifest should be consistent across DBs for this test model_sql_path = self.dir('models/model.sql') my_schema_name = self.unique_schema() @@ -164,12 +149,26 @@ def verify_manifest(self): 'seed.test.seed': ['model.test.model'], }, } - self.assertEqual(manifest_without_macros, expected_manifest) + def verify_manifest(self, expected_manifest): + self.assertTrue(os.path.exists('./target/manifest.json')) + + with open('./target/manifest.json') as fp: + manifest = json.load(fp) + + self.assertEqual( + set(manifest), + {'nodes', 'macros', 'parent_map', 'child_map'} + ) - @attr(type='postgres') + self.verify_manifest_macros(manifest) + manifest_without_macros = { + k: v for k, v in manifest.items() if k != 'macros' + } + self.assertEqual(manifest_without_macros, expected_manifest) + + @use_profile('postgres') def test__postgres__run_and_generate(self): - self.use_profile('postgres') self.run_and_generate() my_schema_name = self.unique_schema() expected_cols = [ @@ -225,11 +224,10 @@ def test__postgres__run_and_generate(self): }, } self.verify_catalog(expected_catalog) - self.verify_manifest() + self.verify_manifest(self.expected_seeded_manifest()) - @attr(type='snowflake') + @use_profile('snowflake') def test__snowflake__run_and_generate(self): - self.use_profile('snowflake') self.run_and_generate() my_schema_name = self.unique_schema() expected_cols = [ @@ -286,35 +284,34 @@ def test__snowflake__run_and_generate(self): } self.verify_catalog(expected_catalog) - self.verify_manifest() + self.verify_manifest(self.expected_seeded_manifest()) - @attr(type='bigquery') + @use_profile('bigquery') def test__bigquery__run_and_generate(self): - self.use_profile('bigquery') - self.run_and_generate() + self.run_and_generate({'data-paths': [self.dir("seed")]}) my_schema_name = self.unique_schema() expected_cols = [ { 'name': 'id', - 'index': 0, + 'index': 1, 'type': 'INTEGER', 'comment': None, }, { 'name': 'first_name', - 'index': 1, + 'index': 2, 'type': 'STRING', 'comment': None, }, { 'name': 'email', - 'index': 2, + 'index': 3, 'type': 'STRING', 'comment': None, }, { 'name': 'ip_address', - 'index': 3, + 'index': 4, 'type': 'STRING', 'comment': None, }, @@ -346,4 +343,69 @@ def test__bigquery__run_and_generate(self): }, } self.verify_catalog(expected_catalog) - self.verify_manifest() + self.verify_manifest(self.expected_seeded_manifest()) + + @use_profile('bigquery') + def test__bigquery__nested_models(self): + self.use_default_project({'source-paths': [self.dir('bq_models')]}) + + # actual test + self.assertEqual(len(self.run_dbt()), 2) + self.run_dbt(['docs', 'generate']) + + expected_cols = [ + { + "name": "field_1", + "index": 1, + "type": "INTEGER", + "comment": None + }, + { + "name": "field_2", + "index": 2, + "type": "INTEGER", + "comment": None + }, + { + "name": "field_3", + "index": 3, + "type": "INTEGER", + "comment": None + }, + { + "name": "nested_field.field_4", + "index": 4, + "type": "INTEGER", + "comment": None + }, + { + "name": "nested_field.field_5", + "index": 5, + "type": "INTEGER", + "comment": None + } + ] + catalog = { + my_schema_name: { + "model": { + "metadata": { + "schema": my_schema_name, + "name": "model", + "type": "view", + "comment": None + }, + "columns": expected_cols + }, + "seed": { + "metadata": { + "schema": my_schema_name, + "name": "seed", + "type": "view", + "comment": None + }, + "columns": expected_cols + } + } + } + self.verify_catalog(catalog) + self.verify_manifest({}) From ae9ee717b0ffbe02b83add0f340dd4ba83e56618 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Tue, 17 Jul 2018 15:55:52 -0600 Subject: [PATCH 17/18] ok, tests now work again --- dbt/adapters/bigquery/impl.py | 2 +- .../test_docs_generate.py | 104 +++++++++++++++--- 2 files changed, 88 insertions(+), 18 deletions(-) diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 661753c7c84..5e36b21e775 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -621,7 +621,7 @@ def _flat_columns_in_table(cls, profile, project_cfg, schema_name, index, column pairs. """ cols = cls.get_columns_in_table(profile, project_cfg, - schema_name, relation.name) + schema_name, table_name) for col in cols: flattened = col.flatten() for subcol in flattened: diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 175ea0ef895..6d5be9f108d 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -89,7 +89,7 @@ def expected_seeded_manifest(self): # the manifest should be consistent across DBs for this test model_sql_path = self.dir('models/model.sql') my_schema_name = self.unique_schema() - expected_manifest = { + return { 'nodes': { 'model.test.model': { 'name': 'model', @@ -288,7 +288,7 @@ def test__snowflake__run_and_generate(self): @use_profile('bigquery') def test__bigquery__run_and_generate(self): - self.run_and_generate({'data-paths': [self.dir("seed")]}) + self.run_and_generate() my_schema_name = self.unique_schema() expected_cols = [ { @@ -317,7 +317,7 @@ def test__bigquery__run_and_generate(self): }, { 'name': 'updated_at', - 'index': 4, + 'index': 5, 'type': 'DATETIME', 'comment': None, }, @@ -349,10 +349,10 @@ def test__bigquery__run_and_generate(self): def test__bigquery__nested_models(self): self.use_default_project({'source-paths': [self.dir('bq_models')]}) - # actual test self.assertEqual(len(self.run_dbt()), 2) self.run_dbt(['docs', 'generate']) + my_schema_name = self.unique_schema() expected_cols = [ { "name": "field_1", @@ -386,26 +386,96 @@ def test__bigquery__nested_models(self): } ] catalog = { - my_schema_name: { "model": { - "metadata": { - "schema": my_schema_name, - "name": "model", - "type": "view", - "comment": None - }, - "columns": expected_cols + "metadata": { + "schema": my_schema_name, + "name": "model", + "type": "view", + "comment": None + }, + "columns": expected_cols }, "seed": { - "metadata": { + "metadata": { "schema": my_schema_name, "name": "seed", "type": "view", "comment": None - }, - "columns": expected_cols + }, + "columns": expected_cols } - } } self.verify_catalog(catalog) - self.verify_manifest({}) + model_sql_path = self.dir('bq_models/model.sql') + seed_sql_path = self.dir('bq_models/seed.sql') + expected_manifest = { + 'nodes': { + 'model.test.model': { + 'alias': 'model', + 'config': { + 'column_types': {}, + 'enabled': True, + 'materialized': 'view', + 'post-hook': [], + 'pre-hook': [], + 'quoting': {}, + 'vars': {} + }, + 'depends_on': { + 'macros': [], + 'nodes': ['model.test.seed'] + }, + 'empty': False, + 'fqn': ['test', 'model'], + 'name': 'model', + 'original_file_path': model_sql_path, + 'package_name': 'test', + 'path': 'model.sql', + 'raw_sql': open(model_sql_path).read().rstrip('\n'), + 'refs': [['seed']], + 'resource_type': 'model', + 'root_path': os.getcwd(), + 'schema': my_schema_name, + 'tags': [], + 'unique_id': 'model.test.model' + }, + 'model.test.seed': { + 'alias': 'seed', + 'config': { + 'column_types': {}, + 'enabled': True, + 'materialized': 'view', + 'post-hook': [], + 'pre-hook': [], + 'quoting': {}, + 'vars': {} + }, + 'depends_on': { + 'macros': [], + 'nodes': [] + }, + 'empty': False, + 'fqn': ['test', 'seed'], + 'name': 'seed', + 'original_file_path': seed_sql_path, + 'package_name': 'test', + 'path': 'seed.sql', + 'raw_sql': open(seed_sql_path).read().rstrip('\n'), + 'refs': [], + 'resource_type': 'model', + 'root_path': os.getcwd(), + 'schema': my_schema_name, + 'tags': [], + 'unique_id': 'model.test.seed' + } + }, + 'child_map': { + 'model.test.model': [], + 'model.test.seed': ['model.test.model'] + }, + 'parent_map': { + 'model.test.model': ['model.test.seed'], + 'model.test.seed': [] + }, + } + self.verify_manifest(expected_manifest) From 251cb19c723b918b80fc3573e7e294d045883c96 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 18 Jul 2018 09:58:21 -0600 Subject: [PATCH 18/18] update a docstring to be true again --- dbt/adapters/bigquery/impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 5e36b21e775..d3f2c27d7a9 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -617,8 +617,8 @@ def expand_target_column_types(cls, profile, project_cfg, temp_table, @classmethod def _flat_columns_in_table(cls, profile, project_cfg, schema_name, table_name): - """An iterator over the columns for a given schema and table. Yields - index, column pairs. + """An iterator over the flattened columns for a given schema and table. + Resolves child columns as having the name "parent.child". """ cols = cls.get_columns_in_table(profile, project_cfg, schema_name, table_name)