Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bigquery catalog generation (#830) #857

Merged
merged 25 commits into from
Jul 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
046942b
Write out some SQL
jwerderits Jul 11, 2018
4921294
Merge branch 'development' into snowflake-get-catalog
jwerderits Jul 12, 2018
22d8ad6
Add snowflake tests, rework existing postgres to use dbt seed + dbt r…
jwerderits Jul 12, 2018
5d97937
Add tests for dbt run generating a manifest
jwerderits Jul 12, 2018
7832322
Implement get_catalog for snowflake, move adapter-side logic into the…
jwerderits Jul 12, 2018
b1ab19f
I never remember to run pep8
jwerderits Jul 12, 2018
3bfa6bb
Fix up paths so they make work in CI as well asl ocally
jwerderits Jul 13, 2018
85605dd
Merge branch 'development' into snowflake-get-catalog
jwerderits Jul 13, 2018
461da2f
Remove misleading comment, explicity order postgres results like I di…
jwerderits Jul 13, 2018
0e3edf1
Merge branch 'dev/isaac-asimov' into snowflake-get-catalog
jwerderits Jul 16, 2018
85a4413
PR feedback: union all and dbt schemas only
jwerderits Jul 16, 2018
d4e2cfd
bigquery catalog/manifest support
jwerderits Jul 16, 2018
1454572
pep8
jwerderits Jul 16, 2018
d4597df
Don't need to union anything here
jwerderits Jul 16, 2018
94a4102
Merge branch 'snowflake-get-catalog' into bigquery-catalog-generation
jwerderits Jul 16, 2018
26df721
Windows path nonsense
jwerderits Jul 16, 2018
5d27b2b
Merge branch 'snowflake-get-catalog' into bigquery-catalog-generation
jwerderits Jul 16, 2018
f5d5bbc
Merge branch 'dev/isaac-asimov' into bigquery-catalog-generation
jwerderits Jul 17, 2018
7fb6e95
Remove extra line
jwerderits Jul 17, 2018
ad55c4c
Merge branch 'dev/isaac-asimov' into bigquery-catalog-generation
jwerderits Jul 17, 2018
694c508
Handle nested records properly in bigquery
jwerderits Jul 17, 2018
75f0a22
Add a little decorator to do attr + use_profile
jwerderits Jul 17, 2018
e7a4641
Add new BQ test for nested records, manifest not complete
jwerderits Jul 17, 2018
ae9ee71
ok, tests now work again
jwerderits Jul 17, 2018
251cb19
update a docstring to be true again
jwerderits Jul 18, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions dbt/adapters/bigquery/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,3 +613,59 @@ def expand_target_column_types(cls, profile, project_cfg, temp_table,
to_schema, to_table, model_name=None):
# This is a no-op on BigQuery
pass

@classmethod
def _flat_columns_in_table(cls, profile, project_cfg, schema_name,
table_name):
"""An iterator over the flattened columns for a given schema and table.
Resolves child columns as having the name "parent.child".
"""
cols = cls.get_columns_in_table(profile, project_cfg,
schema_name, table_name)
for col in cols:
flattened = col.flatten()
for subcol in flattened:
yield subcol

@classmethod
def get_catalog(cls, profile, project_cfg, manifest):
schemas = {
node.to_dict()['schema']
for node in manifest.nodes.values()
}

column_names = (
'table_schema',
'table_name',
'table_type',
'table_comment',
'column_name',
'column_index',
'column_type',
'column_comment',
)
columns = []

for schema_name in schemas:
relations = cls.list_relations(profile, project_cfg, schema_name)
for relation in relations:
flattened = cls._flat_columns_in_table(
profile,
project_cfg,
schema_name,
relation.name
)
for index, column in enumerate(flattened, start=1):
column_data = (
relation.schema,
relation.name,
relation.type,
None,
column.name,
index,
column.data_type,
None,
)
columns.append(dict(zip(column_names, column_data)))

return dbt.clients.agate_helper.table_from_data(columns, column_names)
7 changes: 7 additions & 0 deletions test/integration/029_docs_generate_tests/bq_models/model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{{
config(
materialized='view'
)
}}

select * from {{ ref('seed') }}
9 changes: 9 additions & 0 deletions test/integration/029_docs_generate_tests/bq_models/seed.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
select
1 as field_1,
2 as field_2,
3 as field_3,

struct(
4 as field_4,
5 as field_5
) as nested_field
244 changes: 218 additions & 26 deletions test/integration/029_docs_generate_tests/test_docs_generate.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import json
import os

from nose.plugins.attrib import attr
from test.integration.base import DBTIntegrationTest
from test.integration.base import DBTIntegrationTest, use_profile


class TestDocsGenerate(DBTIntegrationTest):
Expand All @@ -12,7 +11,7 @@ def setUp(self):

@property
def schema(self):
return 'simple_dependency_029'
return 'docs_generate_029'

@staticmethod
def dir(path):
Expand Down Expand Up @@ -86,25 +85,11 @@ def verify_manifest_macros(self, manifest):
}
)

def verify_manifest(self):
self.assertTrue(os.path.exists('./target/manifest.json'))

with open('./target/manifest.json') as fp:
manifest = json.load(fp)

self.assertEqual(
set(manifest),
{'nodes', 'macros', 'parent_map', 'child_map'}
)

self.verify_manifest_macros(manifest)
manifest_without_macros = {
k: v for k, v in manifest.items() if k != 'macros'
}
def expected_seeded_manifest(self):
# the manifest should be consistent across DBs for this test
model_sql_path = self.dir('models/model.sql')
my_schema_name = self.unique_schema()
expected_manifest = {
return {
'nodes': {
'model.test.model': {
'name': 'model',
Expand Down Expand Up @@ -164,12 +149,26 @@ def verify_manifest(self):
'seed.test.seed': ['model.test.model'],
},
}
self.assertEqual(manifest_without_macros, expected_manifest)

def verify_manifest(self, expected_manifest):
self.assertTrue(os.path.exists('./target/manifest.json'))

with open('./target/manifest.json') as fp:
manifest = json.load(fp)

self.assertEqual(
set(manifest),
{'nodes', 'macros', 'parent_map', 'child_map'}
)

@attr(type='postgres')
self.verify_manifest_macros(manifest)
manifest_without_macros = {
k: v for k, v in manifest.items() if k != 'macros'
}
self.assertEqual(manifest_without_macros, expected_manifest)

@use_profile('postgres')
def test__postgres__run_and_generate(self):
self.use_profile('postgres')
self.run_and_generate()
my_schema_name = self.unique_schema()
expected_cols = [
Expand Down Expand Up @@ -225,11 +224,10 @@ def test__postgres__run_and_generate(self):
},
}
self.verify_catalog(expected_catalog)
self.verify_manifest()
self.verify_manifest(self.expected_seeded_manifest())

@attr(type='snowflake')
@use_profile('snowflake')
def test__snowflake__run_and_generate(self):
self.use_profile('snowflake')
self.run_and_generate()
my_schema_name = self.unique_schema()
expected_cols = [
Expand Down Expand Up @@ -286,4 +284,198 @@ def test__snowflake__run_and_generate(self):
}

self.verify_catalog(expected_catalog)
self.verify_manifest()
self.verify_manifest(self.expected_seeded_manifest())

@use_profile('bigquery')
def test__bigquery__run_and_generate(self):
self.run_and_generate()
my_schema_name = self.unique_schema()
expected_cols = [
{
'name': 'id',
'index': 1,
'type': 'INTEGER',
'comment': None,
},
{
'name': 'first_name',
'index': 2,
'type': 'STRING',
'comment': None,
},
{
'name': 'email',
'index': 3,
'type': 'STRING',
'comment': None,
},
{
'name': 'ip_address',
'index': 4,
'type': 'STRING',
'comment': None,
},
{
'name': 'updated_at',
'index': 5,
'type': 'DATETIME',
'comment': None,
},
]
expected_catalog = {
'model': {
'metadata': {
'schema': my_schema_name,
'name': 'model',
'type': 'view',
'comment': None,
},
'columns': expected_cols,
},
'seed': {
'metadata': {
'schema': my_schema_name,
'name': 'seed',
'type': 'table',
'comment': None,
},
'columns': expected_cols,
},
}
self.verify_catalog(expected_catalog)
self.verify_manifest(self.expected_seeded_manifest())

@use_profile('bigquery')
def test__bigquery__nested_models(self):
self.use_default_project({'source-paths': [self.dir('bq_models')]})

self.assertEqual(len(self.run_dbt()), 2)
self.run_dbt(['docs', 'generate'])

my_schema_name = self.unique_schema()
expected_cols = [
{
"name": "field_1",
"index": 1,
"type": "INTEGER",
"comment": None
},
{
"name": "field_2",
"index": 2,
"type": "INTEGER",
"comment": None
},
{
"name": "field_3",
"index": 3,
"type": "INTEGER",
"comment": None
},
{
"name": "nested_field.field_4",
"index": 4,
"type": "INTEGER",
"comment": None
},
{
"name": "nested_field.field_5",
"index": 5,
"type": "INTEGER",
"comment": None
}
]
catalog = {
"model": {
"metadata": {
"schema": my_schema_name,
"name": "model",
"type": "view",
"comment": None
},
"columns": expected_cols
},
"seed": {
"metadata": {
"schema": my_schema_name,
"name": "seed",
"type": "view",
"comment": None
},
"columns": expected_cols
}
}
self.verify_catalog(catalog)
model_sql_path = self.dir('bq_models/model.sql')
seed_sql_path = self.dir('bq_models/seed.sql')
expected_manifest = {
'nodes': {
'model.test.model': {
'alias': 'model',
'config': {
'column_types': {},
'enabled': True,
'materialized': 'view',
'post-hook': [],
'pre-hook': [],
'quoting': {},
'vars': {}
},
'depends_on': {
'macros': [],
'nodes': ['model.test.seed']
},
'empty': False,
'fqn': ['test', 'model'],
'name': 'model',
'original_file_path': model_sql_path,
'package_name': 'test',
'path': 'model.sql',
'raw_sql': open(model_sql_path).read().rstrip('\n'),
'refs': [['seed']],
'resource_type': 'model',
'root_path': os.getcwd(),
'schema': my_schema_name,
'tags': [],
'unique_id': 'model.test.model'
},
'model.test.seed': {
'alias': 'seed',
'config': {
'column_types': {},
'enabled': True,
'materialized': 'view',
'post-hook': [],
'pre-hook': [],
'quoting': {},
'vars': {}
},
'depends_on': {
'macros': [],
'nodes': []
},
'empty': False,
'fqn': ['test', 'seed'],
'name': 'seed',
'original_file_path': seed_sql_path,
'package_name': 'test',
'path': 'seed.sql',
'raw_sql': open(seed_sql_path).read().rstrip('\n'),
'refs': [],
'resource_type': 'model',
'root_path': os.getcwd(),
'schema': my_schema_name,
'tags': [],
'unique_id': 'model.test.seed'
}
},
'child_map': {
'model.test.model': [],
'model.test.seed': ['model.test.model']
},
'parent_map': {
'model.test.model': ['model.test.seed'],
'model.test.seed': []
},
}
self.verify_manifest(expected_manifest)
Loading