Skip to content

Commit

Permalink
Bigquery catalog generation (#830) (#857)
Browse files Browse the repository at this point in the history
  • Loading branch information
beckjake authored Jul 18, 2018
1 parent 568c82e commit e5bc9c0
Show file tree
Hide file tree
Showing 5 changed files with 318 additions and 26 deletions.
56 changes: 56 additions & 0 deletions dbt/adapters/bigquery/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,3 +613,59 @@ def expand_target_column_types(cls, profile, project_cfg, temp_table,
to_schema, to_table, model_name=None):
# This is a no-op on BigQuery
pass

@classmethod
def _flat_columns_in_table(cls, profile, project_cfg, schema_name,
table_name):
"""An iterator over the flattened columns for a given schema and table.
Resolves child columns as having the name "parent.child".
"""
cols = cls.get_columns_in_table(profile, project_cfg,
schema_name, table_name)
for col in cols:
flattened = col.flatten()
for subcol in flattened:
yield subcol

@classmethod
def get_catalog(cls, profile, project_cfg, manifest):
schemas = {
node.to_dict()['schema']
for node in manifest.nodes.values()
}

column_names = (
'table_schema',
'table_name',
'table_type',
'table_comment',
'column_name',
'column_index',
'column_type',
'column_comment',
)
columns = []

for schema_name in schemas:
relations = cls.list_relations(profile, project_cfg, schema_name)
for relation in relations:
flattened = cls._flat_columns_in_table(
profile,
project_cfg,
schema_name,
relation.name
)
for index, column in enumerate(flattened, start=1):
column_data = (
relation.schema,
relation.name,
relation.type,
None,
column.name,
index,
column.data_type,
None,
)
columns.append(dict(zip(column_names, column_data)))

return dbt.clients.agate_helper.table_from_data(columns, column_names)
7 changes: 7 additions & 0 deletions test/integration/029_docs_generate_tests/bq_models/model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{{
config(
materialized='view'
)
}}

select * from {{ ref('seed') }}
9 changes: 9 additions & 0 deletions test/integration/029_docs_generate_tests/bq_models/seed.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
select
1 as field_1,
2 as field_2,
3 as field_3,

struct(
4 as field_4,
5 as field_5
) as nested_field
244 changes: 218 additions & 26 deletions test/integration/029_docs_generate_tests/test_docs_generate.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import json
import os

from nose.plugins.attrib import attr
from test.integration.base import DBTIntegrationTest
from test.integration.base import DBTIntegrationTest, use_profile


class TestDocsGenerate(DBTIntegrationTest):
Expand All @@ -12,7 +11,7 @@ def setUp(self):

@property
def schema(self):
return 'simple_dependency_029'
return 'docs_generate_029'

@staticmethod
def dir(path):
Expand Down Expand Up @@ -86,25 +85,11 @@ def verify_manifest_macros(self, manifest):
}
)

def verify_manifest(self):
self.assertTrue(os.path.exists('./target/manifest.json'))

with open('./target/manifest.json') as fp:
manifest = json.load(fp)

self.assertEqual(
set(manifest),
{'nodes', 'macros', 'parent_map', 'child_map'}
)

self.verify_manifest_macros(manifest)
manifest_without_macros = {
k: v for k, v in manifest.items() if k != 'macros'
}
def expected_seeded_manifest(self):
# the manifest should be consistent across DBs for this test
model_sql_path = self.dir('models/model.sql')
my_schema_name = self.unique_schema()
expected_manifest = {
return {
'nodes': {
'model.test.model': {
'name': 'model',
Expand Down Expand Up @@ -164,12 +149,26 @@ def verify_manifest(self):
'seed.test.seed': ['model.test.model'],
},
}
self.assertEqual(manifest_without_macros, expected_manifest)

def verify_manifest(self, expected_manifest):
self.assertTrue(os.path.exists('./target/manifest.json'))

with open('./target/manifest.json') as fp:
manifest = json.load(fp)

self.assertEqual(
set(manifest),
{'nodes', 'macros', 'parent_map', 'child_map'}
)

@attr(type='postgres')
self.verify_manifest_macros(manifest)
manifest_without_macros = {
k: v for k, v in manifest.items() if k != 'macros'
}
self.assertEqual(manifest_without_macros, expected_manifest)

@use_profile('postgres')
def test__postgres__run_and_generate(self):
self.use_profile('postgres')
self.run_and_generate()
my_schema_name = self.unique_schema()
expected_cols = [
Expand Down Expand Up @@ -225,11 +224,10 @@ def test__postgres__run_and_generate(self):
},
}
self.verify_catalog(expected_catalog)
self.verify_manifest()
self.verify_manifest(self.expected_seeded_manifest())

@attr(type='snowflake')
@use_profile('snowflake')
def test__snowflake__run_and_generate(self):
self.use_profile('snowflake')
self.run_and_generate()
my_schema_name = self.unique_schema()
expected_cols = [
Expand Down Expand Up @@ -286,4 +284,198 @@ def test__snowflake__run_and_generate(self):
}

self.verify_catalog(expected_catalog)
self.verify_manifest()
self.verify_manifest(self.expected_seeded_manifest())

@use_profile('bigquery')
def test__bigquery__run_and_generate(self):
self.run_and_generate()
my_schema_name = self.unique_schema()
expected_cols = [
{
'name': 'id',
'index': 1,
'type': 'INTEGER',
'comment': None,
},
{
'name': 'first_name',
'index': 2,
'type': 'STRING',
'comment': None,
},
{
'name': 'email',
'index': 3,
'type': 'STRING',
'comment': None,
},
{
'name': 'ip_address',
'index': 4,
'type': 'STRING',
'comment': None,
},
{
'name': 'updated_at',
'index': 5,
'type': 'DATETIME',
'comment': None,
},
]
expected_catalog = {
'model': {
'metadata': {
'schema': my_schema_name,
'name': 'model',
'type': 'view',
'comment': None,
},
'columns': expected_cols,
},
'seed': {
'metadata': {
'schema': my_schema_name,
'name': 'seed',
'type': 'table',
'comment': None,
},
'columns': expected_cols,
},
}
self.verify_catalog(expected_catalog)
self.verify_manifest(self.expected_seeded_manifest())

@use_profile('bigquery')
def test__bigquery__nested_models(self):
self.use_default_project({'source-paths': [self.dir('bq_models')]})

self.assertEqual(len(self.run_dbt()), 2)
self.run_dbt(['docs', 'generate'])

my_schema_name = self.unique_schema()
expected_cols = [
{
"name": "field_1",
"index": 1,
"type": "INTEGER",
"comment": None
},
{
"name": "field_2",
"index": 2,
"type": "INTEGER",
"comment": None
},
{
"name": "field_3",
"index": 3,
"type": "INTEGER",
"comment": None
},
{
"name": "nested_field.field_4",
"index": 4,
"type": "INTEGER",
"comment": None
},
{
"name": "nested_field.field_5",
"index": 5,
"type": "INTEGER",
"comment": None
}
]
catalog = {
"model": {
"metadata": {
"schema": my_schema_name,
"name": "model",
"type": "view",
"comment": None
},
"columns": expected_cols
},
"seed": {
"metadata": {
"schema": my_schema_name,
"name": "seed",
"type": "view",
"comment": None
},
"columns": expected_cols
}
}
self.verify_catalog(catalog)
model_sql_path = self.dir('bq_models/model.sql')
seed_sql_path = self.dir('bq_models/seed.sql')
expected_manifest = {
'nodes': {
'model.test.model': {
'alias': 'model',
'config': {
'column_types': {},
'enabled': True,
'materialized': 'view',
'post-hook': [],
'pre-hook': [],
'quoting': {},
'vars': {}
},
'depends_on': {
'macros': [],
'nodes': ['model.test.seed']
},
'empty': False,
'fqn': ['test', 'model'],
'name': 'model',
'original_file_path': model_sql_path,
'package_name': 'test',
'path': 'model.sql',
'raw_sql': open(model_sql_path).read().rstrip('\n'),
'refs': [['seed']],
'resource_type': 'model',
'root_path': os.getcwd(),
'schema': my_schema_name,
'tags': [],
'unique_id': 'model.test.model'
},
'model.test.seed': {
'alias': 'seed',
'config': {
'column_types': {},
'enabled': True,
'materialized': 'view',
'post-hook': [],
'pre-hook': [],
'quoting': {},
'vars': {}
},
'depends_on': {
'macros': [],
'nodes': []
},
'empty': False,
'fqn': ['test', 'seed'],
'name': 'seed',
'original_file_path': seed_sql_path,
'package_name': 'test',
'path': 'seed.sql',
'raw_sql': open(seed_sql_path).read().rstrip('\n'),
'refs': [],
'resource_type': 'model',
'root_path': os.getcwd(),
'schema': my_schema_name,
'tags': [],
'unique_id': 'model.test.seed'
}
},
'child_map': {
'model.test.model': [],
'model.test.seed': ['model.test.model']
},
'parent_map': {
'model.test.model': ['model.test.seed'],
'model.test.seed': []
},
}
self.verify_manifest(expected_manifest)
Loading

0 comments on commit e5bc9c0

Please sign in to comment.