Skip to content

Commit

Permalink
Convert dbt to use dataclasses and hologram for representing things
Browse files Browse the repository at this point in the history
Most of the things that previously used manually created jsonschemas
Split tests into their own node type
Change tests to reflect that tables require a freshness block
add a lot more debug-logging on exceptions
Make things that get passed to Var() tell it about their vars
finally make .empty a property
documentation resource type is now a property, not serialized
added a Mergeable helper mixin to perform simple merges
Convert some oneOf checks into if-else chains to get better errors
Add more tests
Use "Any" as value in type defs
 - accept the warning from hologram for now, PR out to suppress it
set default values for enabled/materialized
Clean up the Parsed/Compiled type hierarchy
Allow generic snapshot definitions
remove the "graph" entry in the context
 - This improves performance on large projects significantly
Update changelog to reflect removing graph
  • Loading branch information
Jacob Beck committed Jul 16, 2019
1 parent 10be7ba commit 49f7cf8
Show file tree
Hide file tree
Showing 85 changed files with 5,556 additions and 4,416 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## dbt ? - Louisa May Alcott

### Breaking changes
- the undocumented "graph" variable was removed from the parsing context ([#1589](https:/fishtown-analytics/dbt/pull/1589))

## dbt 0.14.0 - Wilt Chamberlain (July 10, 2019)

### Overview
Expand Down
58 changes: 29 additions & 29 deletions core/dbt/adapters/base/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,26 @@

import dbt.exceptions
import dbt.flags
from dbt.api import APIObject
from dbt.contracts.connection import Connection
from dbt.contracts.util import Replaceable
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.utils import translate_aliases

from hologram.helpers import ExtensibleJsonSchemaMixin

class Credentials(APIObject):
"""Common base class for credentials. This is not valid to instantiate"""
SCHEMA = NotImplemented
# map credential aliases to their canonical names.
ALIASES = {}
from dataclasses import dataclass, field
from typing import Any, ClassVar, Dict, Tuple

def __init__(self, **kwargs):
renamed = self.translate_aliases(kwargs)
super().__init__(**renamed)

@property
@dataclass
class Credentials(
ExtensibleJsonSchemaMixin,
Replaceable,
metaclass=abc.ABCMeta
):
_ALIASES: ClassVar[Dict[str, str]] = field(default={}, init=False)

@abc.abstractproperty
def type(self):
raise NotImplementedError(
'type not implemented for base credentials class'
Expand All @@ -30,37 +33,34 @@ def type(self):
def connection_info(self):
"""Return an ordered iterator of key/value pairs for pretty-printing.
"""
as_dict = self.to_dict()
for key in self._connection_keys():
if key in self._contents:
yield key, self._contents[key]
if key in as_dict:
yield key, as_dict[key]

def _connection_keys(self):
"""The credential object keys that should be printed to users in
'dbt debug' output. This is specific to each adapter.
"""
@abc.abstractmethod
def _connection_keys(self) -> Tuple[str, ...]:
raise NotImplementedError

def incorporate(self, **kwargs):
# implementation note: we have to do this here, or
# incorporate(alias_name=...) will result in duplicate keys in the
# merged dict that APIObject.incorporate() creates.
renamed = self.translate_aliases(kwargs)
return super().incorporate(**renamed)
@classmethod
def from_dict(cls, data):
data = cls.translate_aliases(data)
return super().from_dict(data)

@classmethod
def translate_aliases(cls, kwargs: Dict[str, Any]) -> Dict[str, Any]:
return translate_aliases(kwargs, cls._ALIASES)

def serialize(self, with_aliases=False):
serialized = super().serialize()
def to_dict(self, omit_none=True, validate=False, with_aliases=False):
serialized = super().to_dict(omit_none=omit_none, validate=validate)
if with_aliases:
serialized.update({
new_name: serialized[canonical_name]
for new_name, canonical_name in self.ALIASES.items()
for new_name, canonical_name in self._ALIASES.items()
if canonical_name in serialized
})
return serialized

@classmethod
def translate_aliases(cls, kwargs):
return translate_aliases(kwargs, cls.ALIASES)


class BaseConnectionManager(metaclass=abc.ABCMeta):
"""Methods to implement:
Expand Down
8 changes: 4 additions & 4 deletions core/dbt/adapters/base/relation.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def quoted(self, identifier):
def create_from_source(cls, source, **kwargs):
quote_policy = dbt.utils.deep_merge(
cls.DEFAULTS['quote_policy'],
source.quoting,
source.quoting.to_dict(),
kwargs.get('quote_policy', {})
)
return cls.create(
Expand All @@ -240,9 +240,9 @@ def create_from_node(cls, config, node, table_name=None, quote_policy=None,
quote_policy = dbt.utils.merge(config.quoting, quote_policy)

return cls.create(
database=node.get('database'),
schema=node.get('schema'),
identifier=node.get('alias'),
database=node.database,
schema=node.schema,
identifier=node.alias,
table_name=table_name,
quote_policy=quote_policy,
**kwargs)
Expand Down
2 changes: 1 addition & 1 deletion core/dbt/adapters/sql/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def begin(self):
if connection.transaction_open is True:
raise dbt.exceptions.InternalException(
'Tried to begin a new transaction on connection "{}", but '
'it already had one open!'.format(connection.get('name')))
'it already had one open!'.format(connection.name))

self.add_begin_query()

Expand Down
18 changes: 9 additions & 9 deletions core/dbt/clients/jinja.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@ def __init__(self):
self.file_cache = {}

def get_node_template(self, node):
key = (node['package_name'], node['original_file_path'])
key = (node.package_name, node.original_file_path)

if key in self.file_cache:
return self.file_cache[key]

template = get_template(
string=node.get('raw_sql'),
string=node.raw_sql,
ctx={},
node=node
node=node,
)
self.file_cache[key] = template

Expand All @@ -92,7 +92,7 @@ def clear(self):
def macro_generator(node):
def apply_context(context):
def call(*args, **kwargs):
name = node.get('name')
name = node.name
template = template_cache.get_node_template(node)
module = template.make_module(context, False, context)

Expand Down Expand Up @@ -178,21 +178,21 @@ def __init__(self, hint=None, obj=None, name=None, exc=None):
super().__init__(hint=hint, name=name)
self.node = node
self.name = name
self.package_name = node.get('package_name')
self.package_name = node.package_name
# jinja uses these for safety, so we have to override them.
# see https:/pallets/jinja/blob/master/jinja2/sandbox.py#L332-L339 # noqa
self.unsafe_callable = False
self.alters_data = False

def __deepcopy__(self, memo):
path = os.path.join(self.node.get('root_path'),
self.node.get('original_file_path'))
path = os.path.join(self.node.root_path,
self.node.original_file_path)

logger.debug(
'dbt encountered an undefined variable, "{}" in node {}.{} '
'(source path: {})'
.format(self.name, self.node.get('package_name'),
self.node.get('name'), path))
.format(self.name, self.node.package_name,
self.node.name, path))

# match jinja's message
dbt.exceptions.raise_compiler_error(
Expand Down
31 changes: 19 additions & 12 deletions core/dbt/compilation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,21 @@
import dbt.flags
import dbt.loader
import dbt.config
from dbt.contracts.graph.compiled import CompiledNode
from dbt.contracts.graph.compiled import InjectedCTE, CompiledNode, \
CompiledTestNode

from dbt.logger import GLOBAL_LOGGER as logger

graph_file_name = 'graph.gpickle'


def _compiled_type_for(model):
if model.resource_type == NodeType.Test:
return CompiledTestNode
else:
return CompiledNode


def print_compile_stats(stats):
names = {
NodeType.Model: 'models',
Expand All @@ -44,9 +52,9 @@ def print_compile_stats(stats):


def _add_prepended_cte(prepended_ctes, new_cte):
for dct in prepended_ctes:
if dct['id'] == new_cte['id']:
dct['sql'] = new_cte['sql']
for cte in prepended_ctes:
if cte.id == new_cte.id:
cte.sql = new_cte.sql
return
prepended_ctes.append(new_cte)

Expand All @@ -67,20 +75,19 @@ def recursively_prepend_ctes(model, manifest):
return (model, model.extra_ctes, manifest)

if dbt.flags.STRICT_MODE:
# ensure that the cte we're adding to is compiled
CompiledNode(**model.serialize())
assert isinstance(model, (CompiledNode, CompiledTestNode))

prepended_ctes = []

for cte in model.extra_ctes:
cte_id = cte['id']
cte_id = cte.id
cte_to_add = manifest.nodes.get(cte_id)
cte_to_add, new_prepended_ctes, manifest = recursively_prepend_ctes(
cte_to_add, manifest)
_extend_prepended_ctes(prepended_ctes, new_prepended_ctes)
new_cte_name = '__dbt__CTE__{}'.format(cte_to_add.get('name'))
new_cte_name = '__dbt__CTE__{}'.format(cte_to_add.name)
sql = ' {} as (\n{}\n)'.format(new_cte_name, cte_to_add.compiled_sql)
_add_prepended_cte(prepended_ctes, {'id': cte_id, 'sql': sql})
_add_prepended_cte(prepended_ctes, InjectedCTE(id=cte_id, sql=sql))

model.prepend_ctes(prepended_ctes)

Expand All @@ -101,7 +108,7 @@ def compile_node(self, node, manifest, extra_context=None):
if extra_context is None:
extra_context = {}

logger.debug("Compiling {}".format(node.get('unique_id')))
logger.debug("Compiling {}".format(node.unique_id))

data = node.to_dict()
data.update({
Expand All @@ -111,14 +118,14 @@ def compile_node(self, node, manifest, extra_context=None):
'extra_ctes': [],
'injected_sql': None,
})
compiled_node = CompiledNode(**data)
compiled_node = _compiled_type_for(node).from_dict(data)

context = dbt.context.runtime.generate(
compiled_node, self.config, manifest)
context.update(extra_context)

compiled_node.compiled_sql = dbt.clients.jinja.get_rendered(
node.get('raw_sql'),
node.raw_sql,
context,
node)

Expand Down
2 changes: 1 addition & 1 deletion core/dbt/config/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# all these are just exports, they need "noqa" so flake8 will not complain.
from .renderer import ConfigRenderer # noqa
from .profile import Profile, UserConfig, PROFILES_DIR # noqa
from .profile import Profile, PROFILES_DIR, read_user_config # noqa
from .project import Project # noqa
from .runtime import RuntimeConfig # noqa
Loading

0 comments on commit 49f7cf8

Please sign in to comment.