Skip to content

Commit

Permalink
Merge pull request #1410 from fishtown-analytics/feature/test-severity
Browse files Browse the repository at this point in the history
Add a "severity" for tests (#1005)
  • Loading branch information
beckjake authored Apr 30, 2019
2 parents 0fb620c + abcbaca commit 154aae5
Show file tree
Hide file tree
Showing 11 changed files with 322 additions and 181 deletions.
5 changes: 4 additions & 1 deletion core/dbt/contracts/graph/parsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,14 @@
}
]
},
'severity': {
'enum': ['ERROR', 'WARN'],
},
},
'required': [
'enabled', 'materialized', 'post-hook', 'pre-hook', 'vars',
'quoting', 'column_types', 'tags'
]
],
}


Expand Down
10 changes: 10 additions & 0 deletions core/dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,16 @@ def warn_or_error(msg, node=None, log_fmt=None):
logger.warning(msg)


def warn_or_raise(exc, log_fmt=None):
if dbt.flags.WARN_ERROR:
raise exc
else:
msg = str(exc)
if log_fmt is not None:
msg = log_fmt.format(msg)
logger.warning(msg)


# Update this when a new function should be added to the
# dbt context's `exceptions` key!
CONTEXT_EXPORTS = {
Expand Down
264 changes: 140 additions & 124 deletions core/dbt/parser/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,109 +67,144 @@ def as_kwarg(key, value):
return "{key}={value}".format(key=key, value=formatted_value)


def build_test_raw_sql(test_namespace, model, test_type, test_args):
"""Build the raw SQL from a test definition.
:param test_namespace: The test's namespace, if one exists
:param model: The model under test
:param test_type: The type of the test (unique_id, etc)
:param test_args: The arguments passed to the test as a list of `key=value`
strings
:return: A string of raw sql for the test node.
"""
# sort the dict so the keys are rendered deterministically (for tests)
kwargs = [as_kwarg(key, test_args[key]) for key in sorted(test_args)]

if test_namespace is None:
macro_name = "test_{}".format(test_type)
else:
macro_name = "{}.test_{}".format(test_namespace, test_type)

raw_sql = "{{{{ {macro}(model=ref('{model}'), {kwargs}) }}}}".format(
**{
'model': model['name'],
'macro': macro_name,
'kwargs': ", ".join(kwargs)
}
)
return raw_sql
class TestBuilder(object):
"""An object to hold assorted test settings and perform basic parsing
Test names have the following pattern:
- the test name itself may be namespaced (package.test)
- or it may not be namespaced (test)
- the test may have arguments embedded in the name (, severity=WARN)
- or it may not have arguments.
def build_source_test_raw_sql(test_namespace, source, table, test_type,
test_args):
"""Build the raw SQL from a source test definition.
:param test_namespace: The test's namespace, if one exists
:param source: The source under test.
:param table: The table under test
:param test_type: The type of the test (unique_id, etc)
:param test_args: The arguments passed to the test as a list of `key=value`
strings
:return: A string of raw sql for the test node.
"""
# sort the dict so the keys are rendered deterministically (for tests)
kwargs = [as_kwarg(key, test_args[key]) for key in sorted(test_args)]

if test_namespace is None:
macro_name = "test_{}".format(test_type)
else:
macro_name = "{}.test_{}".format(test_namespace, test_type)

raw_sql = (
"{{{{ {macro}(model=source('{source}', '{table}'), {kwargs}) }}}}"
.format(
source=source['name'],
table=table['name'],
macro=macro_name,
kwargs=", ".join(kwargs))
TEST_NAME_PATTERN = re.compile(
r'((?P<test_namespace>([a-zA-Z_][0-9a-zA-Z_]*))\.)?'
r'(?P<test_name>([a-zA-Z_][0-9a-zA-Z_]*))'
)
return raw_sql
# map magic keys to default values
MODIFIER_ARGS = {'severity': 'ERROR'}

def __init__(self, test, target, column_name, package_name):
test_name, test_args = self.extract_test_args(test, column_name)
self.args = test_args
self.package_name = package_name
self.target = target

match = self.TEST_NAME_PATTERN.match(test_name)
if match is None:
dbt.exceptions.raise_compiler_error(
'Test name string did not match expected pattern: {}'
.format(test_name)
)

def calculate_test_namespace(test_type, package_name):
test_namespace = None
split = test_type.split('.')
if len(split) > 1:
test_type = split[1]
package_name = split[0]
test_namespace = package_name
groups = match.groupdict()
self.name = groups['test_name']
self.namespace = groups['test_namespace']
self.modifiers = {}
for key, default in self.MODIFIER_ARGS.items():
self.modifiers[key] = self.args.pop(key, default)

return test_namespace, test_type, package_name
if self.namespace is not None:
self.package_name = self.namespace

@staticmethod
def extract_test_args(test, name=None):
if not isinstance(test, dict):
dbt.exceptions.raise_compiler_error(
'test must be dict or str, got {} (value {})'.format(
type(test), test
)
)

def _build_test_args(test, name):
if isinstance(test, basestring):
test_name = test
test_args = {}
elif isinstance(test, dict):
test = list(test.items())
if len(test) != 1:
dbt.exceptions.raise_compiler_error(
'test definition dictionary must have exactly one key, got'
' {} instead ({} keys)'.format(test, len(test))
)
test_name, test_args = test[0]
else:
dbt.exceptions.raise_compiler_error(
'test must be dict or str, got {} (value {})'.format(
type(test), test

if not isinstance(test_args, dict):
dbt.exceptions.raise_compiler_error(
'test arguments must be dict, got {} (value {})'.format(
type(test_args), test_args
)
)
)
if not isinstance(test_args, dict):
dbt.exceptions.raise_compiler_error(
'test arguments must be dict, got {} (value {})'.format(
type(test_args), test_args
if not isinstance(test_name, basestring):
dbt.exceptions.raise_compiler_error(
'test name must be a str, got {} (value {})'.format(
type(test_name), test_name
)
)
if name is not None:
test_args['column_name'] = name
return test_name, test_args

def severity(self):
return self.modifiers.get('severity', 'ERROR').upper()

def test_kwargs_str(self):
# sort the dict so the keys are rendered deterministically (for tests)
return ', '.join((
as_kwarg(key, self.args[key])
for key in sorted(self.args)
))

def macro_name(self):
macro_name = 'test_{}'.format(self.name)
if self.namespace is not None:
macro_name = "{}.{}".format(self.namespace, macro_name)
return macro_name

def build_model_str(self):
raise NotImplementedError('build_model_str not implemented!')

def get_test_name(self):
raise NotImplementedError('get_test_name not implemented!')

def build_raw_sql(self):
return (
"{{{{ config(severity='{severity}') }}}}"
"{{{{ {macro}(model={model}, {kwargs}) }}}}"
).format(
model=self.build_model_str(),
macro=self.macro_name(),
kwargs=self.test_kwargs_str(),
severity=self.severity()
)
if not isinstance(test_name, basestring):
dbt.exceptions.raise_compiler_error(
'test name must be a str, got {} (value {})'.format(
type(test_name), test_name
)


class RefTestBuilder(TestBuilder):
def build_model_str(self):
return "ref('{}')".format(self.target['name'])

def get_test_name(self):
return get_nice_schema_test_name(self.name,
self.target['name'],
self.args)

def describe_test_target(self):
return 'model "{}"'.format(self.target)


class SourceTestBuilder(TestBuilder):
def build_model_str(self):
return "source('{}', '{}')".format(
self.target['source']['name'],
self.target['table']['name']
)
if name is not None:
test_args['column_name'] = name
return test_name, test_args

def get_test_name(self):
target_name = '{}_{}'.format(self.target['source']['name'],
self.target['table']['name'])
return get_nice_schema_test_name(
'source_' + self.name,
target_name,
self.args
)

def describe_test_target(self):
return 'source "{0[source]}.{0[table]}"'.format(self.target)


def warn_invalid(filepath, key, value, explain):
Expand Down Expand Up @@ -212,6 +247,8 @@ def add(self, column_name, description):


class SchemaBaseTestParser(MacrosKnownParser):
Builder = TestBuilder

def _parse_column(self, target, column, package_name, root_dir, path,
refs):
# this should yield ParsedNodes where resource_type == NodeType.Test
Expand All @@ -237,53 +274,38 @@ def _parse_column(self, target, column, package_name, root_dir, path,
)
continue

def _build_raw_sql(self, test_namespace, target, test_type, test_args):
raise NotImplementedError

def _generate_test_name(self, target, test_type, test_args):
"""Returns a hashed_name, full_name pair."""
raise NotImplementedError

@staticmethod
def _describe_test_target(test_target):
raise NotImplementedError

def build_test_node(self, test_target, package_name, test, root_dir, path,
column_name=None):
"""Build a test node against the given target (a model or a source).
:param test_target: An unparsed form of the target.
"""
test_type, test_args = _build_test_args(test, column_name)
if isinstance(test, basestring):
test = {test: {}}

test_namespace, test_type, package_name = calculate_test_namespace(
test_type, package_name
)
test_info = self.Builder(test, test_target, column_name, package_name)

source_package = self.all_projects.get(package_name)
source_package = self.all_projects.get(test_info.package_name)
if source_package is None:
desc = '"{}" test on {}'.format(
test_type, self._describe_test_target(test_target)
test_info.name, test_info.describe_test_target()
)
dbt.exceptions.raise_dep_not_found(None, desc, test_namespace)
dbt.exceptions.raise_dep_not_found(None, desc, test_info.namespace)

test_path = os.path.basename(path)

hashed_name, full_name = self._generate_test_name(test_target,
test_type,
test_args)
hashed_name, full_name = test_info.get_test_name()

hashed_path = get_pseudo_test_path(hashed_name, test_path,
'schema_test')

full_path = get_pseudo_test_path(full_name, test_path, 'schema_test')
raw_sql = self._build_raw_sql(test_namespace, test_target, test_type,
test_args)
raw_sql = test_info.build_raw_sql()

unparsed = UnparsedNode(
name=full_name,
resource_type=NodeType.Test,
package_name=package_name,
package_name=test_info.package_name,
root_path=root_dir,
path=hashed_path,
original_file_path=path,
Expand Down Expand Up @@ -318,15 +340,7 @@ def build_test_node(self, test_target, package_name, test, root_dir, path,


class SchemaModelParser(SchemaBaseTestParser):
def _build_raw_sql(self, test_namespace, target, test_type, test_args):
return build_test_raw_sql(test_namespace, target, test_type, test_args)

def _generate_test_name(self, target, test_type, test_args):
return get_nice_schema_test_name(test_type, target['name'], test_args)

@staticmethod
def _describe_test_target(test_target):
return 'model "{}"'.format(test_target)
Builder = RefTestBuilder

def parse_models_entry(self, model_dict, path, package_name, root_dir):
model_name = model_dict['name']
Expand Down Expand Up @@ -381,6 +395,8 @@ def parse_all(self, models, path, package_name, root_dir):


class SchemaSourceParser(SchemaBaseTestParser):
Builder = SourceTestBuilder

def __init__(self, root_project_config, all_projects, macro_manifest):
super(SchemaSourceParser, self).__init__(
root_project_config=root_project_config,
Expand All @@ -389,16 +405,16 @@ def __init__(self, root_project_config, all_projects, macro_manifest):
)
self._renderer = ConfigRenderer(self.root_project_config.cli_vars)

def _build_raw_sql(self, test_namespace, target, test_type, test_args):
return build_source_test_raw_sql(test_namespace, target['source'],
target['table'], test_type,
test_args)
def _build_raw_sql(self, test_info):
return test_info.build_source_test_raw_sql()

def _generate_test_name(self, target, test_type, test_args):
def _generate_test_name(self, test_info):
target_name = '{}_{}'.format(test_info.target['source']['name'],
test_info.target['table']['name'])
return get_nice_schema_test_name(
'source_' + test_type,
'{}_{}'.format(target['source']['name'], target['table']['name']),
test_args
'source_' + test_info.name,
target_name,
test_info.args
)

@staticmethod
Expand Down
Loading

0 comments on commit 154aae5

Please sign in to comment.