From 08842e5ad87ed3f502199e03bd70433cc5427054 Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Wed, 25 Jan 2017 19:49:24 -0500 Subject: [PATCH 01/11] add a simple set of graph unit tests for models only --- Dockerfile | 2 +- dbt/clients/system.py | 48 +++++++ dbt/compilation.py | 19 +-- dbt/model.py | 5 +- dbt/source.py | 129 ++++++++++-------- dev_requirements.txt | 1 - .../test_simple_dependency_with_configs.py | 3 +- test/unit/test_dependencies.py | 108 +++++++++++++++ 8 files changed, 244 insertions(+), 71 deletions(-) create mode 100644 dbt/clients/system.py create mode 100644 test/unit/test_dependencies.py diff --git a/Dockerfile b/Dockerfile index 58574511148..410823589f3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.5 +FROM python:3.6 RUN apt-get update diff --git a/dbt/clients/system.py b/dbt/clients/system.py new file mode 100644 index 00000000000..9ff020aa69d --- /dev/null +++ b/dbt/clients/system.py @@ -0,0 +1,48 @@ +import fnmatch +import os +import os.path + +def find_matching(root_path, + relative_paths_to_search, + file_pattern): + """ + Given an absolute `root_path`, a list of relative paths to that + absolute root path (`relative_paths_to_search`), and a `file_pattern` + like '*.sql', returns information about the files. For example: + + > find_matching('/root/path', 'models', '*.sql') + + [ { 'absolute_path': '/root/path/models/model_one.sql', + 'relative_path': 'models/model_one.sql', + 'searched_path': 'models' }, + { 'absolute_path': '/root/path/models/subdirectory/model_two.sql', + 'relative_path': 'models/subdirectory/model_two.sql', + 'searched_path': 'models' } ] + """ + matching = [] + + for relative_path_to_search in relative_paths_to_search: + absolute_path_to_search = os.path.join( + root_path, relative_path_to_search) + walk_results = os.walk(absolute_path_to_search) + + for current_path, subdirectories, local_files in walk_results: + for local_file in local_files: + absolute_path = os.path.join(current_path, local_file) + relative_path = os.path.relpath( + absolute_path, absolute_path_to_search) + + if fnmatch.fnmatch(local_file, file_pattern): + matching.append({ + 'searched_path': relative_path_to_search, + 'absolute_path': absolute_path, + 'relative_path': relative_path, + }) + + return matching + +def load_file_contents(path): + with open(path, 'r') as handle: + to_return = handle.read() + + return to_return.strip() diff --git a/dbt/compilation.py b/dbt/compilation.py index 56b5e408bdc..a0c1a8a24df 100644 --- a/dbt/compilation.py +++ b/dbt/compilation.py @@ -6,10 +6,11 @@ import sqlparse import dbt.project +import dbt.utils + from dbt.source import Source from dbt.utils import find_model_by_fqn, find_model_by_name, \ - dependency_projects, split_path, This, Var, compiler_error, \ - to_string + split_path, This, Var, compiler_error, to_string from dbt.linker import Linker from dbt.runtime import RuntimeContext @@ -229,7 +230,7 @@ def wrapped_do_ref(*args): return wrapped_do_ref - def get_context(self, linker, model, models, add_dependency=False): + def get_context(self, linker, model, models, add_dependency=False): runtime = RuntimeContext(model=model) context = self.project.context() @@ -272,10 +273,10 @@ def compile_model(self, linker, model, models, add_dependency=True): fs_loader = jinja2.FileSystemLoader(searchpath=model.root_dir) jinja = jinja2.Environment(loader=fs_loader) - # this is a dumb jinja2 bug -- on windows, forward slashes - # are EXPECTED - posix_filepath = '/'.join(split_path(model.rel_filepath)) - template = jinja.get_template(posix_filepath) + template_contents = dbt.clients.system.load_file_contents( + model.filepath) + + template = jinja.from_string(template_contents) context = self.get_context( linker, model, models, add_dependency=add_dependency ) @@ -521,7 +522,7 @@ def compile_archives(self): def get_models(self): all_models = self.model_sources(this_project=self.project) - for project in dependency_projects(self.project): + for project in dbt.utils.dependency_projects(self.project): all_models.extend( self.model_sources( this_project=self.project, own_project=project @@ -536,7 +537,7 @@ def compile(self, limit_to=None): all_models = self.get_models() all_macros = self.get_macros(this_project=self.project) - for project in dependency_projects(self.project): + for project in dbt.utils.dependency_projects(self.project): all_macros.extend( self.get_macros(this_project=self.project, own_project=project) ) diff --git a/dbt/model.py b/dbt/model.py index 3832faf6bc1..65ca4095b48 100644 --- a/dbt/model.py +++ b/dbt/model.py @@ -11,7 +11,6 @@ from dbt.utils import deep_merge, DBTConfigKeys, compiler_error, \ compiler_warning - class SourceConfig(object): Materializations = ['view', 'table', 'incremental', 'ephemeral'] ConfigKeys = DBTConfigKeys @@ -230,9 +229,7 @@ def serialize(self): @property def contents(self): - filepath = os.path.join(self.root_dir, self.rel_filepath) - with open(filepath) as fh: - return fh.read().strip() + return dbt.clients.system.load_file_contents(self.filepath) @property def config(self): diff --git a/dbt/source.py b/dbt/source.py index 0a6e5c40cd5..7c05719eefc 100644 --- a/dbt/source.py +++ b/dbt/source.py @@ -3,6 +3,8 @@ from dbt.model import Model, Analysis, TestModel, SchemaFile, Csv, Macro, \ ArchiveModel, DataTest +import dbt.clients.system + class Source(object): def __init__(self, project, own_project=None): @@ -15,72 +17,91 @@ def __init__(self, project, own_project=None): self.own_project_root = self.own_project['project-root'] self.own_project_name = self.own_project['name'] - def find(self, source_paths, file_pattern): - """returns abspath, relpath, filename of files matching file_regex in - source_paths""" - found = [] - - if type(source_paths) not in (list, tuple): - source_paths = [source_paths] - - for source_path in source_paths: - root_path = os.path.join(self.own_project_root, source_path) - for root, dirs, files in os.walk(root_path): - for filename in files: - abs_path = os.path.join(root, filename) - rel_path = os.path.relpath(abs_path, root_path) - - if fnmatch.fnmatch(filename, file_pattern): - found.append( - (self.project, - source_path, - rel_path, - self.own_project) - ) - return found + def build_models_from_file_matches( + self, + to_build, + file_matches, + extra_args=[]): + + build_args = [[self.project, + file_match.get('searched_path'), + file_match.get('relative_path'), + self.own_project] + extra_args + for file_match in file_matches] + + return [to_build(*args) for args in build_args] def get_models(self, model_dirs, create_template): - pattern = "[!.#~]*.sql" - models = [Model(*model + (create_template,)) - for model in self.find(model_dirs, pattern)] - return models + file_matches = dbt.clients.system.find_matching( + self.own_project_root, + model_dirs, + "[!.#~]*.sql") + + return self.build_models_from_file_matches( + Model, + file_matches, + [create_template]) def get_test_models(self, model_dirs, create_template): - pattern = "[!.#~]*.sql" - models = [TestModel(*model + (create_template,)) - for model in self.find(model_dirs, pattern)] - return models + file_matches = dbt.clients.system.find_matching( + self.own_project_root, + model_dirs, + "[!.#~]*.sql") + + return self.build_models_from_file_matches( + TestModel, + file_matches, + [create_template]) def get_analyses(self, analysis_dirs): - pattern = "[!.#~]*.sql" - models = [Analysis(*analysis) - for analysis in self.find(analysis_dirs, pattern)] - return models - - def get_schemas(self, model_dirs): - "Get schema.yml files" - pattern = "[!.#~]*.yml" - schemas = [SchemaFile(*schema) - for schema in self.find(model_dirs, pattern)] - return schemas + file_matches = dbt.clients.system.find_matching( + self.own_project_root, + analysis_dirs, + "[!.#~]*.sql") + + return self.build_models_from_file_matches( + Analysis, + file_matches) + + def get_schemas(self, schema_dirs): + file_matches = dbt.clients.system.find_matching( + self.own_project_root, + schema_dirs, + "[!.#~]*.yml") + + return self.build_models_from_file_matches( + SchemaFile, + file_matches) def get_tests(self, test_dirs): - "Get custom test files" - pattern = "[!.#~]*.sql" - tests = [DataTest(*test) for test in self.find(test_dirs, pattern)] - return tests + file_matches = dbt.clients.system.find_matching( + self.own_project_root, + test_dirs, + "[!.#~]*.sql") + + return self.build_models_from_file_matches( + DataTest, + file_matches) def get_csvs(self, csv_dirs): - "Get CSV files" - pattern = "[!.#~]*.csv" - csvs = [Csv(*csv) for csv in self.find(csv_dirs, pattern)] - return csvs + file_matches = dbt.clients.system.find_matching( + self.own_project_root, + csv_dirs, + "[!.#~]*.csv") + + return self.build_models_from_file_matches( + Csv, + file_matches) def get_macros(self, macro_dirs): - "Get Macro files" - pattern = "[!.#~]*.sql" - macros = [Macro(*macro) for macro in self.find(macro_dirs, pattern)] - return macros + file_matches = dbt.clients.system.find_matching( + self.own_project_root, + macro_dirs, + "[!.#~]*.sql") + + return self.build_models_from_file_matches( + Macro, + file_matches) def get_archives(self, create_template): "Get Archive models defined in project config" diff --git a/dev_requirements.txt b/dev_requirements.txt index 04af909e836..bd576fdfb5c 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,5 +1,4 @@ nose>=1.3.7 -nosy>=1.1.2 mock>=1.3.0 pep8>=1.6.2 bumpversion==0.5.3 diff --git a/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py b/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py index 2d4daaf7ef4..971c06bbefd 100644 --- a/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py +++ b/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py @@ -91,8 +91,7 @@ def project_config(self): "vars": { "config_1": "ghi", "config_2": "jkl", - "bool_config": True - + "bool_config": True, } } } diff --git a/test/unit/test_dependencies.py b/test/unit/test_dependencies.py new file mode 100644 index 00000000000..2e8a04e708b --- /dev/null +++ b/test/unit/test_dependencies.py @@ -0,0 +1,108 @@ +from mock import MagicMock, patch, PropertyMock +import unittest + +import dbt.model +import dbt.project +import dbt.templates +import dbt.utils + +import networkx as nx + +import dbt.compilation + +from dbt.logger import GLOBAL_LOGGER as logger + +class DependencyTest(unittest.TestCase): + def setUp(self): + def mock_write_yaml(graph, outfile): + self.graph_result = graph + + nx.write_yaml = mock_write_yaml + self.graph_result = None + + self.profiles = { + 'test': { + 'outputs': { + 'test': { + 'type': 'postgres', + 'threads': 4, + 'host': 'database', + 'port': 5432, + 'user': 'root', + 'pass': 'password', + 'dbname': 'dbt', + 'schema': 'dbt_test' + } + }, + 'target': 'test' + } + } + + self.project = dbt.project.Project( + cfg={ + 'name': 'test_models_compile', + 'version': '0.1', + 'profile': 'test', + 'project-root': '/fake', + }, + profiles=self.profiles, + profiles_dir=None) + + self.project.validate() + + self.compiler = dbt.compilation.Compiler( + self.project, + dbt.templates.BaseCreateTemplate, + {}) + + self.compiler.get_macros = MagicMock(return_value=[]) + + dbt.utils.dependency_projects = MagicMock(return_value=[]) + + + def use_models(self, models): + dbt.clients.system.find_matching = MagicMock( + return_value=[{'searched_path': 'models', + 'absolute_path': '/fake/models/{}.sql'.format(k), + 'relative_path': '{}.sql'.format(k)} + for k, v in models.items()]) + + def mock_load_file_contents(path): + k = path.split('/')[-1].split('.')[0] + return models[k] + + dbt.clients.system.load_file_contents = MagicMock( + side_effect=mock_load_file_contents) + + + def test_single_model(self): + self.use_models({ + 'model_one': 'select * from events', + }) + + self.compiler.compile(limit_to=['models']) + + self.assertEquals( + self.graph_result.nodes(), + [('test_models_compile', 'model_one')]) + + self.assertEquals( + self.graph_result.edges(), []) + + def test_two_models_simple_ref(self): + self.use_models({ + 'model_one': 'select * from events', + 'model_two': "select * from {{ref('model_one')}}", + }) + + self.compiler.compile(limit_to=['models']) + + self.assertEquals( + self.graph_result.nodes(), + [('test_models_compile', 'model_one'), + ('test_models_compile', 'model_two'),]) + + self.assertEquals( + self.graph_result.edges(), + [(('test_models_compile', 'model_one'), + ('test_models_compile', 'model_two')),]) From 3579cc9cb5e2943cae55acb5121c19ee7e34dc2d Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Wed, 25 Jan 2017 20:01:52 -0500 Subject: [PATCH 02/11] fix tests --- dbt/compilation.py | 2 +- dbt/model.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dbt/compilation.py b/dbt/compilation.py index a0c1a8a24df..459f5e64560 100644 --- a/dbt/compilation.py +++ b/dbt/compilation.py @@ -274,7 +274,7 @@ def compile_model(self, linker, model, models, add_dependency=True): jinja = jinja2.Environment(loader=fs_loader) template_contents = dbt.clients.system.load_file_contents( - model.filepath) + model.absolute_path) template = jinja.from_string(template_contents) context = self.get_context( diff --git a/dbt/model.py b/dbt/model.py index 65ca4095b48..5684372b182 100644 --- a/dbt/model.py +++ b/dbt/model.py @@ -201,6 +201,10 @@ def __init__(self, project, top_dir, rel_filepath, own_project): self.source_config = SourceConfig(project, own_project, self.fqn) + @property + def absolute_path(self): + return os.path.join(self.root_dir, self.rel_filepath) + @property def root_dir(self): return os.path.join(self.own_project['project-root'], self.top_dir) @@ -229,7 +233,7 @@ def serialize(self): @property def contents(self): - return dbt.clients.system.load_file_contents(self.filepath) + return dbt.clients.system.load_file_contents(self.absolute_path) @property def config(self): From efb204feb13bd356bd1466e3509ca1a90237b110 Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Wed, 25 Jan 2017 20:05:42 -0500 Subject: [PATCH 03/11] this is called the graph, not dependencies --- test/unit/{test_dependencies.py => test_graph.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename test/unit/{test_dependencies.py => test_graph.py} (98%) diff --git a/test/unit/test_dependencies.py b/test/unit/test_graph.py similarity index 98% rename from test/unit/test_dependencies.py rename to test/unit/test_graph.py index 2e8a04e708b..0b5bf7a95ab 100644 --- a/test/unit/test_dependencies.py +++ b/test/unit/test_graph.py @@ -12,7 +12,7 @@ from dbt.logger import GLOBAL_LOGGER as logger -class DependencyTest(unittest.TestCase): +class GraphTest(unittest.TestCase): def setUp(self): def mock_write_yaml(graph, outfile): self.graph_result = graph From a76b5b9a5740a88da6f33ecc2d87c92a43339698 Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Wed, 25 Jan 2017 20:06:21 -0500 Subject: [PATCH 04/11] thanks drew! --- dbt/clients/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 dbt/clients/__init__.py diff --git a/dbt/clients/__init__.py b/dbt/clients/__init__.py new file mode 100644 index 00000000000..e69de29bb2d From 072ae943b9a9e7876b6c7b55837c0579f1190f33 Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Thu, 26 Jan 2017 09:46:50 -0500 Subject: [PATCH 05/11] fix tests --- dbt/clients/system.py | 13 +++++++++---- dbt/model.py | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/dbt/clients/system.py b/dbt/clients/system.py index 9ff020aa69d..45142baf7ff 100644 --- a/dbt/clients/system.py +++ b/dbt/clients/system.py @@ -2,6 +2,7 @@ import os import os.path + def find_matching(root_path, relative_paths_to_search, file_pattern): @@ -41,8 +42,12 @@ def find_matching(root_path, return matching -def load_file_contents(path): - with open(path, 'r') as handle: - to_return = handle.read() - return to_return.strip() +def load_file_contents(path, strip=True): + with open(path, 'rb') as handle: + to_return = handle.read().decode('utf-8') + + if strip: + to_return = to_return.strip() + + return to_return diff --git a/dbt/model.py b/dbt/model.py index 5684372b182..daa89755d85 100644 --- a/dbt/model.py +++ b/dbt/model.py @@ -1,4 +1,3 @@ - import os.path import yaml import jinja2 @@ -11,6 +10,7 @@ from dbt.utils import deep_merge, DBTConfigKeys, compiler_error, \ compiler_warning + class SourceConfig(object): Materializations = ['view', 'table', 'incremental', 'ephemeral'] ConfigKeys = DBTConfigKeys From 233182fe2b5c76077a3197578aba49fd3fa830bc Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Thu, 26 Jan 2017 13:14:16 -0500 Subject: [PATCH 06/11] use six for py2/3 compat --- .../009_data_tests_test/test_data_tests.py | 1 - test/unit/test_graph.py | 48 +++++++++++++------ 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/test/integration/009_data_tests_test/test_data_tests.py b/test/integration/009_data_tests_test/test_data_tests.py index dc579f30fd3..13bbb5fa88e 100644 --- a/test/integration/009_data_tests_test/test_data_tests.py +++ b/test/integration/009_data_tests_test/test_data_tests.py @@ -43,7 +43,6 @@ def test_data_tests(self): self.run_dbt() test_results = self.run_data_validations() - for result in test_results: # assert that all deliberately failing tests actually fail if 'fail' in result.model.name: diff --git a/test/unit/test_graph.py b/test/unit/test_graph.py index 0b5bf7a95ab..917f30ccace 100644 --- a/test/unit/test_graph.py +++ b/test/unit/test_graph.py @@ -1,6 +1,9 @@ -from mock import MagicMock, patch, PropertyMock +from mock import MagicMock +import os +import six import unittest +import dbt.compilation import dbt.model import dbt.project import dbt.templates @@ -8,8 +11,6 @@ import networkx as nx -import dbt.compilation - from dbt.logger import GLOBAL_LOGGER as logger class GraphTest(unittest.TestCase): @@ -59,23 +60,40 @@ def mock_write_yaml(graph, outfile): dbt.utils.dependency_projects = MagicMock(return_value=[]) + self.mock_models = [] + self.mock_content = {} + + def mock_find_matching(root_path, relative_paths_to_search, + file_pattern): + if not 'sql' in file_pattern: + return [] + + to_return = [] + + if 'models' in relative_paths_to_search: + to_return = to_return + self.mock_models + + return to_return - def use_models(self, models): dbt.clients.system.find_matching = MagicMock( - return_value=[{'searched_path': 'models', - 'absolute_path': '/fake/models/{}.sql'.format(k), - 'relative_path': '{}.sql'.format(k)} - for k, v in models.items()]) + side_effect=mock_find_matching) def mock_load_file_contents(path): - k = path.split('/')[-1].split('.')[0] - return models[k] + return self.mock_content[path] dbt.clients.system.load_file_contents = MagicMock( side_effect=mock_load_file_contents) - - def test_single_model(self): + def use_models(self, models): + for k, v in models.items(): + path = '/fake/models/{}.sql'.format(k) + self.mock_models.append({ + 'searched_path': 'models', + 'absolute_path': os.path.abspath(path), + 'relative_path': '{}.sql'.format(k)}) + self.mock_content[path] = v + + def test__single_model(self): self.use_models({ 'model_one': 'select * from events', }) @@ -89,7 +107,7 @@ def test_single_model(self): self.assertEquals( self.graph_result.edges(), []) - def test_two_models_simple_ref(self): + def test__two_models_simple_ref(self): self.use_models({ 'model_one': 'select * from events', 'model_two': "select * from {{ref('model_one')}}", @@ -97,12 +115,12 @@ def test_two_models_simple_ref(self): self.compiler.compile(limit_to=['models']) - self.assertEquals( + six.assertCountEqual(self, self.graph_result.nodes(), [('test_models_compile', 'model_one'), ('test_models_compile', 'model_two'),]) - self.assertEquals( + six.assertCountEqual(self, self.graph_result.edges(), [(('test_models_compile', 'model_one'), ('test_models_compile', 'model_two')),]) From 1278434f4d6d382c256edd52b304e0b108fa3b16 Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Thu, 26 Jan 2017 13:37:44 -0500 Subject: [PATCH 07/11] windows :( --- test/unit/test_graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/test_graph.py b/test/unit/test_graph.py index 917f30ccace..70a94ff180d 100644 --- a/test/unit/test_graph.py +++ b/test/unit/test_graph.py @@ -86,10 +86,10 @@ def mock_load_file_contents(path): def use_models(self, models): for k, v in models.items(): - path = '/fake/models/{}.sql'.format(k) + path = os.path.abspath('/fake/models/{}.sql'.format(k)) self.mock_models.append({ 'searched_path': 'models', - 'absolute_path': os.path.abspath(path), + 'absolute_path': path, 'relative_path': '{}.sql'.format(k)}) self.mock_content[path] = v From da82757d82ac237fd78b2dc0014a39c6fdb71b5a Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Thu, 26 Jan 2017 14:17:05 -0500 Subject: [PATCH 08/11] i think this will actually fix windows --- test/unit/test_graph.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/unit/test_graph.py b/test/unit/test_graph.py index 70a94ff180d..267ee6ccfd2 100644 --- a/test/unit/test_graph.py +++ b/test/unit/test_graph.py @@ -44,7 +44,7 @@ def mock_write_yaml(graph, outfile): 'name': 'test_models_compile', 'version': '0.1', 'profile': 'test', - 'project-root': '/fake', + 'project-root': os.path.abspath('.'), }, profiles=self.profiles, profiles_dir=None) @@ -86,7 +86,7 @@ def mock_load_file_contents(path): def use_models(self, models): for k, v in models.items(): - path = os.path.abspath('/fake/models/{}.sql'.format(k)) + path = os.path.abspath('models/{}.sql'.format(k)) self.mock_models.append({ 'searched_path': 'models', 'absolute_path': path, @@ -105,7 +105,8 @@ def test__single_model(self): [('test_models_compile', 'model_one')]) self.assertEquals( - self.graph_result.edges(), []) + self.graph_result.edges(), + []) def test__two_models_simple_ref(self): self.use_models({ From ca85dc3f9a604499ee301b8f21cb826ad9be1142 Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Thu, 26 Jan 2017 14:25:10 -0500 Subject: [PATCH 09/11] restore module functions after mocking them in setUp --- test/unit/test_graph.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/unit/test_graph.py b/test/unit/test_graph.py index 267ee6ccfd2..643107badac 100644 --- a/test/unit/test_graph.py +++ b/test/unit/test_graph.py @@ -14,11 +14,20 @@ from dbt.logger import GLOBAL_LOGGER as logger class GraphTest(unittest.TestCase): + + def tearDown(self): + nx.write_yaml = self.real_write_yaml + dbt.utils.dependency_projects = self.real_dependency_projects + dbt.clients.system.find_matching = self.real_find_matching + dbt.clients.system.load_file_contents = self.real_load_file_contents + def setUp(self): def mock_write_yaml(graph, outfile): self.graph_result = graph + self.real_write_yaml = nx.write_yaml nx.write_yaml = mock_write_yaml + self.graph_result = None self.profiles = { @@ -58,6 +67,7 @@ def mock_write_yaml(graph, outfile): self.compiler.get_macros = MagicMock(return_value=[]) + self.real_dependency_projects = dbt.utils.dependency_projects dbt.utils.dependency_projects = MagicMock(return_value=[]) self.mock_models = [] @@ -75,12 +85,14 @@ def mock_find_matching(root_path, relative_paths_to_search, return to_return + self.real_find_matching = dbt.clients.system.find_matching dbt.clients.system.find_matching = MagicMock( side_effect=mock_find_matching) def mock_load_file_contents(path): return self.mock_content[path] + self.real_load_file_contents = dbt.clients.system.load_file_contents dbt.clients.system.load_file_contents = MagicMock( side_effect=mock_load_file_contents) From 20701ba915b652ec7f08d10473a6d1450be33a05 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Wed, 1 Feb 2017 23:37:20 -0500 Subject: [PATCH 10/11] add materialization unit tests for the graph --- test/unit/test_graph.py | 171 ++++++++++++++++++++++++++++++++++------ 1 file changed, 149 insertions(+), 22 deletions(-) diff --git a/test/unit/test_graph.py b/test/unit/test_graph.py index 643107badac..e6d359f3eb9 100644 --- a/test/unit/test_graph.py +++ b/test/unit/test_graph.py @@ -13,6 +13,11 @@ from dbt.logger import GLOBAL_LOGGER as logger +class FakeArgs: + + def __init__(self): + self.full_refresh = False + class GraphTest(unittest.TestCase): def tearDown(self): @@ -48,25 +53,6 @@ def mock_write_yaml(graph, outfile): } } - self.project = dbt.project.Project( - cfg={ - 'name': 'test_models_compile', - 'version': '0.1', - 'profile': 'test', - 'project-root': os.path.abspath('.'), - }, - profiles=self.profiles, - profiles_dir=None) - - self.project.validate() - - self.compiler = dbt.compilation.Compiler( - self.project, - dbt.templates.BaseCreateTemplate, - {}) - - self.compiler.get_macros = MagicMock(return_value=[]) - self.real_dependency_projects = dbt.utils.dependency_projects dbt.utils.dependency_projects = MagicMock(return_value=[]) @@ -75,7 +61,7 @@ def mock_write_yaml(graph, outfile): def mock_find_matching(root_path, relative_paths_to_search, file_pattern): - if not 'sql' in file_pattern: + if 'sql' not in file_pattern: return [] to_return = [] @@ -96,6 +82,35 @@ def mock_load_file_contents(path): dbt.clients.system.load_file_contents = MagicMock( side_effect=mock_load_file_contents) + def get_project(self, extra_cfg=None): + if extra_cfg is None: + extra_cfg = {} + + cfg = { + 'name': 'test_models_compile', + 'version': '0.1', + 'profile': 'test', + 'project-root': os.path.abspath('.'), + } + cfg.update(extra_cfg) + + project = dbt.project.Project( + cfg=cfg, + profiles=self.profiles, + profiles_dir=None) + + project.validate() + return project + + def get_compiler(self, project): + compiler = dbt.compilation.Compiler( + project, + dbt.templates.BaseCreateTemplate, + FakeArgs()) + + compiler.get_macros = MagicMock(return_value=[]) + return compiler + def use_models(self, models): for k, v in models.items(): path = os.path.abspath('models/{}.sql'.format(k)) @@ -110,7 +125,8 @@ def test__single_model(self): 'model_one': 'select * from events', }) - self.compiler.compile(limit_to=['models']) + compiler = self.get_compiler(self.get_project()) + compiler.compile(limit_to=['models']) self.assertEquals( self.graph_result.nodes(), @@ -126,7 +142,8 @@ def test__two_models_simple_ref(self): 'model_two': "select * from {{ref('model_one')}}", }) - self.compiler.compile(limit_to=['models']) + compiler = self.get_compiler(self.get_project()) + compiler.compile(limit_to=['models']) six.assertCountEqual(self, self.graph_result.nodes(), @@ -137,3 +154,113 @@ def test__two_models_simple_ref(self): self.graph_result.edges(), [(('test_models_compile', 'model_one'), ('test_models_compile', 'model_two')),]) + + def test__model_materializations(self): + self.use_models({ + 'model_one': 'select * from events', + 'model_two': "select * from {{ref('model_one')}}", + 'model_three': "select * from events", + 'model_four': "select * from events", + }) + + cfg = { + "models": { + "materialized": "table", + "test_models_compile": { + "model_one": { "materialized": "table" }, + "model_two": { "materialized": "view" }, + "model_three": { "materialized": "ephemeral" } + } + } + } + + compiler = self.get_compiler(self.get_project(cfg)) + compiler.compile(limit_to=['models']) + + expected_materialization = { + "model_one": "table", + "model_two": "view", + "model_three": "ephemeral", + "model_four": "table" + } + + nodes = self.graph_result.node + + for model, expected in expected_materialization.items(): + actual = nodes[("test_models_compile", model)]["materialized"] + self.assertEquals(actual, expected) + + + def test__model_enabled(self): + self.use_models({ + 'model_one': 'select * from events', + 'model_two': "select * from {{ref('model_one')}}", + }) + + cfg = { + "models": { + "materialized": "table", + "test_models_compile": { + "model_one": { "enabled": True }, + "model_two": { "enabled": False }, + } + } + } + + compiler = self.get_compiler(self.get_project(cfg)) + compiler.compile(limit_to=['models']) + + + six.assertCountEqual(self, + self.graph_result.nodes(), [('test_models_compile', 'model_one')]) + + six.assertCountEqual(self, self.graph_result.edges(), []) + + def test__model_incremental_without_sql_where_fails(self): + self.use_models({ + 'model_one': 'select * from events' + }) + + cfg = { + "models": { + "materialized": "table", + "test_models_compile": { + "model_one": { "materialized": "incremental" }, + } + } + } + + compiler = self.get_compiler(self.get_project(cfg)) + + with self.assertRaises(RuntimeError) as context: + compiler.compile(limit_to=['models']) + + def test__model_incremental(self): + self.use_models({ + 'model_one': 'select * from events' + }) + + cfg = { + "models": { + "test_models_compile": { + "model_one": { + "materialized": "incremental", + "sql_where": "TRUE", + "unique_key": "TRUE" + }, + } + } + } + + compiler = self.get_compiler(self.get_project(cfg)) + compiler.compile(limit_to=['models']) + + node = ('test_models_compile', 'model_one') + + self.assertEqual(self.graph_result.nodes(), [node]) + self.assertEqual(self.graph_result.edges(), []) + + self.assertEqual( + self.graph_result.node[node]['materialized'], + 'incremental') + From 9997afb5e2c12ee20fde5193d82fd808b1d9b9be Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sat, 4 Feb 2017 22:15:10 -0500 Subject: [PATCH 11/11] test topological sort + depenedency list --- Makefile | 4 ++ test/unit/test_graph.py | 141 ++++++++++++++++++++++++++++++++++------ 2 files changed, 125 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index ca3201ece20..738768bebdb 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,10 @@ changed_tests := `git status --porcelain | grep '^\(M\| M\|A\| A\)' | awk '{ print $$2 }' | grep '\/test_[a-zA-Z_\-\.]\+.py'` +it: + @echo "Unit test run starting..." + @time docker-compose run test tox -e unit-py27,pep8 + test: @echo "Full test run starting..." @time docker-compose run test tox diff --git a/test/unit/test_graph.py b/test/unit/test_graph.py index e6d359f3eb9..be41bd37e1b 100644 --- a/test/unit/test_graph.py +++ b/test/unit/test_graph.py @@ -8,16 +8,19 @@ import dbt.project import dbt.templates import dbt.utils +import dbt.linker import networkx as nx -from dbt.logger import GLOBAL_LOGGER as logger +# from dbt.logger import GLOBAL_LOGGER as logger + class FakeArgs: - + def __init__(self): self.full_refresh = False + class GraphTest(unittest.TestCase): def tearDown(self): @@ -146,14 +149,20 @@ def test__two_models_simple_ref(self): compiler.compile(limit_to=['models']) six.assertCountEqual(self, - self.graph_result.nodes(), - [('test_models_compile', 'model_one'), - ('test_models_compile', 'model_two'),]) + self.graph_result.nodes(), + [ + ('test_models_compile', 'model_one'), + ('test_models_compile', 'model_two') + ]) six.assertCountEqual(self, - self.graph_result.edges(), - [(('test_models_compile', 'model_one'), - ('test_models_compile', 'model_two')),]) + self.graph_result.edges(), + [ + ( + ('test_models_compile', 'model_one'), + ('test_models_compile', 'model_two') + ) + ]) def test__model_materializations(self): self.use_models({ @@ -167,9 +176,9 @@ def test__model_materializations(self): "models": { "materialized": "table", "test_models_compile": { - "model_one": { "materialized": "table" }, - "model_two": { "materialized": "view" }, - "model_three": { "materialized": "ephemeral" } + "model_one": {"materialized": "table"}, + "model_two": {"materialized": "view"}, + "model_three": {"materialized": "ephemeral"} } } } @@ -190,7 +199,6 @@ def test__model_materializations(self): actual = nodes[("test_models_compile", model)]["materialized"] self.assertEquals(actual, expected) - def test__model_enabled(self): self.use_models({ 'model_one': 'select * from events', @@ -201,8 +209,8 @@ def test__model_enabled(self): "models": { "materialized": "table", "test_models_compile": { - "model_one": { "enabled": True }, - "model_two": { "enabled": False }, + "model_one": {"enabled": True}, + "model_two": {"enabled": False}, } } } @@ -210,9 +218,9 @@ def test__model_enabled(self): compiler = self.get_compiler(self.get_project(cfg)) compiler.compile(limit_to=['models']) - six.assertCountEqual(self, - self.graph_result.nodes(), [('test_models_compile', 'model_one')]) + self.graph_result.nodes(), + [('test_models_compile', 'model_one')]) six.assertCountEqual(self, self.graph_result.edges(), []) @@ -225,14 +233,14 @@ def test__model_incremental_without_sql_where_fails(self): "models": { "materialized": "table", "test_models_compile": { - "model_one": { "materialized": "incremental" }, + "model_one": {"materialized": "incremental"}, } } } compiler = self.get_compiler(self.get_project(cfg)) - with self.assertRaises(RuntimeError) as context: + with self.assertRaises(RuntimeError): compiler.compile(limit_to=['models']) def test__model_incremental(self): @@ -245,8 +253,8 @@ def test__model_incremental(self): "test_models_compile": { "model_one": { "materialized": "incremental", - "sql_where": "TRUE", - "unique_key": "TRUE" + "sql_where": "created_at", + "unique_key": "id" }, } } @@ -264,3 +272,96 @@ def test__model_incremental(self): self.graph_result.node[node]['materialized'], 'incremental') + def test__topological_ordering(self): + self.use_models({ + 'model_1': 'select * from events', + 'model_2': 'select * from {{ ref("model_1") }}', + 'model_3': ''' + select * from {{ ref("model_1") }} + union all + select * from {{ ref("model_2") }} + ''', + 'model_4': 'select * from {{ ref("model_3") }}' + }) + + compiler = self.get_compiler(self.get_project({})) + compiler.compile(limit_to=['models']) + + six.assertCountEqual(self, + self.graph_result.nodes(), + [ + ('test_models_compile', 'model_1'), + ('test_models_compile', 'model_2'), + ('test_models_compile', 'model_3'), + ('test_models_compile', 'model_4') + ]) + + six.assertCountEqual(self, + self.graph_result.edges(), + [ + ( + ('test_models_compile', 'model_1'), + ('test_models_compile', 'model_2') + ), + ( + ('test_models_compile', 'model_1'), + ('test_models_compile', 'model_3') + ), + ( + ('test_models_compile', 'model_2'), + ('test_models_compile', 'model_3') + ), + ( + ('test_models_compile', 'model_3'), + ('test_models_compile', 'model_4') + ) + ]) + + linker = dbt.linker.Linker() + linker.graph = self.graph_result + + actual_ordering = linker.as_topological_ordering() + expected_ordering = [ + ('test_models_compile', 'model_1'), + ('test_models_compile', 'model_2'), + ('test_models_compile', 'model_3'), + ('test_models_compile', 'model_4') + ] + + self.assertEqual(actual_ordering, expected_ordering) + + def test__dependency_list(self): + self.use_models({ + 'model_1': 'select * from events', + 'model_2': 'select * from {{ ref("model_1") }}', + 'model_3': ''' + select * from {{ ref("model_1") }} + union all + select * from {{ ref("model_2") }} + ''', + 'model_4': 'select * from {{ ref("model_3") }}' + }) + + compiler = self.get_compiler(self.get_project({})) + compiler.compile(limit_to=['models']) + + linker = dbt.linker.Linker() + linker.graph = self.graph_result + + actual_dep_list = linker.as_dependency_list() + expected_dep_list = [ + [ + ('test_models_compile', 'model_1') + ], + [ + ('test_models_compile', 'model_2') + ], + [ + ('test_models_compile', 'model_3') + ], + [ + ('test_models_compile', 'model_4'), + ] + ] + + self.assertEqual(actual_dep_list, expected_dep_list)