-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Compile on-run-(start|end) hooks to file #412
Changes from 3 commits
6306909
821256b
d5470f8
0572e9d
6296e8e
061678c
01bb4d7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -331,6 +331,52 @@ def load_and_parse_sql(package_name, root_project, all_projects, root_dir, | |
return parse_sql_nodes(result, root_project, all_projects, tags) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks like this file was |
||
|
||
|
||
def get_hooks_from_project(project_cfg, hook_type): | ||
hooks = project_cfg.get(hook_type, []) | ||
|
||
if type(hooks) not in (list, tuple): | ||
hooks = [hooks] | ||
|
||
return hooks | ||
|
||
|
||
def get_hooks(all_projects, hook_type): | ||
project_hooks = {} | ||
|
||
for project_name, project in all_projects.items(): | ||
hooks = get_hooks_from_project(project, hook_type) | ||
|
||
if len(hooks) > 0: | ||
project_hooks[project_name] = ";\n".join(hooks) | ||
|
||
return project_hooks | ||
|
||
|
||
def load_and_parse_run_hooks(root_project, all_projects, hook_type): | ||
|
||
if dbt.flags.STRICT_MODE: | ||
dbt.contracts.project.validate_list(all_projects) | ||
|
||
project_hooks = get_hooks(all_projects, hook_type) | ||
|
||
result = [] | ||
for project_name, hooks in project_hooks.items(): | ||
project = all_projects[project_name] | ||
|
||
hook_path = dbt.utils.get_pseudo_hook_path(hook_type) | ||
|
||
result.append({ | ||
'name': hook_type, | ||
'root_path': "{}/dbt_project.yml".format(project_name), | ||
'resource_type': NodeType.Operation, | ||
'path': hook_path, | ||
'package_name': project_name, | ||
'raw_sql': hooks | ||
}) | ||
|
||
return parse_sql_nodes(result, root_project, all_projects, tags={hook_type}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks great |
||
|
||
|
||
def load_and_parse_macros(package_name, root_project, all_projects, root_dir, | ||
relative_dirs, resource_type, tags=None): | ||
extension = "[!.#~]*.sql" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,6 @@ | |
import os | ||
import time | ||
import itertools | ||
from datetime import datetime | ||
|
||
from dbt.adapters.factory import get_adapter | ||
from dbt.logger import GLOBAL_LOGGER as logger | ||
|
@@ -370,24 +369,13 @@ def execute_archive(profile, node, context): | |
return result | ||
|
||
|
||
def run_hooks(profile, hooks, context, source): | ||
if type(hooks) not in (list, tuple): | ||
hooks = [hooks] | ||
|
||
ctx = { | ||
"target": profile, | ||
"state": "start", | ||
"invocation_id": context['invocation_id'], | ||
"run_started_at": context['run_started_at'] | ||
} | ||
|
||
compiled_hooks = [ | ||
dbt.clients.jinja.get_rendered(hook, ctx) for hook in hooks | ||
] | ||
|
||
def run_hooks(profile, hooks): | ||
adapter = get_adapter(profile) | ||
|
||
return adapter.execute_all(profile=profile, sqls=compiled_hooks) | ||
master_connection = adapter.begin(profile) | ||
compiled_hooks = [hook['wrapped_sql'] for hook in hooks] | ||
adapter.execute_all(profile=profile, sqls=compiled_hooks) | ||
master_connection = adapter.commit(master_connection) | ||
|
||
|
||
def track_model_run(index, num_nodes, run_model_result): | ||
|
@@ -461,10 +449,8 @@ def call_table_exists(schema, table): | |
return adapter.table_exists( | ||
profile, schema, table, node.get('name')) | ||
|
||
self.run_started_at = datetime.now() | ||
|
||
return { | ||
"run_started_at": datetime.now(), | ||
"run_started_at": dbt.tracking.active_user.run_started_at, | ||
"invocation_id": dbt.tracking.active_user.invocation_id, | ||
"get_columns_in_table": call_get_columns_in_table, | ||
"get_missing_columns": call_get_missing_columns, | ||
|
@@ -513,7 +499,6 @@ def execute_node(self, node, flat_graph, existing, profile, adapter): | |
return node, result | ||
|
||
def compile_node(self, node, flat_graph): | ||
|
||
compiler = dbt.compilation.Compiler(self.project) | ||
node = compiler.compile_node(node, flat_graph) | ||
return node | ||
|
@@ -687,12 +672,9 @@ def execute_nodes(self, flat_graph, node_dependency_list, on_failure, | |
start_time = time.time() | ||
|
||
if should_run_hooks: | ||
master_connection = adapter.begin(profile) | ||
run_hooks(self.project.get_target(), | ||
self.project.cfg.get('on-run-start', []), | ||
self.node_context({}), | ||
'on-run-start hooks') | ||
master_connection = adapter.commit(master_connection) | ||
start_hooks = dbt.utils.get_nodes_by_tags(flat_graph, {'on-run-start'}, "operations") | ||
hooks = [self.compile_node(hook, flat_graph) for hook in start_hooks] | ||
run_hooks(profile, hooks) | ||
|
||
def get_idx(node): | ||
return node_id_to_index_map.get(node.get('unique_id')) | ||
|
@@ -739,12 +721,9 @@ def get_idx(node): | |
pool.join() | ||
|
||
if should_run_hooks: | ||
adapter.begin(profile) | ||
run_hooks(self.project.get_target(), | ||
self.project.cfg.get('on-run-end', []), | ||
self.node_context({}), | ||
'on-run-end hooks') | ||
adapter.commit(master_connection) | ||
end_hooks = dbt.utils.get_nodes_by_tags(flat_graph, {'on-run-end'}, "operations") | ||
hooks = [self.compile_node(hook, flat_graph) for hook in end_hooks] | ||
run_hooks(profile, hooks) | ||
|
||
execution_time = time.time() - start_time | ||
|
||
|
@@ -755,18 +734,35 @@ def get_idx(node): | |
|
||
def get_ancestor_ephemeral_nodes(self, flat_graph, linked_graph, | ||
selected_nodes): | ||
node_names = { | ||
node: flat_graph['nodes'].get(node).get('name') | ||
for node in selected_nodes | ||
if node in flat_graph['nodes'] | ||
} | ||
|
||
include_spec = [ | ||
'+{}'.format(node_names[node]) | ||
for node in selected_nodes if node in node_names | ||
] | ||
|
||
all_ancestors = dbt.graph.selector.select_nodes( | ||
self.project, | ||
linked_graph, | ||
['+{}'.format(flat_graph.get('nodes').get(node).get('name')) | ||
for node in selected_nodes], | ||
include_spec, | ||
[]) | ||
|
||
return set([ancestor for ancestor in all_ancestors | ||
if(flat_graph['nodes'][ancestor].get( | ||
'resource_type') == NodeType.Model and | ||
get_materialization( | ||
flat_graph['nodes'][ancestor]) == 'ephemeral')]) | ||
res = [] | ||
|
||
for ancestor in all_ancestors: | ||
if ancestor not in flat_graph['nodes']: | ||
continue | ||
ancestor_node = flat_graph['nodes'][ancestor] | ||
is_model = ancestor_node.get('resource_type') == NodeType.Model | ||
is_ephemeral = get_materialization(ancestor_node) == 'ephemeral' | ||
if is_model and is_ephemeral: | ||
res.append(ancestor) | ||
|
||
return set(res) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
|
||
def get_nodes_to_run(self, graph, include_spec, exclude_spec, | ||
resource_types, tags): | ||
|
@@ -874,15 +870,17 @@ def compile_models(self, include_spec, exclude_spec): | |
NodeType.Model, | ||
NodeType.Test, | ||
NodeType.Archive, | ||
NodeType.Analysis | ||
NodeType.Analysis, | ||
NodeType.Operation | ||
] | ||
|
||
return self.run_types_from_graph(include_spec, | ||
exclude_spec, | ||
resource_types=resource_types, | ||
tags=set(), | ||
should_run_hooks=False, | ||
should_execute=False) | ||
should_execute=False, | ||
flatten_graph=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why flatten the graph here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. debugging! good catch |
||
|
||
def run_models(self, include_spec, exclude_spec): | ||
return self.run_types_from_graph(include_spec, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
curious why you put operations in a separate subgraph
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah, i think you're right -- these can in the
nodes
graph... let me investigate