Add a new ListTask, and 'dbt list'/'dbt ls'

All tasks now have a 'pre_init_hook' classmethod, called by main - runs after args are parsed, before anything else
dbt-labs · May 2, 2019 · e043643 · e043643
1 parent 4c02b4a
commit e043643
Show file tree

Hide file tree

Showing 14 changed files with 524 additions and 7 deletions.
diff --git a/core/dbt/logger.py b/core/dbt/logger.py
@@ -84,11 +84,19 @@ def notice(self, msg, *args, **kwargs):
 initialized = False
 
 
+def _swap_handler(logger, old, new):
+ if old in logger.handlers:
+ logger.handlers.remove(old)
+ if new not in logger.handlers:
+ logger.addHandler(new)
+
+
 def log_to_stderr(logger):
- if stdout_handler in logger.handlers:
- logger.handlers.remove(stdout_handler)
- if stderr_handler not in logger.handlers:
- logger.addHandler(stderr_handler)
+ _swap_handler(logger, stdout_handler, stderr_handler)
+
+
+def log_to_stdout(logger):
+ _swap_handler(logger, stderr_handler, stdout_handler)
 
 
 def make_log_dir_if_missing(log_dir):

diff --git a/core/dbt/main.py b/core/dbt/main.py
@@ -22,6 +22,7 @@
 import dbt.task.serve as serve_task
 import dbt.task.freshness as freshness_task
 import dbt.task.run_operation as run_operation_task
+from dbt.task.list import ListTask
 from dbt.task.rpc_server import RPCServerTask
 from dbt.adapters.factory import reset_adapters
 
@@ -183,6 +184,7 @@ def run_from_args(parsed):
  log_cache_events(getattr(parsed, 'log_cache_events', False))
  update_flags(parsed)
 
+ parsed.cls.pre_init_hook()
  logger.info("Running with dbt{}".format(dbt.version.installed))
 
  # this will convert DbtConfigErrors into RuntimeExceptions
@@ -569,6 +571,42 @@ def _build_rpc_subparser(subparsers, base_subparser):
  return sub
 
 
+def _build_list_subparser(subparsers, base_subparser):
+ sub = subparsers.add_parser(
+ 'list',
+ parents=[base_subparser],
+ help='list models'
+ )
+ sub.set_defaults(cls=ListTask, which='list')
+ resource_values = list(ListTask.ALL_RESOURCE_VALUES) + ['default', 'all']
+ sub.add_argument('--resource-type',
+ choices=resource_values,
+ action='append',
+ default=[],
+ dest='resource_types')
+ sub.add_argument('--output',
+ choices=['json', 'name', 'path', 'selector'],
+ default='selector')
+ sub.add_argument(
+ '-s',
+ '--select',
+ required=False,
+ nargs='+',
+ help="Specify the nodes to select.",
+ dest='models'
+ )
+ sub.add_argument(
+ '--exclude',
+ required=False,
+ nargs='+',
+ help="Specify the models to exclude."
+ )
+ # in python 3.x you can use the 'aliases' kwarg, but in python 2.7 you get
+ # to do this
+ subparsers._name_parser_map['ls'] = sub
+ return sub
+
+
 def parse_args(args):
  p = DBTArgumentParser(
  prog='dbt: data build tool',
@@ -645,14 +683,15 @@ def parse_args(args):
 
  # make the subcommands that have their own subcommands
  docs_sub = _build_docs_subparser(subs, base_subparser)
- docs_subs = docs_sub.add_subparsers()
+ docs_subs = docs_sub.add_subparsers(title="Available sub-commands")
  source_sub = _build_source_subparser(subs, base_subparser)
- source_subs = source_sub.add_subparsers()
+ source_subs = source_sub.add_subparsers(title="Available sub-commands")
 
  _build_init_subparser(subs, base_subparser)
  _build_clean_subparser(subs, base_subparser)
  _build_debug_subparser(subs, base_subparser)
  _build_deps_subparser(subs, base_subparser)
+ _build_list_subparser(subs, base_subparser)
 
  archive_sub = _build_archive_subparser(subs, base_subparser)
  rpc_sub = _build_rpc_subparser(subs, base_subparser)

diff --git a/core/dbt/parser/base_sql.py b/core/dbt/parser/base_sql.py
@@ -47,9 +47,14 @@ def load_and_parse(self, package_name, root_dir, relative_dirs,
  path = self.get_compiled_path(name,
  file_match.get('relative_path'))
 
+ # TODO(jeb): Why would the original file path rely on the compiled
+ # path?
+ # original_file_path = os.path.join(
+ # file_match.get('searched_path'),
+ # path)
  original_file_path = os.path.join(
  file_match.get('searched_path'),
- path)
+ file_match.get('relative_path'))
 
  result.append({
  'name': name,

diff --git a/core/dbt/task/base.py b/core/dbt/task/base.py
@@ -46,6 +46,10 @@ def __init__(self, args, config):
  self.args = args
  self.config = config
 
+ @classmethod
+ def pre_init_hook(cls):
+ """A hook called before the task is initialized."""
+
  @classmethod
  def from_args(cls, args):
  try:

diff --git a/core/dbt/task/list.py b/core/dbt/task/list.py
@@ -0,0 +1,120 @@
+from __future__ import print_function
+
+import json
+
+from dbt.task.runnable import GraphRunnableTask, ManifestTask
+from dbt.node_types import NodeType
+import dbt.exceptions
+from dbt.logger import GLOBAL_LOGGER as logger
+from dbt.logger import log_to_stderr
+
+
+class ListTask(GraphRunnableTask):
+ DEFAULT_RESOURCE_VALUES = frozenset((
+ NodeType.Model,
+ NodeType.Archive,
+ NodeType.Seed,
+ NodeType.Test,
+ NodeType.Source,
+ ))
+ ALL_RESOURCE_VALUES = DEFAULT_RESOURCE_VALUES | frozenset((
+ NodeType.Analysis,
+ ))
+ ALLOWED_KEYS = frozenset((
+ 'alias',
+ 'name',
+ 'package_name',
+ 'depends_on',
+ 'tags',
+ 'config',
+ 'resource_type',
+ 'source_name',
+ ))
+
+ def __init__(self, args, config):
+ super(ListTask, self).__init__(args, config)
+ self.config.args.single_threaded = True
+
+ @classmethod
+ def pre_init_hook(cls):
+ """A hook called before the task is initialized."""
+ log_to_stderr(logger)
+
+ def _iterate_selected_nodes(self):
+ nodes = sorted(self.select_nodes())
+ if not nodes:
+ logger.warning('No nodes selected!')
+ return
+ for node in nodes:
+ yield self.manifest.nodes[node]
+
+ def generate_selectors(self):
+ for node in self._iterate_selected_nodes():
+ if node.resource_type == NodeType.Source:
+ yield 'source:{}'.format(node.unique_id)
+ else:
+ yield node.unique_id
+
+ def generate_names(self):
+ for node in self._iterate_selected_nodes():
+ if node.resource_type == NodeType.Source:
+ yield '{0.source_name}.{0.name}'.format(node)
+ else:
+ yield node.name
+
+ def generate_json(self):
+ for node in self._iterate_selected_nodes():
+ yield json.dumps({
+ k: v
+ for k, v in node.serialize().items()
+ if k in self.ALLOWED_KEYS
+ })
+
+ def generate_paths(self):
+ for node in self._iterate_selected_nodes():
+ yield node.get('original_file_path')
+
+ def run(self):
+ ManifestTask._runtime_initialize(self)
+ output = self.config.args.output
+ if output == 'selector':
+ generator = self.generate_selectors
+ elif output == 'name':
+ generator = self.generate_names
+ elif output == 'json':
+ generator = self.generate_json
+ elif output == 'path':
+ generator = self.generate_paths
+ else:
+ raise dbt.exceptions.IternalException(
+ 'Invalid output {}'.format(output)
+ )
+ for result in generator():
+ self.node_results.append(result)
+ print(result)
+ return self.node_results
+
+ @property
+ def resource_types(self):
+ values = set(self.config.args.resource_types)
+ if not values:
+ return list(self.DEFAULT_RESOURCE_VALUES)
+
+ if 'default' in values:
+ values.remove('default')
+ values.update(self.DEFAULT_RESOURCE_VALUES)
+ if 'all' in values:
+ values.remove('all')
+ values.update(self.ALL_RESOURCE_VALUES)
+ return list(values)
+
+ def build_query(self):
+ return {
+ "include": self.args.models,
+ "exclude": self.args.exclude,
+ "resource_types": self.resource_types,
+ "tags": [],
+ }
+
+ def interpret_results(self, results):
+ return bool(results)
diff --git a/test/integration/047_dbt_ls_test/analyses/analysis.sql b/test/integration/047_dbt_ls_test/analyses/analysis.sql
@@ -0,0 +1 @@
+select 4 as id
diff --git a/test/integration/047_dbt_ls_test/archives/archive.sql b/test/integration/047_dbt_ls_test/archives/archive.sql
@@ -0,0 +1,12 @@
+{% archive my_archive %}
+ {{
+ config(
+ target_database=var('target_database', database),
+ target_schema=schema,
+ unique_key='id',
+ strategy='timestamp',
+ updated_at='updated_at',
+ )
+ }}
+ select * from {{database}}.{{schema}}.seed
+{% endarchive %}
diff --git a/test/integration/047_dbt_ls_test/data/seed.csv b/test/integration/047_dbt_ls_test/data/seed.csv
@@ -0,0 +1,2 @@
+a,b
+1,2
diff --git a/test/integration/047_dbt_ls_test/macros/macro_stuff.sql b/test/integration/047_dbt_ls_test/macros/macro_stuff.sql
@@ -0,0 +1,7 @@
+{% macro cool_macro() %}
+ wow!
+{% endmacro %}
+
+{% macro other_cool_macro(a, b) %}
+ cool!
+{% endmacro %}
diff --git a/test/integration/047_dbt_ls_test/models/docs.md b/test/integration/047_dbt_ls_test/models/docs.md
@@ -0,0 +1,3 @@
+{% docs my_docs %}
+ some docs
+{% enddocs %}
diff --git a/test/integration/047_dbt_ls_test/models/outer.sql b/test/integration/047_dbt_ls_test/models/outer.sql
@@ -0,0 +1 @@
+select 1 as id
diff --git a/test/integration/047_dbt_ls_test/models/schema.yml b/test/integration/047_dbt_ls_test/models/schema.yml
@@ -0,0 +1,15 @@
+version: 2
+models:
+ - name: outer
+ description: The outer table
+ columns:
+ - name: id
+ description: The id value
+ tests:
+ - unique
+ - not_null
+
+sources:
+ - name: my_source
+ tables:
+ - name: my_table
diff --git a/test/integration/047_dbt_ls_test/models/sub/inner.sql b/test/integration/047_dbt_ls_test/models/sub/inner.sql
@@ -0,0 +1 @@
+select * from {{ ref('outer') }}