Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/archive blocks #1361

Merged
merged 9 commits into from
Apr 26, 2019
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
PGUSER: root
PGPASSWORD: password
PGDATABASE: postgres
- run: tox -e pep8,unit-py27,unit-py36
- run: tox -e flake8,unit-py27,unit-py36
integration-postgres-py36:
docker: *test_and_postgres
steps:
Expand Down
4 changes: 2 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ A short list of tools used in dbt testing that will be helpful to your understan
- [tox](https://tox.readthedocs.io/en/latest/) to manage virtualenvs across python versions
- [nosetests](http://nose.readthedocs.io/en/latest) to discover/run tests
- [make](https://users.cs.duke.edu/~ola/courses/programming/Makefiles/Makefiles.html) - but don't worry too much, nobody _really_ understands how make works and our Makefile is super simple
- [pep8](https://pep8.readthedocs.io/en/release-1.7.x/) for code linting
- [CircleCI](https://circleci.com/product/) and [Appveyor](https://www.appveyor.com/docs/)
- [flake8](https://gitlab.com/pycqa/flake8) for code linting
- [CircleCI](https://circleci.com/product/) and [Azure Pipelines](https://azure.microsoft.com/en-us/services/devops/pipelines/)

If you're unfamiliar with any or all of these, that's fine! You really do not have to have a deep understanding of any of these to get by.

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ test:

test-unit:
@echo "Unit test run starting..."
@time docker-compose run test tox -e unit-py27,unit-py36,pep8
@time docker-compose run test tox -e unit-py27,unit-py36,flake8

test-integration:
@echo "Integration test run starting..."
Expand Down
13 changes: 8 additions & 5 deletions core/dbt/adapters/base/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from dbt.adapters.base.meta import available
from dbt.adapters.base.relation import BaseRelation
from dbt.adapters.base.connections import BaseConnectionManager, Credentials
from dbt.adapters.base.impl import BaseAdapter
from dbt.adapters.base.plugin import AdapterPlugin
# these are all just exports, #noqa them so flake8 will be happy
from dbt.adapters.base.meta import available # noqa
from dbt.adapters.base.relation import BaseRelation # noqa
from dbt.adapters.base.relation import Column # noqa
from dbt.adapters.base.connections import BaseConnectionManager # noqa
from dbt.adapters.base.connections import Credentials # noqa
from dbt.adapters.base.impl import BaseAdapter # noqa
from dbt.adapters.base.plugin import AdapterPlugin # noqa
45 changes: 43 additions & 2 deletions core/dbt/adapters/base/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,17 @@

import dbt.exceptions
import dbt.flags
import dbt.schema
import dbt.clients.agate_helper

from dbt.compat import abstractclassmethod, classmethod
from dbt.node_types import NodeType
from dbt.loader import GraphLoader
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.schema import Column
from dbt.utils import filter_null_values

from dbt.adapters.base.meta import AdapterMeta, available, available_deprecated
from dbt.adapters.base import BaseRelation
from dbt.adapters.base import Column
from dbt.adapters.cache import RelationsCache


Expand Down Expand Up @@ -534,6 +533,48 @@ def get_missing_columns(self, from_relation, to_relation):
if col_name in missing_columns
]

@available
def valid_archive_target(self, relation):
"""Ensure that the target relation is valid, by making sure it has the
expected columns.
:param Relation relation: The relation to check
:raises dbt.exceptions.CompilationException: If the columns are
incorrect.
"""
if not isinstance(relation, self.Relation):
dbt.exceptions.invalid_type_error(
method_name='is_existing_old_style_archive',
arg_name='relation',
got_value=relation,
expected_type=self.Relation)

columns = self.get_columns_in_relation(relation)
names = set(c.name.lower() for c in columns)
expanded_keys = ('scd_id', 'valid_from', 'valid_to')
extra = []
missing = []
for legacy in expanded_keys:
desired = 'dbt_' + legacy
if desired not in names:
missing.append(desired)
if legacy in names:
extra.append(legacy)

if missing:
if extra:
msg = (
'Archive target has ("{}") but not ("{}") - is it an '
'unmigrated previous version archive?'
.format('", "'.join(extra), '", "'.join(missing))
)
else:
msg = (
'Archive target is not an archive table (missing "{}")'
.format('", "'.join(missing))
)
dbt.exceptions.raise_compiler_error(msg)

@available
def expand_target_column_types(self, temp_table, to_relation):
if not isinstance(to_relation, self.Relation):
Expand Down
2 changes: 0 additions & 2 deletions core/dbt/adapters/base/plugin.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

from dbt.config.project import Project


Expand Down
88 changes: 88 additions & 0 deletions core/dbt/adapters/base/relation.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,3 +303,91 @@ def is_cte(self):
@property
def is_view(self):
return self.type == self.View


class Column(object):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think there's any merit to putting this in its own file?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it really matters either way.

TYPE_LABELS = {
'STRING': 'TEXT',
'TIMESTAMP': 'TIMESTAMP',
'FLOAT': 'FLOAT',
'INTEGER': 'INT'
}

def __init__(self, column, dtype, char_size=None, numeric_precision=None,
numeric_scale=None):
self.column = column
self.dtype = dtype
self.char_size = char_size
self.numeric_precision = numeric_precision
self.numeric_scale = numeric_scale

@classmethod
def translate_type(cls, dtype):
return cls.TYPE_LABELS.get(dtype.upper(), dtype)

@classmethod
def create(cls, name, label_or_dtype):
column_type = cls.translate_type(label_or_dtype)
return cls(name, column_type)

@property
def name(self):
return self.column

@property
def quoted(self):
return '"{}"'.format(self.column)

@property
def data_type(self):
if self.is_string():
return Column.string_type(self.string_size())
elif self.is_numeric():
return Column.numeric_type(self.dtype, self.numeric_precision,
self.numeric_scale)
else:
return self.dtype

def is_string(self):
return self.dtype.lower() in ['text', 'character varying', 'character',
'varchar']

def is_numeric(self):
return self.dtype.lower() in ['numeric', 'number']

def string_size(self):
if not self.is_string():
raise RuntimeError("Called string_size() on non-string field!")

if self.dtype == 'text' or self.char_size is None:
# char_size should never be None. Handle it reasonably just in case
return 255
else:
return int(self.char_size)

def can_expand_to(self, other_column):
"""returns True if this column can be expanded to the size of the
other column"""
if not self.is_string() or not other_column.is_string():
return False

return other_column.string_size() > self.string_size()

def literal(self, value):
return "{}::{}".format(value, self.data_type)

@classmethod
def string_type(cls, size):
return "character varying({})".format(size)

@classmethod
def numeric_type(cls, dtype, precision, scale):
# This could be decimal(...), numeric(...), number(...)
# Just use whatever was fed in here -- don't try to get too clever
if precision is None or scale is None:
return dtype
else:
return "{}({},{})".format(dtype, precision, scale)

def __repr__(self):
return "<Column {} ({})>".format(self.name, self.data_type)
5 changes: 3 additions & 2 deletions core/dbt/adapters/sql/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from dbt.adapters.sql.connections import SQLConnectionManager
from dbt.adapters.sql.impl import SQLAdapter
# these are all just exports, #noqa them so flake8 will be happy
from dbt.adapters.sql.connections import SQLConnectionManager # noqa
from dbt.adapters.sql.impl import SQLAdapter # noqa
4 changes: 2 additions & 2 deletions core/dbt/api/object.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import copy
from collections import Mapping
from jsonschema import Draft4Validator
from jsonschema import Draft7Validator
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's this all about?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed this because I wanted to use some features that changed between draft 4 and draft 7 (jsonschema has its own ref mechanism involving schema IDs), but then I reverted those because it got out of hand on the contracts side. I can change it back but I figured it wouldn't hurt to leave it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also: It would be cool to annotate our contracts the "proper" jsonschema way. It will be a big PR, but then we could use refs properly. The reason I reverted it was because our current way of combining contracts (deep_merge, etc) doesn't play well with the schema id/ref model jsonschema provides, and changing all that would massively increase the size and risk of this PR, which is already kind of out of hand for my taste.


from dbt.exceptions import JSONValidationException
from dbt.utils import deep_merge
Expand Down Expand Up @@ -79,7 +79,7 @@ def validate(self):
of this instance. If any attributes are missing or
invalid, raise a ValidationException.
"""
validator = Draft4Validator(self.SCHEMA)
validator = Draft7Validator(self.SCHEMA)

errors = set() # make errors a set to avoid duplicates

Expand Down
4 changes: 2 additions & 2 deletions core/dbt/clients/_jinja_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ def _expect_match(self, expected_name, *patterns, **kwargs):
match = self._first_match(*patterns, **kwargs)
if match is None:
msg = 'unexpected EOF, expected {}, got "{}"'.format(
expected_name, self.data[self.pos:]
)
expected_name, self.data[self.pos:]
)
dbt.exceptions.raise_compiler_error(msg)
return match

Expand Down
3 changes: 2 additions & 1 deletion core/dbt/clients/jinja.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import dbt.compat
import dbt.exceptions
import dbt.utils

from dbt.clients._jinja_blocks import BlockIterator

Expand Down Expand Up @@ -55,7 +56,7 @@ def _compile(self, source, filename):
linecache.cache[filename] = (
len(source),
None,
[line+'\n' for line in source.splitlines()],
[line + '\n' for line in source.splitlines()],
filename
)

Expand Down
8 changes: 2 additions & 6 deletions core/dbt/compilation.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
import itertools
import os
import json
from collections import OrderedDict, defaultdict
import sqlparse
from collections import defaultdict

import dbt.utils
import dbt.include
import dbt.tracking

from dbt import deprecations
from dbt.utils import get_materialization, NodeType, is_type
from dbt.linker import Linker

Expand All @@ -19,9 +16,8 @@
import dbt.flags
import dbt.loader
import dbt.config
from dbt.contracts.graph.compiled import CompiledNode, CompiledGraph
from dbt.contracts.graph.compiled import CompiledNode

from dbt.clients.system import write_json
from dbt.logger import GLOBAL_LOGGER as logger

graph_file_name = 'graph.gpickle'
Expand Down
10 changes: 5 additions & 5 deletions core/dbt/config/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

from .renderer import ConfigRenderer
from .profile import Profile, UserConfig, PROFILES_DIR
from .project import Project
from .runtime import RuntimeConfig
# all these are just exports, they need "noqa" so flake8 will not complain.
from .renderer import ConfigRenderer # noqa
from .profile import Profile, UserConfig, PROFILES_DIR # noqa
from .project import Project # noqa
from .runtime import RuntimeConfig # noqa
13 changes: 8 additions & 5 deletions core/dbt/config/project.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

from copy import deepcopy
import hashlib
import os
Expand All @@ -14,7 +13,6 @@
from dbt.exceptions import SemverException
from dbt.exceptions import ValidationException
from dbt.exceptions import warn_or_error
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.semver import VersionSpecifier
from dbt.semver import versions_compatible
from dbt.version import get_installed_version
Expand Down Expand Up @@ -145,9 +143,10 @@ def _parse_versions(versions):
class Project(object):
def __init__(self, project_name, version, project_root, profile_name,
source_paths, macro_paths, data_paths, test_paths,
analysis_paths, docs_paths, target_path, clean_targets,
log_path, modules_path, quoting, models, on_run_start,
on_run_end, archive, seeds, dbt_version, packages):
analysis_paths, docs_paths, target_path, archive_paths,
clean_targets, log_path, modules_path, quoting, models,
on_run_start, on_run_end, archive, seeds, dbt_version,
packages):
self.project_name = project_name
self.version = version
self.project_root = project_root
Expand All @@ -159,6 +158,7 @@ def __init__(self, project_name, version, project_root, profile_name,
self.analysis_paths = analysis_paths
self.docs_paths = docs_paths
self.target_path = target_path
self.archive_paths = archive_paths
self.clean_targets = clean_targets
self.log_path = log_path
self.modules_path = modules_path
Expand Down Expand Up @@ -241,6 +241,7 @@ def from_project_config(cls, project_dict, packages_dict=None):
analysis_paths = project_dict.get('analysis-paths', [])
docs_paths = project_dict.get('docs-paths', source_paths[:])
target_path = project_dict.get('target-path', 'target')
archive_paths = project_dict.get('archive-paths', ['archives'])
# should this also include the modules path by default?
clean_targets = project_dict.get('clean-targets', [target_path])
log_path = project_dict.get('log-path', 'logs')
Expand Down Expand Up @@ -274,6 +275,7 @@ def from_project_config(cls, project_dict, packages_dict=None):
analysis_paths=analysis_paths,
docs_paths=docs_paths,
target_path=target_path,
archive_paths=archive_paths,
clean_targets=clean_targets,
log_path=log_path,
modules_path=modules_path,
Expand Down Expand Up @@ -321,6 +323,7 @@ def to_project_config(self, with_packages=False):
'analysis-paths': self.analysis_paths,
'docs-paths': self.docs_paths,
'target-path': self.target_path,
'archive-paths': self.archive_paths,
'clean-targets': self.clean_targets,
'log-path': self.log_path,
'quoting': self.quoting,
Expand Down
10 changes: 6 additions & 4 deletions core/dbt/config/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ class RuntimeConfig(Project, Profile):
"""
def __init__(self, project_name, version, project_root, source_paths,
macro_paths, data_paths, test_paths, analysis_paths,
docs_paths, target_path, clean_targets, log_path,
modules_path, quoting, models, on_run_start, on_run_end,
archive, seeds, dbt_version, profile_name, target_name,
config, threads, credentials, packages, args):
docs_paths, target_path, archive_paths, clean_targets,
log_path, modules_path, quoting, models, on_run_start,
on_run_end, archive, seeds, dbt_version, profile_name,
target_name, config, threads, credentials, packages, args):
# 'vars'
self.args = args
self.cli_vars = parse_cli_vars(getattr(args, 'vars', '{}'))
Expand All @@ -39,6 +39,7 @@ def __init__(self, project_name, version, project_root, source_paths,
analysis_paths=analysis_paths,
docs_paths=docs_paths,
target_path=target_path,
archive_paths=archive_paths,
clean_targets=clean_targets,
log_path=log_path,
modules_path=modules_path,
Expand Down Expand Up @@ -87,6 +88,7 @@ def from_parts(cls, project, profile, args):
analysis_paths=project.analysis_paths,
docs_paths=project.docs_paths,
target_path=project.target_path,
archive_paths=project.archive_paths,
clean_targets=project.clean_targets,
log_path=project.log_path,
modules_path=project.modules_path,
Expand Down
Loading