Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 275: Support dbt package dependencies in Git subdirectories #3267

Merged
Merged
4 changes: 2 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version: 2.1
jobs:
unit:
docker: &test_only
- image: fishtownanalytics/test-container:11
- image: fishtownanalytics/test-container:12
environment:
DBT_INVOCATION_ENV: circle
DOCKER_TEST_DATABASE_HOST: "database"
Expand Down Expand Up @@ -37,7 +37,7 @@ jobs:
destination: dist
integration-postgres:
docker:
- image: fishtownanalytics/test-container:11
- image: fishtownanalytics/test-container:12
environment:
DBT_INVOCATION_ENV: circle
DOCKER_TEST_DATABASE_HOST: "database"
Expand Down
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

### Features
- Support commit hashes in dbt deps package revision ([#3268](https:/fishtown-analytics/dbt/issues/3268), [#3270](https:/fishtown-analytics/dbt/pull/3270))
- Add optional `subdirectory` key to install dbt packages that are not hosted at the root of a Git repository ([#275](https:/fishtown-analytics/dbt/issues/275), [#3267](https:/fishtown-analytics/dbt/pull/3267))
- Add optional configs for `require_partition_filter` and `partition_expiration_days` in BigQuery ([#1843](https:/fishtown-analytics/dbt/issues/1843), [#2928](https:/fishtown-analytics/dbt/pull/2928))
- Fix for EOL SQL comments prevent entire line execution ([#2731](https:/fishtown-analytics/dbt/issues/2731), [#2974](https:/fishtown-analytics/dbt/pull/2974))
- Add optional `merge_update_columns` config to specify columns to update for `merge` statements in BigQuery and Snowflake ([#1862](https:/fishtown-analytics/dbt/issues/1862), [#3100](https:/fishtown-analytics/dbt/pull/3100))
Expand Down Expand Up @@ -52,9 +53,9 @@ Contributors:
- [@techytushar](https:/techytushar) ([#3158](https:/fishtown-analytics/dbt/pull/3158))
- [@cgopalan](https:/cgopalan) ([#3165](https:/fishtown-analytics/dbt/pull/3165), [#3182](https:/fishtown-analytics/dbt/pull/3182))
- [@fux](https:/fuchsst) ([#3241](https:/fishtown-analytics/dbt/issues/3241))
- [@dmateusp](https:/dmateusp) ([#3270](https:/fishtown-analytics/dbt/pull/3270))
- [@arzavj](https:/arzavj) ([3106](https:/fishtown-analytics/dbt/pull/3106))
- [@JCZuurmond](https:/JCZuurmond) ([#3133](https:/fishtown-analytics/dbt/pull/3133))
- [@dmateusp](https:/dmateusp) ([#3270](https:/fishtown-analytics/dbt/pull/3270), [#3267](https:/fishtown-analytics/dbt/pull/3267))

## dbt 0.19.1 (March 31, 2021)

Expand Down
3 changes: 3 additions & 0 deletions Dockerfile.test
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ FROM ubuntu:18.04
ENV DEBIAN_FRONTEND noninteractive

RUN apt-get update \
&& apt-get install -y --no-install-recommends \
software-properties-common \
&& add-apt-repository ppa:git-core/ppa -y \
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had to update the git version in the test container, it was 2.17, needs to be >= 2.25

&& apt-get dist-upgrade -y \
&& apt-get install -y --no-install-recommends \
netcat \
Expand Down
31 changes: 25 additions & 6 deletions core/dbt/clients/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,30 @@
from dbt.clients.system import run_cmd, rmdir
from dbt.logger import GLOBAL_LOGGER as logger
import dbt.exceptions
from packaging import version


def _is_commit(revision: str) -> bool:
# match SHA-1 git commit
return bool(re.match(r"\b[0-9a-f]{40}\b", revision))


def clone(repo, cwd, dirname=None, remove_git_dir=False, revision=None):
def clone(repo, cwd, dirname=None, remove_git_dir=False, revision=None, subdirectory=None):
has_revision = revision is not None
is_commit = _is_commit(revision or "")

clone_cmd = ['git', 'clone', '--depth', '1']
if subdirectory:
logger.debug(' Subdirectory specified: {}, using sparse checkout.'.format(subdirectory))
out, _ = run_cmd(cwd, ['git', '--version'], env={'LC_ALL': 'C'})
git_version = version.parse(re.search(r"\d+\.\d+\.\d+", out.decode("utf-8")).group(0))
if not git_version >= version.parse("2.25.0"):
# 2.25.0 introduces --sparse
raise RuntimeError(
"Please update your git version to pull a dbt package "
"from a subdirectory: your version is {}, >= 2.25.0 needed".format(git_version)
)
clone_cmd.extend(['--filter=blob:none', '--sparse'])

if has_revision and not is_commit:
clone_cmd.extend(['--branch', revision])
Expand All @@ -24,9 +36,11 @@ def clone(repo, cwd, dirname=None, remove_git_dir=False, revision=None):

if dirname is not None:
clone_cmd.append(dirname)

result = run_cmd(cwd, clone_cmd, env={'LC_ALL': 'C'})

if subdirectory:
run_cmd(os.path.join(cwd, dirname or ''), ['git', 'sparse-checkout', 'set', subdirectory])

if remove_git_dir:
rmdir(os.path.join(dirname, '.git'))

Expand Down Expand Up @@ -84,11 +98,16 @@ def remove_remote(cwd):


def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
revision=None):
revision=None, subdirectory=None):
exists = None
try:
_, err = clone(repo, cwd, dirname=dirname,
remove_git_dir=remove_git_dir)
_, err = clone(
repo,
cwd,
dirname=dirname,
remove_git_dir=remove_git_dir,
subdirectory=subdirectory,
)
except dbt.exceptions.CommandResultError as exc:
err = exc.stderr.decode('utf-8')
exists = re.match("fatal: destination path '(.+)' already exists", err)
Expand Down Expand Up @@ -120,4 +139,4 @@ def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
start_sha[:7], end_sha[:7])
else:
logger.debug(' Checked out at {}.', end_sha[:7])
return directory
return os.path.join(directory, subdirectory or '')
1 change: 1 addition & 0 deletions core/dbt/contracts/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class GitPackage(Package):
git: str
revision: Optional[RawVersion] = None
warn_unpinned: Optional[bool] = None
subdirectory: Optional[str] = None

def get_revisions(self) -> List[str]:
if self.revision is None:
Expand Down
3 changes: 3 additions & 0 deletions core/dbt/deps/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ def get_installation_path(self, project, renderer):
dest_dirname = self.get_project_name(project, renderer)
return os.path.join(project.modules_path, dest_dirname)

def get_subdirectory(self):
return None


SomePinned = TypeVar('SomePinned', bound=PinnedPackage)
SomeUnpinned = TypeVar('SomeUnpinned', bound='UnpinnedPackage')
Expand Down
26 changes: 20 additions & 6 deletions core/dbt/deps/git.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import hashlib
from typing import List
from typing import List, Optional

from dbt.clients import git, system
from dbt.config import Project
Expand Down Expand Up @@ -37,16 +37,24 @@ def source_type(self) -> str:

class GitPinnedPackage(GitPackageMixin, PinnedPackage):
def __init__(
self, git: str, revision: str, warn_unpinned: bool = True
self,
git: str,
revision: str,
warn_unpinned: bool = True,
subdirectory: Optional[str] = None,
) -> None:
super().__init__(git)
self.revision = revision
self.warn_unpinned = warn_unpinned
self.subdirectory = subdirectory
self._checkout_name = md5sum(self.git)

def get_version(self):
return self.revision

def get_subdirectory(self):
return self.subdirectory

def nice_version_name(self):
if self.revision == 'HEAD':
return 'HEAD (default revision)'
Expand All @@ -69,7 +77,7 @@ def _checkout(self):
try:
dir_ = git.clone_and_checkout(
self.git, get_downloads_path(), revision=self.revision,
dirname=self._checkout_name
dirname=self._checkout_name, subdirectory=self.subdirectory
)
except ExecutableError as exc:
if exc.cmd and exc.cmd[0] == 'git':
Expand Down Expand Up @@ -107,11 +115,16 @@ def install(self, project, renderer):

class GitUnpinnedPackage(GitPackageMixin, UnpinnedPackage[GitPinnedPackage]):
def __init__(
self, git: str, revisions: List[str], warn_unpinned: bool = True
self,
git: str,
revisions: List[str],
warn_unpinned: bool = True,
subdirectory: Optional[str] = None,
) -> None:
super().__init__(git)
self.revisions = revisions
self.warn_unpinned = warn_unpinned
self.subdirectory = subdirectory

@classmethod
def from_contract(
Expand All @@ -122,7 +135,7 @@ def from_contract(
# we want to map None -> True
warn_unpinned = contract.warn_unpinned is not False
return cls(git=contract.git, revisions=revisions,
warn_unpinned=warn_unpinned)
warn_unpinned=warn_unpinned, subdirectory=contract.subdirectory)

def all_names(self) -> List[str]:
if self.git.endswith('.git'):
Expand All @@ -140,6 +153,7 @@ def incorporate(
git=self.git,
revisions=self.revisions + other.revisions,
warn_unpinned=warn_unpinned,
subdirectory=self.subdirectory,
)

def resolved(self) -> GitPinnedPackage:
Expand All @@ -153,5 +167,5 @@ def resolved(self) -> GitPinnedPackage:

return GitPinnedPackage(
git=self.git, revision=requested.pop(),
warn_unpinned=self.warn_unpinned
warn_unpinned=self.warn_unpinned, subdirectory=self.subdirectory
)
5 changes: 4 additions & 1 deletion core/dbt/task/deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,11 @@ def run(self):
for package in final_deps:
logger.info('Installing {}', package)
package.install(self.config, renderer)
logger.info(' Installed from {}\n',
logger.info(' Installed from {}',
package.nice_version_name())
if package.get_subdirectory():
logger.info(' and subdirectory {}\n',
package.get_subdirectory())

self.track_package_install(
package_name=package.name,
Expand Down
110 changes: 73 additions & 37 deletions test/rpc/test_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,42 +72,78 @@ def deps_with_packages(packages, bad_packages, project_dir, profiles_dir, schema
querier.is_result(querier.async_wait(tok1))


@pytest.mark.parametrize(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jtcohen6 I parametrized the test instead of the copy paste fyi

(I did this when I merged the changes from the PR that adds support for commits here)

"packages, bad_packages",
# from dbt hub
[(
[{
'package': 'fishtown-analytics/dbt_utils',
'version': '0.5.0',
}],
# wrong package name
[{
'package': 'fishtown-analytics/dbt_util',
'version': '0.5.0',
}],
),
# from git release/tag/branch
(
[{
'git': 'https:/fishtown-analytics/dbt-utils.git',
'revision': '0.5.0',
}],
# if you use a bad URL, git thinks it's a private repo and prompts for auth
[{
'git': 'https:/fishtown-analytics/dbt-utils.git',
'revision': 'not-a-real-revision',
}],
),
# from git commit
(
[{
'git': 'https:/fishtown-analytics/dbt-utils.git',
'revision': 'b736cf6acdbf80d2de69b511a51c8d7fe214ee79',
}],
# don't use short commits
[{
'git': 'https:/fishtown-analytics/dbt-utils.git',
'revision': 'b736cf6',
}],
),
# from git release and subdirectory
(
[{
'git': 'https:/fishtown-analytics/dbt-labs-experimental-features.git',
'revision': '0.0.1',
'subdirectory': 'materialized-views',
}],
[{
'git': 'https:/fishtown-analytics/dbt-labs-experimental-features.git',
'revision': '0.0.1',
'subdirectory': 'path/to/nonexistent/dir',
}],
),
# from git commit and subdirectory
(
[{
'git': 'https:/fishtown-analytics/dbt-utils.git',
'revision': 'f4f84e9110db26aba22f756abbae9f1f8dbb15da',
'subdirectory': 'dbt_projects/dbt_utils',
}],
[{
'git': 'https:/fishtown-analytics/dbt-utils.git',
'revision': 'f4f84e9110db26aba22f756abbae9f1f8dbb15da',
'subdirectory': 'path/to/nonexistent/dir',
}],
)],
ids=[
"from dbt hub",
"from git release/tag/branch",
"from git commit",
"from git release and subdirectory",
"from git commit and subdirectory",
],
)
@pytest.mark.supported('postgres')
def test_rpc_deps_packages(project_root, profiles_root, dbt_profile, unique_schema):
packages = [{
'package': 'fishtown-analytics/dbt_utils',
'version': '0.5.0',
}]
bad_packages = [{
'package': 'fishtown-analytics/dbt_util',
'version': '0.5.0',
}]
deps_with_packages(packages, bad_packages, project_root, profiles_root, unique_schema)


@pytest.mark.supported('postgres')
def test_rpc_deps_git(project_root, profiles_root, dbt_profile, unique_schema):
packages = [{
'git': 'https:/fishtown-analytics/dbt-utils.git',
'revision': '0.5.0'
}]
# if you use a bad URL, git thinks it's a private repo and prompts for auth
bad_packages = [{
'git': 'https:/fishtown-analytics/dbt-utils.git',
'revision': 'not-a-real-revision'
}]
deps_with_packages(packages, bad_packages, project_root, profiles_root, unique_schema)


@pytest.mark.supported('postgres')
def test_rpc_deps_git_commit(project_root, profiles_root, dbt_profile, unique_schema):
packages = [{
'git': 'https:/fishtown-analytics/dbt-utils.git',
'revision': 'b736cf6acdbf80d2de69b511a51c8d7fe214ee79'
}]
# don't use short commits
bad_packages = [{
'git': 'https:/fishtown-analytics/dbt-utils.git',
'revision': 'b736cf6'
}]
def test_rpc_deps_packages(project_root, profiles_root, dbt_profile, unique_schema, packages, bad_packages):
deps_with_packages(packages, bad_packages, project_root, profiles_root, unique_schema)