Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use profile specified in --profile with dbt init #7450

Merged
merged 5 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20230424-161642.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: If --profile specified with dbt-init, create the project with the specified
profile
time: 2023-04-24T16:16:42.994547-04:00
custom:
Author: ezraerb
Issue: "6154"
37 changes: 31 additions & 6 deletions core/dbt/task/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
from pathlib import Path
import re
import shutil
import sys
from typing import Optional

import yaml
import click

import dbt.config
import dbt.clients.system
from dbt.config.profile import read_profile
from dbt.flags import get_flags
from dbt.version import _get_adapter_plugin_names
from dbt.adapters.factory import load_plugin, get_include_paths
Expand Down Expand Up @@ -247,11 +249,11 @@ def get_valid_project_name(self) -> str:

return name

def create_new_project(self, project_name: str):
def create_new_project(self, project_name: str, profile_name: str):
self.copy_starter_repo(project_name)
os.chdir(project_name)
with open("dbt_project.yml", "r") as f:
content = f"{f.read()}".format(project_name=project_name, profile_name=project_name)
content = f"{f.read()}".format(project_name=project_name, profile_name=profile_name)
with open("dbt_project.yml", "w") as f:
f.write(content)
fire_event(
Expand All @@ -274,9 +276,18 @@ def run(self):
in_project = False

if in_project:
# If --profile was specified, it means use an existing profile, which is not
# applicable to this case
if getattr(get_flags(), "PROFILE", None):
ezraerb marked this conversation as resolved.
Show resolved Hide resolved
print(
ezraerb marked this conversation as resolved.
Show resolved Hide resolved
"Can not init existing project with specified profile, edit dbt_project.yml instead"
)
sys.exit(1)

# When dbt init is run inside an existing project,
# just setup the user's profile.
profile_name = self.get_profile_name_from_current_project()
profile_specified = False
ezraerb marked this conversation as resolved.
Show resolved Hide resolved
else:
# When dbt init is run outside of an existing project,
# create a new project and set up the user's profile.
Expand All @@ -285,11 +296,25 @@ def run(self):
if project_path.exists():
fire_event(ProjectNameAlreadyExists(name=project_name))
return
self.create_new_project(project_name)
profile_name = project_name

# Ask for adapter only if skip_profile_setup flag is not provided.
if not self.args.skip_profile_setup:
# If the user specified an existing profile to use, use it instead of generating a new one
user_profile_name = getattr(get_flags(), "PROFILE", None)
if user_profile_name:
# Verify it exists. Can't use the regular profile validation routine because it assumes
# the project file exists
raw_profiles = read_profile(profiles_dir)
if user_profile_name not in raw_profiles:
ezraerb marked this conversation as resolved.
Show resolved Hide resolved
print("Could not find profile named '{}'".format(user_profile_name))
sys.exit(1)
profile_name = user_profile_name
profile_specified = True
else:
profile_name = project_name
profile_specified = False
self.create_new_project(project_name, profile_name)

# Ask for adapter only if skip_profile_setup flag is not provided and no profile to use was specified.
if not self.args.skip_profile_setup and not profile_specified:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

User experience

From a user experience perspective, I don't think we need to raise an error if the specified profile is not found. I'd rather just create it anytime it doesn't exist.

Implementation

I defer to whoever ends up being the code reviewer for this, but see below for some suggestions for refactoring.

The run method is long and has a lot of conditionals, which makes it harder to read. Refactoring this would make it easier to maintain in the future.

So I'd suggest refactoring this logic into its own method (similar to how check_if_can_write_profile is its own method). Maybe something similar this (completely untested!):

def check_if_profile_exists(self, profile_name: Optional[str] = None) -> bool:
    profile_exists = False  # assume it doesn't exist unless proven otherwise
    user_profile_name = getattr(get_flags(), "PROFILE", None)
    profiles_dir = get_flags().PROFILES_DIR
    if user_profile_name:
        raw_profiles = read_profile(profiles_dir)
        profile_exists = user_profile_name in raw_profiles
    return profile_exists

Then this method can be applied in one or more places to use the specified profile if it exists (and create it otherwise).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For user experience, I think more input is needed. The risk with creating the profile if it does not exist is the classic typo problem, where a misspelling creates a new profile instead of using the one the user actually wanted. The requirements said "existing profile" so I put in the check.

For implementation, the test is only done once currently, but shrinking a big method is usually a good idea. Refactored.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you push the refactored code @ezraerb?

This is the most recent that I'm seeing:
image

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still working on a few other comments, so have not pushed yet.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changes have been pushed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My first instinct was to agree with @dbeatty10 on this point:

From a user experience perspective, I don't think we need to raise an error if the specified profile is not found. I'd rather just create it anytime it doesn't exist.

I definitely appreciate the typo annoyance: dbt init --profile defaut would lead to the writing of a whole new defaut profile, when you intended to use the existing default profile. In that case, it would be better to get an explicit error.

But it means that, when initializing a new project from scratch, you have exactly two options:

  1. Do not pass --profile flag: Initialize a new project and a new profile, with the same name as your project.
  2. Pass the --profile flag. It must match an existing profile.

If we took Doug's recommendation, there would be three options:

  1. Do not pass --profile flag to initialize a new project and a new profile. The profile name will match your supplied project name (reasonable default behavior).
  2. Pass the --profile flag, and its value matches an existing profile: Initialize a new project, do not write a new profile.
  3. Pass the --profile flag, and its value doesn't match an existing profile: Initialize a new project and a new profile. The profile name will match what you passed into the --profile flag.

We could even take this one step further, and provide the same flexibility that we offer when running dbt init within an existing project:

The profile <profile-name> already exists in /Users/jerco/.dbt/profiles.yml. Do you wish to overwrite it? [y/N]: N

I don't have strong feelings either way. As a heuristic to make this determination, I'm thinking about how there are two "modes" of dbt init:

  1. Interactive. Likely first time using dbt. Need to set up everything.
  2. Programmatic. Someone who has used dbt before, likely on this machine. Wants to skip the click interactivity and jump straight to dbt init <project_name> --skip-profile-setup, or dbt init <project_name> --profile <existing_profile_name>.

This --profile flag feels designed for persona / use case (2). The first user is less likely to want fine-grained control over exactly how the profile is being named — we should provide the easiest path from start to finish, with some sensible defaults along the way. And second user (slightly more experienced) is less likely to want the interactive walkthrough for setting up their profile.

Which is to say: I'm convinced enough that the behavior implemented in this PR is an acceptable user experience. We'll need to document the behavior in a new "Existing profile" section here: https://docs.getdbt.com/reference/commands/init

fire_event(SettingUpProfile())
if not self.check_if_can_write_profile(profile_name=profile_name):
return
Expand Down
149 changes: 149 additions & 0 deletions tests/functional/init/test_init.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import click
import os
import yaml
import pytest
from pathlib import Path
from unittest import mock
Expand Down Expand Up @@ -84,6 +85,12 @@ def test_init_task_in_project_with_existing_profiles_yml(
"""
)

def test_init_task_in_project_specifying_profile_errors(self):
# This triggers a call to sys.exit(), requring the following to test it
with pytest.raises(SystemExit) as error:
ezraerb marked this conversation as resolved.
Show resolved Hide resolved
run_dbt(["init", "--profile", "test"])
assert error.value.code == 1


class TestInitProjectWithoutExistingProfilesYml:
@mock.patch("dbt.task.init._get_adapter_plugin_names")
Expand Down Expand Up @@ -159,6 +166,21 @@ def exists_side_effect(path):
"""
)

@mock.patch.object(Path, "exists", autospec=True)
def test_init_task_in_project_without_profile_yml_specifying_profile_errors(self, exists):
def exists_side_effect(path):
# Override responses on specific files, default to 'real world' if not overriden
return {"profiles.yml": False}.get(path.name, os.path.exists(path))

exists.side_effect = exists_side_effect

# Even through no profiles.yml file exists, the init will not modify project.yml,
# so this errors
# This triggers a call to sys.exit(), requring the following to test it
with pytest.raises(SystemExit) as error:
ezraerb marked this conversation as resolved.
Show resolved Hide resolved
run_dbt(["init", "--profile", "test"])
assert error.value.code == 1


class TestInitProjectWithoutExistingProfilesYmlOrTemplate:
@mock.patch("dbt.task.init._get_adapter_plugin_names")
Expand Down Expand Up @@ -708,3 +730,130 @@ def test_init_inside_project_and_skip_profile_setup(
# skip interactive profile setup
run_dbt(["init", "--skip-profile-setup"])
assert len(manager.mock_calls) == 0


class TestInitOutsideOfProjectWithSpecifiedProfile(TestInitOutsideOfProjectBase):
@mock.patch("dbt.task.init._get_adapter_plugin_names")
@mock.patch("click.prompt")
def test_init_task_outside_of_project_with_specified_profile(
self, mock_prompt, mock_get_adapter, project, project_name, unique_schema, dbt_profile_data
):
manager = Mock()
manager.attach_mock(mock_prompt, "prompt")
manager.prompt.side_effect = [
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious why Black didn't reformat this or the line below it to be the same.

project_name,
]
mock_get_adapter.return_value = [project.adapter.type()]
run_dbt(["init", "--profile", "test"])

manager.assert_has_calls(
[
call.prompt("Enter a name for your project (letters, digits, underscore)"),
]
)

# profiles.yml is NOT overwritten, so assert that the text matches that of the
# original fixture
with open(os.path.join(project.profiles_dir, "profiles.yml"), "r") as f:
assert f.read() == yaml.safe_dump(dbt_profile_data)

with open(os.path.join(project.project_root, project_name, "dbt_project.yml"), "r") as f:
assert (
f.read()
== f"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make this a fixture in a constant at file top or in a fixture.py?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checking the other init tests shows that all of them use this pattern. Its probably worth getting more opinions before changing it.

# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: '{project_name}'
version: '1.0.0'
config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: 'test'

# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"


# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models

# In this example config, we tell dbt to build all models in the example/
# directory as views. These settings can be overridden in the individual model
# files using the `{{{{ config(...) }}}}` macro.
models:
{project_name}:
# Config indicated by + and applies to all files under models/example/
example:
+materialized: view
"""
)


class TestInitOutsideOfProjectSpecifyingInvalidProfile(TestInitOutsideOfProjectBase):
@mock.patch("dbt.task.init._get_adapter_plugin_names")
@mock.patch("click.prompt")
def test_init_task_outside_project_specifying_invalid_profile_errors(
self, mock_prompt, mock_get_adapter, project, project_name
):
manager = Mock()
manager.attach_mock(mock_prompt, "prompt")
manager.prompt.side_effect = [
project_name,
]
mock_get_adapter.return_value = [project.adapter.type()]

# This triggers a call to sys.exit(), requring the following to test it
with pytest.raises(SystemExit) as error:
ezraerb marked this conversation as resolved.
Show resolved Hide resolved
run_dbt(["init", "--profile", "invalid"])
assert error.value.code == 1

manager.assert_has_calls(
[
call.prompt("Enter a name for your project (letters, digits, underscore)"),
]
)


class TestInitOutsideOfProjectSpecifyingProfileNoProfilesYml(TestInitOutsideOfProjectBase):
@mock.patch("dbt.task.init._get_adapter_plugin_names")
@mock.patch("click.prompt")
def test_init_task_outside_project_specifying_profile_no_profiles_yml_errors(
self, mock_prompt, mock_get_adapter, project, project_name
):
manager = Mock()
manager.attach_mock(mock_prompt, "prompt")
manager.prompt.side_effect = [
project_name,
]
mock_get_adapter.return_value = [project.adapter.type()]

# Override responses on specific files, default to 'real world' if not overriden
original_isfile = os.path.isfile
with mock.patch(
"os.path.isfile",
new=lambda path: {"profiles.yml": False}.get(
os.path.basename(path), original_isfile(path)
),
):
# This triggers a call to sys.exit(), requring the following to test it
with pytest.raises(SystemExit) as error:
ezraerb marked this conversation as resolved.
Show resolved Hide resolved
run_dbt(["init", "--profile", "test"])
assert error.value.code == 1

manager.assert_has_calls(
[
call.prompt("Enter a name for your project (letters, digits, underscore)"),
]
)