Skip to content

Commit

Permalink
53 add database schema and datalake options to profile creation (#60)
Browse files Browse the repository at this point in the history
### Add new Profile Options

Add profile options that reflect Dremio concepts.

### Description

Existing profile options reflected dbt concepts that did map intuitively
to Dremio concepts, such as `database`, `schema`, etc. Hence, added
profile options that reflect Dremio concepts, and mapped these to the
dbt equivalents.

Added:
- object_storage_source --> datalake
- object_storage_path --> root_folder
- dremio_space --> database
- dremio_space_folder --> schema

### Related Issue

#53

### Additional Reviewers

@ArgusLi 
@ravjotbrar 

### Testing

- /tests/component/test_profile_template.py [new]
- /.github/scripts/smoke_test.sh (all tests)

Co-authored-by: ArgusLi <[email protected]>
  • Loading branch information
jlarue26 and ArgusLi authored Nov 9, 2022
1 parent d1ace8f commit 5515c85
Show file tree
Hide file tree
Showing 5 changed files with 198 additions and 37 deletions.
2 changes: 1 addition & 1 deletion .github/scripts/smoke_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ EOF
# Main
test_type=$1
test_ssl="${6:-false}"
profiles_path=~/.dbt/profiles.yml
profiles_path=~/.dbt/profiles.yml

if [ $test_type == softwareUP ]; then
test_user=$2
Expand Down
2 changes: 1 addition & 1 deletion dbt/adapters/dremio/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def create_catalog(self, database, schema):
credentials = connection.credentials
api_parameters = connection.handle.get_parameters()

if database == "@" + credentials.UID:
if database == ("@" + credentials.UID):
logger.debug("Database is default: creating folders only")
else:
self._create_space(database, api_parameters)
Expand Down
78 changes: 44 additions & 34 deletions dbt/adapters/dremio/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,60 +20,65 @@

@dataclass
class DremioCredentials(Credentials):
environment: Optional[str]
database: Optional[str]
schema: Optional[str]
datalake: Optional[str]
root_path: Optional[str]
environment: Optional[str] = None
UID: Optional[str] = None
PWD: Optional[str] = None
pat: Optional[str] = None
datalake: Optional[str] = None
root_path: Optional[str] = None
database: Optional[str] = None
schema: Optional[str] = None
cloud_project_id: Optional[str] = None
cloud_host: Optional[str] = None
software_host: Optional[str] = None
UID: Optional[str] = None
PWD: Optional[str] = None
port: Optional[int] = 9047 # for rest endpoint
use_ssl: Optional[bool] = True
pat: Optional[str] = None
additional_parameters: Optional[str] = None

_ALIASES = {
# Only terms on right-side will be used going forward.
"username": "UID", # backwards compatibility with existing profiles
"user": "UID",
"username": "UID",
"pass": "PWD",
"password": "PWD",
"server": "host",
"track": "environment",
"space": "database",
"folder": "schema",
"materialization_database": "datalake",
"materialization_schema": "root_path",
"object_storage_source": "datalake",
"object_storage_path": "root_path",
"dremio_space": "database",
"dremio_space_folder": "schema",
}

_DEFAULT_OBJECT_STORAGE_SOURCE = "$scratch"
_SPACE_NAME_PLACEHOLDER = "@user"

@property
def type(self):
return "dremio"

@property
def unique_field(self):
return self.host
def aliases(self):
return self._ALIASES

def _connection_keys(self):
# return an iterator of keys to pretty-print in 'dbt debug'
# raise NotImplementedError

return (
"driver",
"cloud_host",
"cloud_project_id",
"software_host",
"port",
"use_ssl",
"environment",
# These are aliased...
"UID",
"root_path",
"datalake",
"database",
"schema",
"additional_parameters",
"datalake",
"root_path",
"environment",
"use_ssl",
# ...by these. Output these to ensure they match
# what they alias.
"user", # -> UID
"username", # -> UID
"object_storage_source", # -> datalake
"object_storage_path", # -> root_path
"dremio_space", # -> database
"dremio_space_folder", # -> schema
)

@classmethod
Expand All @@ -83,26 +88,31 @@ def __pre_deserialize__(cls, data):
data["cloud_host"] = None
if "software_host" not in data:
data["software_host"] = None

if "database" not in data:
data["database"] = None
if "schema" not in data:
data["schema"] = None

if "datalake" not in data:
data["datalake"] = None
if "root_path" not in data:
data["root_path"] = None
if "environment" not in data:
data["environment"] = None

if "pat" not in data:
data["pat"] = None

if "environment" not in data:
data["environment"] = None

return data

def __post_init__(self):
if self.database is None:
self.database = "@" + self.UID
if self.schema is None:
self.schema = DremioRelation.no_schema
if self.datalake is None:
self.datalake = "$scratch"
self.datalake = self._DEFAULT_OBJECT_STORAGE_SOURCE
if self.root_path is None:
self.root_path = DremioRelation.no_schema
if self.database is None or self.database == self._SPACE_NAME_PLACEHOLDER:
self.database = f"@{self.UID}"
if self.schema is None:
self.schema = DremioRelation.no_schema
14 changes: 13 additions & 1 deletion dbt/include/dremio/profile_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ fixed:
type: dremio
use_ssl: true
prompts:
_choose_cloud_or_software_with_passowrd_or_software_with_pat:
_choose_cloud_or_software_with_password_or_software_with_pat:
dremio_cloud:
cloud_host:
default: api.dremio.cloud
Expand Down Expand Up @@ -54,6 +54,18 @@ prompts:
use_ssl:
default: false
hint: 'use encrypted connection'
object_storage_source:
default: '$scratch'
hint: 'object storage source for seeds, tables, etc. [dbt alias: datalake]'
object_storage_path:
default: 'no_schema'
hint: 'object storage path [dbt alias: schema]'
dremio_space:
default: '@user'
hint: 'space for creating views [dbt alias: database]'
dremio_space_folder:
default: 'no_schema'
hint: 'dremio space folder [dbt alias: root_path]'
threads:
hint: '1 or more'
type: 'int'
Expand Down
139 changes: 139 additions & 0 deletions tests/component/test_profile_template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# Copyright (C) 2022 Dremio Corporation

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from typing import Dict
import os

from dbt.config.profile import read_profile
from dbt.adapters.dremio.credentials import DremioCredentials

# Tests require manual setup before executing.
#
# Prior to running these tests, create three dbt projects:
#
# 1. `dbt init test_cloud_options`
# - accept all default options
# - provide any value for mandatory options
#
# 2. `dbt init test_sw_up_options`
# - accept all default options
# - provide any value for mandatory options
#
# 3. `dbt init test_sw_pat_options`
# - accept all default options
# - provide any value for mandatory options
#
# These tests assumes there exists a $HOME/.dbt/profiles.yml
# file containing these three dbt projects.
class TestProfileTemplate:
# non-OS specific
PROFILE_DIRECTORY = os.path.expanduser("~") + "/.dbt/"

# These projects must exist in the profile.yml file. All defaults must be selected.
_TEST_CLOUD_PROFILE_PROJECT = "test_cloud_options"
_TEST_SOFTWARE_USER_PASSWORD_PROFILE_PROJECT = "test_sw_up_options"
_TEST_SOFTWARE_PAT_PROFILE_PROJECT = "test_sw_pat_options"

_PASSWORD_AUTH_PROFILE_OPTIONS_WITH_DEFAULTS = {"password": None}
_PAT_AUTH_PROFILE_OPTIONS_WITH_DEFAULTS = {"pat": None}
_COMMON_PROFILE_OPTIONS_WITH_DEFAULTS = {
"user": None,
"object_storage_source": "$scratch",
"object_storage_path": "no_schema",
"dremio_space": "@user",
"dremio_space_folder": "no_schema",
"threads": 1,
}
_DREMIO_CLOUD_PROFILE_SPECIFIC_OPTIONS_WITH_DEFAULTS = {
"cloud_host": "api.dremio.cloud",
"cloud_project_id": None,
"use_ssl": True,
}
_DREMIO_SW_PROFILE_SPECIFIC_OPTIONS_WITH_DEFAULTS = {
"software_host": None,
"port": 9047,
"use_ssl": False,
}

_DREMIO_CLOUD_PROFILE_OPTIONS_WITH_DEFAULTS = (
_COMMON_PROFILE_OPTIONS_WITH_DEFAULTS
| _DREMIO_CLOUD_PROFILE_SPECIFIC_OPTIONS_WITH_DEFAULTS
| _PAT_AUTH_PROFILE_OPTIONS_WITH_DEFAULTS
)
_DREMIO_SOFTWARE_USERNAME_PASSWORD_PROFILE_OPTIONS = (
_COMMON_PROFILE_OPTIONS_WITH_DEFAULTS
| _DREMIO_SW_PROFILE_SPECIFIC_OPTIONS_WITH_DEFAULTS
| _PASSWORD_AUTH_PROFILE_OPTIONS_WITH_DEFAULTS
)
_DREMIO_SOFTWARE_PAT_PROFILE_OPTIONS = (
_COMMON_PROFILE_OPTIONS_WITH_DEFAULTS
| _DREMIO_SW_PROFILE_SPECIFIC_OPTIONS_WITH_DEFAULTS
| _PAT_AUTH_PROFILE_OPTIONS_WITH_DEFAULTS
)

_PROFILE_OPTIONS_ALIASES = {
"username": "UID",
"user": "UID",
"password": "PWD",
"object_storage_source": "datalake",
"object_storage_path": "root_path",
"dremio_space": "database",
"dremio_space_folder": "schema",
}

def test_cloud_options(self) -> None:
self._test_project_profile_options(
self._get_dbt_test_project_dict(self._TEST_CLOUD_PROFILE_PROJECT),
self._DREMIO_CLOUD_PROFILE_OPTIONS_WITH_DEFAULTS,
)

def test_software_username_password_options(self) -> None:
self._test_project_profile_options(
self._get_dbt_test_project_dict(
self._TEST_SOFTWARE_USER_PASSWORD_PROFILE_PROJECT
),
self._DREMIO_SOFTWARE_USERNAME_PASSWORD_PROFILE_OPTIONS,
)

def test_software_pat_options(self) -> None:
self._test_project_profile_options(
self._get_dbt_test_project_dict(self._TEST_SOFTWARE_PAT_PROFILE_PROJECT),
self._DREMIO_SOFTWARE_PAT_PROFILE_OPTIONS,
)

def test_aliases(self) -> None:
credentials = DremioCredentials()
credential_option_aliases = credentials.aliases
for option, alias in self._PROFILE_OPTIONS_ALIASES.items():
assert credential_option_aliases[option] is not None
assert alias == credential_option_aliases[option]

@pytest.mark.skip
def _get_dbt_test_project_dict(self, dbt_test_project_name: str) -> Dict[str, any]:
# read_profile returns dictionary with the following layout:
# { <project name>: { 'outputs': { 'dev' } : { <all the profile options and values that we want to test> ] } }
profile_dictionary = read_profile(TestProfileTemplate.PROFILE_DIRECTORY)
return profile_dictionary.get(dbt_test_project_name).get("outputs").get("dev")

@pytest.mark.skip
def _test_project_profile_options(
self, test_project: Dict[str, any], test_options: Dict[str, any]
) -> None:

assert test_project is not None

for option in test_options:
assert test_project[option] is not None
if test_options[option] is not None:
assert test_project[option] == test_options[option]

0 comments on commit 5515c85

Please sign in to comment.