Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow duplicate refable node names across packages #7374

Merged
merged 11 commits into from
May 8, 2023
7 changes: 7 additions & 0 deletions .changes/unreleased/Features-20230429-155057.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Features
body: Allow duplicate manifest node (models, seeds, analyses, snapshots) names across
packages
time: 2023-04-29T15:50:57.757832-04:00
custom:
Author: MichelleArk
Issue: "7446"
1 change: 1 addition & 0 deletions core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@ def resolve(
target_version: Optional[NodeVersion] = None,
) -> RelationProxy:
target_model = self.manifest.resolve_ref(
self.model,
target_name,
target_package,
target_version,
Expand Down
45 changes: 37 additions & 8 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
DuplicateResourceNameError,
DuplicateMacroInPackageError,
DuplicateMaterializationNameError,
AmbiguousResourceNameRefError,
)
from dbt.helper_types import PathSet
from dbt.events.functions import fire_event
Expand Down Expand Up @@ -149,26 +150,35 @@ class RefableLookup(dbtClassMixin):
_lookup_types: ClassVar[set] = set(NodeType.refable())
_versioned_types: ClassVar[set] = set(NodeType.versioned())

# refables are actually unique, so the Dict[PackageName, UniqueID] will
# only ever have exactly one value, but doing 3 dict lookups instead of 1
# is not a big deal at all and retains consistency
def __init__(self, manifest: "Manifest"):
self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
self.populate(manifest)

def get_unique_id(self, key, package: Optional[PackageName], version: Optional[NodeVersion]):
def get_unique_id(
self,
key: str,
package: Optional[PackageName],
version: Optional[NodeVersion],
node: Optional[GraphMemberNode] = None,
):
if version:
key = f"{key}.v{version}"
return find_unique_id_for_package(self.storage, key, package)

unique_ids = self._find_unique_ids_for_package(key, package)
if len(unique_ids) > 1:
raise AmbiguousResourceNameRefError(key, unique_ids, node)
else:
return unique_ids[0] if unique_ids else None

def find(
self,
key,
key: str,
package: Optional[PackageName],
version: Optional[NodeVersion],
manifest: "Manifest",
source_node: Optional[GraphMemberNode] = None,
):
unique_id = self.get_unique_id(key, package, version)
unique_id = self.get_unique_id(key, package, version, source_node)
if unique_id is not None:
node = self.perform_lookup(unique_id, manifest)
# If this is an unpinned ref (no 'version' arg was passed),
Expand Down Expand Up @@ -223,6 +233,22 @@ def perform_lookup(self, unique_id: UniqueID, manifest) -> ManifestNode:
)
return manifest.nodes[unique_id]

def _find_unique_ids_for_package(self, key, package: Optional[PackageName]) -> List[str]:
if key not in self.storage:
return []

pkg_dct: Mapping[PackageName, UniqueID] = self.storage[key]

if package is None:
if not pkg_dct:
return []
else:
return list(pkg_dct.values())
elif package in pkg_dct:
return [pkg_dct[package]]
else:
return []


class MetricLookup(dbtClassMixin):
def __init__(self, manifest: "Manifest"):
Expand Down Expand Up @@ -916,6 +942,7 @@ def analysis_lookup(self) -> AnalysisLookup:
# and dbt.parser.manifest._process_refs_for_node
def resolve_ref(
self,
source_node: GraphMemberNode,
target_model_name: str,
target_model_package: Optional[str],
target_model_version: Optional[NodeVersion],
Expand All @@ -928,7 +955,9 @@ def resolve_ref(

candidates = _search_packages(current_project, node_package, target_model_package)
for pkg in candidates:
node = self.ref_lookup.find(target_model_name, pkg, target_model_version, self)
node = self.ref_lookup.find(
target_model_name, pkg, target_model_version, self, source_node
)

if node is not None and node.config.enabled:
return node
Expand Down
15 changes: 15 additions & 0 deletions core/dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1961,6 +1961,21 @@ def get_message(self) -> str:
return msg


class AmbiguousResourceNameRefError(CompilationError):
def __init__(self, duped_name, unique_ids, node=None):
self.duped_name = duped_name
self.unique_ids = unique_ids
self.packages = [unique_id.split(".")[1] for unique_id in unique_ids]
super().__init__(msg=self.get_message(), node=node)

def get_message(self) -> str:
msg = (
f'dbt found multiple potential nodes when referencing "{self.duped_name}" - {self.unique_ids}.'
f"\nTo fix this, specify which package to use as the first argument to 'ref' (one of: {self.packages})."
MichelleArk marked this conversation as resolved.
Show resolved Hide resolved
)
return msg


class AmbiguousCatalogMatchError(CompilationError):
def __init__(self, unique_id: str, match_1, match_2):
self.unique_id = unique_id
Expand Down
10 changes: 3 additions & 7 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1092,30 +1092,23 @@ def _check_resource_uniqueness(
manifest: Manifest,
config: RuntimeConfig,
) -> None:
names_resources: Dict[str, ManifestNode] = {}
alias_resources: Dict[str, ManifestNode] = {}

for resource, node in manifest.nodes.items():
if not node.is_relational:
continue

name = node.name
# the full node name is really defined by the adapter's relation
relation_cls = get_relation_class_by_name(config.credentials.type)
relation = relation_cls.create_from(config=config, node=node)
full_node_name = str(relation)

existing_node = names_resources.get(name)
if existing_node is not None and not existing_node.is_versioned:
raise dbt.exceptions.DuplicateResourceNameError(existing_node, node)

existing_alias = alias_resources.get(full_node_name)
if existing_alias is not None:
raise AmbiguousAliasError(
node_1=existing_alias, node_2=node, duped_name=full_node_name
)

names_resources[name] = node
alias_resources[full_node_name] = node


Expand Down Expand Up @@ -1193,6 +1186,7 @@ def _process_refs_for_exposure(manifest: Manifest, current_project: str, exposur
)

target_model = manifest.resolve_ref(
exposure,
target_model_name,
target_model_package,
target_model_version,
Expand Down Expand Up @@ -1245,6 +1239,7 @@ def _process_refs_for_metric(manifest: Manifest, current_project: str, metric: M
)

target_model = manifest.resolve_ref(
metric,
target_model_name,
target_model_package,
target_model_version,
Expand Down Expand Up @@ -1349,6 +1344,7 @@ def _process_refs_for_node(manifest: Manifest, current_project: str, node: Manif
)

target_model = manifest.resolve_ref(
node,
target_model_name,
target_model_package,
target_model_version,
Expand Down
89 changes: 89 additions & 0 deletions test/unit/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
)

from dbt.events.functions import reset_metadata_vars
from dbt.exceptions import AmbiguousResourceNameRefError
from dbt.flags import set_from_args

from dbt.node_types import NodeType
Expand Down Expand Up @@ -1443,11 +1444,75 @@ def _refable_parameter_sets():
version=None,
expected=None,
),
# duplicate node name across package
FindNodeSpec(
nodes=[MockNode("project_a", "my_model"), MockNode("project_b", "my_model")],
sources=[],
package="project_a",
version=None,
expected=("project_a", "my_model"),
),
# duplicate node name across package: root node preferred to package node
FindNodeSpec(
nodes=[MockNode("root", "my_model"), MockNode("project_a", "my_model")],
sources=[],
package=None,
version=None,
expected=("root", "my_model"),
),
FindNodeSpec(
nodes=[MockNode("root", "my_model"), MockNode("project_a", "my_model")],
sources=[],
package="root",
version=None,
expected=("root", "my_model"),
),
FindNodeSpec(
nodes=[MockNode("root", "my_model"), MockNode("project_a", "my_model")],
sources=[],
package="project_a",
version=None,
expected=("project_a", "my_model"),
),
# duplicate node name across package: resolved by version
FindNodeSpec(
nodes=[
MockNode("project_a", "my_model", version="1"),
MockNode("project_b", "my_model", version="2"),
],
sources=[],
package=None,
version="1",
expected=("project_a", "my_model", "1"),
),
]
)
return sets


def _ambiguous_ref_parameter_sets():
sets = [
FindNodeSpec(
nodes=[MockNode("project_a", "my_model"), MockNode("project_b", "my_model")],
sources=[],
package=None,
version=None,
expected=None,
),
FindNodeSpec(
nodes=[
MockNode("project_a", "my_model", version="2", is_latest_version=True),
MockNode("project_b", "my_model", version="1", is_latest_version=True),
],
sources=[],
package=None,
version=None,
expected=None,
),
]
return sets


def id_nodes(arg):
if isinstance(arg, list):
node_names = "__".join(f"{n.package_name}_{n.search_name}" for n in arg)
Expand All @@ -1464,6 +1529,7 @@ def id_nodes(arg):
def test_resolve_ref(nodes, sources, package, version, expected):
manifest = make_manifest(nodes=nodes, sources=sources)
result = manifest.resolve_ref(
source_node=None,
target_model_name="my_model",
target_model_package=package,
target_model_version=version,
Expand All @@ -1485,6 +1551,29 @@ def test_resolve_ref(nodes, sources, package, version, expected):
assert result.package_name == expected_package


@pytest.mark.parametrize(
"nodes,sources,package,version,expected",
_ambiguous_ref_parameter_sets(),
ids=id_nodes,
)
def test_resolve_ref_ambiguous_resource_name_across_packages(
nodes, sources, package, version, expected
):
manifest = make_manifest(
nodes=nodes,
sources=sources,
)
with pytest.raises(AmbiguousResourceNameRefError):
manifest.resolve_ref(
source_node=None,
target_model_name="my_model",
target_model_package=None,
target_model_version=version,
current_project="root",
node_package="root",
)


def _source_parameter_sets():
sets = [
# empties
Expand Down
10 changes: 5 additions & 5 deletions tests/functional/duplicates/test_duplicate_model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from dbt.exceptions import CompilationError, DuplicateResourceNameError
from dbt.exceptions import CompilationError, AmbiguousAliasError
from dbt.tests.fixtures.project import write_project_files
from dbt.tests.util import run_dbt, get_manifest

Expand Down Expand Up @@ -88,7 +88,7 @@ def test_duplicate_model_disabled_partial_parsing(self, project):
assert len(results) == 1


class TestDuplicateModelEnabledAcrossPackages:
class TestDuplicateModelAliasEnabledAcrossPackages:
@pytest.fixture(scope="class")
def models(self):
return {"table_model.sql": enabled_model_sql}
Expand All @@ -105,10 +105,10 @@ def setUp(self, project_root):
def packages(self):
return {"packages": [{"local": "local_dependency"}]}

def test_duplicate_model_enabled_across_packages(self, project):
def test_duplicate_model_alias_enabled_across_packages(self, project):
run_dbt(["deps"])
message = "dbt found two models with the name"
with pytest.raises(DuplicateResourceNameError) as exc:
message = "dbt found two resources with the database representation"
with pytest.raises(AmbiguousAliasError) as exc:
run_dbt(["run"])
assert message in str(exc.value)

Expand Down