From 9863fb901ceb6643acf9d505c75c8581ad41d6f7 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Wed, 25 Jan 2023 17:47:45 -0500 Subject: [PATCH] CT 1894 log partial parsing var changes and sort cli vars before hashing (#6713) * Log information about vars_hash, normalize cli_vars before hashing * Changie * Add to test_events.py * Update core/dbt/events/types.py Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> (cherry picked from commit a34521ec0708f55817be7f18eda059794e213814) --- .../unreleased/Fixes-20230124-141943.yaml | 6 +++++ core/dbt/events/proto_types.py | 17 ++++++++++++++ core/dbt/events/types.proto | 16 ++++++++++++- core/dbt/events/types.py | 9 ++++++++ core/dbt/parser/manifest.py | 23 ++++++++++++++++++- tests/unit/test_events.py | 1 + 6 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 .changes/unreleased/Fixes-20230124-141943.yaml diff --git a/.changes/unreleased/Fixes-20230124-141943.yaml b/.changes/unreleased/Fixes-20230124-141943.yaml new file mode 100644 index 00000000000..4b85413de58 --- /dev/null +++ b/.changes/unreleased/Fixes-20230124-141943.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Sort cli vars before hashing for partial parsing +time: 2023-01-24T14:19:43.333628-05:00 +custom: + Author: gshank + Issue: "6710" diff --git a/core/dbt/events/proto_types.py b/core/dbt/events/proto_types.py index da8721d55b9..2cf780439c3 100644 --- a/core/dbt/events/proto_types.py +++ b/core/dbt/events/proto_types.py @@ -1054,6 +1054,23 @@ class UnableToPartialParseMsg(betterproto.Message): data: "UnableToPartialParse" = betterproto.message_field(2) +@dataclass +class StateCheckVarsHash(betterproto.Message): + """I025""" + + checksum: str = betterproto.string_field(1) + vars: str = betterproto.string_field(2) + profile: str = betterproto.string_field(3) + target: str = betterproto.string_field(4) + version: str = betterproto.string_field(5) + + +@dataclass +class StateCheckVarsHashMsg(betterproto.Message): + info: "EventInfo" = betterproto.message_field(1) + data: "StateCheckVarsHash" = betterproto.message_field(2) + + @dataclass class PartialParsingNotEnabled(betterproto.Message): """I028""" diff --git a/core/dbt/events/types.proto b/core/dbt/events/types.proto index 71e7fc3176c..ece648c6ad0 100644 --- a/core/dbt/events/types.proto +++ b/core/dbt/events/types.proto @@ -838,7 +838,21 @@ message UnableToPartialParseMsg { UnableToPartialParse data = 2; } -// Skipped I025, I026, I027 +// I025 +message StateCheckVarsHash { + string checksum = 1; + string vars = 2; + string profile = 3; + string target = 4; + string version = 5; +} + +message StateCheckVarsHashMsg { + EventInfo info = 1; + StateCheckVarsHash data = 2; +} + +// Skipped I026, I027 // I028 diff --git a/core/dbt/events/types.py b/core/dbt/events/types.py index 4a2a0fb99ee..eddcd4364dd 100644 --- a/core/dbt/events/types.py +++ b/core/dbt/events/types.py @@ -843,6 +843,15 @@ def message(self) -> str: return f"Unable to do partial parsing because {self.reason}" +@dataclass +class StateCheckVarsHash(DebugLevel, pt.StateCheckVarsHash): + def code(self): + return "I025" + + def message(self) -> str: + return f"checksum: {self.checksum}, vars: {self.vars}, profile: {self.profile}, target: {self.target}, version: {self.version}" + + # Skipped I025, I026, I026, I027 diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index fbfada4fc2a..903852f6ed7 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -8,6 +8,7 @@ from itertools import chain import time from dbt.events.base_types import EventLevel +import pprint import dbt.exceptions import dbt.tracking @@ -29,6 +30,8 @@ ParsedFileLoadFailed, InvalidDisabledTargetInTestNode, NodeNotFoundOrDisabled, + StateCheckVarsHash, + Note, ) from dbt.logger import DbtProcessState from dbt.node_types import NodeType @@ -569,6 +572,12 @@ def is_partial_parsable(self, manifest: Manifest) -> Tuple[bool, Optional[str]]: reason="config vars, config profile, or config target have changed" ) ) + fire_event( + Note( + msg=f"previous checksum: {self.manifest.state_check.vars_hash.checksum}, current checksum: {manifest.state_check.vars_hash.checksum}" + ), + level=EventLevel.DEBUG, + ) valid = False reparse_reason = ReparseReason.vars_changed if self.manifest.state_check.profile_hash != manifest.state_check.profile_hash: @@ -702,16 +711,28 @@ def build_manifest_state_check(self): # arg vars, but since any changes to that file will cause state_check # to not pass, it doesn't matter. If we move to more granular checking # of env_vars, that would need to change. + # We are using the parsed cli_vars instead of config.args.vars, in order + # to sort them and avoid reparsing because of ordering issues. + stringified_cli_vars = pprint.pformat(config.cli_vars) vars_hash = FileHash.from_contents( "\x00".join( [ - getattr(config.args, "vars", "{}") or "{}", + stringified_cli_vars, getattr(config.args, "profile", "") or "", getattr(config.args, "target", "") or "", __version__, ] ) ) + fire_event( + StateCheckVarsHash( + checksum=vars_hash.checksum, + vars=stringified_cli_vars, + profile=config.args.profile, + target=config.args.target, + version=__version__, + ) + ) # Create a FileHash of the env_vars in the project key_list = list(config.project_env_vars.keys()) diff --git a/tests/unit/test_events.py b/tests/unit/test_events.py index 2afee427c4d..4f2fd63a4bf 100644 --- a/tests/unit/test_events.py +++ b/tests/unit/test_events.py @@ -188,6 +188,7 @@ def test_event_codes(self): PartialParsingError(exc_info={}), PartialParsingSkipParsing(), UnableToPartialParse(reason="something went wrong"), + StateCheckVarsHash(vars="testing", target="testing", profile="testing"), PartialParsingNotEnabled(), ParsedFileLoadFailed(path="", exc="", exc_info=""), StaticParserCausedJinjaRendering(path=""),