From 067b861d30bb03618ea163b811ab007127bfe2af Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Thu, 16 Dec 2021 13:30:52 -0500 Subject: [PATCH] Improve checking of schema version for pre-1.0.0 manifests (#4497) * [#4470] Improve checking of schema version for pre-1.0.0 manifests * Check exception code instead of message in test --- CHANGELOG.md | 1 + core/dbt/contracts/state.py | 6 ++- core/dbt/contracts/util.py | 41 +++++++++++++++++++ .../previous_state/manifest.json | 6 +++ .../test_modified_state.py | 11 ++++- 5 files changed, 61 insertions(+), 4 deletions(-) create mode 100644 test/integration/062_defer_state_test/previous_state/manifest.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 182a161316b..3e9ff13a6ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - Fix redefined status param of SQLQueryStatus to typecheck the string which passes on `._message` value of `AdapterResponse` or the `str` value sent by adapter plugin. ([#4463](https://github.com/dbt-labs/dbt-core/pull/4463#issuecomment-990174166)) - Fix `DepsStartPackageInstall` event to use package name instead of version number. ([#4482](https://github.com/dbt-labs/dbt-core/pull/4482)) - Reimplement log message to use adapter name instead of the object method. ([#4501](https://github.com/dbt-labs/dbt-core/pull/4501)) +- Issue better error message for incompatible schemas ([#4470](https://github.com/dbt-labs/dbt-core/pull/4442), [#4497](https://github.com/dbt-labs/dbt-core/pull/4497)) ### Docs - Fix missing data on exposures in docs ([#4467](https://github.com/dbt-labs/dbt-core/issues/4467)) diff --git a/core/dbt/contracts/state.py b/core/dbt/contracts/state.py index 1f269287ffb..04ffd577c77 100644 --- a/core/dbt/contracts/state.py +++ b/core/dbt/contracts/state.py @@ -14,7 +14,8 @@ def __init__(self, path: Path): manifest_path = self.path / 'manifest.json' if manifest_path.exists() and manifest_path.is_file(): try: - self.manifest = WritableManifest.read(str(manifest_path)) + # we want to bail with an error if schema versions don't match + self.manifest = WritableManifest.read_and_check_versions(str(manifest_path)) except IncompatibleSchemaException as exc: exc.add_filename(str(manifest_path)) raise @@ -22,7 +23,8 @@ def __init__(self, path: Path): results_path = self.path / 'run_results.json' if results_path.exists() and results_path.is_file(): try: - self.results = RunResultsArtifact.read(str(results_path)) + # we want to bail with an error if schema versions don't match + self.results = RunResultsArtifact.read_and_check_versions(str(results_path)) except IncompatibleSchemaException as exc: exc.add_filename(str(results_path)) raise diff --git a/core/dbt/contracts/util.py b/core/dbt/contracts/util.py index 6b2be0674ed..8f86a6503f4 100644 --- a/core/dbt/contracts/util.py +++ b/core/dbt/contracts/util.py @@ -9,6 +9,7 @@ from dbt.exceptions import ( InternalException, RuntimeException, + IncompatibleSchemaException ) from dbt.version import __version__ from dbt.events.functions import get_invocation_id @@ -158,6 +159,8 @@ def get_metadata_env() -> Dict[str, str]: } +# This is used in the ManifestMetadata, RunResultsMetadata, RunOperationResultMetadata, +# FreshnessMetadata, and CatalogMetadata classes @dataclasses.dataclass class BaseArtifactMetadata(dbtClassMixin): dbt_schema_version: str @@ -177,6 +180,17 @@ def __post_serialize__(self, dct): return dct +# This is used as a class decorator to set the schema_version in the +# 'dbt_schema_version' class attribute. (It's copied into the metadata objects.) +# Name attributes of SchemaVersion in classes with the 'schema_version' decorator: +# manifest +# run-results +# run-operation-result +# sources +# catalog +# remote-compile-result +# remote-execution-result +# remote-run-result def schema_version(name: str, version: int): def inner(cls: Type[VersionedSchema]): cls.dbt_schema_version = SchemaVersion( @@ -187,6 +201,7 @@ def inner(cls: Type[VersionedSchema]): return inner +# This is used in the ArtifactMixin and RemoteResult classes @dataclasses.dataclass class VersionedSchema(dbtClassMixin): dbt_schema_version: ClassVar[SchemaVersion] @@ -198,6 +213,30 @@ def json_schema(cls, embeddable: bool = False) -> Dict[str, Any]: result['$id'] = str(cls.dbt_schema_version) return result + @classmethod + def read_and_check_versions(cls, path: str): + try: + data = read_json(path) + except (EnvironmentError, ValueError) as exc: + raise RuntimeException( + f'Could not read {cls.__name__} at "{path}" as JSON: {exc}' + ) from exc + + # Check metadata version. There is a class variable 'dbt_schema_version', but + # that doesn't show up in artifacts, where it only exists in the 'metadata' + # dictionary. + if hasattr(cls, 'dbt_schema_version'): + if 'metadata' in data and 'dbt_schema_version' in data['metadata']: + previous_schema_version = data['metadata']['dbt_schema_version'] + # cls.dbt_schema_version is a SchemaVersion object + if str(cls.dbt_schema_version) != previous_schema_version: + raise IncompatibleSchemaException( + expected=str(cls.dbt_schema_version), + found=previous_schema_version + ) + + return cls.from_dict(data) # type: ignore + T = TypeVar('T', bound='ArtifactMixin') @@ -205,6 +244,8 @@ def json_schema(cls, embeddable: bool = False) -> Dict[str, Any]: # metadata should really be a Generic[T_M] where T_M is a TypeVar bound to # BaseArtifactMetadata. Unfortunately this isn't possible due to a mypy issue: # https://github.com/python/mypy/issues/7520 +# This is used in the WritableManifest, RunResultsArtifact, RunOperationResultsArtifact, +# and CatalogArtifact @dataclasses.dataclass(init=False) class ArtifactMixin(VersionedSchema, Writable, Readable): metadata: BaseArtifactMetadata diff --git a/test/integration/062_defer_state_test/previous_state/manifest.json b/test/integration/062_defer_state_test/previous_state/manifest.json new file mode 100644 index 00000000000..6ab63f3f563 --- /dev/null +++ b/test/integration/062_defer_state_test/previous_state/manifest.json @@ -0,0 +1,6 @@ +{ + "metadata": { + "dbt_schema_version": "https://schemas.getdbt.com/dbt/manifest/v3.json", + "dbt_version": "0.21.1" + } +} diff --git a/test/integration/062_defer_state_test/test_modified_state.py b/test/integration/062_defer_state_test/test_modified_state.py index 1fd41ccb6ad..5f64cd66ae1 100644 --- a/test/integration/062_defer_state_test/test_modified_state.py +++ b/test/integration/062_defer_state_test/test_modified_state.py @@ -6,7 +6,7 @@ import pytest -from dbt.exceptions import CompilationException +from dbt.exceptions import CompilationException, IncompatibleSchemaException class TestModifiedState(DBTIntegrationTest): @@ -36,7 +36,7 @@ def _symlink_test_folders(self): for entry in os.listdir(self.test_original_source_path): src = os.path.join(self.test_original_source_path, entry) tst = os.path.join(self.test_root_dir, entry) - if entry in {'models', 'seeds', 'macros'}: + if entry in {'models', 'seeds', 'macros', 'previous_state'}: shutil.copytree(src, tst) elif os.path.isdir(entry) or entry.endswith('.sql'): os.symlink(src, tst) @@ -202,3 +202,10 @@ def test_postgres_changed_exposure(self): results, stdout = self.run_dbt_and_capture(['run', '--models', '+state:modified', '--state', './state']) assert len(results) == 1 assert results[0].node.name == 'view_model' + + @use_profile('postgres') + def test_postgres_previous_version_manifest(self): + # This tests that a different schema version in the file throws an error + with self.assertRaises(IncompatibleSchemaException) as exc: + results = self.run_dbt(['ls', '-s', 'state:modified', '--state', './previous_state']) + self.assertEqual(exc.CODE, 10014)