From c33c2531542e683506a8093e891d82aaaf97060c Mon Sep 17 00:00:00 2001 From: PJGaetan Date: Thu, 20 May 2021 07:53:58 +0200 Subject: [PATCH 1/2] Allow to use a costum field as check-cols updated_at --- CHANGELOG.md | 1 + .../materializations/snapshot/strategies.sql | 2 +- .../test_simple_snapshot.py | 47 +++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76076a9ac92..6bcd1e0d1d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ Contributors: - Add native support for Postgres index creation ([#804](https://github.com/fishtown-analytics/dbt/issues/804), [3106](https://github.com/fishtown-analytics/dbt/pull/3106)) - Less greedy test selection: expand to select unselected tests if and only if all parents are selected ([#2891](https://github.com/fishtown-analytics/dbt/issues/2891), [#3235](https://github.com/fishtown-analytics/dbt/pull/3235)) - Prevent locks in Redshift during full refresh in incremental materialization. ([#2426](https://github.com/fishtown-analytics/dbt/issues/2426), [#2998](https://github.com/fishtown-analytics/dbt/pull/2998)) +Add optional `updated_at` config parameter to invalidate rows with the inputed column timestamp in snapshot using `check` strategy. ([#1844](https://github.com/fishtown-analytics/dbt/issues/1844), [#3376](https://github.com/fishtown-analytics/dbt/pull/3376)) ### Under the hood - Add dependabot configuration for alerting maintainers about keeping dependencies up to date and secure. ([#3061](https://github.com/fishtown-analytics/dbt/issues/3061), [#3062](https://github.com/fishtown-analytics/dbt/pull/3062)) diff --git a/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql index 569e213e227..1d35c71290c 100644 --- a/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql +++ b/core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql @@ -144,7 +144,7 @@ {% if now is none or now is undefined -%} {%- do exceptions.raise_compiler_error('Could not get a snapshot start time from the database') -%} {%- endif %} - {% set updated_at = snapshot_string_as_time(now) %} + {% set updated_at = config.get('updated_at', snapshot_string_as_time(now)) %} {% set column_added = false %} diff --git a/test/integration/004_simple_snapshot_test/test_simple_snapshot.py b/test/integration/004_simple_snapshot_test/test_simple_snapshot.py index f522fee583e..8daf8550c40 100644 --- a/test/integration/004_simple_snapshot_test/test_simple_snapshot.py +++ b/test/integration/004_simple_snapshot_test/test_simple_snapshot.py @@ -577,6 +577,53 @@ def project_config(self): } +class TestUpdatedAtCheckCols(TestCheckCols): + def _assertTablesEqualSql(self, relation_a, relation_b, columns=None): + revived_records = self.run_sql( + ''' + select + id, + updated_at, + dbt_valid_from + from {} + '''.format(relation_b), + fetch='all' + ) + + for result in revived_records: + # result is a tuple, the updated_at is second and dbt_valid_from is latest + self.assertIsInstance(result[1], datetime) + self.assertIsInstance(result[2], datetime) + self.assertEqual(result[1].replace(tzinfo=pytz.UTC), result[2].replace(tzinfo=pytz.UTC)) + + if columns is None: + columns = [c for c in self.get_relation_columns(relation_a) if not c[0].lower().startswith('dbt_')] + return super()._assertTablesEqualSql(relation_a, relation_b, columns=columns) + + def assert_expected(self): + super().assert_expected() + self.assertTablesEqual('snapshot_checkall', 'snapshot_expected') + + + @property + def project_config(self): + return { + 'config-version': 2, + "data-paths": ['data'], + "snapshot-paths": ['test-check-col-snapshots-noconfig'], + "snapshots": { + "test": { + "target_schema": self.unique_schema(), + "unique_key": "id || '-' || first_name", + "strategy": "check", + "check_cols" : "all", + "updated_at": "updated_at", + }, + }, + 'macro-paths': ['macros'], + } + + class TestCheckColsBigquery(TestSimpleSnapshotFilesBigquery): def _assertTablesEqualSql(self, relation_a, relation_b, columns=None): # When building the equality tests, only test columns that don't start From 991e11fcfe33f986eb44934db07ba321025efd88 Mon Sep 17 00:00:00 2001 From: PJGaetan Date: Fri, 21 May 2021 20:30:33 +0200 Subject: [PATCH 2/2] Clarify changlog /w jtcohen6 --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bcd1e0d1d2..f0debe9c8d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,12 +4,16 @@ - Fix compiled sql for ephemeral models ([#3317](https://github.com/fishtown-analytics/dbt/issues/3317), [#3318](https://github.com/fishtown-analytics/dbt/pull/3318)) - Now generating `run_results.json` even when no nodes are selected ([#3313](https://github.com/fishtown-analytics/dbt/issues/3313), [#3315](https://github.com/fishtown-analytics/dbt/pull/3315)) +### Features +- Support optional `updated_at` config parameter with `check` strategy snapshots. If not supplied, will use current timestamp (default). ([#1844](https://github.com/fishtown-analytics/dbt/issues/1844), [#3376](https://github.com/fishtown-analytics/dbt/pull/3376)) + ### Under the hood - Added logic for registry requests to raise a timeout error after a response hangs out for 30 seconds and 5 attempts have been made to reach the endpoint ([#3177](https://github.com/fishtown-analytics/dbt/issues/3177), [#3275](https://github.com/fishtown-analytics/dbt/pull/3275)) Contributors: - [@TeddyCr](https://github.com/TeddyCr) ([#3275](https://github.com/fishtown-analytics/dbt/pull/3275)) - [@panasenco](https://github.com/panasenco) ([#3315](https://github.com/fishtown-analytics/dbt/pull/3315)) +- [@PJGaetan](https://github.com/PJGaetan) ([#3315](https://github.com/fishtown-analytics/dbt/pull/3376)) ## dbt 0.20.0b1 (May 03, 2021) @@ -41,7 +45,6 @@ Contributors: - Add native support for Postgres index creation ([#804](https://github.com/fishtown-analytics/dbt/issues/804), [3106](https://github.com/fishtown-analytics/dbt/pull/3106)) - Less greedy test selection: expand to select unselected tests if and only if all parents are selected ([#2891](https://github.com/fishtown-analytics/dbt/issues/2891), [#3235](https://github.com/fishtown-analytics/dbt/pull/3235)) - Prevent locks in Redshift during full refresh in incremental materialization. ([#2426](https://github.com/fishtown-analytics/dbt/issues/2426), [#2998](https://github.com/fishtown-analytics/dbt/pull/2998)) -Add optional `updated_at` config parameter to invalidate rows with the inputed column timestamp in snapshot using `check` strategy. ([#1844](https://github.com/fishtown-analytics/dbt/issues/1844), [#3376](https://github.com/fishtown-analytics/dbt/pull/3376)) ### Under the hood - Add dependabot configuration for alerting maintainers about keeping dependencies up to date and secure. ([#3061](https://github.com/fishtown-analytics/dbt/issues/3061), [#3062](https://github.com/fishtown-analytics/dbt/pull/3062))