Skip to content

Commit

Permalink
(#2350) Fix for changing snapshot strategy types
Browse files Browse the repository at this point in the history
  • Loading branch information
drewbanin committed May 2, 2020
1 parent 8681dd8 commit 2cd6a2d
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,18 @@
{% set primary_key = config['unique_key'] %}
{% set updated_at = config['updated_at'] %}

(snapshotted_data.MetaDataLastUpdatedTime < source_data.MetaDataLastUpdatedTime)

{#/*
The snapshot relation might not have an {{ updated_at }} value if the snapshot
strategy is changed (ie. from `check_cols` to `timestamp`). We should use a
dbt-created column for the comparison in the snapshot table instead of assuming
that the user-supplied {{ updated_at }} will be present in the historical data

See https://github.com/fishtown-analytics/dbt/issues/2350
*/ #}
{% set row_changed_expr -%}
({{ snapshotted_rel }}.{{ updated_at }} < {{ current_rel }}.{{ updated_at }})
({{ snapshotted_rel }}.dbt_valid_from < {{ current_rel }}.{{ updated_at }})
{%- endset %}

{% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %}
Expand Down Expand Up @@ -126,7 +136,7 @@
select {{ snapshot_get_time() }} as snapshot_start
{%- endset %}

{# don't access the column by name, to avoid dealing with casing issues on snowflake #}
{#-- don't access the column by name, to avoid dealing with casing issues on snowflake #}
{%- set now = run_query(select_current_time)[0][0] -%}
{% if now is none or now is undefined -%}
{%- do exceptions.raise_compiler_error('Could not get a snapshot start time from the database') -%}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

{# /*
Given the repro case for the snapshot build, we'd
expect to see both records have color='pink'
in their most recent rows.
*/ #}
with expected as (
select 1 as id, 'pink' as color union all
select 2 as id, 'pink' as color
),
actual as (
select id, color
from {{ ref('my_snapshot') }}
where color = 'pink'
and dbt_valid_to is null
)
select * from expected
except
select * from actual
union all
select * from actual
except
select * from expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@

{#
REPRO:
1. Run with check strategy
2. Add a new ts column and run with check strategy
3. Run with timestamp strategy on new ts column

Expect: new entry is added for changed rows in (3)
#}


{% snapshot my_snapshot %}

{#--------------- Configuration ------------ #}

{{ config(
target_schema=schema,
unique_key='id'
) }}

{% if var('strategy') == 'timestamp' %}
{{ config(strategy='timestamp', updated_at='updated_at') }}
{% else %}
{{ config(strategy='check', check_cols=['color']) }}
{% endif %}

{#--------------- Test setup ------------ #}

{% if var('step') == 1 %}

select 1 as id, 'blue' as color
union all
select 2 as id, 'red' as color

{% elif var('step') == 2 %}

-- change id=1 color from blue to green
-- id=2 is unchanged when using the check strategy
select 1 as id, 'green' as color, '2020-01-01'::date as updated_at
union all
select 2 as id, 'red' as color, '2020-01-01'::date as updated_at

{% elif var('step') == 3 %}

-- bump timestamp for both records. Expect that after this runs
-- using the timestamp strategy, both ids should have the color
-- 'pink' in the database. This should be in the future b/c we're
-- going to compare to the check timestamp, which will be _now_
select 1 as id, 'pink' as color, (now() + interval '1 day')::date as updated_at
union all
select 2 as id, 'pink' as color, (now() + interval '1 day')::date as updated_at

{% endif %}

{% endsnapshot %}
36 changes: 36 additions & 0 deletions test/integration/004_simple_snapshot_test/test_simple_snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,3 +710,39 @@ def test__postgres__slow(self):

results = self.run_dbt(['test'])
self.assertEqual(len(results), 1)


class TestChangingStrategy(DBTIntegrationTest):

@property
def schema(self):
return "simple_snapshot_004"

@property
def models(self):
return "models-slow"

def run_snapshot(self):
return self.run_dbt(['snapshot'])

@property
def project_config(self):
return {
"config-version": 2,
"snapshot-paths": ['test-snapshots-changing-strategy'],
"test-paths": ["test-snapshots-changing-strategy-tests"],
}

@use_profile('postgres')
def test__postgres__changing_strategy(self):
results = self.run_dbt(['snapshot', '--vars', '{strategy: check, step: 1}'])
self.assertEqual(len(results), 1)

results = self.run_dbt(['snapshot', '--vars', '{strategy: check, step: 2}'])
self.assertEqual(len(results), 1)

results = self.run_dbt(['snapshot', '--vars', '{strategy: timestamp, step: 3}'])
self.assertEqual(len(results), 1)

results = self.run_dbt(['test'])
self.assertEqual(len(results), 1)

0 comments on commit 2cd6a2d

Please sign in to comment.