From 6d1a899fa7ea46b7449913285613e6ad81f0134b Mon Sep 17 00:00:00 2001 From: Jamie Rodriguez <65564846+fivetran-jamie@users.noreply.github.com> Date: Mon, 13 Nov 2023 16:59:24 -0800 Subject: [PATCH 1/2] working? --- macros/fill_staging_columns.sql | 14 ++++++++++-- macros/get_json_columns_in_relation.sql | 29 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 macros/get_json_columns_in_relation.sql diff --git a/macros/fill_staging_columns.sql b/macros/fill_staging_columns.sql index ee21b4a..186b275 100644 --- a/macros/fill_staging_columns.sql +++ b/macros/fill_staging_columns.sql @@ -2,10 +2,20 @@ {%- set source_column_names = source_columns|map(attribute='name')|map('lower')|list -%} +{%- set json_columns = [] -%} +{% if target.type == 'bigquery' %} + {%- set json_columns = fivetran_utils.get_json_columns_in_relation(source_columns) -%} + {{ log(json_columns|lower, info=true)}} +{% endif %} + {%- for column in staging_columns %} {% if column.name|lower in source_column_names -%} - {{ fivetran_utils.quote_column(column) }} as - {%- if 'alias' in column %} {{ column.alias }} {% else %} {{ fivetran_utils.quote_column(column) }} {%- endif -%} + {%- if column.name|lower in json_columns|lower -%} + TO_JSON_STRING( {{ fivetran_utils.quote_column(column) }} ) + {%- else -%} + {{ fivetran_utils.quote_column(column) }} + {%- endif %} + as {%- if 'alias' in column %} {{ column.alias }} {% else %} {{ fivetran_utils.quote_column(column) }} {%- endif -%} {%- else -%} cast(null as {{ column.datatype }}) {%- if 'alias' in column %} as {{ column.alias }} {% else %} as {{ fivetran_utils.quote_column(column) }} {% endif -%} diff --git a/macros/get_json_columns_in_relation.sql b/macros/get_json_columns_in_relation.sql new file mode 100644 index 0000000..e9a91a6 --- /dev/null +++ b/macros/get_json_columns_in_relation.sql @@ -0,0 +1,29 @@ +{% macro get_json_columns_in_relation(source_columns) %} + +{{ adapter.dispatch('get_json_columns_in_relation', 'fivetran_utils') (source_columns) }} + +{%- endmacro %} + +-- currently only need this for bigquery, so for everything else do nothing and just return an empty list +{% macro default__get_json_columns_in_relation(source_columns) %} +{{ return([]) }} +{% endmacro %} + +-- we will return the columns that are of JSON type +{% macro bigquery__get_json_columns_in_relation(source_columns) %} + +{% set json_columns = [] %} + +{% set sc = source_columns|list %} + +{% for col_index in range(sc|length) %} + + {% if sc[col_index].dtype|lower == 'json' %} + {% do json_columns.append(sc[col_index].name) %} + + {% endif %} +{% endfor %} + +{{ return(json_columns) }} + +{% endmacro %} \ No newline at end of file From 56a53cf4167eab57562f9e9a9df000e7b157ee7b Mon Sep 17 00:00:00 2001 From: Jamie Rodriguez <65564846+fivetran-jamie@users.noreply.github.com> Date: Tue, 14 Nov 2023 13:14:27 -0800 Subject: [PATCH 2/2] document --- README.md | 17 +++++++++++++++++ dbt_project.yml | 2 +- integration_tests/dbt_project.yml | 2 +- macros/fill_staging_columns.sql | 1 - macros/get_json_columns_in_relation.sql | 2 ++ 5 files changed, 21 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 923ce1d..3b33b82 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ dispatch: - [Cross-database compatibility](#cross-database-compatibility) - [array\_agg (source)](#array_agg-source) - [ceiling (source)](#ceiling-source) + - [get\_json\_columns\_in\_relation (source)](#get_json_columns_in_relation-source) - [first\_value (source)](#first_value-source) - [json\_extract (source)](#json_extract-source) - [json\_parse (source)](#json_parse-source) @@ -81,6 +82,7 @@ dispatch: - [remove\_prefix\_from\_columns (source)](#remove_prefix_from_columns-source) - [source\_relation (source)](#source_relation-source) - [union\_data (source)](#union_data-source) + - [Union Data Defined Sources Configuration](#union-data-defined-sources-configuration) - [union\_relations (source)](#union_relations-source) - [Variable Checks](#variable-checks) - [empty\_variable\_warning (source)](#empty_variable_warning-source) @@ -171,6 +173,19 @@ than, or equal to, the specified numeric expression. The ceiling macro is compat **Args:** * `num` (required): The integer field you wish to apply the ceiling function. +---- +### get_json_columns_in_relation ([source](macros/get_json_columns_in_relation.sql)) +In BigQuery warehouses, this macro returns the names of columns that are of type JSON (as opposed to a string), given a model or source's columns. For non-BigQuery destinations, it will always return an empty list, as JSON support has not yet been rolled out to other Fivetran destinations. + +**Usage:** +```sql +{{ fivetran_utils.get_json_columns_in_relation(source_columns=adapter.get_columns_in_relation(ref('stg_fivetran_platform__connector_tmp'))) }} +``` +**Args:** +* `source_columns` (required): The columns of the relation. This will likely be a call to `adapter.get_columns_in_relation`. + +> In Fivetran modeling packages, the `get_json_columns_in_relation` macro is called within the [fivetran_utils.fill_staging_columns](macros/fill_staging_columns.sql) macro. + ---- ### first_value ([source](macros/first_value.sql)) This macro returns the value_expression for the first row in the current window frame with cross db functionality. This macro ignores null values. The default first_value calculation within the macro is the `first_value` function. The Redshift first_value calculation is the `first_value` function, with the inclusion of a frame_clause `{{ partition_field }} rows unbounded preceding`. @@ -430,6 +445,8 @@ from source * `source_columns` (required): Will call the [get_columns_in_relation](https://docs.getdbt.com/reference/dbt-jinja-functions/adapter/#get_columns_in_relation) macro as well requires a `ref()` or `source()` argument for the staging models within the `_tmp` directory. * `staging_columns` (required): Created as a result of running the [generate_columns_macro](https://github.com/fivetran/dbt_fivetran_utils#generate_columns_macro-source) for the respective table. +> This macro makes a call to `fivetran_utils.get_json_columns_in_relation()`, which returns source columns that are JSONs (BigQuery only). It will wrap each JSON field in [TO_JSON_STRING](https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#to_json_string) and convert each to a string. + ---- ### persist_pass_through_columns ([source](macros/persist_pass_through_columns.sql)) This macro is used to persist pass through columns from the staging model to the **transform** package. This is particularly helpful when a `select *` is not feasible. diff --git a/dbt_project.yml b/dbt_project.yml index 4da9213..7d90d59 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,4 +1,4 @@ name: 'fivetran_utils' -version: '0.4.8' +version: '0.4.9' config-version: 2 require-dbt-version: [">=1.3.0", "<2.0.0"] diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 7fc9655..5649454 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -1,5 +1,5 @@ name: 'fivetran_utils_integration_tests' -version: '0.4.8' +version: '0.4.9' config-version: 2 profile: 'integration_tests' diff --git a/macros/fill_staging_columns.sql b/macros/fill_staging_columns.sql index 186b275..512040b 100644 --- a/macros/fill_staging_columns.sql +++ b/macros/fill_staging_columns.sql @@ -5,7 +5,6 @@ {%- set json_columns = [] -%} {% if target.type == 'bigquery' %} {%- set json_columns = fivetran_utils.get_json_columns_in_relation(source_columns) -%} - {{ log(json_columns|lower, info=true)}} {% endif %} {%- for column in staging_columns %} diff --git a/macros/get_json_columns_in_relation.sql b/macros/get_json_columns_in_relation.sql index e9a91a6..f9b89c0 100644 --- a/macros/get_json_columns_in_relation.sql +++ b/macros/get_json_columns_in_relation.sql @@ -6,7 +6,9 @@ -- currently only need this for bigquery, so for everything else do nothing and just return an empty list {% macro default__get_json_columns_in_relation(source_columns) %} + {{ return([]) }} + {% endmacro %} -- we will return the columns that are of JSON type