From 54b64f89226da556ff252356071496b5c1ce3a2e Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 13 Jan 2020 14:23:41 -0700 Subject: [PATCH 1/2] add "is_number" and "is_float" Column methods Split out snowflake column type added column type test integration tests --- core/dbt/adapters/base/column.py | 29 ++++++-- core/dbt/adapters/sql/impl.py | 2 +- .../bigquery/dbt/adapters/bigquery/column.py | 9 ++- .../dbt/adapters/snowflake/__init__.py | 1 + .../dbt/adapters/snowflake/column.py | 21 ++++++ .../snowflake/dbt/adapters/snowflake/impl.py | 2 + .../056_column_type_tests/bq_models/model.sql | 5 ++ .../bq_models/schema.yml | 10 +++ .../macros/test_is_type.sql | 69 +++++++++++++++++++ .../056_column_type_tests/pg_models/model.sql | 9 +++ .../pg_models/schema.yml | 14 ++++ .../056_column_type_tests/rs_models/model.sql | 17 +++++ .../rs_models/schema.yml | 22 ++++++ .../056_column_type_tests/sf_models/model.sql | 18 +++++ .../sf_models/schema.yml | 23 +++++++ .../test_column_types.py | 51 ++++++++++++++ 16 files changed, 294 insertions(+), 8 deletions(-) create mode 100644 plugins/snowflake/dbt/adapters/snowflake/column.py create mode 100644 test/integration/056_column_type_tests/bq_models/model.sql create mode 100644 test/integration/056_column_type_tests/bq_models/schema.yml create mode 100644 test/integration/056_column_type_tests/macros/test_is_type.sql create mode 100644 test/integration/056_column_type_tests/pg_models/model.sql create mode 100644 test/integration/056_column_type_tests/pg_models/schema.yml create mode 100644 test/integration/056_column_type_tests/rs_models/model.sql create mode 100644 test/integration/056_column_type_tests/rs_models/schema.yml create mode 100644 test/integration/056_column_type_tests/sf_models/model.sql create mode 100644 test/integration/056_column_type_tests/sf_models/schema.yml create mode 100644 test/integration/056_column_type_tests/test_column_types.py diff --git a/core/dbt/adapters/base/column.py b/core/dbt/adapters/base/column.py index c6e6fcb3288..75b41e0b3f6 100644 --- a/core/dbt/adapters/base/column.py +++ b/core/dbt/adapters/base/column.py @@ -2,9 +2,7 @@ from hologram import JsonSchemaMixin -from typing import TypeVar, Dict, ClassVar, Any, Optional, Type - -Self = TypeVar('Self', bound='Column') +from typing import Dict, ClassVar, Any, Optional @dataclass @@ -26,7 +24,7 @@ def translate_type(cls, dtype: str) -> str: return cls.TYPE_LABELS.get(dtype.upper(), dtype) @classmethod - def create(cls: Type[Self], name, label_or_dtype: str) -> Self: + def create(cls, name, label_or_dtype: str) -> 'Column': column_type = cls.translate_type(label_or_dtype) return cls(name, column_type) @@ -52,8 +50,27 @@ def is_string(self) -> bool: return self.dtype.lower() in ['text', 'character varying', 'character', 'varchar'] + def is_number(self): + return any([self.is_integer(), self.is_numeric(), self.is_float()]) + + def is_float(self): + return self.dtype.lower() in [ + # floats + 'real', 'float4', 'float', 'double precision', 'float8' + ] + + def is_integer(self) -> bool: + return self.dtype.lower() in [ + # real types + 'smallint', 'integer', 'bigint', + 'smallserial', 'serial', 'bigserial', + # aliases + 'int2', 'int4', 'int8', + 'serial2', 'serial4', 'serial8', + ] + def is_numeric(self) -> bool: - return self.dtype.lower() in ['numeric', 'number'] + return self.dtype.lower() in ['numeric', 'decimal'] def string_size(self) -> int: if not self.is_string(): @@ -65,7 +82,7 @@ def string_size(self) -> int: else: return int(self.char_size) - def can_expand_to(self: Self, other_column: Self) -> bool: + def can_expand_to(self, other_column: 'Column') -> bool: """returns True if this column can be expanded to the size of the other column""" if not self.is_string() or not other_column.is_string(): diff --git a/core/dbt/adapters/sql/impl.py b/core/dbt/adapters/sql/impl.py index 5b5f97a85cb..b0542edf60a 100644 --- a/core/dbt/adapters/sql/impl.py +++ b/core/dbt/adapters/sql/impl.py @@ -168,7 +168,7 @@ def rename_relation(self, from_relation, to_relation): kwargs=kwargs ) - def get_columns_in_relation(self, relation: str): + def get_columns_in_relation(self, relation): return self.execute_macro( GET_COLUMNS_IN_RELATION_MACRO_NAME, kwargs={'relation': relation} diff --git a/plugins/bigquery/dbt/adapters/bigquery/column.py b/plugins/bigquery/dbt/adapters/bigquery/column.py index 8c8a442b412..305bae77a34 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/column.py +++ b/plugins/bigquery/dbt/adapters/bigquery/column.py @@ -99,8 +99,15 @@ def data_type(self) -> str: def is_string(self) -> bool: return self.dtype.lower() == 'string' + def is_integer(self) -> bool: + # snowflake technicality: These are all synonyms with NUMBER(38, 0) + return self.dtype.lower() == 'int64' + def is_numeric(self) -> bool: - return False + return self.dtype.lower() == 'numeric' + + def is_float(self): + return self.dtype.lower() == 'float64' def can_expand_to(self: Self, other_column: Self) -> bool: """returns True if both columns are strings""" diff --git a/plugins/snowflake/dbt/adapters/snowflake/__init__.py b/plugins/snowflake/dbt/adapters/snowflake/__init__.py index 40f8aee62d4..bb4e17c9bb3 100644 --- a/plugins/snowflake/dbt/adapters/snowflake/__init__.py +++ b/plugins/snowflake/dbt/adapters/snowflake/__init__.py @@ -1,3 +1,4 @@ +from dbt.adapters.snowflake.column import SnowflakeColumn # noqa from dbt.adapters.snowflake.connections import SnowflakeConnectionManager # noqa from dbt.adapters.snowflake.connections import SnowflakeCredentials from dbt.adapters.snowflake.relation import SnowflakeRelation # noqa diff --git a/plugins/snowflake/dbt/adapters/snowflake/column.py b/plugins/snowflake/dbt/adapters/snowflake/column.py new file mode 100644 index 00000000000..caa7cbdc3d5 --- /dev/null +++ b/plugins/snowflake/dbt/adapters/snowflake/column.py @@ -0,0 +1,21 @@ +from dataclasses import dataclass + +from dbt.adapters.base.column import Column + + +@dataclass +class SnowflakeColumn(Column): + def is_integer(self) -> bool: + # everything that smells like an int is actually a NUMBER(38, 0) + return False + + def is_numeric(self) -> bool: + return self.dtype.lower() in [ + 'int', 'integer', 'bigint', 'smallint', 'tinyint', 'byteint', + 'numeric', 'decimal', 'number' + ] + + def is_float(self): + return self.dtype.lower() in [ + 'float', 'float4', 'float8', 'double', 'double precision', 'real', + ] diff --git a/plugins/snowflake/dbt/adapters/snowflake/impl.py b/plugins/snowflake/dbt/adapters/snowflake/impl.py index a69a7d147b0..13af6dc73f2 100644 --- a/plugins/snowflake/dbt/adapters/snowflake/impl.py +++ b/plugins/snowflake/dbt/adapters/snowflake/impl.py @@ -3,12 +3,14 @@ from dbt.adapters.sql import SQLAdapter from dbt.adapters.snowflake import SnowflakeConnectionManager from dbt.adapters.snowflake import SnowflakeRelation +from dbt.adapters.snowflake import SnowflakeColumn from dbt.utils import filter_null_values from dbt.exceptions import RuntimeException class SnowflakeAdapter(SQLAdapter): Relation = SnowflakeRelation + Column = SnowflakeColumn ConnectionManager = SnowflakeConnectionManager AdapterSpecificConfigs = frozenset( diff --git a/test/integration/056_column_type_tests/bq_models/model.sql b/test/integration/056_column_type_tests/bq_models/model.sql new file mode 100644 index 00000000000..94e4fba18ca --- /dev/null +++ b/test/integration/056_column_type_tests/bq_models/model.sql @@ -0,0 +1,5 @@ +select + CAST(1 as int64) as int64_col, + CAST(2.0 as float64) as float64_col, + CAST(3.0 as numeric) as numeric_col, + CAST('3' as string) as string_col, diff --git a/test/integration/056_column_type_tests/bq_models/schema.yml b/test/integration/056_column_type_tests/bq_models/schema.yml new file mode 100644 index 00000000000..8eb8a9ae286 --- /dev/null +++ b/test/integration/056_column_type_tests/bq_models/schema.yml @@ -0,0 +1,10 @@ +version: 2 +models: + - name: model + tests: + - is_type: + column_map: + int64_col: ['integer', 'number'] + float64_col: ['float', 'number'] + numeric_col: ['numeric', 'number'] + string_col: ['string', 'not number'] diff --git a/test/integration/056_column_type_tests/macros/test_is_type.sql b/test/integration/056_column_type_tests/macros/test_is_type.sql new file mode 100644 index 00000000000..6ee430f1ea0 --- /dev/null +++ b/test/integration/056_column_type_tests/macros/test_is_type.sql @@ -0,0 +1,69 @@ + +{% macro simple_type_check_column(column, check) %} + {% if check == 'string' %} + {{ return(column.is_string()) }} + {% elif check == 'float' %} + {{ return(column.is_float()) }} + {% elif check == 'number' %} + {{ return(column.is_number()) }} + {% elif check == 'numeric' %} + {{ return(column.is_numeric()) }} + {% elif check == 'integer' %} + {{ return(column.is_integer()) }} + {% else %} + {% do exceptions.raise_compiler_error('invalid type check value: ' ~ check) %} + {% endif %} +{% endmacro %} + +{% macro type_check_column(column, type_checks) %} + {% set failures = [] %} + {% for type_check in type_checks %} + {% if type_check.startswith('not ') %} + {% if simple_type_check_column(column, type_check[4:]) %} + {% do log('simple_type_check_column got ', True) %} + {% do failures.append(type_check) %} + {% endif %} + {% else %} + {% if not simple_type_check_column(column, type_check) %} + {% do failures.append(type_check) %} + {% endif %} + {% endif %} + {% endfor %} + {% if (failures | length) > 0 %} + {% do log('column ' ~ column.name ~ ' had failures: ' ~ failures, info=True) %} + {% endif %} + {% do return((failures | length) == 0) %} +{% endmacro %} + + +{% macro test_is_type(model, column_map) %} + {% if not execute %} + {{ return(None) }} + {% endif %} + {% if not column_map %} + {% do exceptions.raise_compiler_error('test_is_type must have a column name') %} + {% endif %} + {% set columns = adapter.get_columns_in_relation(model) %} + {% if (column_map | length) != (columns | length) %} + {% set column_map_keys = (column_map | list | string) %} + {% set column_names = (columns | map(attribute='name') | list | string) %} + {% do exceptions.raise_compiler_error('did not get all the columns/all columns not specified:\n' ~ column_map_keys ~ '\nvs\n' ~ column_names) %} + {% endif %} + {% set bad_columns = [] %} + {% for column in columns %} + {% set column_key = (column.name | lower) %} + {% if column_key in column_map %} + {% set type_checks = column_map[column_key] %} + {% if not type_checks %} + {% do exceptions.raise_compiler_error('no type checks?') %} + {% endif %} + {% if not type_check_column(column, type_checks) %} + {% do bad_columns.append(column.name) %} + {% endif %} + {% else %} + {% do exceptions.raise_compiler_error('column key ' ~ column_key ~ ' not found in ' ~ (column_map | list | string)) %} + {% endif %} + {% endfor %} + {% do log('bad columns: ' ~ bad_columns, info=True) %} + select {{ bad_columns | length }} as pass_fail +{% endmacro %} diff --git a/test/integration/056_column_type_tests/pg_models/model.sql b/test/integration/056_column_type_tests/pg_models/model.sql new file mode 100644 index 00000000000..f1b877225a9 --- /dev/null +++ b/test/integration/056_column_type_tests/pg_models/model.sql @@ -0,0 +1,9 @@ +select + 1::smallint as smallint_col, + 2::integer as int_col, + 3::bigint as bigint_col, + 4.0::real as real_col, + 5.0::double precision as double_col, + 6.0::numeric as numeric_col, + '7'::text as text_col, + '8'::varchar(20) as varchar_col diff --git a/test/integration/056_column_type_tests/pg_models/schema.yml b/test/integration/056_column_type_tests/pg_models/schema.yml new file mode 100644 index 00000000000..93e309d1b0b --- /dev/null +++ b/test/integration/056_column_type_tests/pg_models/schema.yml @@ -0,0 +1,14 @@ +version: 2 +models: + - name: model + tests: + - is_type: + column_map: + smallint_col: ['integer', 'number'] + int_col: ['integer', 'number'] + bigint_col: ['integer', 'number'] + real_col: ['float', 'number'] + double_col: ['float', 'number'] + numeric_col: ['numeric', 'number'] + text_col: ['string', 'not number'] + varchar_col: ['string', 'not number'] diff --git a/test/integration/056_column_type_tests/rs_models/model.sql b/test/integration/056_column_type_tests/rs_models/model.sql new file mode 100644 index 00000000000..f8e9721030a --- /dev/null +++ b/test/integration/056_column_type_tests/rs_models/model.sql @@ -0,0 +1,17 @@ +select + 1::smallint as smallint_col, + 2::int as int_col, + 3::bigint as bigint_col, + 4::int2 as int2_col, + 5::int4 as int4_col, + 6::int8 as int8_col, + 7::integer as integer_col, + 8.0::real as real_col, + 9.0::float4 as float4_col, + 10.0::float8 as float8_col, + 11.0::float as float_col, + 12.0::double precision as double_col, + 13.0::numeric as numeric_col, + 14.0::decimal as decimal_col, + '15'::varchar(20) as varchar_col, + '16'::text as text_col diff --git a/test/integration/056_column_type_tests/rs_models/schema.yml b/test/integration/056_column_type_tests/rs_models/schema.yml new file mode 100644 index 00000000000..5b35ce02526 --- /dev/null +++ b/test/integration/056_column_type_tests/rs_models/schema.yml @@ -0,0 +1,22 @@ +version: 2 +models: + - name: model + tests: + - is_type: + column_map: + smallint_col: ['integer', 'number'] + int_col: ['integer', 'number'] + bigint_col: ['integer', 'number'] + int2_col: ['integer', 'number'] + int4_col: ['integer', 'number'] + int8_col: ['integer', 'number'] + integer_col: ['integer', 'number'] + real_col: ['float', 'number'] + double_col: ['float', 'number'] + float4_col: ['float', 'number'] + float8_col: ['float', 'number'] + float_col: ['float', 'number'] + numeric_col: ['numeric', 'number'] + decimal_col: ['numeric', 'number'] + varchar_col: ['string', 'not number'] + text_col: ['string', 'not number'] diff --git a/test/integration/056_column_type_tests/sf_models/model.sql b/test/integration/056_column_type_tests/sf_models/model.sql new file mode 100644 index 00000000000..b58c7c43cb0 --- /dev/null +++ b/test/integration/056_column_type_tests/sf_models/model.sql @@ -0,0 +1,18 @@ +select + 1::smallint as smallint_col, + 2::int as int_col, + 3::bigint as bigint_col, + 4::integer as integer_col, + 5::tinyint as tinyint_col, + 6::byteint as byteint_col, + 7.0::float as float_col, + 8.0::float4 as float4_col, + 9.0::float8 as float8_col, + 10.0::double as double_col, + 11.0::double precision as double_p_col, + 12.0::real as real_col, + 13.0::numeric as numeric_col, + 14.0::decimal as decimal_col, + 15.0::number as number_col, + '16'::text as text_col, + '17'::varchar(20) as varchar_col diff --git a/test/integration/056_column_type_tests/sf_models/schema.yml b/test/integration/056_column_type_tests/sf_models/schema.yml new file mode 100644 index 00000000000..5be21188c5e --- /dev/null +++ b/test/integration/056_column_type_tests/sf_models/schema.yml @@ -0,0 +1,23 @@ +version: 2 +models: + - name: model + tests: + - is_type: + column_map: + smallint_col: ['numeric', 'number', 'not string', 'not float', 'not integer'] + int_col: ['numeric', 'number', 'not string', 'not float', 'not integer'] + bigint_col: ['numeric', 'number', 'not string', 'not float', 'not integer'] + integer_col: ['numeric', 'number', 'not string', 'not float', 'not integer'] + tinyint_col: ['numeric', 'number', 'not string', 'not float', 'not integer'] + byteint_col: ['numeric', 'number', 'not string', 'not float', 'not integer'] + float_col: ['float', 'number', 'not string', 'not integer', 'not numeric'] + float4_col: ['float', 'number', 'not string', 'not integer', 'not numeric'] + float8_col: ['float', 'number', 'not string', 'not integer', 'not numeric'] + double_col: ['float', 'number', 'not string', 'not integer', 'not numeric'] + double_p_col: ['float', 'number', 'not string', 'not integer', 'not numeric'] + real_col: ['float', 'number', 'not string', 'not integer', 'not numeric'] + numeric_col: ['numeric', 'number', 'not string', 'not float', 'not integer'] + decimal_col: ['numeric', 'number', 'not string', 'not float', 'not integer'] + number_col: ['numeric', 'number', 'not string', 'not float', 'not integer'] + text_col: ['string', 'not number'] + varchar_col: ['string', 'not number'] diff --git a/test/integration/056_column_type_tests/test_column_types.py b/test/integration/056_column_type_tests/test_column_types.py new file mode 100644 index 00000000000..835b9f775fc --- /dev/null +++ b/test/integration/056_column_type_tests/test_column_types.py @@ -0,0 +1,51 @@ +from test.integration.base import DBTIntegrationTest, use_profile + + +class TestColumnTypes(DBTIntegrationTest): + @property + def schema(self): + return '056_column_types' + + def run_and_test(self): + self.assertEqual(len(self.run_dbt(['run'])), 1) + self.assertEqual(len(self.run_dbt(['test'])), 1) + + +class TestPostgresColumnTypes(TestColumnTypes): + @property + def models(self): + return 'pg_models' + + @use_profile('postgres') + def test_postgres_column_types(self): + self.run_and_test() + + +class TestRedshiftColumnTypes(TestColumnTypes): + @property + def models(self): + return 'rs_models' + + @use_profile('redshift') + def test_redshift_column_types(self): + self.run_and_test() + + +class TestSnowflakeColumnTypes(TestColumnTypes): + @property + def models(self): + return 'sf_models' + + @use_profile('snowflake') + def test_snowflake_column_types(self): + self.run_and_test() + + +class TestBigQueryColumnTypes(TestColumnTypes): + @property + def models(self): + return 'bq_models' + + @use_profile('bigquery') + def test_bigquery_column_types(self): + self.run_and_test() From affdbe719aa43d2e32c53e0da60b200908e3c013 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 24 Jan 2020 08:27:19 -0700 Subject: [PATCH 2/2] PR feedback --- plugins/bigquery/dbt/adapters/bigquery/column.py | 1 - 1 file changed, 1 deletion(-) diff --git a/plugins/bigquery/dbt/adapters/bigquery/column.py b/plugins/bigquery/dbt/adapters/bigquery/column.py index 305bae77a34..6c8a70df1c1 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/column.py +++ b/plugins/bigquery/dbt/adapters/bigquery/column.py @@ -100,7 +100,6 @@ def is_string(self) -> bool: return self.dtype.lower() == 'string' def is_integer(self) -> bool: - # snowflake technicality: These are all synonyms with NUMBER(38, 0) return self.dtype.lower() == 'int64' def is_numeric(self) -> bool: