Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Column.is_number/is_float (#1969) #2046

Merged
merged 2 commits into from
Jan 24, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions core/dbt/adapters/base/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@

from hologram import JsonSchemaMixin

from typing import TypeVar, Dict, ClassVar, Any, Optional, Type

Self = TypeVar('Self', bound='Column')
from typing import Dict, ClassVar, Any, Optional


@dataclass
Expand All @@ -26,7 +24,7 @@ def translate_type(cls, dtype: str) -> str:
return cls.TYPE_LABELS.get(dtype.upper(), dtype)

@classmethod
def create(cls: Type[Self], name, label_or_dtype: str) -> Self:
def create(cls, name, label_or_dtype: str) -> 'Column':
column_type = cls.translate_type(label_or_dtype)
return cls(name, column_type)

Expand All @@ -52,8 +50,27 @@ def is_string(self) -> bool:
return self.dtype.lower() in ['text', 'character varying', 'character',
'varchar']

def is_number(self):
return any([self.is_integer(), self.is_numeric(), self.is_float()])

def is_float(self):
return self.dtype.lower() in [
# floats
'real', 'float4', 'float', 'double precision', 'float8'
]

def is_integer(self) -> bool:
return self.dtype.lower() in [
# real types
'smallint', 'integer', 'bigint',
'smallserial', 'serial', 'bigserial',
# aliases
'int2', 'int4', 'int8',
'serial2', 'serial4', 'serial8',
]

def is_numeric(self) -> bool:
return self.dtype.lower() in ['numeric', 'number']
return self.dtype.lower() in ['numeric', 'decimal']
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is the only thing in this PR that I'm not sure about -- do you know why number was in here before? I tried looking through the blame but couldn't find the origin.... AFAICT number is not a valid type on pg/redshift/snowflake/bq.....?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's only valid on snowflake, and previously this method applied for Snowflake as well as postgres/redshift. That's why I removed it when I split snowflake out.


def string_size(self) -> int:
if not self.is_string():
Expand All @@ -65,7 +82,7 @@ def string_size(self) -> int:
else:
return int(self.char_size)

def can_expand_to(self: Self, other_column: Self) -> bool:
def can_expand_to(self, other_column: 'Column') -> bool:
"""returns True if this column can be expanded to the size of the
other column"""
if not self.is_string() or not other_column.is_string():
Expand Down
2 changes: 1 addition & 1 deletion core/dbt/adapters/sql/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def rename_relation(self, from_relation, to_relation):
kwargs=kwargs
)

def get_columns_in_relation(self, relation: str):
def get_columns_in_relation(self, relation):
return self.execute_macro(
GET_COLUMNS_IN_RELATION_MACRO_NAME,
kwargs={'relation': relation}
Expand Down
9 changes: 8 additions & 1 deletion plugins/bigquery/dbt/adapters/bigquery/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,15 @@ def data_type(self) -> str:
def is_string(self) -> bool:
return self.dtype.lower() == 'string'

def is_integer(self) -> bool:
# snowflake technicality: These are all synonyms with NUMBER(38, 0)
beckjake marked this conversation as resolved.
Show resolved Hide resolved
return self.dtype.lower() == 'int64'

def is_numeric(self) -> bool:
return False
return self.dtype.lower() == 'numeric'

def is_float(self):
return self.dtype.lower() == 'float64'

def can_expand_to(self: Self, other_column: Self) -> bool:
"""returns True if both columns are strings"""
Expand Down
1 change: 1 addition & 0 deletions plugins/snowflake/dbt/adapters/snowflake/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from dbt.adapters.snowflake.column import SnowflakeColumn # noqa
from dbt.adapters.snowflake.connections import SnowflakeConnectionManager # noqa
from dbt.adapters.snowflake.connections import SnowflakeCredentials
from dbt.adapters.snowflake.relation import SnowflakeRelation # noqa
Expand Down
21 changes: 21 additions & 0 deletions plugins/snowflake/dbt/adapters/snowflake/column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from dataclasses import dataclass

from dbt.adapters.base.column import Column


@dataclass
class SnowflakeColumn(Column):
def is_integer(self) -> bool:
# everything that smells like an int is actually a NUMBER(38, 0)
return False

def is_numeric(self) -> bool:
return self.dtype.lower() in [
'int', 'integer', 'bigint', 'smallint', 'tinyint', 'byteint',
'numeric', 'decimal', 'number'
]

def is_float(self):
return self.dtype.lower() in [
'float', 'float4', 'float8', 'double', 'double precision', 'real',
]
2 changes: 2 additions & 0 deletions plugins/snowflake/dbt/adapters/snowflake/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
from dbt.adapters.sql import SQLAdapter
from dbt.adapters.snowflake import SnowflakeConnectionManager
from dbt.adapters.snowflake import SnowflakeRelation
from dbt.adapters.snowflake import SnowflakeColumn
from dbt.utils import filter_null_values
from dbt.exceptions import RuntimeException


class SnowflakeAdapter(SQLAdapter):
Relation = SnowflakeRelation
Column = SnowflakeColumn
ConnectionManager = SnowflakeConnectionManager

AdapterSpecificConfigs = frozenset(
Expand Down
5 changes: 5 additions & 0 deletions test/integration/056_column_type_tests/bq_models/model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
select
CAST(1 as int64) as int64_col,
CAST(2.0 as float64) as float64_col,
CAST(3.0 as numeric) as numeric_col,
CAST('3' as string) as string_col,
10 changes: 10 additions & 0 deletions test/integration/056_column_type_tests/bq_models/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version: 2
models:
- name: model
tests:
- is_type:
column_map:
int64_col: ['integer', 'number']
float64_col: ['float', 'number']
numeric_col: ['numeric', 'number']
string_col: ['string', 'not number']
69 changes: 69 additions & 0 deletions test/integration/056_column_type_tests/macros/test_is_type.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@

{% macro simple_type_check_column(column, check) %}
{% if check == 'string' %}
{{ return(column.is_string()) }}
{% elif check == 'float' %}
{{ return(column.is_float()) }}
{% elif check == 'number' %}
{{ return(column.is_number()) }}
{% elif check == 'numeric' %}
{{ return(column.is_numeric()) }}
{% elif check == 'integer' %}
{{ return(column.is_integer()) }}
{% else %}
{% do exceptions.raise_compiler_error('invalid type check value: ' ~ check) %}
{% endif %}
{% endmacro %}

{% macro type_check_column(column, type_checks) %}
{% set failures = [] %}
{% for type_check in type_checks %}
{% if type_check.startswith('not ') %}
{% if simple_type_check_column(column, type_check[4:]) %}
{% do log('simple_type_check_column got ', True) %}
{% do failures.append(type_check) %}
{% endif %}
{% else %}
{% if not simple_type_check_column(column, type_check) %}
{% do failures.append(type_check) %}
{% endif %}
{% endif %}
{% endfor %}
{% if (failures | length) > 0 %}
{% do log('column ' ~ column.name ~ ' had failures: ' ~ failures, info=True) %}
{% endif %}
{% do return((failures | length) == 0) %}
{% endmacro %}


{% macro test_is_type(model, column_map) %}
{% if not execute %}
{{ return(None) }}
{% endif %}
{% if not column_map %}
{% do exceptions.raise_compiler_error('test_is_type must have a column name') %}
{% endif %}
{% set columns = adapter.get_columns_in_relation(model) %}
{% if (column_map | length) != (columns | length) %}
{% set column_map_keys = (column_map | list | string) %}
{% set column_names = (columns | map(attribute='name') | list | string) %}
{% do exceptions.raise_compiler_error('did not get all the columns/all columns not specified:\n' ~ column_map_keys ~ '\nvs\n' ~ column_names) %}
{% endif %}
{% set bad_columns = [] %}
{% for column in columns %}
{% set column_key = (column.name | lower) %}
{% if column_key in column_map %}
{% set type_checks = column_map[column_key] %}
{% if not type_checks %}
{% do exceptions.raise_compiler_error('no type checks?') %}
{% endif %}
{% if not type_check_column(column, type_checks) %}
{% do bad_columns.append(column.name) %}
{% endif %}
{% else %}
{% do exceptions.raise_compiler_error('column key ' ~ column_key ~ ' not found in ' ~ (column_map | list | string)) %}
{% endif %}
{% endfor %}
{% do log('bad columns: ' ~ bad_columns, info=True) %}
select {{ bad_columns | length }} as pass_fail
{% endmacro %}
9 changes: 9 additions & 0 deletions test/integration/056_column_type_tests/pg_models/model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
select
1::smallint as smallint_col,
2::integer as int_col,
3::bigint as bigint_col,
4.0::real as real_col,
5.0::double precision as double_col,
6.0::numeric as numeric_col,
'7'::text as text_col,
'8'::varchar(20) as varchar_col
14 changes: 14 additions & 0 deletions test/integration/056_column_type_tests/pg_models/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
version: 2
models:
- name: model
tests:
- is_type:
column_map:
smallint_col: ['integer', 'number']
int_col: ['integer', 'number']
bigint_col: ['integer', 'number']
real_col: ['float', 'number']
double_col: ['float', 'number']
numeric_col: ['numeric', 'number']
text_col: ['string', 'not number']
varchar_col: ['string', 'not number']
17 changes: 17 additions & 0 deletions test/integration/056_column_type_tests/rs_models/model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
select
1::smallint as smallint_col,
2::int as int_col,
3::bigint as bigint_col,
4::int2 as int2_col,
5::int4 as int4_col,
6::int8 as int8_col,
7::integer as integer_col,
8.0::real as real_col,
9.0::float4 as float4_col,
10.0::float8 as float8_col,
11.0::float as float_col,
12.0::double precision as double_col,
13.0::numeric as numeric_col,
14.0::decimal as decimal_col,
'15'::varchar(20) as varchar_col,
'16'::text as text_col
22 changes: 22 additions & 0 deletions test/integration/056_column_type_tests/rs_models/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
version: 2
models:
- name: model
tests:
- is_type:
column_map:
smallint_col: ['integer', 'number']
int_col: ['integer', 'number']
bigint_col: ['integer', 'number']
int2_col: ['integer', 'number']
int4_col: ['integer', 'number']
int8_col: ['integer', 'number']
integer_col: ['integer', 'number']
real_col: ['float', 'number']
double_col: ['float', 'number']
float4_col: ['float', 'number']
float8_col: ['float', 'number']
float_col: ['float', 'number']
numeric_col: ['numeric', 'number']
decimal_col: ['numeric', 'number']
varchar_col: ['string', 'not number']
text_col: ['string', 'not number']
18 changes: 18 additions & 0 deletions test/integration/056_column_type_tests/sf_models/model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
select
1::smallint as smallint_col,
2::int as int_col,
3::bigint as bigint_col,
4::integer as integer_col,
5::tinyint as tinyint_col,
6::byteint as byteint_col,
7.0::float as float_col,
8.0::float4 as float4_col,
9.0::float8 as float8_col,
10.0::double as double_col,
11.0::double precision as double_p_col,
12.0::real as real_col,
13.0::numeric as numeric_col,
14.0::decimal as decimal_col,
15.0::number as number_col,
'16'::text as text_col,
'17'::varchar(20) as varchar_col
23 changes: 23 additions & 0 deletions test/integration/056_column_type_tests/sf_models/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
version: 2
models:
- name: model
tests:
- is_type:
column_map:
smallint_col: ['numeric', 'number', 'not string', 'not float', 'not integer']
int_col: ['numeric', 'number', 'not string', 'not float', 'not integer']
bigint_col: ['numeric', 'number', 'not string', 'not float', 'not integer']
integer_col: ['numeric', 'number', 'not string', 'not float', 'not integer']
tinyint_col: ['numeric', 'number', 'not string', 'not float', 'not integer']
byteint_col: ['numeric', 'number', 'not string', 'not float', 'not integer']
float_col: ['float', 'number', 'not string', 'not integer', 'not numeric']
float4_col: ['float', 'number', 'not string', 'not integer', 'not numeric']
float8_col: ['float', 'number', 'not string', 'not integer', 'not numeric']
double_col: ['float', 'number', 'not string', 'not integer', 'not numeric']
double_p_col: ['float', 'number', 'not string', 'not integer', 'not numeric']
real_col: ['float', 'number', 'not string', 'not integer', 'not numeric']
numeric_col: ['numeric', 'number', 'not string', 'not float', 'not integer']
decimal_col: ['numeric', 'number', 'not string', 'not float', 'not integer']
number_col: ['numeric', 'number', 'not string', 'not float', 'not integer']
text_col: ['string', 'not number']
varchar_col: ['string', 'not number']
51 changes: 51 additions & 0 deletions test/integration/056_column_type_tests/test_column_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from test.integration.base import DBTIntegrationTest, use_profile


class TestColumnTypes(DBTIntegrationTest):
@property
def schema(self):
return '056_column_types'

def run_and_test(self):
self.assertEqual(len(self.run_dbt(['run'])), 1)
self.assertEqual(len(self.run_dbt(['test'])), 1)


class TestPostgresColumnTypes(TestColumnTypes):
@property
def models(self):
return 'pg_models'

@use_profile('postgres')
def test_postgres_column_types(self):
self.run_and_test()


class TestRedshiftColumnTypes(TestColumnTypes):
@property
def models(self):
return 'rs_models'

@use_profile('redshift')
def test_redshift_column_types(self):
self.run_and_test()


class TestSnowflakeColumnTypes(TestColumnTypes):
@property
def models(self):
return 'sf_models'

@use_profile('snowflake')
def test_snowflake_column_types(self):
self.run_and_test()


class TestBigQueryColumnTypes(TestColumnTypes):
@property
def models(self):
return 'bq_models'

@use_profile('bigquery')
def test_bigquery_column_types(self):
self.run_and_test()