diff --git a/CHANGELOG.md b/CHANGELOG.md index d06501ed0bb..679e377d2a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,10 +12,12 @@ - Added support for renaming BigQuery relations ([#2520](https://github.com/fishtown-analytics/dbt/issues/2520), [#2521](https://github.com/fishtown-analytics/dbt/pull/2521)) - Added support for BigQuery authorized views ([#1718](https://github.com/fishtown-analytics/dbt/issues/1718), [#2517](https://github.com/fishtown-analytics/dbt/pull/2517)) - Added support for altering BigQuery column types ([#2546](https://github.com/fishtown-analytics/dbt/issues/2546), [#2547](https://github.com/fishtown-analytics/dbt/pull/2547)) +- Include row counts and bytes processed in log output for all BigQuery statement types ([#2526](https://github.com/fishtown-analytics/dbt/issues/2526)) ### Fixes - Fixed an error in create_adapter_plugins.py script when -dependency arg not passed ([#2507](https://github.com/fishtown-analytics/dbt/issues/2507), [#2508](https://github.com/fishtown-analytics/dbt/pull/2508)) - Remove misleading "Opening a new connection" log message in set_connection_name. ([#2511](https://github.com/fishtown-analytics/dbt/issues/2511)) +- Now all the BigQuery statement types return the number of bytes processed ([#2526](https://github.com/fishtown-analytics/dbt/issues/2526)). Contributors: - [@raalsky](https://github.com/Raalsky) ([#2417](https://github.com/fishtown-analytics/dbt/pull/2417), [#2485](https://github.com/fishtown-analytics/dbt/pull/2485)) @@ -23,6 +25,7 @@ Contributors: - [@scarrucciu](https://github.com/scarrucciu) ([#2508](https://github.com/fishtown-analytics/dbt/pull/2508)) - [@southpolemonkey](https://github.com/southpolemonkey) ([#2511](https://github.com/fishtown-analytics/dbt/issues/2511)) - [@azhard](https://github.com/azhard) ([#2517](https://github.com/fishtown-analytics/dbt/pull/2517), ([#2521](https://github.com/fishtown-analytics/dbt/pull/2521)), [#2547](https://github.com/fishtown-analytics/dbt/pull/2547)) + - [@alepuccetti](https://github.com/alepuccetti) ([#2526](https://github.com/fishtown-analytics/dbt/issues/2526)) ## dbt 0.17.1 (Release TBD) diff --git a/core/dbt/utils.py b/core/dbt/utils.py index 1e17c736131..6e8ea6377d1 100644 --- a/core/dbt/utils.py +++ b/core/dbt/utils.py @@ -517,12 +517,23 @@ def __get__(self, obj, objtype): def format_bytes(num_bytes): - for unit in ['Bytes', 'KB', 'MB', 'GB', 'TB']: + for unit in ['Bytes', 'KB', 'MB', 'GB', 'TB', 'PB']: if abs(num_bytes) < 1024.0: return f"{num_bytes:3.1f} {unit}" num_bytes /= 1024.0 - return "> 1024 TB" + num_bytes *= 1024.0 + return f"{num_bytes:3.1f} {unit}" + + +def format_rows_number(rows_number): + for unit in ['', 'k', 'm', 'b', 't']: + if abs(rows_number) < 1000.0: + return f"{rows_number:3.1f}{unit}".strip() + rows_number /= 1000.0 + + rows_number *= 1000.0 + return f"{rows_number:3.1f}{unit}".strip() # a little concurrent.futures.Executor for single-threaded mode diff --git a/plugins/bigquery/dbt/adapters/bigquery/connections.py b/plugins/bigquery/dbt/adapters/bigquery/connections.py index 74e5d7d3690..b62dba5320d 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/connections.py +++ b/plugins/bigquery/dbt/adapters/bigquery/connections.py @@ -8,7 +8,7 @@ from google.api_core import retry, client_info from google.oauth2 import service_account -from dbt.utils import format_bytes +from dbt.utils import format_bytes, format_rows_number from dbt.clients import agate_helper, gcloud from dbt.exceptions import ( FailedToConnectException, RuntimeException, DatabaseException @@ -245,16 +245,21 @@ def execute(self, sql, auto_begin=False, fetch=None): conn = self.get_thread_connection() client = conn.handle table = client.get_table(query_job.destination) - status = 'CREATE TABLE ({})'.format(table.num_rows) + processed = format_bytes(query_job.total_bytes_processed) + status = 'CREATE TABLE ({} rows, {} processed)'.format( + format_rows_number(table.num_rows), + format_bytes(query_job.total_bytes_processed), + ) elif query_job.statement_type == 'SCRIPT': processed = format_bytes(query_job.total_bytes_processed) status = f'SCRIPT ({processed} processed)' elif query_job.statement_type in ['INSERT', 'DELETE', 'MERGE']: - status = '{} ({})'.format( + status = '{} ({} rows, {} processed)'.format( query_job.statement_type, - query_job.num_dml_affected_rows + format_rows_number(query_job.num_dml_affected_rows), + format_bytes(query_job.total_bytes_processed), ) else: diff --git a/test/unit/test_utils.py b/test/unit/test_utils.py index e624d5da4e0..e4b2f005fc5 100644 --- a/test/unit/test_utils.py +++ b/test/unit/test_utils.py @@ -93,7 +93,6 @@ def test__simple_cases(self): actual = dbt.utils.deep_map(self.intify_all, expected) self.assertEqual(actual, expected) - @staticmethod def special_keypath(value, keypath): @@ -150,7 +149,25 @@ def test__simple_cases(self): self.assertEqual(dbt.utils.format_bytes(1024**2*1.5), '1.5 MB') self.assertEqual(dbt.utils.format_bytes(1024**3*52.6), '52.6 GB') self.assertEqual(dbt.utils.format_bytes(1024**4*128), '128.0 TB') - self.assertEqual(dbt.utils.format_bytes(1024**5+1), '> 1024 TB') + self.assertEqual(dbt.utils.format_bytes(1024**5), '1.0 PB') + self.assertEqual(dbt.utils.format_bytes(1024**5*31.4), '31.4 PB') + self.assertEqual(dbt.utils.format_bytes(1024**6), '1024.0 PB') + self.assertEqual(dbt.utils.format_bytes(1024**6*42), '43008.0 PB') + + +class TestRowsNumberFormatting(unittest.TestCase): + + def test__simple_cases(self): + self.assertEqual(dbt.utils.format_rows_number(-1), '-1.0') + self.assertEqual(dbt.utils.format_rows_number(0), '0.0') + self.assertEqual(dbt.utils.format_rows_number(20), '20.0') + self.assertEqual(dbt.utils.format_rows_number(1030), '1.0k') + self.assertEqual(dbt.utils.format_rows_number(1000**2*1.5), '1.5m') + self.assertEqual(dbt.utils.format_rows_number(1000**3*52.6), '52.6b') + self.assertEqual(dbt.utils.format_rows_number(1000**3*128), '128.0b') + self.assertEqual(dbt.utils.format_rows_number(1000**4), '1.0t') + self.assertEqual(dbt.utils.format_rows_number(1000**4*31.4), '31.4t') + self.assertEqual(dbt.utils.format_rows_number(1000**5*31.4), '31400.0t') # noqa: E501 class TestMultiDict(unittest.TestCase):