From cfd63942437095a016f0805119a3ecd75b97c5c0 Mon Sep 17 00:00:00 2001 From: Shige Takeda Date: Mon, 11 May 2020 15:10:35 +0000 Subject: [PATCH 01/11] SNOW-156287 bumped the minimum cryptography version required by the connector --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 82bb46a71..1f728acaa 100644 --- a/setup.py +++ b/setup.py @@ -186,7 +186,7 @@ def _get_arrow_lib_as_linker_input(self): 'pycryptodomex>=3.2,!=3.5.0,<4.0.0', 'pyOpenSSL>=16.2.0,<21.0.0', 'cffi>=1.9,<1.14', - 'cryptography>=1.8.2,<3.0.0', + 'cryptography>=2.5.0,<3.0.0', 'ijson<3.0.0', 'pyjwt<2.0.0', 'idna<2.10', From d3b0075180d61c8fec668582c8526566cd863e1c Mon Sep 17 00:00:00 2001 From: Shige Takeda Date: Mon, 11 May 2020 15:11:07 +0000 Subject: [PATCH 02/11] SNOW-144275 Upgrade pyarrow from 0.16.0 to 0.17.0 --- setup.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/setup.py b/setup.py index 1f728acaa..471f0631d 100644 --- a/setup.py +++ b/setup.py @@ -56,27 +56,21 @@ class MyBuildExt(build_ext): # this list should be carefully examined when pyarrow lib is # upgraded arrow_libs_to_copy = { - 'linux': ['libarrow.so.16', - 'libarrow_python.so.16', - 'libarrow_flight.so.16', - 'libarrow_boost_filesystem.so.1.68.0', - 'libarrow_boost_system.so.1.68.0', - 'libarrow_boost_regex.so.1.68.0'], - 'darwin': ['libarrow.16.dylib', - 'libarrow_python.16.dylib', - 'libarrow_boost_filesystem.dylib', - 'libarrow_boost_regex.dylib', - 'libarrow_boost_system.dylib'], + 'linux': ['libarrow.so.17', + 'libarrow_python.so.17', + 'libarrow_flight.so.17'], + 'darwin': ['libarrow.17.dylib', + 'libarrow_python.17.dylib'], 'win32': ['arrow.dll', 'arrow_python.dll', 'zlib.dll'] } arrow_libs_to_link = { - 'linux': ['libarrow.so.16', - 'libarrow_python.so.16'], - 'darwin': ['libarrow.16.dylib', - 'libarrow_python.16.dylib'], + 'linux': ['libarrow.so.17', + 'libarrow_python.so.17'], + 'darwin': ['libarrow.17.dylib', + 'libarrow_python.17.dylib'], 'win32': ['arrow.lib', 'arrow_python.lib'] } @@ -224,8 +218,7 @@ def _get_arrow_lib_as_linker_input(self): 'keyring<22.0.0,!=16.1.0', ], "pandas": [ - 'pyarrow>=0.15.1,<0.16.0;python_version=="3.5" and platform_system=="Windows"', - 'pyarrow>=0.16.0,<0.17.0;python_version!="3.5" or platform_system!="Windows"', + 'pyarrow>=0.17.0,<0.18.0', 'pandas==0.24.2;python_version=="3.5"', 'pandas>=1.0.0,<1.1.0;python_version>"3.5"', ], From 2df619b22c770120832911e2a3f3b36c677b4ebf Mon Sep 17 00:00:00 2001 From: Shige Takeda Date: Mon, 11 May 2020 15:11:34 +0000 Subject: [PATCH 03/11] updated Python Connector reqs to branch master --- tested_requirements/requirements_35.txt | 1 + tested_requirements/requirements_36.txt | 1 + tested_requirements/requirements_37.txt | 1 + tested_requirements/requirements_38.txt | 1 + 4 files changed, 4 insertions(+) diff --git a/tested_requirements/requirements_35.txt b/tested_requirements/requirements_35.txt index 1f2a9f5b5..db4eb9c83 100644 --- a/tested_requirements/requirements_35.txt +++ b/tested_requirements/requirements_35.txt @@ -22,4 +22,5 @@ pytz==2020.1 requests==2.23.0 s3transfer==0.3.3 six==1.14.0 +snowflake-connector-python @ file:///home/user/trunk/Python/dist/docker/3.5/snowflake_connector_python-2.2.5-cp35-cp35m-linux_x86_64.whl urllib3==1.25.9 diff --git a/tested_requirements/requirements_36.txt b/tested_requirements/requirements_36.txt index 1f2a9f5b5..c8d08ab96 100644 --- a/tested_requirements/requirements_36.txt +++ b/tested_requirements/requirements_36.txt @@ -22,4 +22,5 @@ pytz==2020.1 requests==2.23.0 s3transfer==0.3.3 six==1.14.0 +snowflake-connector-python @ file:///home/user/trunk/Python/dist/docker/3.6/snowflake_connector_python-2.2.5-cp36-cp36m-linux_x86_64.whl urllib3==1.25.9 diff --git a/tested_requirements/requirements_37.txt b/tested_requirements/requirements_37.txt index 1f2a9f5b5..92e4445ee 100644 --- a/tested_requirements/requirements_37.txt +++ b/tested_requirements/requirements_37.txt @@ -22,4 +22,5 @@ pytz==2020.1 requests==2.23.0 s3transfer==0.3.3 six==1.14.0 +snowflake-connector-python @ file:///home/user/trunk/Python/dist/docker/3.7/snowflake_connector_python-2.2.5-cp37-cp37m-linux_x86_64.whl urllib3==1.25.9 diff --git a/tested_requirements/requirements_38.txt b/tested_requirements/requirements_38.txt index 1f2a9f5b5..01df7d0fe 100644 --- a/tested_requirements/requirements_38.txt +++ b/tested_requirements/requirements_38.txt @@ -22,4 +22,5 @@ pytz==2020.1 requests==2.23.0 s3transfer==0.3.3 six==1.14.0 +snowflake-connector-python @ file:///home/user/trunk/Python/dist/docker/3.8/snowflake_connector_python-2.2.5-cp38-cp38-linux_x86_64.whl urllib3==1.25.9 From b157907c6790111a52d4bfb0ded6f909a541f92b Mon Sep 17 00:00:00 2001 From: Shige Takeda Date: Mon, 11 May 2020 15:11:54 +0000 Subject: [PATCH 04/11] SNOW-135902 fixing typos in pandas_utils --- pandas_tools.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas_tools.py b/pandas_tools.py index e64dd0659..db5bd3879 100644 --- a/pandas_tools.py +++ b/pandas_tools.py @@ -48,10 +48,10 @@ def write_pandas(conn: 'SnowflakeConnection', :Example: import pandas - from snowflake.connector.pandas_tools import write_pandas_all + from snowflake.connector.pandas_tools import write_pandas df = pandas.DataFrame([('Mark', 10), ('Luke', 20)], columns=['name', 'balance']) - success, nchunks, nrows, _ = write_pandas_all(cnx, df, 'customers') + success, nchunks, nrows, _ = write_pandas(cnx, df, 'customers') @param conn: connection to be used to communicate with Snowflake @param df: Dataframe we'd like to write back @@ -68,7 +68,7 @@ def write_pandas(conn: 'SnowflakeConnection', @return: tuple of whether all chunks were ingested correctly, # of chunks, # of ingested rows, and ingest's output """ if database is not None and schema is None: - raise ProgrammingError("Schema has to be provided to write_pandas_all when a database is provided") + raise ProgrammingError("Schema has to be provided to write_pandas when a database is provided") # This dictionary maps the compression algorithm to Snowflake put copy into command type # https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#type-parquet compression_map = { @@ -90,7 +90,7 @@ def write_pandas(conn: 'SnowflakeConnection', while True: try: stage_name = ''.join(random.choice(string.ascii_lowercase) for _ in range(5)) - cursor.execute('create temporary stage /* Python:snowflake.connector.pandas_tools.write.pandas_all() */ ' + cursor.execute('create temporary stage /* Python:snowflake.connector.pandas_tools.write_pandas() */ ' '"{stage_name}"'.format(stage_name=stage_name), _is_internal=True).fetchall() break except ProgrammingError as pe: @@ -104,7 +104,7 @@ def write_pandas(conn: 'SnowflakeConnection', # Dump chunk into parquet file chunk.to_parquet(chunk_path, compression=compression) # Upload parquet file - cursor.execute('PUT /* Python:snowflake.connector.pandas_tools.write.pandas_all() */ ' + cursor.execute('PUT /* Python:snowflake.connector.pandas_tools.write_pandas() */ ' 'file://{path} @"{stage_name}" PARALLEL={parallel}'.format( path=chunk_path, stage_name=stage_name, @@ -113,7 +113,7 @@ def write_pandas(conn: 'SnowflakeConnection', # Remove chunk file os.remove(chunk_path) copy_results = cursor.execute(( - 'COPY INTO {location} /* Python:snowflake.connector.pandas_tools.write.pandas_all() */ ' + 'COPY INTO {location} /* Python:snowflake.connector.pandas_tools.write_pandas() */ ' 'FROM @"{stage_name}" FILE_FORMAT=(TYPE=PARQUET COMPRESSION={compression}) ' 'MATCH_BY_COLUMN_NAME=CASE_SENSITIVE PURGE=TRUE ON_ERROR={on_error}' ).format( @@ -134,13 +134,13 @@ def pd_writer(table: pandas.io.sql.SQLTable, keys: Iterable, data_iter: Iterable) -> None: """ - This is a wrapper on top of write_pandas_all to make it compatible with to_sql method in pandas. + This is a wrapper on top of write_pandas to make it compatible with to_sql method in pandas. :Example: import pandas as pd - from snowflake.connector.pandas_utils import pf_writer + from snowflake.connector.pandas_tools import pd_writer - sf_connector_version_df = pd.DataFrame([('snowflake-connector-python',)], columns=['NAME', 'NEWEST_VERSION']) + sf_connector_version_df = pd.DataFrame([('snowflake-connector-python', '1.0')], columns=['NAME', 'NEWEST_VERSION']) sf_connector_version_df.to_sql('driver_versions', engine, index=False, method=pd_writer) @param table: Pandas package's table object @param conn: SQLAlchemy engine object to talk to Snowflake @@ -152,6 +152,6 @@ def pd_writer(table: pandas.io.sql.SQLTable, df = pandas.DataFrame(data_iter, columns=keys) write_pandas(conn=sf_connection, df=df, - # Note: Our sqlalchemy connector creates table in the case insensitive way + # Note: Our sqlalchemy connector creates tables case insensitively table_name=table.name.upper(), schema=table.schema) From a2ae7de86fc2ad2b199079fc2956908c7bc43646 Mon Sep 17 00:00:00 2001 From: Shige Takeda Date: Mon, 11 May 2020 15:12:41 +0000 Subject: [PATCH 05/11] Revert SNOW-148520 more restrictive application name enforcement and standardizing it with other drivers --- connection.py | 39 +++++++++++++-------------------------- test/test_connection.py | 14 ++++++-------- 2 files changed, 19 insertions(+), 34 deletions(-) diff --git a/connection.py b/connection.py index aabedb441..2c37390e9 100644 --- a/connection.py +++ b/connection.py @@ -142,12 +142,12 @@ def DefaultConverterClass(): u'support_negative_year': True, # snowflake u'log_max_query_length': LOG_MAX_QUERY_LENGTH, # snowflake u'disable_request_pooling': False, # snowflake - # Enable temporary credential file for Linux, default false. Mac/Win will overlook this - u'client_store_temporary_credential': False, + u'client_store_temporary_credential': False, # enable temporary credential file for Linux, default false. Mac/Win will overlook this 'use_openssl_only': False, # only use openssl instead of python only crypto modules } -APPLICATION_RE = re.compile(r'^[\w.-]+$') +APPLICATION_RE = re.compile(r'[\w\d_]+') + # adding the exception class to Connection class for m in [method for method in dir(errors) if callable(getattr(errors, method))]: @@ -159,21 +159,6 @@ def DefaultConverterClass(): logger = getLogger(__name__) -def verify_application(val: str) -> None: - """Raise ProgrammingError if invalid application name""" - if len(val) > 50: - raise ProgrammingError( - msg="Application name is too long: {}".format(val), - errno=0 - ) - - if not APPLICATION_RE.match(val): - raise ProgrammingError( - msg='Invalid application name: {}'.format(val), - errno=0 - ) - - class SnowflakeConnection(object): u""" Implementation of the connection object for the Snowflake Database. Use @@ -417,10 +402,6 @@ def application(self): """ return self._application - @application.setter - def application(self, val: str) -> None: - verify_application(val) - @property def errorhandler(self): u""" @@ -720,7 +701,7 @@ def __open_connection(self): # enable storing temporary credential in a file self._session_parameters[ PARAMETER_CLIENT_STORE_TEMPORARY_CREDENTIAL] = \ - self._client_store_temporary_credential if IS_LINUX else True + self._client_store_temporary_credential if IS_LINUX else True auth = Auth(self.rest) if not auth.read_temporary_credential( @@ -746,10 +727,16 @@ def __config(self, **kwargs): if name == u'sequence_counter': self.sequence_counter = value elif name == u'application': - verify_application(value) - setattr(self, '_' + name, value) + if not APPLICATION_RE.match(value): + msg = u'Invalid application name: {}'.format(value) + raise ProgrammingError( + msg=msg, + errno=0 + ) + else: + setattr(self, u'_' + name, value) else: - setattr(self, '_' + name, value) + setattr(self, u'_' + name, value) if self._numpy: try: diff --git a/test/test_connection.py b/test/test_connection.py index 6465ce11b..f6c535c9f 100644 --- a/test/test_connection.py +++ b/test/test_connection.py @@ -253,13 +253,11 @@ def test_bogus(db_parameters): ) -@pytest.mark.parametrize('app_name', ['%%%', 'multiple words', 'PythonConnector -p my-password', '', None, - 'VerySuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuperMegaLongName']) -def test_invalid_application(db_parameters, app_name): +def test_invalid_application(db_parameters): """ Invalid application """ - with pytest.raises(snowflake.connector.ProgrammingError): + with pytest.raises(snowflake.connector.Error): snowflake.connector.connect( protocol=db_parameters['protocol'], user=db_parameters['user'], @@ -267,21 +265,21 @@ def test_invalid_application(db_parameters, app_name): application='%%%') -@pytest.mark.parametrize('app_name', ['Special_Client', 'Client-0.0.1_dev_yasdahcsbj']) -def test_valid_application(db_parameters, app_name): +def test_valid_application(db_parameters): """ Valid app name """ + application = 'Special_Client' cnx = snowflake.connector.connect( user=db_parameters['user'], password=db_parameters['password'], host=db_parameters['host'], port=db_parameters['port'], account=db_parameters['account'], - application=app_name, + application=application, protocol=db_parameters['protocol'], ) - assert cnx.application == app_name, "Must be valid application" + assert cnx.application == application, "Must be valid application" cnx.close() From 3147c0a4790e99c0224eb01153c4a6949b7ea249 Mon Sep 17 00:00:00 2001 From: Shige Takeda Date: Mon, 11 May 2020 15:13:37 +0000 Subject: [PATCH 06/11] SNOW-157642 remove ijson from snowsql cursor class and from a test --- arrow_result.pyx | 3 +-- chunk_downloader.py | 23 ++++++----------------- cursor.py | 10 +++++----- json_result.py | 13 ++++++------- test/test_large_result_set.py | 18 ++---------------- util_text.py | 27 --------------------------- 6 files changed, 20 insertions(+), 74 deletions(-) diff --git a/arrow_result.pyx b/arrow_result.pyx index 062c9c907..366a5670b 100644 --- a/arrow_result.pyx +++ b/arrow_result.pyx @@ -96,8 +96,7 @@ cdef class ArrowResult: else self._connection._chunk_downloader_class( chunks, self._connection, self._cursor, qrmk, chunk_headers, query_result_format='arrow', - prefetch_threads=self._connection.client_prefetch_threads, - use_ijson=False) + prefetch_threads=self._connection.client_prefetch_threads) def __iter__(self): return self diff --git a/chunk_downloader.py b/chunk_downloader.py index 4c14104d1..221762e71 100644 --- a/chunk_downloader.py +++ b/chunk_downloader.py @@ -8,13 +8,11 @@ import time from collections import namedtuple from gzip import GzipFile -from io import BytesIO from logging import getLogger from multiprocessing.pool import ThreadPool from threading import Condition, Lock from snowflake.connector.gzip_decoder import decompress_raw_data -from snowflake.connector.util_text import split_rows_from_stream from .arrow_context import ArrowConverterContext from .errorcode import ER_CHUNK_DOWNLOAD_FAILED @@ -51,9 +49,7 @@ class SnowflakeChunkDownloader(object): def _pre_init(self, chunks, connection, cursor, qrmk, chunk_headers, query_result_format='JSON', - prefetch_threads=DEFAULT_CLIENT_PREFETCH_THREADS, - use_ijson=False): - self._use_ijson = use_ijson + prefetch_threads=DEFAULT_CLIENT_PREFETCH_THREADS): self._query_result_format = query_result_format self._downloader_error = None @@ -97,12 +93,10 @@ def _pre_init(self, chunks, connection, cursor, qrmk, chunk_headers, def __init__(self, chunks, connection, cursor, qrmk, chunk_headers, query_result_format='JSON', - prefetch_threads=DEFAULT_CLIENT_PREFETCH_THREADS, - use_ijson=False): + prefetch_threads=DEFAULT_CLIENT_PREFETCH_THREADS): self._pre_init(chunks, connection, cursor, qrmk, chunk_headers, query_result_format=query_result_format, - prefetch_threads=prefetch_threads, - use_ijson=use_ijson) + prefetch_threads=prefetch_threads) logger.debug('Chunk Downloader in memory') for idx in range(self._effective_threads): self._pool.apply_async(self._download_chunk, [idx]) @@ -257,8 +251,7 @@ def _fetch_chunk(self, url, headers): """ Fetch the chunk from S3. """ - handler = JsonBinaryHandler(is_raw_binary_iterator=True, - use_ijson=self._use_ijson) \ + handler = JsonBinaryHandler(is_raw_binary_iterator=True) \ if self._query_result_format == 'json' else \ ArrowBinaryHandler(self._cursor, self._connection) @@ -299,9 +292,8 @@ class JsonBinaryHandler(RawBinaryDataHandler): """ Convert result chunk in json format into interator """ - def __init__(self, is_raw_binary_iterator, use_ijson): + def __init__(self, is_raw_binary_iterator): self._is_raw_binary_iterator = is_raw_binary_iterator - self._use_ijson = use_ijson def to_iterator(self, raw_data_fd, download_time): parse_start_time = get_time_millis() @@ -310,10 +302,7 @@ def to_iterator(self, raw_data_fd, download_time): ).decode('utf-8', 'replace') if not self._is_raw_binary_iterator: ret = json.loads(raw_data) - elif not self._use_ijson: - ret = iter(json.loads(raw_data)) - else: - ret = split_rows_from_stream(BytesIO(raw_data.encode('utf-8'))) + ret = iter(json.loads(raw_data)) parse_end_time = get_time_millis() diff --git a/cursor.py b/cursor.py index 7f394625f..60d3c8136 100644 --- a/cursor.py +++ b/cursor.py @@ -583,7 +583,7 @@ def execute(self, command, params=None, timeout=None, u'total'] if u'data' in ret and u'total' in ret[ u'data'] else -1 return data - self._init_result_and_meta(data, _use_ijson) + self._init_result_and_meta(data) else: self._total_rowcount = ret[u'data'][ u'total'] if u'data' in ret and u'total' in ret[u'data'] else -1 @@ -611,7 +611,7 @@ def _is_dml(self, data): and int(data[u'statementTypeId']) in \ STATEMENT_TYPE_ID_DML_SET - def _init_result_and_meta(self, data, use_ijson=False): + def _init_result_and_meta(self, data): is_dml = self._is_dml(data) self._query_result_format = data.get(u'queryResultFormat', u'json') logger.debug(u"Query result format: %s", self._query_result_format) @@ -636,7 +636,7 @@ def _init_result_and_meta(self, data, use_ijson=False): self.check_can_use_arrow_resultset() self._result = ArrowResult(data, self, use_dict_result=self._use_dict_result) else: - self._result = self._json_result_class(data, self, use_ijson) + self._result = self._json_result_class(data, self) if is_dml: updated_rows = 0 @@ -694,7 +694,7 @@ def check_can_use_pandas(self): } ) - def query_result(self, qid, _use_ijson=False): + def query_result(self, qid): url = '/queries/{qid}/result'.format(qid=qid) ret = self._connection.rest.request(url=url, method='get') self._sfqid = ret[u'data'][ @@ -707,7 +707,7 @@ def query_result(self, qid, _use_ijson=False): if ret.get(u'success'): data = ret.get(u'data') - self._init_result_and_meta(data, _use_ijson) + self._init_result_and_meta(data) else: logger.info(u'failed') logger.debug(ret) diff --git a/json_result.py b/json_result.py index 6861713b6..76c9fc9e6 100644 --- a/json_result.py +++ b/json_result.py @@ -16,13 +16,13 @@ class JsonResult: - def __init__(self, raw_response, cursor, use_ijson=False): + def __init__(self, raw_response, cursor): self._reset() self._cursor = cursor self._connection = cursor.connection - self._init_from_meta(raw_response, use_ijson) + self._init_from_meta(raw_response) - def _init_from_meta(self, data, use_ijson): + def _init_from_meta(self, data): self._total_row_index = -1 # last fetched number of rows self._chunk_index = 0 self._chunk_count = 0 @@ -59,8 +59,7 @@ def _init_from_meta(self, data, use_ijson): self._chunk_downloader = self._connection._chunk_downloader_class( chunks, self._connection, self._cursor, qrmk, chunk_headers, query_result_format='json', - prefetch_threads=self._connection.client_prefetch_threads, - use_ijson=use_ijson) + prefetch_threads=self._connection.client_prefetch_threads) def __iter__(self): return self @@ -166,8 +165,8 @@ def _reset(self): class DictJsonResult(JsonResult): - def __init__(self, raw_response, cursor, use_ijson): - JsonResult.__init__(self, raw_response, cursor, use_ijson) + def __init__(self, raw_response, cursor): + JsonResult.__init__(self, raw_response, cursor) def _row_to_python(self, row): # see the base class diff --git a/test/test_large_result_set.py b/test/test_large_result_set.py index d4d1333c2..d84481400 100644 --- a/test/test_large_result_set.py +++ b/test/test_large_result_set.py @@ -103,18 +103,9 @@ def test_query_large_result_set(conn_cnx, db_parameters, ingest_data): datum) cnx._telemetry.add_log_to_batch = add_log_mock - # large result set fetch in the default mode - result1 = [] - for rec in cnx.cursor().execute(sql): - result1.append(rec) - - num_rows = len(result1) - assert result1[0][0] == ingest_data[0] - assert result1[num_rows - 1][8] == ingest_data[1] - # large result set fetch in ijson mode result2 = [] - for rec in cnx.cursor().execute(sql, _use_ijson=True): + for rec in cnx.cursor().execute(sql): result2.append(rec) num_rows = len(result2) @@ -129,11 +120,6 @@ def test_query_large_result_set(conn_cnx, db_parameters, ingest_data): assert result999[0][0] == ingest_data[0] assert result999[num_rows - 1][8] == ingest_data[1] - assert len(result1) == len(result999), ( - "result length is different: result1, and result999") - for i, (x, y) in enumerate(zip(result1, result999)): - assert x == y, "element {}".format(i) - assert len(result2) == len(result999), ( "result length is different: result2, and result999") for i, (x, y) in enumerate(zip(result2, result999)): @@ -146,6 +132,6 @@ def test_query_large_result_set(conn_cnx, db_parameters, ingest_data): TelemetryField.TIME_DOWNLOADING_CHUNKS] for field in expected: assert sum([1 if x.message['type'] == field else 0 for x in - telemetry_data]) == 3, \ + telemetry_data]) == 2, \ "Expected three telemetry logs (one per query) " \ "for log type {}".format(field) diff --git a/util_text.py b/util_text.py index 51efd0dca..9d93252bd 100644 --- a/util_text.py +++ b/util_text.py @@ -6,11 +6,6 @@ import logging import re -try: - import ijson -except Exception: - ijson = None - COMMENT_PATTERN_RE = re.compile(r'^\s*\-\-') EMPTY_LINE_RE = re.compile(r'^\s*$') @@ -170,28 +165,6 @@ def _concatenate_statements(statement_list): return u''.join(valid_statement_list).strip(), is_put_or_get -def split_rows_from_stream(stream): - """ - Splits into rows from a stream object. Generator. - """ - if not ijson: - raise Exception("install ijson") - row = [] - in_row = False - for prefix, event, value in ijson.parse(stream): - if prefix == '': - continue - if in_row: - if event == 'end_array': - yield row - row = [] - in_row = False - else: - row.append(value) - elif event == 'start_array': - in_row = True - - def construct_hostname(region, account): """ Constructs hostname from region and account From d4cfdad860169d06402076da1a1fbc40a8ffdc86 Mon Sep 17 00:00:00 2001 From: Shige Takeda Date: Mon, 11 May 2020 15:14:48 +0000 Subject: [PATCH 07/11] SNOW-156373 Add log intercepting and masking functionality to Python Logging --- secret_detector.py | 106 ++++++++-- telemetry_oob.py | 3 +- test/test_connection.py.rej | 40 ---- test/test_unit_log_secret_detector.py | 199 ++++++++++++++++++ ...or.py => test_unit_oob_secret_detector.py} | 22 +- 5 files changed, 300 insertions(+), 70 deletions(-) delete mode 100644 test/test_connection.py.rej create mode 100644 test/test_unit_log_secret_detector.py rename test/{test_unit_secret_detector.py => test_unit_oob_secret_detector.py} (88%) diff --git a/secret_detector.py b/secret_detector.py index b315123d2..c6e70656d 100644 --- a/secret_detector.py +++ b/secret_detector.py @@ -1,22 +1,50 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (c) 2012-2019 Snowflake Computing Inc. All right reserved. +# Copyright (c) 2012-2020 Snowflake Computing Inc. All right reserved. # """ -Detects and Masks Secrets. Based on SecretDetector.java in the JDBC Driver +Detects and Masks Secrets that might be leaked from two potential avenues +1. Out of Band Telemetry +2. Logging """ - +import logging import re +import os + +MIN_TOKEN_LEN = os.getenv("MIN_TOKEN_LEN", 32) +MIN_PWD_LEN = os.getenv("MIN_PWD_LEN", 8) + +class SecretDetector(logging.Formatter): + AWS_KEY_PATTERN = re.compile(r"(aws_key_id|aws_secret_key|access_key_id|secret_access_key)\s*=\s*'([^']+)'", + flags=re.IGNORECASE) + AWS_TOKEN_PATTERN = re.compile(r'(accessToken|tempToken|keySecret)"\s*:\s*"([a-z0-9/+]{32,}={0,2})"', + flags=re.IGNORECASE) + SAS_TOKEN_PATTERN = re.compile(r'(sig|signature|AWSAccessKeyId|password|passcode)=(?P[a-z0-9%/+]{16,})', + flags=re.IGNORECASE) + PRIVATE_KEY_PATTERN = re.compile(r'-----BEGIN PRIVATE KEY-----\\n([a-z0-9/+=\\n]{32,})\\n-----END PRIVATE KEY-----', + flags=re.MULTILINE | re.IGNORECASE) + PRIVATE_KEY_DATA_PATTERN = re.compile(r'"privateKeyData": "([a-z0-9/+=\\n]{10,})"', + flags=re.MULTILINE | re.IGNORECASE) + CONNECTION_TOKEN_PATTERN = re.compile(r'(token|assertion content)' + r'([\'\"\s:=]+)' + r'([a-z0-9=/_\-\+]{8,})', + flags=re.IGNORECASE) -class SecretDetector(object): + PASSWORD_PATTERN = re.compile(r'(password' + r'|pwd)' + r'([\'\"\s:=]+)' + r'([a-z0-9!\"#\$%&\\\'\(\)\*\+\,-\./:;<=>\?\@\[\]\^_`\{\|\}~]{8,})', + flags=re.IGNORECASE) + + @staticmethod + def mask_connection_token(text): + return SecretDetector.CONNECTION_TOKEN_PATTERN.sub(r'\1\2****', text) - AWS_KEY_PATTERN = re.compile(r"(aws_key_id|aws_secret_key|access_key_id|secret_access_key)\s*=\s*'([^']+)'", flags=re.IGNORECASE) - AWS_TOKEN_PATTERN = re.compile(r'(accessToken|tempToken|keySecret)"\s*:\s*"([a-z0-9/+]{32,}={0,2})"', flags=re.IGNORECASE) - SAS_TOKEN_PATTERN = re.compile(r'(sig|signature|AWSAccessKeyId|password|passcode)=(?P[a-z0-9%/+]{16,})', flags=re.IGNORECASE) - PRIVATE_KEY_PATTERN = re.compile(r'-----BEGIN PRIVATE KEY-----\\n([a-z0-9/+=\\n]{32,})\\n-----END PRIVATE KEY-----', flags=re.MULTILINE | re.IGNORECASE) - PRIVATE_KEY_DATA_PATTERN = re.compile(r'"privateKeyData": "([a-z0-9/+=\\n]{10,})"', flags=re.MULTILINE | re.IGNORECASE) + @staticmethod + def mask_password(text): + return SecretDetector.PASSWORD_PATTERN.sub(r'\1\2****', text) @staticmethod def mask_aws_keys(text): @@ -32,7 +60,8 @@ def mask_aws_tokens(text): @staticmethod def mask_private_key(text): - return SecretDetector.PRIVATE_KEY_PATTERN.sub("-----BEGIN PRIVATE KEY-----\\\\nXXXX\\\\n-----END PRIVATE KEY-----", text) + return SecretDetector.PRIVATE_KEY_PATTERN.sub( + "-----BEGIN PRIVATE KEY-----\\\\nXXXX\\\\n-----END PRIVATE KEY-----", text) @staticmethod def mask_private_key_data(text): @@ -49,15 +78,56 @@ def mask_secrets(text): if text is None: return None - masked_text = SecretDetector.mask_private_key_data( - SecretDetector.mask_private_key( - SecretDetector.mask_aws_tokens( - SecretDetector.mask_sas_tokens( - SecretDetector.mask_aws_keys( - text + masked = False + err_str = None + try: + masked_text = SecretDetector.mask_connection_token( + SecretDetector.mask_password( + SecretDetector.mask_private_key_data( + SecretDetector.mask_private_key( + SecretDetector.mask_aws_tokens( + SecretDetector.mask_sas_tokens( + SecretDetector.mask_aws_keys( + text + ) + ) + ) ) ) ) ) - ) - return masked_text + if masked_text != text: + masked = True + except Exception as ex: + # We'll assume that the exception was raised during masking + # to be safe consider that the log has sensitive information + # and do not raise an exception. + masked = True + masked_text = str(ex) + err_str = str(ex) + + return masked, masked_text, err_str + + def format(self, record: logging.LogRecord) -> str: + """ + Wrapper around logging module's formatter. + This will ensure that the formatted message is + free from sensitive credentials. + :param record: the logging record + :return: formatted desensitized log string + """ + try: + unsanitized_log = super().format(record) + masked, sanitized_log, err_str = \ + SecretDetector.mask_secrets(unsanitized_log) + if masked and err_str is not None: + sanitized_log = "{} - {} {} - {} - {} - {}".format( + record.asctime, record.threadName, + "sf_secret_detector.py", "sanitize_log_str", + record.levelname, err_str) + except Exception as ex: + sanitized_log = "{} - {} {} - {} - {} - {}".format( + record.asctime, record.threadName, + "secret_detector.py", "sanitize_log_str", + record.levelname, "EXCEPTION - " + str(ex)) + return sanitized_log diff --git a/telemetry_oob.py b/telemetry_oob.py index e855289d1..1fa049556 100644 --- a/telemetry_oob.py +++ b/telemetry_oob.py @@ -491,7 +491,8 @@ def export_queue_to_string(self): except Exception: logger.debug("Failed to generate a JSON dump from the passed in telemetry OOB events. String representation of logs: %s" % str(logs), exc_info=True) payload = None - return SecretDetector.mask_secrets(payload) + _, masked_text, _ = SecretDetector.mask_secrets(payload) + return masked_text def close(self): """ diff --git a/test/test_connection.py.rej b/test/test_connection.py.rej deleted file mode 100644 index fd6a832d9..000000000 --- a/test/test_connection.py.rej +++ /dev/null @@ -1,40 +0,0 @@ ---- test/test_connection.py -+++ test/test_connection.py -@@ -10,6 +10,7 @@ - - import mock - import pytest -+ - import snowflake.connector - from snowflake.connector import DatabaseError, OperationalError, ProgrammingError - from snowflake.connector.auth_okta import AuthByOkta -@@ -30,7 +31,6 @@ - """ - assert conn_testaccount, 'invalid cnx' - # Test default values -- assert not conn_testaccount.use_openssl_only - conn_testaccount._set_current_objects() - - -@@ -667,10 +667,10 @@ - use_openssl_only=True, - ) - assert cnx -- assert 'USE_OPENSSL_ONLY' in os.environ -+ assert 'SF_USE_OPENSSL_ONLY' in os.environ - # Note during testing conftest will default this value to False, so if testing this we need to manually clear it - # Let's test it again, after clearing it -- del os.environ['USE_OPENSSL_ONLY'] -+ del os.environ['SF_USE_OPENSSL_ONLY'] - cnx = snowflake.connector.connect( - user=db_parameters['user'], - password=db_parameters['password'], -@@ -681,7 +681,7 @@ - use_openssl_only=True, - ) - assert cnx -- assert os.environ['USE_OPENSSL_ONLY'] == 'True' -+ assert os.environ['SF_USE_OPENSSL_ONLY'] == 'True' - - - def test_dashed_url(db_parameters): diff --git a/test/test_unit_log_secret_detector.py b/test/test_unit_log_secret_detector.py new file mode 100644 index 000000000..40dd000fe --- /dev/null +++ b/test/test_unit_log_secret_detector.py @@ -0,0 +1,199 @@ +# encoding=utf-8 +# !/usr/bin/env python +# +# Copyright (c) 2020 Snowflake Computing Inc. All right reserved. +# + +from snowflake.connector.secret_detector import SecretDetector + + +def test_no_masking(): + test_str = "This string is innocuous" + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(test_str) + assert not masked + assert err_str is None + assert masked_str == test_str + + +def test_mask_token(): + long_token = '_Y1ZNETTn5/qfUWj3Jedby7gipDzQs=U' \ + 'KyJH9DS=nFzzWnfZKGV+C7GopWCGD4Lj' \ + 'OLLFZKOE26LXHDt3pTi4iI1qwKuSpf/F' \ + 'mClCMBSissVsU3Ei590FP0lPQQhcSGcD' \ + 'u69ZL_1X6e9h5z62t/iY7ZkII28n2qU=' \ + 'nrBJUgPRCIbtJQkVJXIuOHjX4G5yUEKj' \ + 'ZBAx4w6=_lqtt67bIA=o7D=oUSjfywsR' \ + 'FoloNIkBPXCwFTv+1RVUHgVA2g8A9Lw5' \ + 'XdJYuI8vhg=f0bKSq7AhQ2Bh' + + token_str_w_prefix = 'Token =' + long_token + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(token_str_w_prefix) + assert masked + assert err_str is None + assert masked_str == 'Token =****' + + id_token_str_w_prefix = 'idToken : ' + long_token + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(id_token_str_w_prefix) + assert masked + assert err_str is None + assert masked_str == 'idToken : ****' + + session_token_w_prefix = 'sessionToken : ' + long_token + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(session_token_w_prefix) + assert masked + assert err_str is None + assert masked_str == 'sessionToken : ****' + + master_token_w_prefix = 'masterToken : ' + long_token + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(master_token_w_prefix) + assert masked + assert err_str is None + assert masked_str == 'masterToken : ****' + + assertion_w_prefix = 'assertion content:' + long_token + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(assertion_w_prefix) + assert masked + assert err_str is None + assert masked_str == 'assertion content:****' + +def test_token_false_positives(): + false_positive_token_str = "2020-04-30 23:06:04,069 - MainThread auth.py:397" \ + " - write_temporary_credential() - DEBUG - no ID " \ + "token is given when try to store temporary credential" + + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(false_positive_token_str) + assert not masked + assert err_str is None + assert masked_str == false_positive_token_str + + +def test_password(): + random_password = 'Fh[+2J~AcqeqW%?' + random_password_w_prefix = 'password:' + random_password + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(random_password_w_prefix) + assert masked + assert err_str is None + assert masked_str == 'password:****' + + random_password_caps = 'PASSWORD:' + random_password + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(random_password_caps) + assert masked + assert err_str is None + assert masked_str == 'PASSWORD:****' + + random_password_mix_case = 'PassWorD:' + random_password + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(random_password_mix_case) + assert masked + assert err_str is None + assert masked_str == 'PassWorD:****' + + random_password_equal_sign = 'password = ' + random_password + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(random_password_equal_sign) + assert masked + assert err_str is None + assert masked_str == 'password = ****' + + random_password = 'Fh[+2J~AcqeqW%?' + random_password_w_prefix = 'pwd:' + random_password + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(random_password_w_prefix) + assert masked + assert err_str is None + assert masked_str == 'pwd:****' + + +def test_token_password(): + long_token = '_Y1ZNETTn5/qfUWj3Jedby7gipDzQs=U' \ + 'KyJH9DS=nFzzWnfZKGV+C7GopWCGD4Lj' \ + 'OLLFZKOE26LXHDt3pTi4iI1qwKuSpf/F' \ + 'mClCMBSissVsU3Ei590FP0lPQQhcSGcD' \ + 'u69ZL_1X6e9h5z62t/iY7ZkII28n2qU=' \ + 'nrBJUgPRCIbtJQkVJXIuOHjX4G5yUEKj' \ + 'ZBAx4w6=_lqtt67bIA=o7D=oUSjfywsR' \ + 'FoloNIkBPXCwFTv+1RVUHgVA2g8A9Lw5' \ + 'XdJYuI8vhg=f0bKSq7AhQ2Bh' + + long_token2 = 'ktL57KJemuq4-M+Q0pdRjCIMcf1mzcr' \ + 'MwKteDS5DRE/Pb+5MzvWjDH7LFPV5b_' \ + '/tX/yoLG3b4TuC6Q5qNzsARPPn_zs/j' \ + 'BbDOEg1-IfPpdsbwX6ETeEnhxkHIL4H' \ + 'sP-V' + + random_pwd = 'Fh[+2J~AcqeqW%?' + random_pwd2 = random_pwd + 'vdkav13' + + test_string_w_prefix = "token=" + long_token + \ + " random giberish " + \ + "password:" + random_pwd + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(test_string_w_prefix) + assert masked + assert err_str is None + assert masked_str == 'token=****' + \ + " random giberish " + \ + "password:****" + + # order reversed + test_string_w_prefix = "password:" + random_pwd + \ + " random giberish " + \ + "token=" + long_token + + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(test_string_w_prefix) + assert masked + assert err_str is None + assert masked_str == 'password:****' + \ + " random giberish " + \ + "token=****" + + # multiple tokens and password + test_string_w_prefix = "token=" + long_token + \ + " random giberish " + \ + "password:" + random_pwd + \ + " random giberish " + \ + "idToken:" + long_token2 + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(test_string_w_prefix) + assert masked + assert err_str is None + assert masked_str == 'token=****' + \ + " random giberish " + \ + "password:****" + \ + " random giberish " + \ + "idToken:****" + + # multiple passwords + test_string_w_prefix = "password=" + random_pwd + \ + " random giberish " + "pwd:" \ + + random_pwd2 + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(test_string_w_prefix) + assert masked + assert err_str is None + assert masked_str == "password=" + \ + "****" + " random giberish " \ + + "pwd:" + "****" + + test_string_w_prefix = "password=" + random_pwd + \ + " random giberish " + "password=" \ + + random_pwd2 + " random giberish " + \ + "password=" + random_pwd + masked, masked_str, err_str = SecretDetector. \ + mask_secrets(test_string_w_prefix) + assert masked + assert err_str is None + assert masked_str == "password=" + "****" + \ + " random giberish " + "password=" \ + + "****" + " random giberish " + \ + "password=" + "****" diff --git a/test/test_unit_secret_detector.py b/test/test_unit_oob_secret_detector.py similarity index 88% rename from test/test_unit_secret_detector.py rename to test/test_unit_oob_secret_detector.py index 416851c41..b0e55115c 100644 --- a/test/test_unit_secret_detector.py +++ b/test/test_unit_oob_secret_detector.py @@ -44,7 +44,7 @@ def test_mask_aws_secret(): ";" # Mask an aws key id and secret key - masked_sql = SecretDetector.mask_secrets(sql) + _, masked_sql, _ = SecretDetector.mask_secrets(sql) assert masked_sql == correct @@ -74,28 +74,28 @@ def test_mask_sas_token(): "&Expires=1555481960&Signature=**********" # Mask azure token - masked_text = SecretDetector.mask_secrets(azure_sas_token) + _, masked_text, _ = SecretDetector.mask_secrets(azure_sas_token) assert masked_text == masked_azure_sas_token # Mask s3 token - masked_text = SecretDetector.mask_secrets(s3_sas_token) + _, masked_text, _ = SecretDetector.mask_secrets(s3_sas_token) assert masked_text == masked_s3_sas_token text = ''.join([random.choice(string.ascii_lowercase) for i in range(200)]) - masked_text = SecretDetector.mask_secrets(text) + _, masked_text, _ = SecretDetector.mask_secrets(text) # Randomly generated string should cause no substitutions assert masked_text == text # Mask multiple azure tokens - masked_text = SecretDetector.mask_secrets(azure_sas_token + '\n' + azure_sas_token) + _, masked_text, _ = SecretDetector.mask_secrets(azure_sas_token + '\n' + azure_sas_token) assert masked_text == masked_azure_sas_token + '\n' + masked_azure_sas_token # Mask multiple s3 tokens - masked_text = SecretDetector.mask_secrets(s3_sas_token + '\n' + s3_sas_token) + _, masked_text, _ = SecretDetector.mask_secrets(s3_sas_token + '\n' + s3_sas_token) assert masked_text == masked_s3_sas_token + '\n' + masked_s3_sas_token # Mask azure and s3 token - masked_text = SecretDetector.mask_secrets(azure_sas_token + '\n' + s3_sas_token) + _, masked_text, _ = SecretDetector.mask_secrets(azure_sas_token + '\n' + s3_sas_token) assert masked_text == masked_azure_sas_token + '\n' + masked_s3_sas_token @@ -123,11 +123,11 @@ def test_mask_secrets(): "sig=**********')" # Test masking all kinds of secrets - masked = SecretDetector.mask_secrets(sql) - assert masked == masked_sql + _, masked_text, _ = SecretDetector.mask_secrets(sql) + assert masked_text == masked_sql text = ''.join([random.choice(string.ascii_lowercase) for i in range(500)]) - masked_text = SecretDetector.mask_secrets(text) + _, masked_text, _ = SecretDetector.mask_secrets(text) # Randomly generated string should cause no substitutions assert masked_text == text @@ -137,5 +137,5 @@ def test_mask_private_keys(): filtered_text = "\"privateKeyData\": \"XXXX\"" - result = SecretDetector.mask_secrets(text) + _, result, _ = SecretDetector.mask_secrets(text) assert result == filtered_text From d1f1afcb9947f2b0a135ffd9352bfbfd7ca1ba0e Mon Sep 17 00:00:00 2001 From: Shige Takeda Date: Mon, 11 May 2020 15:15:24 +0000 Subject: [PATCH 08/11] SNOW-159494 Flake 8 fix for Python connector --- test/test_unit_log_secret_detector.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_unit_log_secret_detector.py b/test/test_unit_log_secret_detector.py index 40dd000fe..8014eaffd 100644 --- a/test/test_unit_log_secret_detector.py +++ b/test/test_unit_log_secret_detector.py @@ -62,6 +62,7 @@ def test_mask_token(): assert err_str is None assert masked_str == 'assertion content:****' + def test_token_false_positives(): false_positive_token_str = "2020-04-30 23:06:04,069 - MainThread auth.py:397" \ " - write_temporary_credential() - DEBUG - no ID " \ From 607f804cf170b35cd8d0ab2a2e242202e835a6e1 Mon Sep 17 00:00:00 2001 From: Mark Keller Date: Mon, 11 May 2020 17:07:48 +0000 Subject: [PATCH 09/11] update to pyarrow version --- ci/build_pyarrow.bat | 2 +- ci/build_pyarrow_darwin.sh | 2 +- ci/install.bat | 2 +- ci/install.sh | 2 +- docker/manylinux2010/scripts/build_virtualenvs.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/build_pyarrow.bat b/ci/build_pyarrow.bat index 3b03d972e..a4a61c8b2 100644 --- a/ci/build_pyarrow.bat +++ b/ci/build_pyarrow.bat @@ -31,7 +31,7 @@ if %errorlevel% neq 0 goto :error python -m pip install --upgrade pip if %errorlevel% neq 0 goto :error -pip install --upgrade setuptools wheel Cython pyarrow==0.16.0 numpy +pip install --upgrade setuptools wheel Cython pyarrow==0.17.0 numpy if %errorlevel% neq 0 goto :error cd %CONNECTOR_DIR% diff --git a/ci/build_pyarrow_darwin.sh b/ci/build_pyarrow_darwin.sh index 27d575c74..fb05c261b 100755 --- a/ci/build_pyarrow_darwin.sh +++ b/ci/build_pyarrow_darwin.sh @@ -19,7 +19,7 @@ function build_connector_with_python() { rm -f generated_version.py || true fi # This needs to be kept in sync with setup.py - pip install -U pyarrow==0.16.0 Cython flake8 + pip install -U pyarrow==0.17.0 Cython flake8 flake8 MACOSX_DEPLOYMENT_TARGET=10.12 python setup.py bdist_wheel -d $CONNECTOR_DIR/dist/ unset ENABLE_EXT_MODULES diff --git a/ci/install.bat b/ci/install.bat index fb58e709c..2f0e274af 100644 --- a/ci/install.bat +++ b/ci/install.bat @@ -9,7 +9,7 @@ call env\Scripts\activate # https://github.com/pypa/pip/issues/6566 python -m pip install --upgrade pip :: These versions have to be kept in sync with what is pinned in setup.py manually -pip install "pyarrow==0.16.0" +pip install "pyarrow==0.17.0" pip install wheel pip install Cython set ENABLE_EXT_MODULES=true diff --git a/ci/install.sh b/ci/install.sh index 321a1a59c..8d0aac476 100755 --- a/ci/install.sh +++ b/ci/install.sh @@ -30,7 +30,7 @@ source ./venv/bin/activate if [ "$TRAVIS_OS_NAME" == "osx" ]; then export ENABLE_EXT_MODULES=true cd $THIS_DIR/.. - pip install Cython pyarrow==0.16.0 wheel + pip install Cython pyarrow==0.17.0 wheel python setup.py bdist_wheel unset ENABLE_EXT_MODULES CONNECTOR_WHL=$(ls $THIS_DIR/../dist/snowflake_connector_python*.whl | sort -r | head -n 1) diff --git a/docker/manylinux2010/scripts/build_virtualenvs.sh b/docker/manylinux2010/scripts/build_virtualenvs.sh index c3d653da3..9e90ac48a 100755 --- a/docker/manylinux2010/scripts/build_virtualenvs.sh +++ b/docker/manylinux2010/scripts/build_virtualenvs.sh @@ -39,7 +39,7 @@ for PYTHON in ${PYTHON_VERSIONS}; do "$(cpython_path $PYTHON ${U_WIDTH})/bin/virtualenv" -p ${PYTHON_INTERPRETER} --no-download /home/user/venv-build-${PYTHON} source /home/user/venv-build-${PYTHON}/bin/activate pip install -U pip - pip install "cython==0.29.15" "setuptools" "flake8" "wheel" "pyarrow==0.16.0" + pip install "cython==0.29.15" "setuptools" "flake8" "wheel" "pyarrow==0.17.0" deactivate done From 0ae121cdeac070e70c02cf6fac20bd020a85d067 Mon Sep 17 00:00:00 2001 From: Mark Keller Date: Mon, 11 May 2020 18:48:59 +0000 Subject: [PATCH 10/11] bumping boto3 --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 471f0631d..48f6dfd33 100644 --- a/setup.py +++ b/setup.py @@ -171,8 +171,7 @@ def _get_arrow_lib_as_linker_input(self): install_requires=[ 'azure-common<2.0.0', 'azure-storage-blob<12.0.0', - 'boto3>=1.4.4,<1.12', - 'botocore>=1.5.0,<1.15', + 'boto3>=1.4.4,<1.14', 'requests<2.24.0', 'urllib3>=1.20,<1.26.0', 'certifi<2021.0.0', From de5bd6bf5ac0bdf2131a169f67bc7c7eca028bb2 Mon Sep 17 00:00:00 2001 From: Mark Keller Date: Mon, 11 May 2020 22:02:02 +0000 Subject: [PATCH 11/11] SNOW-159814 fix new flake8 3.8.0 issues Description New flake8 version is able to find new coding issues, fixing these. Testing Jenkins tests have passed and Travis should run these as well. --- pandas_tools.py | 2 +- test/sso/test_unit_sso_connection.py | 8 +++----- test/test_boolean.py | 2 +- test/test_load_unload.py | 1 - test/test_put_get_medium.py | 2 +- test/test_put_get_user_stage.py | 2 +- test/test_transaction.py | 2 +- 7 files changed, 8 insertions(+), 11 deletions(-) diff --git a/pandas_tools.py b/pandas_tools.py index db5bd3879..8c488170b 100644 --- a/pandas_tools.py +++ b/pandas_tools.py @@ -94,7 +94,7 @@ def write_pandas(conn: 'SnowflakeConnection', '"{stage_name}"'.format(stage_name=stage_name), _is_internal=True).fetchall() break except ProgrammingError as pe: - if pe.msg.endswith('already exists.'.format(stage_name)): + if pe.msg.endswith('already exists.'): continue raise diff --git a/test/sso/test_unit_sso_connection.py b/test/sso/test_unit_sso_connection.py index 919cd06df..106a6141f 100644 --- a/test/sso/test_unit_sso_connection.py +++ b/test/sso/test_unit_sso_connection.py @@ -9,11 +9,9 @@ from snowflake.connector.auth import delete_temporary_credential from snowflake.connector.compat import IS_MACOS -@patch( - 'snowflake.connector.auth_webbrowser.AuthByWebBrowser.authenticate') -@patch( - 'snowflake.connector.network.SnowflakeRestful._post_request' -) + +@patch('snowflake.connector.auth_webbrowser.AuthByWebBrowser.authenticate') +@patch('snowflake.connector.network.SnowflakeRestful._post_request') def test_connect_externalbrowser( mockSnowflakeRestfulPostRequest, mockAuthByBrowserAuthenticate): diff --git a/test/test_boolean.py b/test/test_boolean.py index 099bab082..7c28a96d5 100644 --- a/test/test_boolean.py +++ b/test/test_boolean.py @@ -30,7 +30,7 @@ def test_binding_fetching_boolean(conn_cnx, db_parameters): # SNOW-15905: boolean support results = cnx.cursor().execute(""" SELECT CASE WHEN (null LIKE trim(null)) THEN null ELSE null END -""".format(name=db_parameters['name'])).fetchall() +""").fetchall() assert not results[0][0] finally: diff --git a/test/test_load_unload.py b/test/test_load_unload.py index 562d79082..14e559c11 100644 --- a/test/test_load_unload.py +++ b/test/test_load_unload.py @@ -164,7 +164,6 @@ def test_put_local_file(conn_cnx, test_data): AWS_SECRET_KEY={aws_secret_access_key})) """.format(aws_access_key_id=test_data.AWS_ACCESS_KEY_ID, aws_secret_access_key=test_data.AWS_SECRET_ACCESS_KEY, - database=test_data.database_name, stage_name=test_data.stage_name, )) cur.execute(""" put file://{}/ExecPlatform/Database/data/orders_10*.csv @%pytest_putget_t1 diff --git a/test/test_put_get_medium.py b/test/test_put_get_medium.py index 0bd519a10..fb01b9b6c 100644 --- a/test/test_put_get_medium.py +++ b/test/test_put_get_medium.py @@ -579,7 +579,7 @@ def _generate_huge_value_json(tmpdir, n=1, value_size=1): f = gzip.open(fname, 'wb') for i in range(n): logger.debug("adding a value in {}".format(i)) - f.write('{"k":"{0}"}'.format( + f.write('{{"k":"{}"}}'.format( ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(value_size)))) diff --git a/test/test_put_get_user_stage.py b/test/test_put_get_user_stage.py index 7759da375..be93534c4 100644 --- a/test/test_put_get_user_stage.py +++ b/test/test_put_get_user_stage.py @@ -371,7 +371,7 @@ def _put_list_rm_files_in_stage(tmpdir, conn_cnx, db_parameters, elem): assert rec[6] == 'UPLOADED' rec = cnx.cursor().execute(""" LIST @{stage_name} - """.format(stage_name=stage_name, output_dir=output_dir)).fetchone() + """.format(stage_name=stage_name)).fetchone() assert rec, 'LIST should return something' assert rec[0].startswith('s3://'), "The file location in S3" rec = cnx.cursor().execute(""" diff --git a/test/test_transaction.py b/test/test_transaction.py index a281cc708..8f3bc13d3 100644 --- a/test/test_transaction.py +++ b/test/test_transaction.py @@ -108,7 +108,7 @@ def fin(): assert ret[0] == 21 cnx.cursor().execute(""" SELECT WRONG SYNTAX QUERY -""".format(name=db_parameters['name'])) +""") raise Exception("Failed to cause the syntax error") except snowflake.connector.Error: # syntax error should be caught here