Skip to content

Commit

Permalink
Always specify encodings to open()
Browse files Browse the repository at this point in the history
Fixes some neat windows behavior where the default code page can be cp1252
Add BOM unit test
  • Loading branch information
Jacob Beck committed May 10, 2019
1 parent 191ae61 commit d74e37d
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 7 deletions.
15 changes: 8 additions & 7 deletions core/dbt/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,20 @@ def to_native_string(s):

def write_file(path, s):
if WHICH_PYTHON == 2:
with codecs.open(path, 'w', encoding='utf-8') as f:
return f.write(to_string(s))
open = codecs.open
else:
with open(path, 'w') as f:
return f.write(to_string(s))
open = builtins.open
with open(path, 'w', encoding='utf-8') as f:
return f.write(to_string(s))


def open_file(path):
"""Open the path for reading"""
"""Open the path for reading. It must be utf-8 encoded."""
if WHICH_PYTHON == 2:
return codecs.open(path, encoding='utf-8')
open = codecs.open
else:
return open(path)
open = builtins.open
return open(path, encoding='utf-8')


if WHICH_PYTHON == 2:
Expand Down
11 changes: 11 additions & 0 deletions test/unit/test_agate_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
1,n,test,3.2,20180806T11:33:29.320Z,True,NULL
2,y,asdf,900,20180806T11:35:29.320Z,False,a string"""

SAMPLE_CSV_BOM_DATA = u'\ufeff' + SAMPLE_CSV_DATA


EXPECTED = [
[1, 'n', 'test', Decimal('3.2'),
Expand Down Expand Up @@ -41,6 +43,15 @@ def test_from_csv(self):
for idx, row in enumerate(tbl):
self.assertEqual(list(row), EXPECTED[idx])

def test_bom_from_csv(self):
path = os.path.join(self.tempdir, 'input.csv')
with open(path, 'wb') as fp:
fp.write(SAMPLE_CSV_BOM_DATA.encode('utf-8'))
tbl = agate_helper.from_csv(path)
self.assertEqual(len(tbl), len(EXPECTED))
for idx, row in enumerate(tbl):
self.assertEqual(list(row), EXPECTED[idx])

def test_from_data(self):
column_names = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
data = [
Expand Down

0 comments on commit d74e37d

Please sign in to comment.