Updated version to 0.4

staale · May 28, 2014 · ce072f0 · ce072f0
2 parents 05c147b + 5fa8fa8
commit ce072f0
Show file tree

Hide file tree

Showing 9 changed files with 235 additions and 157 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,4 @@ ignored-files/
 *.sublime-*
 .pypirc
 dist/
+.tox
diff --git a/README.rst b/README.rst
@@ -1,17 +1,26 @@
 python-xlsx
 ===========
 
-A small footprint xslx reader that understands shared strings and can process
+A small footprint xlsx reader that understands shared strings and can process
 excel dates.
 
+
+Requirements
+------------
+
+No external requirements. Supports Python versions 2.6+ and 3.2+.
+
+
 Usage
-+++++++
+-----
 
 ::
 
  book = Workbook('filename or filedescriptor') #Open xlsx file
  for sheet in book:
  print sheet.name
+ # for larger workbooks, use sheet.rowsIter() instead of
+ # sheet.rows().iteritems()
  for row, cells in sheet.rows().iteritems(): # or sheet.cols()
  print row # prints row number
  for cell in cells:
@@ -22,6 +31,7 @@ Usage
  some_sheet = book['some sheet name']
  ...
 
+
 Alternatives
 ------------
 

diff --git a/setup.py b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup
 
 setup(
- version="0.2",
+ version="0.4",
  name='py-xlsx',
  description="""Tiny python code for parsing data from Microsoft's Office
  Open XML Spreadsheet format""",
@@ -13,10 +13,18 @@
  'License :: OSI Approved :: MIT License',
  'Operating System :: OS Independent',
  'Programming Language :: Python',
+ 'Programming Language :: Python :: 2',
+ 'Programming Language :: Python :: 2.6',
+ 'Programming Language :: Python :: 2.7',
+ 'Programming Language :: Python :: 3',
+ 'Programming Language :: Python :: 3.2',
+ 'Programming Language :: Python :: 3.3',
+ 'Programming Language :: Python :: 3.4',
  ],
  author='Staale Undheim',
  author_email='[email protected]',
  url='http:/staale/python-xlsx',
+ tests_require = ['six'],
  packages=[
  "xlsx"
  ],

diff --git a/tox.ini b/tox.ini
@@ -0,0 +1,18 @@
+[tox]
+envlist = py26, py27, py32, py33, py34
+
+
+[testenv]
+usedevelop = True
+deps=
+ six
+ pytest
+
+commands =
+ py.test xlsx/tests
+
+
+# py34 env not available in tox <= 1.7, so create it ourselves.
+[testenv:py34]
+basepython = python3.4
+deps = {[testenv]deps}
diff --git a/xlsx/__init__.py b/xlsx/__init__.py
@@ -1,11 +1,15 @@
 # -*- coding: utf-8 -*-
 """ Small footprint xlsx reader """
+
+from __future__ import unicode_literals
+
 __author__="Ståle Undheim <[email protected]>"
 
 import re
 import zipfile
-from xldate import xldate_as_tuple
-from formatting import is_date_format_string
+from xlsx.xldate import xldate_as_tuple
+from xlsx.formatting import is_date_format_string
+from xlsx.timemachine import UnicodeMixin
 
 try:
  from xml.etree import cElementTree as ET
@@ -26,6 +30,7 @@ def __init__(self, filename):
 
  """
 
+ self.ziphandle = None
  self.ziphandle = zipfile.ZipFile(filename, 'r')
 
  def __getitem__(self, key):
@@ -41,7 +46,8 @@ def __getitem__(self, key):
  def __del__(self):
  """Close the zip file when finished"""
 
- self.ziphandle.close()
+ if self.ziphandle:
+ self.ziphandle.close()
 
 class Workbook(object):
  """Main class that contains sheets organized by name or by id.
@@ -127,17 +133,16 @@ def __init__(self, workbook, id, name):
  self.loaded = False
  self.addrPattern = re.compile("([a-zA-Z]*)(\d*)")
  self.__cells = {}
- self.__cols = {}
- self.__rows = {}
+ self.__cols = None
+ self.__rows = None
 
- def __load(self):
+ def rowsIter(self):
  sheetDoc = self.workbook.domzip["xl/worksheets/sheet%d.xml" % self.id]
  sheetData = sheetDoc.find("{http://schemas.openxmlformats.org/spreadsheetml/2006/main}sheetData")
  # @type sheetData Element
- rows = {}
- columns = {}
  for rowNode in sheetData:
  rowNum = int(rowNode.get("r"))
+ rowCells = []
  for columnNode in rowNode:
  colType = columnNode.get("t")
  cellId = columnNode.get("r")
@@ -167,14 +172,22 @@ def __load(self):
  formula = columnNode.find("{http://schemas.openxmlformats.org/spreadsheetml/2006/main}f").text
  except Exception:
  raise #pass
- if not rowNum in rows:
- rows[rowNum] = []
+ cell = Cell(rowNum, colNum, data, formula=formula)
+ rowCells.append(cell)
+ yield rowNum, rowCells
+
+ def __load(self):
+ rows = {}
+ columns = {}
+ for rowNum, row in self.rowsIter():
+ rows[rowNum] = row
+
+ for cell in row:
+ colNum = cell.column
  if not colNum in columns:
  columns[colNum] = []
- cell = Cell(rowNum, colNum, data, formula=formula)
- rows[rowNum].append(cell)
+ self.__cells[cell.id] = cell
  columns[colNum].append(cell)
- self.__cells[cellId] = cell
  self.__rows = rows
  self.__cols = columns
  self.loaded=True
@@ -207,10 +220,16 @@ def __iter__(self):
  self.__load()
  return self.__cells.__iter__()
 
+<<<<<<< HEAD
  def __repr__(self):
  return "%r[%r]"%(self.workbook, self.name)
 
 class Cell(object):
+=======
+
+class Cell(UnicodeMixin):
+
+>>>>>>> 5fa8fa8761d3bfcb3ce1b1b730913f6d4d0ab0c9
  def __init__(self, row, column, value, formula=None):
  self.row = int(row)
  self.column = column
@@ -248,5 +267,5 @@ def __ge__(self, other):
  return self.__cmp__(other) != -1
 
  def __unicode__(self):
- return u"<Cell [%s] : \"%s\" (%s)>" % (self.id, self.value,
-  self.formula, )
+ return "<Cell [%s] : \"%s\" (%s)>" % (self.id, self.value,
+ self.formula, )
diff --git a/xlsx/formatting.py b/xlsx/formatting.py
@@ -26,32 +26,33 @@
 # 2007-09-08 SJM Work around corrupt STYLE record
 # 2007-07-11 SJM Allow for BIFF2/3-style FORMAT record in BIFF4/8 file
 
+from __future__ import unicode_literals
 import re
 
 
-date_chars = u'ymdhs' # year, month/minute, day, hour, second
+date_chars = 'ymdhs' # year, month/minute, day, hour, second
 date_char_dict = {}
 for _c in date_chars + date_chars.upper():
  date_char_dict[_c] = 5
 del _c, date_chars
 
 skip_char_dict = {}
-for _c in u'$-+/(): ':
+for _c in '$-+/(): ':
  skip_char_dict[_c] = 1
 
 num_char_dict = {
- u'0': 5,
- u'#': 5,
- u'?': 5,
+ '0': 5,
+ '#': 5,
+ '?': 5,
  }
 
 non_date_formats = {
- u'0.00E+00':1,
- u'##0.0E+0':1,
- u'General' :1,
- u'GENERAL' :1, # OOo Calc 1.1.4 does this.
- u'general' :1, # pyExcelerator 0.6.3 does this.
- u'@' :1,
+ '0.00E+00':1,
+ '##0.0E+0':1,
+ 'General' :1,
+ 'GENERAL' :1, # OOo Calc 1.1.4 does this.
+ 'general' :1, # pyExcelerator 0.6.3 does this.
+ '@' :1,
  }
 
 fmt_bracketed_sub = re.compile(r'\[[^]]*\]').sub
@@ -69,35 +70,35 @@ def is_date_format_string(fmt):
  # TODO: u'[h]\\ \\h\\o\\u\\r\\s' ([h] means don't care about hours > 23)
  state = 0
  s = ''
- ignorable = skip_char_dict.has_key
+ ignorable = lambda key: key in skip_char_dict
  for c in fmt:
  if state == 0:
- if c == u'"':
+ if c == '"':
  state = 1
- elif c in ur"\_*":
+ elif c in r"\_*":
  state = 2
  elif ignorable(c):
  pass
  else:
  s += c
  elif state == 1:
- if c == u'"':
+ if c == '"':
  state = 0
  elif state == 2:
  # Ignore char after backslash, underscore or asterisk
  state = 0
  assert 0 <= state <= 2
  s = fmt_bracketed_sub('', s)
- if non_date_formats.has_key(s):
+ if s in non_date_formats:
  return False
  state = 0
  separator = ";"
  got_sep = 0
  date_count = num_count = 0
  for c in s:
- if date_char_dict.has_key(c):
+ if c in date_char_dict:
  date_count += date_char_dict[c]
- elif num_char_dict.has_key(c):
+ elif c in num_char_dict:
  num_count += num_char_dict[c]
  elif c == separator:
  got_sep = 1