Skip to content

Commit

Permalink
Added support for larger workbooks by providing a generator for itera…
Browse files Browse the repository at this point in the history
…ting through rows rather than having to load everything into memory.
  • Loading branch information
Joshua Fialkoff committed May 24, 2014
1 parent 2fd4760 commit 36d9e39
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 10 deletions.
4 changes: 4 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ Usage
book = Workbook('filename or filedescriptor') #Open xlsx file
for sheet in book:
print sheet.name
# for larger workbooks, use sheet.rowsIter() instead of
# sheet.rows().iteritems()
for row, cells in sheet.rows().iteritems(): # or sheet.cols()
print row # prints row number
for cell in cells:
Expand All @@ -30,6 +32,8 @@ Usage
...




Alternatives
------------

Expand Down
27 changes: 17 additions & 10 deletions xlsx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,17 +130,16 @@ def __init__(self, workbook, id, name):
self.loaded = False
self.addrPattern = re.compile("([a-zA-Z]*)(\d*)")
self.__cells = {}
self.__cols = {}
self.__rows = {}
self.__cols = None
self.__rows = None

def __load(self):
def rowsIter(self):
sheetDoc = self.workbook.domzip["xl/worksheets/sheet%d.xml" % self.id]
sheetData = sheetDoc.find("{http://schemas.openxmlformats.org/spreadsheetml/2006/main}sheetData")
# @type sheetData Element
rows = {}
columns = {}
for rowNode in sheetData:
rowNum = int(rowNode.get("r"))
rowCells = []
for columnNode in rowNode:
colType = columnNode.get("t")
cellId = columnNode.get("r")
Expand Down Expand Up @@ -170,14 +169,22 @@ def __load(self):
formula = columnNode.find("{http://schemas.openxmlformats.org/spreadsheetml/2006/main}f").text
except Exception:
raise #pass
if not rowNum in rows:
rows[rowNum] = []
cell = Cell(rowNum, colNum, data, formula=formula)
rowCells.append(cell)
yield rowNum, rowCells

def __load(self):
rows = {}
columns = {}
for rowNum, row in self.rowsIter():
rows[rowNum] = row

for cell in row:
colNum = cell.column
if not colNum in columns:
columns[colNum] = []
cell = Cell(rowNum, colNum, data, formula=formula)
rows[rowNum].append(cell)
self.__cells[cell.id] = cell
columns[colNum].append(cell)
self.__cells[cellId] = cell
self.__rows = rows
self.__cols = columns
self.loaded=True
Expand Down

0 comments on commit 36d9e39

Please sign in to comment.