From 36d9e397686033e4f27e8b7162cf57c4abd0e4e6 Mon Sep 17 00:00:00 2001 From: Joshua Fialkoff Date: Fri, 23 May 2014 20:39:58 -0400 Subject: [PATCH] Added support for larger workbooks by providing a generator for iterating through rows rather than having to load everything into memory. --- README.rst | 4 ++++ xlsx/__init__.py | 27 +++++++++++++++++---------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index a844ecb..7ce1665 100644 --- a/README.rst +++ b/README.rst @@ -19,6 +19,8 @@ Usage book = Workbook('filename or filedescriptor') #Open xlsx file for sheet in book: print sheet.name + # for larger workbooks, use sheet.rowsIter() instead of + # sheet.rows().iteritems() for row, cells in sheet.rows().iteritems(): # or sheet.cols() print row # prints row number for cell in cells: @@ -30,6 +32,8 @@ Usage ... + + Alternatives ------------ diff --git a/xlsx/__init__.py b/xlsx/__init__.py index 53b4b30..f6476dd 100644 --- a/xlsx/__init__.py +++ b/xlsx/__init__.py @@ -130,17 +130,16 @@ def __init__(self, workbook, id, name): self.loaded = False self.addrPattern = re.compile("([a-zA-Z]*)(\d*)") self.__cells = {} - self.__cols = {} - self.__rows = {} + self.__cols = None + self.__rows = None - def __load(self): + def rowsIter(self): sheetDoc = self.workbook.domzip["xl/worksheets/sheet%d.xml" % self.id] sheetData = sheetDoc.find("{http://schemas.openxmlformats.org/spreadsheetml/2006/main}sheetData") # @type sheetData Element - rows = {} - columns = {} for rowNode in sheetData: rowNum = int(rowNode.get("r")) + rowCells = [] for columnNode in rowNode: colType = columnNode.get("t") cellId = columnNode.get("r") @@ -170,14 +169,22 @@ def __load(self): formula = columnNode.find("{http://schemas.openxmlformats.org/spreadsheetml/2006/main}f").text except Exception: raise #pass - if not rowNum in rows: - rows[rowNum] = [] + cell = Cell(rowNum, colNum, data, formula=formula) + rowCells.append(cell) + yield rowNum, rowCells + + def __load(self): + rows = {} + columns = {} + for rowNum, row in self.rowsIter(): + rows[rowNum] = row + + for cell in row: + colNum = cell.column if not colNum in columns: columns[colNum] = [] - cell = Cell(rowNum, colNum, data, formula=formula) - rows[rowNum].append(cell) + self.__cells[cell.id] = cell columns[colNum].append(cell) - self.__cells[cellId] = cell self.__rows = rows self.__cols = columns self.loaded=True