Skip to content

Commit

Permalink
Updated version to 0.4
Browse files Browse the repository at this point in the history
  • Loading branch information
Ståle Undheim committed May 28, 2014
2 parents 05c147b + 5fa8fa8 commit ce072f0
Show file tree
Hide file tree
Showing 9 changed files with 235 additions and 157 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ ignored-files/
*.sublime-*
.pypirc
dist/
.tox
14 changes: 12 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,17 +1,26 @@
python-xlsx
===========

A small footprint xslx reader that understands shared strings and can process
A small footprint xlsx reader that understands shared strings and can process
excel dates.


Requirements
------------

No external requirements. Supports Python versions 2.6+ and 3.2+.


Usage
+++++++
-----

::

book = Workbook('filename or filedescriptor') #Open xlsx file
for sheet in book:
print sheet.name
# for larger workbooks, use sheet.rowsIter() instead of
# sheet.rows().iteritems()
for row, cells in sheet.rows().iteritems(): # or sheet.cols()
print row # prints row number
for cell in cells:
Expand All @@ -22,6 +31,7 @@ Usage
some_sheet = book['some sheet name']
...


Alternatives
------------

Expand Down
10 changes: 9 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup

setup(
version="0.2",
version="0.4",
name='py-xlsx',
description="""Tiny python code for parsing data from Microsoft's Office
Open XML Spreadsheet format""",
Expand All @@ -13,10 +13,18 @@
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.2',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
],
author='Staale Undheim',
author_email='[email protected]',
url='http:/staale/python-xlsx',
tests_require = ['six'],
packages=[
"xlsx"
],
Expand Down
18 changes: 18 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[tox]
envlist = py26, py27, py32, py33, py34


[testenv]
usedevelop = True
deps=
six
pytest

commands =
py.test xlsx/tests


# py34 env not available in tox <= 1.7, so create it ourselves.
[testenv:py34]
basepython = python3.4
deps = {[testenv]deps}
49 changes: 34 additions & 15 deletions xlsx/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
# -*- coding: utf-8 -*-
""" Small footprint xlsx reader """

from __future__ import unicode_literals

__author__="Ståle Undheim <[email protected]>"

import re
import zipfile
from xldate import xldate_as_tuple
from formatting import is_date_format_string
from xlsx.xldate import xldate_as_tuple
from xlsx.formatting import is_date_format_string
from xlsx.timemachine import UnicodeMixin

try:
from xml.etree import cElementTree as ET
Expand All @@ -26,6 +30,7 @@ def __init__(self, filename):
"""

self.ziphandle = None
self.ziphandle = zipfile.ZipFile(filename, 'r')

def __getitem__(self, key):
Expand All @@ -41,7 +46,8 @@ def __getitem__(self, key):
def __del__(self):
"""Close the zip file when finished"""

self.ziphandle.close()
if self.ziphandle:
self.ziphandle.close()

class Workbook(object):
"""Main class that contains sheets organized by name or by id.
Expand Down Expand Up @@ -127,17 +133,16 @@ def __init__(self, workbook, id, name):
self.loaded = False
self.addrPattern = re.compile("([a-zA-Z]*)(\d*)")
self.__cells = {}
self.__cols = {}
self.__rows = {}
self.__cols = None
self.__rows = None

def __load(self):
def rowsIter(self):
sheetDoc = self.workbook.domzip["xl/worksheets/sheet%d.xml" % self.id]
sheetData = sheetDoc.find("{http://schemas.openxmlformats.org/spreadsheetml/2006/main}sheetData")
# @type sheetData Element
rows = {}
columns = {}
for rowNode in sheetData:
rowNum = int(rowNode.get("r"))
rowCells = []
for columnNode in rowNode:
colType = columnNode.get("t")
cellId = columnNode.get("r")
Expand Down Expand Up @@ -167,14 +172,22 @@ def __load(self):
formula = columnNode.find("{http://schemas.openxmlformats.org/spreadsheetml/2006/main}f").text
except Exception:
raise #pass
if not rowNum in rows:
rows[rowNum] = []
cell = Cell(rowNum, colNum, data, formula=formula)
rowCells.append(cell)
yield rowNum, rowCells

def __load(self):
rows = {}
columns = {}
for rowNum, row in self.rowsIter():
rows[rowNum] = row

for cell in row:
colNum = cell.column
if not colNum in columns:
columns[colNum] = []
cell = Cell(rowNum, colNum, data, formula=formula)
rows[rowNum].append(cell)
self.__cells[cell.id] = cell
columns[colNum].append(cell)
self.__cells[cellId] = cell
self.__rows = rows
self.__cols = columns
self.loaded=True
Expand Down Expand Up @@ -207,10 +220,16 @@ def __iter__(self):
self.__load()
return self.__cells.__iter__()

<<<<<<< HEAD
def __repr__(self):
return "%r[%r]"%(self.workbook, self.name)

class Cell(object):
=======

class Cell(UnicodeMixin):

>>>>>>> 5fa8fa8761d3bfcb3ce1b1b730913f6d4d0ab0c9
def __init__(self, row, column, value, formula=None):
self.row = int(row)
self.column = column
Expand Down Expand Up @@ -248,5 +267,5 @@ def __ge__(self, other):
return self.__cmp__(other) != -1

def __unicode__(self):
return u"<Cell [%s] : \"%s\" (%s)>" % (self.id, self.value,
self.formula, )
return "<Cell [%s] : \"%s\" (%s)>" % (self.id, self.value,
self.formula, )
37 changes: 19 additions & 18 deletions xlsx/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,32 +26,33 @@
# 2007-09-08 SJM Work around corrupt STYLE record
# 2007-07-11 SJM Allow for BIFF2/3-style FORMAT record in BIFF4/8 file

from __future__ import unicode_literals
import re


date_chars = u'ymdhs' # year, month/minute, day, hour, second
date_chars = 'ymdhs' # year, month/minute, day, hour, second
date_char_dict = {}
for _c in date_chars + date_chars.upper():
date_char_dict[_c] = 5
del _c, date_chars

skip_char_dict = {}
for _c in u'$-+/(): ':
for _c in '$-+/(): ':
skip_char_dict[_c] = 1

num_char_dict = {
u'0': 5,
u'#': 5,
u'?': 5,
'0': 5,
'#': 5,
'?': 5,
}

non_date_formats = {
u'0.00E+00':1,
u'##0.0E+0':1,
u'General' :1,
u'GENERAL' :1, # OOo Calc 1.1.4 does this.
u'general' :1, # pyExcelerator 0.6.3 does this.
u'@' :1,
'0.00E+00':1,
'##0.0E+0':1,
'General' :1,
'GENERAL' :1, # OOo Calc 1.1.4 does this.
'general' :1, # pyExcelerator 0.6.3 does this.
'@' :1,
}

fmt_bracketed_sub = re.compile(r'\[[^]]*\]').sub
Expand All @@ -69,35 +70,35 @@ def is_date_format_string(fmt):
# TODO: u'[h]\\ \\h\\o\\u\\r\\s' ([h] means don't care about hours > 23)
state = 0
s = ''
ignorable = skip_char_dict.has_key
ignorable = lambda key: key in skip_char_dict
for c in fmt:
if state == 0:
if c == u'"':
if c == '"':
state = 1
elif c in ur"\_*":
elif c in r"\_*":
state = 2
elif ignorable(c):
pass
else:
s += c
elif state == 1:
if c == u'"':
if c == '"':
state = 0
elif state == 2:
# Ignore char after backslash, underscore or asterisk
state = 0
assert 0 <= state <= 2
s = fmt_bracketed_sub('', s)
if non_date_formats.has_key(s):
if s in non_date_formats:
return False
state = 0
separator = ";"
got_sep = 0
date_count = num_count = 0
for c in s:
if date_char_dict.has_key(c):
if c in date_char_dict:
date_count += date_char_dict[c]
elif num_char_dict.has_key(c):
elif c in num_char_dict:
num_count += num_char_dict[c]
elif c == separator:
got_sep = 1
Expand Down
Loading

0 comments on commit ce072f0

Please sign in to comment.