Skip to content

Commit

Permalink
Test and fix Issue #411: IndexError when .sents property is used on e…
Browse files Browse the repository at this point in the history
…mpty string.
  • Loading branch information
honnibal committed Sep 27, 2016
1 parent 3d370b7 commit fc4a7ad
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
7 changes: 6 additions & 1 deletion spacy/tests/tokens/test_tokens_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,12 @@ def test_merge_hang():
doc.merge(8, 32, '', '', 'ORG')


def test_sents_empty_string(EN):
doc = EN(u'')
sents = list(doc.sents)
assert len(sents) == 0


@pytest.mark.models
def test_runtime_error(EN):
# Example that caused run-time error while parsing Reddit
Expand Down Expand Up @@ -199,4 +205,3 @@ def test_right_edge(EN):
def test_has_vector(EN):
doc = EN(u'''apple orange pear''')
assert doc.has_vector

3 changes: 2 additions & 1 deletion spacy/tokens/doc.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,8 @@ cdef class Doc:
if self.c[i].sent_start:
yield Span(self, start, i)
start = i
yield Span(self, start, self.length)
if start != self.length:
yield Span(self, start, self.length)

cdef int push_back(self, LexemeOrToken lex_or_tok, bint has_space) except -1:
if self.length == self.max_length:
Expand Down

0 comments on commit fc4a7ad

Please sign in to comment.