Skip to content

Commit

Permalink
Fix bug in sentence starts for non-projective parses
Browse files Browse the repository at this point in the history
The set_children_from_heads function assumed parse trees were
projective. However, non-projective parses may be passed in during
deserialization, or after deprojectivising. This caused incorrect
sentence boundaries to be set for non-projective parses. Close #2772.
  • Loading branch information
honnibal committed Sep 19, 2018
1 parent 48fd36b commit 1759abf
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 20 deletions.
1 change: 0 additions & 1 deletion spacy/tests/regression/test_issue2772.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pytest
from ..util import get_doc

@pytest.mark.xfail
def test_issue2772(en_vocab):
words = 'When we write or communicate virtually , we can hide our true feelings .'.split()
# A tree with a non-projective (i.e. crossing) arc
Expand Down
41 changes: 22 additions & 19 deletions spacy/tokens/doc.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -993,25 +993,28 @@ cdef int set_children_from_heads(TokenC* tokens, int length) except -1:
tokens[i].r_kids = 0
tokens[i].l_edge = i
tokens[i].r_edge = i
# Set left edges
for i in range(length):
child = &tokens[i]
head = &tokens[i + child.head]
if child < head:
head.l_kids += 1
if child.l_edge < head.l_edge:
head.l_edge = child.l_edge

# Set right edges --- same as above, but iterate in reverse
for i in range(length-1, -1, -1):
child = &tokens[i]
head = &tokens[i + child.head]
if child > head:
head.r_kids += 1
if child.r_edge > head.r_edge:
head.r_edge = child.r_edge


# Twice, for non-projectivity
for _ in range(2):
# Set left edges
for i in range(length):
child = &tokens[i]
head = &tokens[i + child.head]
if child < head:
head.l_kids += 1
if child.l_edge < head.l_edge:
head.l_edge = child.l_edge
if child.r_edge > head.r_edge:
head.r_edge = child.r_edge
# Set right edges --- same as above, but iterate in reverse
for i in range(length-1, -1, -1):
child = &tokens[i]
head = &tokens[i + child.head]
if child > head:
head.r_kids += 1
if child.r_edge > head.r_edge:
head.r_edge = child.r_edge
if child.l_edge < head.l_edge:
head.l_edge = child.l_edge
# Set sentence starts
for i in range(length):
if tokens[i].head == 0 and tokens[i].dep != 0:
Expand Down

0 comments on commit 1759abf

Please sign in to comment.