Skip to content

Commit

Permalink
Allow Span to take text label. Fixes #3027
Browse files Browse the repository at this point in the history
  • Loading branch information
honnibal committed Dec 8, 2018
1 parent ffdd5e9 commit 9add03b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
13 changes: 12 additions & 1 deletion spacy/tests/doc/test_span.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest
from spacy.attrs import ORTH, LENGTH
from spacy.tokens import Doc
from spacy.tokens import Doc, Span
from spacy.vocab import Vocab

from ..util import get_doc
Expand Down Expand Up @@ -154,6 +154,17 @@ def test_span_as_doc(doc):
assert span.text == span_doc.text.strip()


def test_span_string_label(doc):
span = Span(doc, 0, 1, label='hello')
assert span.label_ == 'hello'
assert span.label == doc.vocab.strings['hello']

def test_span_string_set_label(doc):
span = Span(doc, 0, 1)
span.label_ = 'hello'
assert span.label_ == 'hello'
assert span.label == doc.vocab.strings['hello']

def test_span_ents_property(doc):
"""Test span.ents for the """
doc.ents = [
Expand Down
8 changes: 6 additions & 2 deletions spacy/tokens/span.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ from ..parts_of_speech cimport univ_pos_t
from ..util import normalize_slice
from ..attrs cimport IS_PUNCT, IS_SPACE
from ..lexeme cimport Lexeme
from ..compat import is_config
from ..compat import is_config, basestring_
from ..errors import Errors, TempErrors, Warnings, user_warning, models_warning
from .underscore import Underscore, get_ext_args

Expand All @@ -42,7 +42,7 @@ cdef class Span:
raise ValueError(Errors.E046.format(name=name))
return Underscore.span_extensions.pop(name)

def __cinit__(self, Doc doc, int start, int end, attr_t label=0,
def __cinit__(self, Doc doc, int start, int end, label=0,
vector=None, vector_norm=None):
"""Create a `Span` object from the slice `doc[start : end]`.
Expand All @@ -64,6 +64,8 @@ cdef class Span:
self.end_char = self.doc[end - 1].idx + len(self.doc[end - 1])
else:
self.end_char = 0
if isinstance(label, basestring_):
label = doc.vocab.strings.add(label)
if label not in doc.vocab.strings:
raise ValueError(Errors.E084.format(label=label))
self.label = label
Expand Down Expand Up @@ -601,6 +603,8 @@ cdef class Span:
"""RETURNS (unicode): The span's label."""
def __get__(self):
return self.doc.vocab.strings[self.label]
def __set__(self, unicode label_):
self.label = self.doc.vocab.strings.add(label_)


cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1:
Expand Down

0 comments on commit 9add03b

Please sign in to comment.