Skip to content

Commit

Permalink
remove (working) unit tests with pre-trained models
Browse files Browse the repository at this point in the history
  • Loading branch information
svlandeg committed Sep 18, 2019
1 parent 08b06e4 commit fc779b3
Showing 1 changed file with 0 additions and 110 deletions.
110 changes: 0 additions & 110 deletions spacy/tests/regression/test_issue4267.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,56 +42,6 @@ def test_issue4267():
assert token.ent_iob == 2


def test_multiple_ner():
""" Test that 2 NERs can work in sequence: the second can overwrite O annotations """
nlp = English()

# 1: untrained NER - should set everything to O
untrained_ner = nlp.create_pipe("ner")
untrained_ner.add_label("SMURFS")
nlp.add_pipe(untrained_ner, name="uner")
nlp.begin_training()

# 2 : trained NER - should set "Antti L Korhonen" to PERSON and "Finland" to GPE
# TODO: can't really use as unit test, using statistical model
trained_ner = spacy.load("en_core_web_lg").get_pipe("ner")
nlp.add_pipe(trained_ner)

doc = nlp("This is Antti L Korhonen speaking in Finland")
expected_iobs = ["O", "O", "B", "I", "I", "O", "O", "B"]
expected_types = ["", "", "PERSON", "PERSON", "PERSON", "", "", "GPE"]
assert [token.ent_iob_ for token in doc] == expected_iobs
assert [token.ent_type_ for token in doc] == expected_types


def test_multiple_ner_2():
""" Test that 2 NERs can work in sequence: the second respects the first annotations """
nlp = English()

# 2: untrained NER - should keep everything as is
untrained_ner = nlp.create_pipe("ner")
untrained_ner.add_label("SMURFS")
# untrained_ner.add_label("PERSON")
# untrained_ner.add_label("GPE")
nlp.add_pipe(untrained_ner, name="uner")
nlp.begin_training()

# 1 : trained NER - should set "Antti Korhonen" to PERSON and "Finland" to GPE
# TODO: can't really use as unit test, using statistical model
trained_ner = spacy.load("en_core_web_lg").get_pipe("ner")
nlp.add_pipe(trained_ner, before="uner")

doc = nlp("This is Antti Korhonen speaking in Finland")

# Because the untrained NER can do whatever it wants, we can't make assumptions on the other tokens
assert doc[2].ent_iob_ == "B"
assert doc[2].ent_type_ == "PERSON"
assert doc[3].ent_iob_ == "I"
assert doc[3].ent_type_ == "PERSON"
assert doc[6].ent_iob_ == "B"
assert doc[6].ent_type_ == "GPE"


def test_ruler_before_ner():
""" Test that an NER works after an entity_ruler: the second can add annotations """
nlp = English()
Expand Down Expand Up @@ -181,66 +131,6 @@ def test_block_ner():
assert [token.ent_type_ for token in doc] == expected_types


def test_preset_ner_1():
""" Test that an NER will respect pre-set tokens (single-token entity) """
nlp = English()

# 1: preset "Antti" as PEEPZ
nlp.add_pipe(PresetComponent(2, 3))

# 2 : trained NER - should ignore "Antti" and set "Finland" to GPE
# TODO: can't really use as unit test, using statistical model
trained_ner = spacy.load("en_core_web_lg").get_pipe("ner")
nlp.add_pipe(trained_ner)

doc = nlp("This is Antti speaking in Finland")

expected_iobs = ["O", "O", "B", "O", "O", "B"]
expected_types = ["", "", "PEEPZ", "", "", "GPE"]
assert [token.ent_iob_ for token in doc] == expected_iobs
assert [token.ent_type_ for token in doc] == expected_types


def test_preset_ner_2():
""" Test that an NER will respect pre-set tokens (multiple-token entity) """
nlp = English()

# 1: preset "Antti Korhonen" as PEEPZ
nlp.add_pipe(PresetComponent(2, 4))

# 2 : trained NER - should ignore "Antti Korhonen" and set "Finland" to GPE
# TODO: can't really use as unit test, using statistical model
trained_ner = spacy.load("en_core_web_lg").get_pipe("ner")
nlp.add_pipe(trained_ner)

doc = nlp("This is Antti Korhonen speaking in Finland")

expected_iobs = ["O", "O", "B", "I", "O", "O", "B"]
expected_types = ["", "", "PEEPZ", "PEEPZ", "", "", "GPE"]
assert [token.ent_iob_ for token in doc] == expected_iobs
assert [token.ent_type_ for token in doc] == expected_types


def test_preset_ner_3():
""" Test that an NER will respect pre-set tokens (multiple-token entity) """
nlp = English()

# 1: preset "Antti L Korhonen" as PEEPZ
nlp.add_pipe(PresetComponent(2, 5))

# 2 : trained NER - should ignore "Antti Korhonen" and set "Finland" to GPE
# TODO: can't really use as unit test, using statistical model
trained_ner = spacy.load("en_core_web_lg").get_pipe("ner")
nlp.add_pipe(trained_ner)

doc = nlp("This is Antti L Korhonen speaking in Finland")

expected_iobs = ["O", "O", "B", "I", "I", "O", "O", "B"]
expected_types = ["", "", "PEEPZ", "PEEPZ", "PEEPZ", "", "", "GPE"]
assert [token.ent_iob_ for token in doc] == expected_iobs
assert [token.ent_type_ for token in doc] == expected_types


class BlockerComponent1(object):
name = "my_blocker"

Expand Down

0 comments on commit fc779b3

Please sign in to comment.