Fix small issues with en/fr lemmatizers

The en tokenizer was including the removed _nouns.py file, so that's removed. The fr tokenizer is unusual in that it has a lemmatizer directory with both __init__.py and lemmatizer.py. lemmatizer.py had not been converted to load the json language data, so that was fixed.
explosion · Aug 18, 2019 · f7204a9 · f7204a9
1 parent ee9609a
commit f7204a9
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 2 deletions.
diff --git a/spacy/lang/en/lemmatizer/__init__.py b/spacy/lang/en/lemmatizer/__init__.py
@@ -7,7 +7,6 @@
 from ._adjectives_irreg import ADJECTIVES_IRREG
 from ._adverbs import ADVERBS
 from ._adverbs_irreg import ADVERBS_IRREG
-from ._nouns import NOUNS
 from ._nouns_irreg import NOUNS_IRREG
 from ._verbs import VERBS
 from ._verbs_irreg import VERBS_IRREG

diff --git a/spacy/lang/fr/lemmatizer/lemmatizer.py b/spacy/lang/fr/lemmatizer/lemmatizer.py
@@ -1,9 +1,13 @@
 # coding: utf8
 from __future__ import unicode_literals
 
+from pathlib import Path
+
 from ....symbols import POS, NOUN, VERB, ADJ, ADV, PRON, DET, AUX, PUNCT, ADP, SCONJ, CCONJ
 from ....symbols import VerbForm_inf, VerbForm_none, Number_sing, Degree_pos
-from .lookup import LOOKUP
+from ....util import load_language_data
+
+LOOKUP = load_language_data(Path(__file__).parent / 'lookup.json')
 
 '''
 French language lemmatizer applies the default rule based lemmatization