From d8d50a0334740babf1891cae09dcfb19e27d324d Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 17 Dec 2016 11:59:28 +0100 Subject: [PATCH] Add tokenizer exception for "gonna" (fixes #691) --- spacy/en/language_data.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/spacy/en/language_data.py b/spacy/en/language_data.py index 851a12c296c..aec221de896 100644 --- a/spacy/en/language_data.py +++ b/spacy/en/language_data.py @@ -1493,6 +1493,16 @@ def get_time_exc(hours): {ORTH: "ma"} ], + "gonna": [ + {ORTH: "gon", LEMMA: "go"}, + {ORTH: "na", LEMMA: "to"} + ], + + "Gonna": [ + {ORTH: "Gon", LEMMA: "go"}, + {ORTH: "na", LEMMA: "to"} + ], + "whats": [ {ORTH: "what"}, {ORTH: "s"}