From 680d615bb3f3662ce2a9c9cbe0bcbe1561077746 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Sun, 5 Apr 2020 17:47:04 +0200
Subject: [PATCH 1/3] Use inline flags in token_match patterns

Use inline flags in `token_match` patterns so that serializing does not
lose the flag information.
---
 spacy/lang/fr/tokenizer_exceptions.py | 2 +-
 spacy/lang/tokenizer_exceptions.py    | 2 +-
 spacy/tokenizer.pyx                   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/spacy/lang/fr/tokenizer_exceptions.py b/spacy/lang/fr/tokenizer_exceptions.py
index dfcb2756e4f..7899cfc9bdb 100644
--- a/spacy/lang/fr/tokenizer_exceptions.py
+++ b/spacy/lang/fr/tokenizer_exceptions.py
@@ -461,5 +461,5 @@ def lower_first_letter(text):
 
 TOKENIZER_EXCEPTIONS = _exc
 TOKEN_MATCH = re.compile(
-    "|".join("(?:{})".format(m) for m in _regular_exp), re.IGNORECASE | re.UNICODE
+        "(?iu:" + "|".join("(?:{})".format(m) for m in _regular_exp) + ")"
 ).match
diff --git a/spacy/lang/tokenizer_exceptions.py b/spacy/lang/tokenizer_exceptions.py
index 385afb8bd3d..0de554f9a7a 100644
--- a/spacy/lang/tokenizer_exceptions.py
+++ b/spacy/lang/tokenizer_exceptions.py
@@ -58,7 +58,7 @@
     # fmt: on
 ).strip()
 
-TOKEN_MATCH = re.compile(URL_PATTERN, re.UNICODE).match
+TOKEN_MATCH = re.compile("(?u:" + URL_PATTERN + ")").match
 
 
 BASE_EXCEPTIONS = {}
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 4da08125976..62b8bbf4a8e 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -567,7 +567,7 @@ cdef class Tokenizer:
         ))
         exclude = util.get_serialization_exclude(deserializers, exclude, kwargs)
         msg = util.from_bytes(bytes_data, deserializers, exclude)
-        for key in ["prefix_search", "suffix_search", "infix_finditer"]:
+        for key in ["prefix_search", "suffix_search", "infix_finditer", "token_match"]:
             if key in data:
                 data[key] = unescape_unicode(data[key])
         if "prefix_search" in data and isinstance(data["prefix_search"], basestring_):

From 8eb23dd4e4de63ae2d0b81e76a55cd911750bcb8 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Mon, 6 Apr 2020 09:59:08 +0200
Subject: [PATCH 2/3] Modify inline flag

---
 spacy/lang/tokenizer_exceptions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/lang/tokenizer_exceptions.py b/spacy/lang/tokenizer_exceptions.py
index 0de554f9a7a..29ce754429c 100644
--- a/spacy/lang/tokenizer_exceptions.py
+++ b/spacy/lang/tokenizer_exceptions.py
@@ -58,7 +58,7 @@
     # fmt: on
 ).strip()
 
-TOKEN_MATCH = re.compile("(?u:" + URL_PATTERN + ")").match
+TOKEN_MATCH = re.compile("(?u)" + URL_PATTERN).match
 
 
 BASE_EXCEPTIONS = {}

From c1cf5da232756179a4e26f161b3ee75e4010f37a Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Mon, 6 Apr 2020 10:03:52 +0200
Subject: [PATCH 3/3] Modify inline flag

---
 spacy/lang/fr/tokenizer_exceptions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/lang/fr/tokenizer_exceptions.py b/spacy/lang/fr/tokenizer_exceptions.py
index 7899cfc9bdb..cb17023009e 100644
--- a/spacy/lang/fr/tokenizer_exceptions.py
+++ b/spacy/lang/fr/tokenizer_exceptions.py
@@ -461,5 +461,5 @@ def lower_first_letter(text):
 
 TOKENIZER_EXCEPTIONS = _exc
 TOKEN_MATCH = re.compile(
-        "(?iu:" + "|".join("(?:{})".format(m) for m in _regular_exp) + ")"
+        "(?iu)" + "|".join("(?:{})".format(m) for m in _regular_exp)
 ).match