diff --git a/server/lib/cleanup/languages/th.js b/server/lib/cleanup/languages/th.js
index 5b26906f..4e26e670 100644
--- a/server/lib/cleanup/languages/th.js
+++ b/server/lib/cleanup/languages/th.js
@@ -15,25 +15,36 @@ function sortSentences(sentences) {
 // question mark http://www.royin.go.th/?page_id=10418
 // exclamation mark http://www.royin.go.th/?page_id=10433
 // Maiyamok http://www.royin.go.th/?page_id=10427
+//
+// Emoji range from
+// https://www.regextester.com/106421
+// https://stackoverflow.com/questions/10992921/how-to-remove-emoji-code-using-javascript
 function clean(sentences) {
   return sentences.map((sentence) => {
     return sentence
-      .replace(/[\u200b\u200c]/g, '')  // removes zero-width chars (occurs in some Thai texts)
-      .replace(/:/g, ' : ') // add a space before and after colon
-      .replace(/\?/g, ' ? ') // adds a space before and after question mark
-      .replace(/!/g, ' ! ') // adds a space before and after exclamation mark
-      .replace(/,/g, ' ') // replaces comma with space
-      .replace(/\.(\.\s*)+/g, ' ') // replaces ellipsis (.., ...) with space
-      .replace(/\s\./g, ' ') // replaces orphan period with space
-      .replace(/(\u0E46\s*)+/g, '\u0E46') // condenses multiple Maiyamok to one Maiyamok
-      .replace(/\u0E46/g, ' \u0E46 ') // adds a space before and after Maiyamok
-      .replace(/\s+/g, ' ') // condenses multiple spaces to one space
-      .replace(/^\./, '') // removes periods at the beginning of the sentence
-      .replace(/^\s+/, '') // removes spaces at the beginning of the sentence
-      .replace(/\s+$/, '') // removes spaces at the end of the sentence
-      .replace(/\u0E40\u0E40/g, '\u0E41') // normalizes Sara E + Sara E -> Sara Ae
-      .replace(/\u0E4d([\u0E48\u0E49\u0E4A\u0E4B]*)\u0E32/g, '$1\u0E33') // normalizes Nikhahit + Sara Aa -> Sara Am
-      .replace(/([\u0E24\u0E26])\u0E32/g, '$1\u0E45') // normalizes Ru/Lu + Sara Aa -> Ru/Lu + Lakkhangyao
+      .replace(/[\u200b\u200c]/g, '')  // remove zero-width chars (occurs in some Thai texts)
+      .replace(/\u00a9|\u00ae|[\u2000-\u3300]|[\u2580-\u27bf]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff]|[\ue000-\uf8ff]/g, '')  // remove emoji
+      .replace(/:/g, ' : ')  // add a space before and after colon
+      .replace(/\?/g, ' ? ')  // add a space before and after question mark
+      .replace(/!/g, ' ! ')  // add a space before and after exclamation mark
+      .replace(/,/g, ' ')  // replace comma with space
+      .replace(/\.(\.\s*)+/g, ' ')  // replace ellipsis (.., ...) with space
+      .replace(/\s\./g, ' ')  // replace orphan period with space
+      .replace(/(\u0E46\s*)+/g, '\u0E46')  // condense multiple Maiyamok to one Maiyamok
+      .replace(/\u0E46/g, ' \u0E46 ')  // add a space before and after Maiyamok
+      .replace(/\s+/g, ' ')  // condense multiple spaces to one space
+      .replace(/^\.+/, '')  // remove periods at the beginning of the sentence
+      .replace(/^,+/, '')  // remove commas at the beginning of the sentence
+      .replace(/,+$/, '')  // remove commas at the end of the sentence
+      .replace(/^:+/, '')  // remove colons at the beginning of the sentence
+      .replace(/:+$/, '')  // remove colons at the end of the sentence
+      .replace(/^;+/, '')  // remove semicolons at the beginning of the sentence
+      .replace(/;+$/, '')  // remove semicolons at the end of the sentence
+      .replace(/^\s+/, '')  // remove spaces at the beginning of the sentence
+      .replace(/\s+$/, '')  // remove spaces at the end of the sentence
+      .replace(/\u0E40\u0E40/g, '\u0E41')  // normalize Sara E + Sara E -> Sara Ae
+      .replace(/\u0E4d([\u0E48\u0E49\u0E4A\u0E4B]*)\u0E32/g, '$1\u0E33')  // normalize Nikhahit + Sara Aa -> Sara Am
+      .replace(/([\u0E24\u0E26])\u0E32/g, '$1\u0E45')  // normalize Ru/Lu + Sara Aa -> Ru/Lu + Lakkhangyao
       ;
   });
 }
diff --git a/server/lib/validation/languages/th.js b/server/lib/validation/languages/th.js
index b708320d..141eda19 100644
--- a/server/lib/validation/languages/th.js
+++ b/server/lib/validation/languages/th.js
@@ -9,8 +9,8 @@
 const MIN_LENGTH = 2;
 const MAX_LENGTH = 80;
 
-// Numbers that are not allowed in a sentence depending on the language. For
-// English this is 0-9 once or multiple times after each other.
+// Numbers that are not allowed in a sentence depending on the language.
+// For English this is 0-9 once or multiple times after each other.
 // Thai digits: \u0E50-\u0E59 (๐-๙)
 const NUMBERS_REGEX = /[0-9๐-๙]+/;
 
@@ -44,7 +44,8 @@ const BEGIN_REGEX = /(^|\s+)[\u0E30\u0E32\u0E33\u0E45\u0E31\u0E34\u0E35\u0E36\u0
 /* eslint-disable-next-line no-misleading-character-class */
 const END_REGEX = /[\u0E40\u0E41\u0E42\u0E43\u0E44](\s+|$)/;
 
-// The following symbols are disallowed, please update here as well and not just the regex
+// The following symbols are disallowed,
+// please update here as well and not just the regex
 // to make it easier to read:
 // < > + * \ # @ ^ [ ] ( ) /
 // Paiyannoi: \u0E2F ฯ (ellipsis, abbreviation)
@@ -52,10 +53,10 @@ const END_REGEX = /[\u0E40\u0E41\u0E42\u0E43\u0E44](\s+|$)/;
 // Fongman: \u0E4F ๏ (used as bullet)
 // Angkhankhu: \u0E5A ๚ (used to mark end of section/verse)
 // Khomut: \u0E5B ๛ (used to mark end of chapter/document)
-//
-// Latin characters are disallowed as well,
-// as they can introduce difficulty for pronunciation.
-const SYMBOL_REGEX = /[<>+*\\#@^[\]()/\u0E2F\u0E46\u0E4F\u0E5A\u0E5B]|[A-Za-z]+/;
+// Latin characters (difficult to pronouce)
+// Emoji range from https://www.regextester.com/106421 and
+// https://stackoverflow.com/questions/10992921/how-to-remove-emoji-code-using-javascript
+const SYMBOL_REGEX = /[<>+*\\#@^[\]()/\u0E2F\u0E46\u0E4F\u0E5A\u0E5B]|[A-Za-z]+|(\u00a9|\u00ae|[\u2000-\u3300]|[\u2580-\u27bf]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff]|[\ue000-\uf8ff])/;
 
 // Any words consisting of uppercase letters or uppercase letters with a period
 // inbetween are considered abbreviations or acronyms.