Skip to content

Commit

Permalink
Merge pull request #21 from GabrieleNunez/master
Browse files Browse the repository at this point in the history
- Moved Regular Expresion generation into function generateCensorChec…
  • Loading branch information
snipe committed Aug 5, 2015
2 parents f75d765 + 1065ea5 commit 18b5dd8
Showing 1 changed file with 60 additions and 42 deletions.
102 changes: 60 additions & 42 deletions src/CensorWords.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@

class CensorWords
{
/*
* When the dictionary is loaded, a ton of regular expression strings are generated
* These regular expressions are used to perform the profanity checks.
* Store them here so when we call censorString we don't need to regenerate them on every call
*/
private $censorChecks = null;

public function __construct() {
$this->replacer = '*';
Expand Down Expand Up @@ -45,7 +51,6 @@ public function setDictionary($dictionary) {
}
}
$this->badwords = $badwords;

}


Expand All @@ -72,7 +77,55 @@ public function randCensor($chars, $len) {
substr($chars, 0, ($len%strlen($chars))));

}

/**
* Generates the regular expressions that are going to be used to check for profanity
* @param boolean $fullWords Option to generate regular expressions used for full words instead. Default is false
* void
*/
private function generateCensorChecks($fullWords = false) {

$badwords = $this->badwords;

// generate censor checks as soon as we load the dictionary
// utilize leet equivalents as well
$leet_replace = array();
$leet_replace['a']= '(a|a\.|a\-|4|@|Á|á|À|Â|à|Â|â|Ä|ä|Ã|ã|Å|å|α|Δ|Λ|λ)';
$leet_replace['b']= '(b|b\.|b\-|8|\|3|ß|Β|β)';
$leet_replace['c']= '(c|c\.|c\-|Ç|ç|¢|€|<|\(|{|©)';
$leet_replace['d']= '(d|d\.|d\-|&part;|\|\)|Þ|þ|Ð|ð)';
$leet_replace['e']= '(e|e\.|e\-|3|€|È|è|É|é|Ê|ê|∑)';
$leet_replace['f']= '(f|f\.|f\-|ƒ)';
$leet_replace['g']= '(g|g\.|g\-|6|9)';
$leet_replace['h']= '(h|h\.|h\-|Η)';
$leet_replace['i']= '(i|i\.|i\-|!|\||\]\[|]|1|∫|Ì|Í|Î|Ï|ì|í|î|ï)';
$leet_replace['j']= '(j|j\.|j\-)';
$leet_replace['k']= '(k|k\.|k\-|Κ|κ)';
$leet_replace['l']= '(l|1\.|l\-|!|\||\]\[|]|£|∫|Ì|Í|Î|Ï)';
$leet_replace['m']= '(m|m\.|m\-)';
$leet_replace['n']= '(n|n\.|n\-|η|Ν|Π)';
$leet_replace['o']= '(o|o\.|o\-|0|Ο|ο|Φ|¤|°|ø)';
$leet_replace['p']= '(p|p\.|p\-|ρ|Ρ|¶|þ)';
$leet_replace['q']= '(q|q\.|q\-)';
$leet_replace['r']= '(r|r\.|r\-|®)';
$leet_replace['s']= '(s|s\.|s\-|5|\$|§)';
$leet_replace['t']= '(t|t\.|t\-|Τ|τ)';
$leet_replace['u']= '(u|u\.|u\-|υ|µ)';
$leet_replace['v']= '(v|v\.|v\-|υ|ν)';
$leet_replace['w']= '(w|w\.|w\-|ω|ψ|Ψ)';
$leet_replace['x']= '(x|x\.|x\-|Χ|χ)';
$leet_replace['y']= '(y|y\.|y\-|¥|γ|ÿ|ý|Ÿ|Ý)';
$leet_replace['z']= '(z|z\.|z\-|Ζ)';

$censorChecks = array();
for ($x=0; $x<count($badwords); $x++) {
$censorChecks[$x] = $fullWords ? '/\b'.str_ireplace(array_keys($leet_replace),array_values($leet_replace), $badwords[$x]).'\b/i'
: '/'.str_ireplace(array_keys($leet_replace),array_values($leet_replace), $badwords[$x]).'/i';
}

$this->censorChecks = $censorChecks;

}

/**
* Apply censorship to $string, replacing $badwords with $censorChar.
Expand All @@ -81,53 +134,18 @@ public function randCensor($chars, $len) {
* string[string]
*/
public function censorString($string, $fullWords = false) {
$badwords = $this->badwords;
$anThis = &$this;

$leet_replace = array();
$leet_replace['a']= '(a|a\.|a\-|4|@|Á|á|À|Â|à|Â|â|Ä|ä|Ã|ã|Å|å|α|Δ|Λ|λ)';
$leet_replace['b']= '(b|b\.|b\-|8|\|3|ß|Β|β)';
$leet_replace['c']= '(c|c\.|c\-|Ç|ç|¢|€|<|\(|{|©)';
$leet_replace['d']= '(d|d\.|d\-|&part;|\|\)|Þ|þ|Ð|ð)';
$leet_replace['e']= '(e|e\.|e\-|3|€|È|è|É|é|Ê|ê|∑)';
$leet_replace['f']= '(f|f\.|f\-|ƒ)';
$leet_replace['g']= '(g|g\.|g\-|6|9)';
$leet_replace['h']= '(h|h\.|h\-|Η)';
$leet_replace['i']= '(i|i\.|i\-|!|\||\]\[|]|1|∫|Ì|Í|Î|Ï|ì|í|î|ï)';
$leet_replace['j']= '(j|j\.|j\-)';
$leet_replace['k']= '(k|k\.|k\-|Κ|κ)';
$leet_replace['l']= '(l|1\.|l\-|!|\||\]\[|]|£|∫|Ì|Í|Î|Ï)';
$leet_replace['m']= '(m|m\.|m\-)';
$leet_replace['n']= '(n|n\.|n\-|η|Ν|Π)';
$leet_replace['o']= '(o|o\.|o\-|0|Ο|ο|Φ|¤|°|ø)';
$leet_replace['p']= '(p|p\.|p\-|ρ|Ρ|¶|þ)';
$leet_replace['q']= '(q|q\.|q\-)';
$leet_replace['r']= '(r|r\.|r\-|®)';
$leet_replace['s']= '(s|s\.|s\-|5|\$|§)';
$leet_replace['t']= '(t|t\.|t\-|Τ|τ)';
$leet_replace['u']= '(u|u\.|u\-|υ|µ)';
$leet_replace['v']= '(v|v\.|v\-|υ|ν)';
$leet_replace['w']= '(w|w\.|w\-|ω|ψ|Ψ)';
$leet_replace['x']= '(x|x\.|x\-|Χ|χ)';
$leet_replace['y']= '(y|y\.|y\-|¥|γ|ÿ|ý|Ÿ|Ý)';
$leet_replace['z']= '(z|z\.|z\-|Ζ)';

$words = explode(" ", $string);

for ($x=0; $x<count($badwords); $x++) {
if($fullWords) {
$badwords[$x] = '/\b'.str_ireplace(array_keys($leet_replace),array_values($leet_replace), $badwords[$x]).'\b/i';
} else {
$badwords[$x] = '/'.str_ireplace(array_keys($leet_replace),array_values($leet_replace), $badwords[$x]).'/i';
}
}

// generate our censor checks if they are not defined yet
if(!$this->censorChecks)
$this->generateCensorChecks($fullWords);

$anThis = &$this;
$counter=0;
$match = array();
$newstring = array();
$newstring['orig'] = html_entity_decode($string);
// $anThis for <= PHP5.3
$newstring['clean'] = preg_replace_callback($badwords, function($matches) use (&$anThis,&$counter,&$match) {
$newstring['clean'] = preg_replace_callback($this->censorChecks, function($matches) use (&$anThis,&$counter,&$match) {
$match[$counter++] = $matches[0];

// is $anThis->replacer a single char?
Expand Down

0 comments on commit 18b5dd8

Please sign in to comment.