View file engine/functions/censure.php

File size: 6.05Kb
<?php

function censure($s, $delta = 3, $continue = "\xe2\x80\xa6")
{

    static $pretext = array(
                '[уyоo]_?        (?=[еёeхx])',                '[вvbсc]_?       (?=[хпбмгжxpmgj])',          '[вvbсc]_?[ъь]_? (?=[еёe])',                  'ё_?             (?=[бb])',                           '[вvb]_?[ыi]_?',              '[зz3]_?[аa]_?',              '[нnh]_?[аaеeиi]_?',          '[вvb]_?[сc]_?          (?=[хпбмгжxpmgj])',          '[оo]_?[тtбb]_?         (?=[хпбмгжxpmgj])',          '[оo]_?[тtбb]_?[ъь]_?   (?=[еёe])',                  '[иiвvb]_?[зz3]_?       (?=[хпбмгжxpmgj])',          '[иiвvb]_?[зz3]_?[ъь]_? (?=[еёe])',                  '[иi]_?[сc]_?           (?=[хпбмгжxpmgj])',          '[пpдdg]_?[оo]_? (?> [бb]_?         (?=[хпбмгжxpmgj])
                           | [бb]_?  [ъь]_? (?=[еёe])
                           | [зz3]_? [аa] _?
                         )?',                  '[пp]_?[рr]_?[оoиi]_?',          '[зz3]_?[лl]_?[оo]_?',           '[нnh]_?[аa]_?[дdg]_?         (?=[хпбмгжxpmgj])',          '[нnh]_?[аa]_?[дdg]_?[ъь]_?   (?=[еёe])',                  '[пp]_?[оo]_?[дdg]_?          (?=[хпбмгжxpmgj])',          '[пp]_?[оo]_?[дdg]_?[ъь]_?    (?=[еёe])',                  '[рr]_?[аa]_?[зz3сc]_?        (?=[хпбмгжxpmgj])',          '[рr]_?[аa]_?[зz3сc]_?[ъь]_?  (?=[еёe])',                  '[вvb]_?[оo]_?[зz3сc]_?       (?=[хпбмгжxpmgj])',          '[вvb]_?[оo]_?[зz3сc]_?[ъь]_? (?=[еёe])',                          '[нnh]_?[еe]_?[дdg]_?[оo]_?',            '[пp]_?[еe]_?[рr]_?[еe]_?',              '[oо]_?[дdg]_?[нnh]_?[оo]_?',            '[кk]_?[oо]_?[нnh]_?[оo]_?',             '[мm]_?[уy]_?[дdg]_?[оoаa]_?',           '[oо]_?[сc]_?[тt]_?[оo]_?',              '[дdg]_?[уy]_?[рpr]_?[оoаa]_?',          '[хx]_?[уy]_?[дdg]_?[оoаa]_?',                   '[мm]_?[нnh]_?[оo]_?[гg]_?[оo]_?',            '[мm]_?[оo]_?[рpr]_?[дdg]_?[оoаa]_?',         '[мm]_?[оo]_?[зz3]_?[гg]_?[оoаa]_?',          '[дdg]_?[оo]_?[лl]_?[бb6]_?[оoаa]_?',     );

    static $badwords = array(
                '(?<=[_\d]) {RE_PRETEXT}?
         [hхx]_?[уyu]_?[йiеeёяюju]     #хуй, хуя, хую, хуем, хуёвый
         #исключения:
         (?<! _hue(?=_)    #HUE    -- цветовая палитра
            | _hue(?=so_)  #hueso  -- испанское слово
            | _хуе(?=дин)  #Хуедин -- город в Румынии
         )',

                '(?<=[_\d]) {RE_PRETEXT}?
         [пp]_?[иi]_?[зz3]_?[дd]_?[:vowel:]',  
                '(?<=[_\d]) {RE_PRETEXT}?
         [eеё]_? (?<!не[её]_) [бb6]_?(?: [уyиi]_                       #ебу, еби
                                       | [ыиiоoaаеeёуy]_?[:consonant:] #ебут, ебать, ебись, ебёт, поеботина, выебываться, ёбарь
                                       | [лl][оoаaыиi]                 #ебло, ебла, ебливая, еблись, еблысь
                                       | [нn]_?[уy]                    #ёбнул, ёбнутый
                                       | [кk]_?[аa]                    #взъёбка
                                      )',
        '(?<=[_\d]) {RE_PRETEXT}
         (?<=[^_\d][^_\d]|[^_\d]_[^_\d]_) [eеё]_?[бb6] (?:_|_?[аa]_?[^_\d])',  
                '(?<=[_\d]) {RE_PRETEXT}?
         [бb6]_?[лl]_?(?:я|ya)(?: _       #бля
                                | _?[тд]  #блять, бляди
                              )',

                '(?<=[_\d]) [пp]_?[иieе]_?[дdg]_?[eеaаoо]_?[rpр]',  
                '(?<=[_\d]) [мm]_?[уy]_?[дdg]_?[аa]',  
                '(?<=[_\d]) [zж]_?h?_?[оo]_?[pп]_?[aаyуыiеeoо]',  
                '(?<=[_\d]) [гg]_?[оo]_?[вvb]_?[нnh]_?[оoаaяеeyу]', 
                '(?<=[_\d]) f_?u_?[cс]_?k',  
    );

    static $re_trans = array(
        '_'             => '\x20',                                       '[:vowel:]'     => '[аеиоуыэюяёaeioyu]',                         '[:consonant:]' => '[^аеиоуыэюяёaeioyu\x20\d]',              );
    $re_badwords = str_replace('{RE_PRETEXT}', 
                               '(?>' . implode('|', $pretext) . ')',
                               '~' . implode('|', $badwords) . '~sxu');
    $re_badwords = strtr($re_badwords, $re_trans);

        
            
        
    static $trans = array(
        "\xc2\xad" => '',           "\xcc\x81" => '',           '/\\'      => 'л',          '/|'       => 'л',          "\xd0\xb5\xd0\xb5" => "\xd0\xb5\xd1\x91",      );
    $s = strtr($s, $trans);

            preg_match_all('/(?> \xd0[\xb0-\xbf]|\xd1[\x80-\x8f\x91]  #[а-я]
                      |  [a-z\d]+
                      )+
                    /sx', $s, $m);
    $s = ' ' . implode(' ', $m[0]) . ' ';

            $s = preg_replace('/(  [\xd0\xd1][\x80-\xbf]  #оптимизированное [а-я]
                         | [a-z\d]
                         ) \\1+
                       /sx', '$1', $s);
        if (preg_match($re_badwords, $s, $m, PREG_OFFSET_CAPTURE))
    {
        list($word, $offset) = $m[0];
        $s1 = substr($s, 0, $offset);
        $s2 = substr($s, $offset + strlen($word));
        $delta = intval($delta);
        if ($delta < 1 || $delta > 10) $delta = 3;
        preg_match('/  (?> \x20 (?>[\xd0\xd1][\x80-\xbf]|[a-z\d]+)+ ){1,' . $delta . '}
                       \x20?
                    $/sx', $s1, $m1);
        preg_match('/^ (?>[\xd0\xd1][\x80-\xbf]|[a-z\d]+)*  #окончание
                       \x20?
                       (?> (?>[\xd0\xd1][\x80-\xbf]|[a-z\d]+)+ \x20 ){1,' . $delta . '}
                    /sx', $s2, $m2);
        $fragment = (ltrim(@$m1[0]) !== ltrim($s1) ? $continue : '') .
                    trim(@$m1[0] . '[' . trim($word) . ']' . @$m2[0]) . 
                    (rtrim(@$m2[0]) !== rtrim($s2) ? $continue : '');
        return $fragment;
    }
    return false;
}

?>