en_2stemming_8php_source.html

<?php

global $STEMMING_EN_STEP2A;

$STEMMING_EN_STEP2A = [

    'TIONAL' => 'TION', 'ENCI' => 'ENCE', 'ANCI' => 'ANCE', 'ABLI' => 'ABLE', 'ENTLI' => 'ENT',

    'IZER' => 'IZE', 'IZATION' => 'IZE', 'ATIONAL' => 'ATE', 'ATION' => 'ATE', 'ATOR' => 'ATE',

    'ALISM' => 'AL', 'ALITI' => 'AL', 'ALLI' => 'AL', 'FULNESS' => 'FUL', 'OUSLI' => 'OUS',

    'OUSNESS' => 'OUS', 'IVENESS' => 'IVE', 'IVITI' => 'IVE', 'BILITI' => 'BLE', 'BLI' => 'BLE',

    'FULLI' => 'FUL', 'LESSLI' => 'LESS'

];

global $STEMMING_EN_STEP2;

$STEMMING_EN_STEP2 = '/(' . implode('|', array_keys($STEMMING_EN_STEP2A)) . '|OGI|LI)$/';

global $STEMMING_EN_STEP3A;

$STEMMING_EN_STEP3A = [

    'TIONAL' => 'TION', 'ATIONAL' => 'ATE', 'ALIZE' => 'AL', 'ICATE' => 'IC', 'ICITI' => 'IC',

    'ICAL' => 'IC', 'FUL' => '', 'NESS' => ''

];

global $STEMMING_EN_STEP3;

$STEMMING_EN_STEP3 = '/(' . implode('|', array_keys($STEMMING_EN_STEP3A)) . '|ATIVE)$/';

global $STEMMING_EN_STEP4A;

$STEMMING_EN_STEP4A = [

    'AL', 'ANCE', 'ENCE', 'ER', 'IC',

    'ABLE', 'IBLE', 'ANT', 'EMENT', 'MENT',

    'ENT', 'ISM', 'ATE', 'ITI', 'OUS',

    'IVE', 'IZE'

];

global $STEMMING_EN_STEP4;

$STEMMING_EN_STEP4 = '/(' . implode('|', $STEMMING_EN_STEP4A) . '|ION)$/';

global $STEMMING_EN_EX1;

$STEMMING_EN_EX1 = [

    'SKIS' => 'SKI',

    'SKIES' => 'SKY',

    'DYING' => 'DIE',

    'LYING' => 'LIE',

    'TYING' => 'TIE',

    'IDLY' => 'IDL',

    'GENTLY' => 'GENTL',

    'UGLY' => 'UGLI',

    'EARLY' => 'EARLI',

    'ONLY' => 'ONLI',

    'SINGLY' => 'SINGL',

    'SKY' => 'SKY',

    'NEWS' => 'NEWS',

    'HOWE' => 'HOWE',

    'ATLAS' => 'ATLAS',

    'COSMOS' => 'COSMOS',

    'BIAS' => 'BIAS',

    'ANDES' => 'ANDES',

];

global $STEMMING_EN_EX2;

$STEMMING_EN_EX2 = [

    'INNING' => 1,

    'OUTING' => 1,

    'CANNING' => 1,

    'HERRING' => 1,

    'EARRING' => 1,

    'PROCEED' => 1,

    'EXCEED' => 1,

    'SUCCEED' => 1,

];


function stemming_letter_en()

{

    return 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM';

}


function stemming_stop_en($sWord)

{

    if (mb_strlen($sWord) < 2)

    {

        return false;

    }

    static $stop_list = false;

    if (!$stop_list)

    {

        $stop_list = [

            'QUOTE' => 0, 'HTTP' => 0, 'WWW' => 0, 'RU' => 0, 'IMG' => 0, 'GIF' => 0, 'A' => 0, 'THE' => 0, 'IS' => 0,

            'ARE' => 0, 'OFF' => 0, 'ON' => 0, 'AND' => 0, 'IN' => 0, 'FOR' => 0, 'OF' => 0, 'BY' => 0, 'WITH' => 0,

            'BE' => 0, 'WAS' => 0, 'IT' => 0,

        ];

        if (defined('STEMMING_STOP_EN'))

        {

            foreach (explode(',', STEMMING_STOP_EN) as $word)

            {

                $word = trim($word);

                if ($word <> '')

                {

                    $stop_list[$word] = 0;

                }

            }

        }

    }

    return !array_key_exists($sWord, $stop_list);

}


function stemming_upper_en($sText)

{

    return mb_strtoupper($sText);

}


function stemming_en($word)

{

    global $STEMMING_EN_STEP2A;

    global $STEMMING_EN_STEP2;

    global $STEMMING_EN_STEP3A;

    global $STEMMING_EN_STEP3;

    global $STEMMING_EN_STEP4A;

    global $STEMMING_EN_STEP4;

    global $STEMMING_EN_EX1;

    global $STEMMING_EN_EX2;


    //If the word has two letters or less, leave it as it is.

    $word_len = mb_strlen($word);

    if ($word_len <= 2)

    {

        return $word;

    }

    if (array_key_exists($word, $STEMMING_EN_EX1))

    {

        return $STEMMING_EN_EX1[$word];

    }


    //Set initial y, or y after a vowel, to Y, and then establish the regions R1 and R2. (See  note on vowel marking.)

    $vowels = 'AEIOUY';

    $word = preg_replace('/^Y/', 'y', $word);

    $word = preg_replace('/([' . $vowels . '])(Y)/', "\\1y", $word);


    //In any word, R1 is the region after the first non-vowel following a vowel, or the end of the word if it contains no such a non-vowel.

    $R1 = 0;

    while (($R1 < $word_len) && (mb_strpos($vowels, mb_substr($word, $R1, 1)) === false))

    {

        $R1++;

    }

    while (($R1 < $word_len) && (mb_strpos($vowels, mb_substr($word, $R1, 1)) !== false))

    {

        $R1++;

    }

    if ($R1 < $word_len)

    {

        $R1++;

    }

    if (preg_match('/^COMMUN/', $word))

    {

        $R1 = 6;

    }

    if (preg_match('/^GENER/', $word))

    {

        $R1 = 5;

    }


    $R2 = $R1;

    while (($R2 < $word_len) && (mb_strpos($vowels, mb_substr($word, $R2, 1)) === false))

    {

        $R2++;

    }

    while (($R2 < $word_len) && (mb_strpos($vowels, mb_substr($word, $R2, 1)) !== false))

    {

        $R2++;

    }

    if ($R2 < $word_len)

    {

        $R2++;

    }


    //Step 1a:

    //  Search for the longest among the following suffixes, and perform the action indicated.

    $found = [];

    if (preg_match('/(SSES|IED|IES|US|SS|S)$/', $word, $found))

    {

        switch ($found[0])

        {

            //sses - replace by ss

        case 'SSES':

            $word = mb_substr($word, 0, $word_len - 4) . 'SS';

            break;

            //ied+   ies* - replace by i if preceded by more than one letter, otherwise by ie  (so ties -> tie, cries -> cri)

        case 'IED':

        case 'IES':

            if (mb_strlen($word) > 4)

            {

                $word = mb_substr($word, 0, $word_len - 3) . 'I';

            }

            else

            {

                $word = mb_substr($word, 0, $word_len - 3) . 'IE';

            }

            break;

            //s  delete if the preceding word part contains a vowel not immediately before the s

            //   (so gas and this retain the s, gaps and kiwis lose it)

        case 'S':

            if (preg_match('/([' . $vowels . '].*.)(S)$/', $word))

            {

                $word = mb_substr($word, 0, $word_len - 1);

            }

            break;

            //us+   ss - do nothing

        }

    }


    if (array_key_exists($word, $STEMMING_EN_EX2))

    {

        return $word;

    }


    //Step 1b:

    //  Search for the longest among the following suffixes, and perform the action indicated.

    //eed   eedly+ - replace by ee if in R1

    if (preg_match('/(EEDLY|INGLY|EDLY|EED|ING|ED)$/', $word, $found))

    {

        switch ($found[0])

        {

        case 'EEDLY':

        case 'EED':

            if (preg_match('/' . $found[0] . '$/', mb_substr($word, $R1)))

            {

                $word = mb_substr($word, 0, mb_strlen($word) - mb_strlen($found[0])) . 'EE';

            }

            break;

        default:

            //delete if the preceding word part contains a vowel, and then

            if (($step1b = preg_replace('/([' . $vowels . '].*)(ED|EDLY|ING|INGLY)$/', "\\1", $word)) != $word)

            {

                //if the word ends at, bl or iz add e (so luxuriat -> luxuriate), or

                if (($step1b1 = preg_replace('/(AT|BL|IZ)$/', "\\1E", $step1b)) == $step1b)

                {

                    //if the word ends with a double remove the last letter (so hopp -> hop), or

                    if (preg_match('/(BB|DD|FF|GG|MM|NN|PP|RR|TT)$/', $step1b))

                    {

                        $step1b1 = mb_substr($step1b, 0, mb_strlen($step1b) - 1);

                    }

                    else

                    {

                        //if the word is short, add e (so hop -> hope)

                        //A word is called short if it consists of a short syllable preceded by zero or more consonants.

                        //Define a short syllable in a word as either (a) a vowel followed by a non-vowel other than w, x or Y

                        //and preceded by a non-vowel, or * (b) a vowel at the beginning of the word followed by a non-vowel.

                        if (preg_match('/^[^' . $vowels . ']+[' . $vowels . '][^WXy' . $vowels . ']$/', $step1b)

                            || preg_match('/^[' . $vowels . '][^' . $vowels . ']$/', $step1b)

                        )

                        {

                            $step1b1 = $step1b . 'E';

                        }

                    }

                }

                $step1b = $step1b1;

            }

            $word = $step1b;

        }

    }


    //Step 1c: *

    //  replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say)

    $word = preg_replace('/^(.+[^' . $vowels . '])([yY])$/', "\\1I", $word);


    //Step 2:

    //  Search for the longest among the following suffixes, and, if found and in R1, perform the action indicated.

    if (

        preg_match($STEMMING_EN_STEP2, $word, $found)

        && preg_match('/' . $found[0] . '$/', mb_substr($word, $R1))

    )

    {

        switch ($found[0])

        {

        case 'OGI':

            if (preg_match('/LOGI$/', $word))

            {

                $word = mb_substr($word, 0, mb_strlen($word) - 3) . 'OG';

            }

            break;

        case 'LI':

            if (preg_match('/[CDEGHKMNRT]LI$/', $word))

            {

                $word = mb_substr($word, 0, mb_strlen($word) - 2);

            }

            break;

        default:

            $word = mb_substr($word, 0, mb_strlen($word) - mb_strlen($found[0])) . $STEMMING_EN_STEP2A[$found[0]];

        }

    }


    //Step 3:

    //  Search for the longest among the following suffixes, and, if found and in R1, perform the action indicated.

    if (

        preg_match($STEMMING_EN_STEP3, $word, $found)

        && preg_match('/' . $found[0] . '$/', mb_substr($word, $R1))

    )

    {

        switch ($found[0])

        {

        case 'ATIVE':

            if (preg_match('/ATIVE$/', mb_substr($word, $R2)))

            {

                $word = mb_substr($word, 0, mb_strlen($word) - 5);

            }

            break;

        default:

            $word = mb_substr($word, 0, mb_strlen($word) - mb_strlen($found[0])) . $STEMMING_EN_STEP3A[$found[0]];

        }

    }


    //Step 4:

    //  Search for the longest among the following suffixes, and, if found and in R2, perform the action indicated.

    if (

        preg_match($STEMMING_EN_STEP4, $word, $found)

        && preg_match('/' . $found[0] . '$/', mb_substr($word, $R2))

    )

    {

        switch ($found[0])

        {

        case 'ION':

            if (preg_match('/[ST]ION$/', $word))

            {

                $word = mb_substr($word, 0, mb_strlen($word) - mb_strlen($found[0]));

            }

            break;

        default:

            $word = mb_substr($word, 0, mb_strlen($word) - mb_strlen($found[0]));

        }

    }


    //Step 5:

    if (

        preg_match('/E$/', mb_substr($word, $R2))

        || (

            preg_match('/E$/', mb_substr($word, $R1))

            //Define a short syllable in a word as either (a) a vowel followed by a non-vowel other than w, x or Y

            //and preceded by a non-vowel, or * (b) a vowel at the beginning of the word followed by a non-vowel.

            && !(

                preg_match('/[^' . $vowels . '][' . $vowels . '][^WXy' . $vowels . '].$/', $word)

                || preg_match('/^[' . $vowels . '][^' . $vowels . '].$/', $word)

            )

        )

    )

    {

        $word = mb_substr($word, 0, mb_strlen($word) - 1);

    }

    elseif (preg_match('/L$/', mb_substr($word, $R2)) && preg_match('/LL$/', $word))

    {

        $word = mb_substr($word, 0, mb_strlen($word) - 1);

    }


    return str_replace('y', 'Y', $word);

}


$STEMMING_EN_STEP3
global $STEMMING_EN_STEP3
Определения stemming.php:17

$STEMMING_EN_STEP3A
global $STEMMING_EN_STEP3A
Определения stemming.php:12

stemming_letter_en
stemming_letter_en()
Определения stemming.php:61

stemming_upper_en
stemming_upper_en($sText)
Определения stemming.php:95

$STEMMING_EN_EX2
global $STEMMING_EN_EX2
Определения stemming.php:49

$STEMMING_EN_STEP2
global $STEMMING_EN_STEP2
Определения stemming.php:10

$STEMMING_EN_STEP4
global $STEMMING_EN_STEP4
Определения stemming.php:26

stemming_en
stemming_en($word)
Определения stemming.php:100

$STEMMING_EN_STEP4A
global $STEMMING_EN_STEP4A
Определения stemming.php:19

$STEMMING_EN_EX1
global $STEMMING_EN_EX1
Определения stemming.php:28

$STEMMING_EN_STEP2A
global $STEMMING_EN_STEP2A
Определения stemming.php:2

stemming_stop_en
stemming_stop_en($sWord)
Определения stemming.php:66

elseif
if( $daysToExpire >=0 &&$daysToExpire< 60 elseif)( $daysToExpire< 0)
Определения prolog_main_admin.php:393