19 static $arLanguages = [];
21 if (!isset($arLanguages[$sLang]))
24 $class_name = mb_strtolower(
'CSearchLanguage' . $sLang);
25 if (!class_exists($class_name))
28 $strDirName =
$_SERVER[
'DOCUMENT_ROOT'] . BX_PERSONAL_ROOT .
'/php_interface/' . $sLang .
'/search';
29 $strFileName = $strDirName .
'/language.php';
30 if (file_exists($strFileName))
32 $obLanguage = @include $strFileName;
35 if (!is_object($obLanguage))
37 if (!class_exists($class_name))
40 $strDirName =
$_SERVER[
'DOCUMENT_ROOT'] .
'/bitrix/modules/search/tools/' . $sLang;
41 $strFileName = $strDirName .
'/language.php';
42 if (file_exists($strFileName))
44 if (\
Bitrix\Main\Localization\Translation::allowConvertEncoding())
46 \Bitrix\Main\Localization\StreamConverter::include($strFileName, $sLang);
50 @include $strFileName;
53 if (!class_exists($class_name))
55 $class_name =
'CSearchLanguage';
61 if (!is_object($obLanguage))
63 $obLanguage =
new $class_name($sLang);
65 $obLanguage->LoadTrigrams($strDirName);
67 if (is_array($arStemInfo))
69 $obLanguage->_abc = array_flip($obLanguage->StrToArray($arStemInfo[
'abc']));
71 $obLanguage->_has_bigramm_info = is_callable([$obLanguage,
'getbigrammletterfreq']);
73 $arLanguages[$sLang] = $obLanguage;
76 return $arLanguages[$sLang];
82 if (empty($this->_trigrams))
84 $file_name = $dir_name .
'/trigram';
85 if (file_exists($file_name) && is_file($file_name))
87 $cache_id = filemtime($file_name) .
',v1,' . $file_name;
88 $obCache =
new CPHPCache;
89 if ($obCache->StartDataCache(360000, $cache_id,
'search'))
91 $text = file_get_contents($file_name);
93 if (isset($keyboard[
'trigram_charset']))
98 foreach (
$ar as $trigramm)
100 if (mb_strlen($trigramm) == 3)
103 if (mb_strlen($strScanCodesTmp) == 3)
105 $this->_trigrams[$strScanCodesTmp] =
true;
110 $obCache->EndDataCache($this->_trigrams);
114 $this->_trigrams = $obCache->GetVars();
201 static $keyboards = [];
202 $combo = $from .
'|' . $to;
204 if (!isset($keyboards[$combo]))
207 if (!array_key_exists($from, $keyboards))
210 $keyboard = $ob->GetKeyboardLayout();
211 if (is_array($keyboard))
213 $keyboards[$from] = array_merge($ob->StrToArray($keyboard[
'lo']), $ob->StrToArray($keyboard[
'hi']));
217 $keyboards[$from] =
null;
221 if (!array_key_exists($to, $keyboards))
224 $keyboard = $ob->GetKeyboardLayout();
225 if (is_array($keyboard))
227 $keyboards[$to] = array_merge($ob->StrToArray($keyboard[
'lo']), $ob->StrToArray($keyboard[
'hi']));
231 $keyboards[$to] =
null;
236 if (isset($keyboards[$from]) && isset($keyboards[$to]))
238 $keyboards[$combo] = [];
239 foreach ($keyboards[$from] as
$i =>
$ch)
243 $keyboards[$combo][
$ch] = $keyboards[$to][
$i];
249 if (isset($keyboards[$combo]))
252 foreach (
$text as $pos => $char)
254 if (isset($keyboards[$combo][$char]))
256 $text[$pos] = $keyboards[$combo][$char];
259 return implode(
'',
$text);
352 while ($arLanguage = $rsLanguages->Fetch())
354 if ($arLanguage[
'LID'] !=
'en')
356 $cache[] = $arLanguage[
'LID'];
363 $arLanguages =
$lang;
367 $arLanguages = $cache;
370 if (
count($arLanguages) < 2)
376 foreach ($arLanguages as
$lang)
393 $languages_from = [];
394 foreach ($arLanguages as
$lang)
398 $arScanCodesTmp1 = $ob->ConvertToScancode(
$text,
true);
399 $_cnt =
count(array_filter($arScanCodesTmp1));
400 if ($_cnt > $max_len)
404 $languages_from[
$lang] = $arScanCodesTmp1;
407 if (empty($languages_from))
417 $languages_from = array_filter($languages_from,
418 function(
$a) use($max_len)
420 return count(array_filter(
$a)) >= $max_len;
424 uasort($languages_from,
427 return count(array_filter($b)) -
count(array_filter(
$a));
433 $arDetectionFrom = [];
435 foreach ($languages_from as
$lang => $arScanCodes)
440 $deviation = $ob->GetDeviation($arScanCodes);
442 $arDetectionFrom[
$lang] = [
444 $ob->CheckTrigrams($arScanCodes),
446 intval($deviation[0] * 100),
452 uasort($arDetectionFrom, [
'CSearchLanguage',
'cmp']);
457 foreach ($arDetectionFrom as $lang_from => $arTemp)
459 foreach ($arLanguages as
$lang)
461 $lang_from_to = $lang_from .
'=>' .
$lang;
463 $arDetection[$lang_from_to] = [];
468 $arScanCodes = $ob->ConvertToScancode($alt_text,
true);
470 $arDetection[$lang_from_to][] = $ob->HasBigrammInfo() ? 0 : 1;
471 $arDetection[$lang_from_to][] = $ob->CheckTrigrams($arScanCodes);
472 $arDetection[$lang_from_to][] = -
count(array_filter($arScanCodes));
476 $deviation = $ob->GetDeviation($arScanCodes);
477 $arDetection[$lang_from_to][] = $deviation[1];
478 $arDetection[$lang_from_to][] = $deviation[0];
480 $arDetection[$lang_from_to][] =
$i;
481 $arDetection[$lang_from_to][] = $lang_from_to;
486 uasort($arDetection, [
'CSearchLanguage',
'cmp']);
487 $language_from_to = key($arDetection);
489 list($language_from, $language_to) = explode(
'=>', $language_from_to);
492 if ($alt_text ===
$text)
497 return [
'from' => $language_from,
'to' => $language_to];
591 return [
'count' => 1];
594 if (!isset($this->_lang_bigramm_cache))
596 $bigramms = $this->GetBigrammLetterFreq();
598 $keyboard_lo = $keyboard[
'lo'];
599 $keyboard_hi = $keyboard[
'hi'];
602 foreach ($bigramms as $letter1 => $row)
604 $p1 = mb_strpos($keyboard_lo, $letter1);
607 $p1 = mb_strpos($keyboard_hi, $letter1);
611 foreach ($bigramms as $letter2 =>
$tmp)
613 $p2 = mb_strpos($keyboard_lo, $letter2);
616 $p2 = mb_strpos($keyboard_hi, $letter2);
621 $result[$p1 .
' ' . $p2] = $weight;
625 $this->_lang_bigramm_cache =
$result;