1C-Bitrix 25.700.0
Загрузка...
Поиск...
Не найдено
language.php
См. документацию.
1<?php
3{
4 var $_abc = [];
7 var $_trigrams = [];
9 var $_bigrams = null;
10
11 function __construct($lang_id)
12 {
13 $this->_lang_id = $lang_id;
14 }
15
16 //Function loads language class
17 static function GetLanguage($sLang)
18 {
19 static $arLanguages = [];
20
21 if (!isset($arLanguages[$sLang]))
22 {
23 $obLanguage = null;
24 $class_name = mb_strtolower('CSearchLanguage' . $sLang);
25 if (!class_exists($class_name))
26 {
27 //First try to load customized class
28 $strDirName = $_SERVER['DOCUMENT_ROOT'] . BX_PERSONAL_ROOT . '/php_interface/' . $sLang . '/search';
29 $strFileName = $strDirName . '/language.php';
30 if (file_exists($strFileName))
31 {
32 $obLanguage = @include $strFileName;
33 }
34
35 if (!is_object($obLanguage))
36 {
37 if (!class_exists($class_name))
38 {
39 //Then module class
40 $strDirName = $_SERVER['DOCUMENT_ROOT'] . '/bitrix/modules/search/tools/' . $sLang;
41 $strFileName = $strDirName . '/language.php';
42 if (file_exists($strFileName))
43 {
44 if (\Bitrix\Main\Localization\Translation::allowConvertEncoding())
45 {
46 \Bitrix\Main\Localization\StreamConverter::include($strFileName, $sLang);
47 }
48 else
49 {
50 @include $strFileName;
51 }
52 }
53 if (!class_exists($class_name))
54 {
55 $class_name = 'CSearchLanguage';
56 }
57 }
58 }
59 }
60
61 if (!is_object($obLanguage))
62 {
63 $obLanguage = new $class_name($sLang);
64 }
65 $obLanguage->LoadTrigrams($strDirName);
66 $arStemInfo = stemming_init($sLang);
67 if (is_array($arStemInfo))
68 {
69 $obLanguage->_abc = array_flip($obLanguage->StrToArray($arStemInfo['abc']));
70 }
71 $obLanguage->_has_bigramm_info = is_callable([$obLanguage, 'getbigrammletterfreq']);
72
73 $arLanguages[$sLang] = $obLanguage;
74 }
75
76 return $arLanguages[$sLang];
77 }
78
79 //Reads file with trigrams (combinations not allowed in the words)
80 function LoadTrigrams($dir_name)
81 {
82 if (empty($this->_trigrams))
83 {
84 $file_name = $dir_name . '/trigram';
85 if (file_exists($file_name) && is_file($file_name))
86 {
87 $cache_id = filemtime($file_name) . ',v1,' . $file_name;
88 $obCache = new CPHPCache;
89 if ($obCache->StartDataCache(360000, $cache_id, 'search'))
90 {
91 $text = file_get_contents($file_name);
92 $keyboard = $this->GetKeyboardLayout();
93 if (isset($keyboard['trigram_charset']))
94 {
95 $text = \Bitrix\Main\Text\Encoding::convertEncoding($text, $keyboard['trigram_charset'], 'utf8');
96 }
97 $ar = explode("\n", $text);
98 foreach ($ar as $trigramm)
99 {
100 if (mb_strlen($trigramm) == 3)
101 {
102 $strScanCodesTmp = $this->ConvertToScancode($trigramm, false, true);
103 if (mb_strlen($strScanCodesTmp) == 3)
104 {
105 $this->_trigrams[$strScanCodesTmp] = true;
106 }
107 }
108 }
109
110 $obCache->EndDataCache($this->_trigrams);
111 }
112 else
113 {
114 $this->_trigrams = $obCache->GetVars();
115 }
116 }
117 }
118 }
119
120 function HasTrigrams()
121 {
122 return !empty($this->_trigrams);
123 }
124
125 //Check phrase against trigrams
126 function CheckTrigrams($arScanCodes)
127 {
128 $result = 0;
129 $check = '';
130 $len = 0;
131 foreach ($arScanCodes as $i => $code)
132 {
133 if ($code === false) //new word starts here
134 {
135 $check = '';
136 $len = 0;
137 }
138 else
139 {
140 //running window of 3 bytes
141 if ($len < 3)
142 {
143 $check .= chr($code + 1);
144 $len++;
145 }
146 else
147 {
148 $check = $check[1] . $check[2] . chr($code + 1);
149 $len = 3;
150 }
151 }
152
153 if ($len >= 3)
154 {
155 if (isset($this->_trigrams[$check]))
156 {
157 $result++;
158 }
159 }
160 }
161
162 return $result;
163 }
164
165 //This function returns positions of the letters
166 //on the keyboard. This one is default English layout
168 {
169 return [
170 'lo' => '` - ' . 'qwertyuiop[]' . "asdfghjkl;'"
171 . 'zxcvbnm,. ',
172 'hi' => '~ ' . 'QWERTYUIOP{}' . 'ASDFGHJKL:"' . 'ZXCVBNM<> '
173 ];
174 }
175
176 function ConvertFromScancode($arScancode)
177 {
178 $result = '';
179 $keyboard = $this->GetKeyboardLayout();
180 foreach ($arScancode as $code)
181 {
182 $result .= mb_substr($keyboard['lo'], $code, 1);
183 }
184 return $result;
185 }
186
187 public static function StrToArray($str)
188 {
189 $result = [];
190 $len = mb_strlen($str);
191 for ($i = 0;$i < $len; $i++)
192 {
193 $result[] = mb_substr($str, $i, 1);
194 }
195 return $result;
196 }
197
198 //This function converts text between layouts
199 public static function ConvertKeyboardLayout($text, $from, $to)
200 {
201 static $keyboards = [];
202 $combo = $from . '|' . $to;
203
204 if (!isset($keyboards[$combo]))
205 {
206 //Fill local cache
207 if (!array_key_exists($from, $keyboards))
208 {
209 $ob = CSearchLanguage::GetLanguage($from);
210 $keyboard = $ob->GetKeyboardLayout();
211 if (is_array($keyboard))
212 {
213 $keyboards[$from] = array_merge($ob->StrToArray($keyboard['lo']), $ob->StrToArray($keyboard['hi']));
214 }
215 else
216 {
217 $keyboards[$from] = null;
218 }
219 }
220
221 if (!array_key_exists($to, $keyboards))
222 {
224 $keyboard = $ob->GetKeyboardLayout();
225 if (is_array($keyboard))
226 {
227 $keyboards[$to] = array_merge($ob->StrToArray($keyboard['lo']), $ob->StrToArray($keyboard['hi']));
228 }
229 else
230 {
231 $keyboards[$to] = null;
232 }
233 }
234
235 //when both layouts defined
236 if (isset($keyboards[$from]) && isset($keyboards[$to]))
237 {
238 $keyboards[$combo] = [];
239 foreach ($keyboards[$from] as $i => $ch)
240 {
241 if ($ch != false)
242 {
243 $keyboards[$combo][$ch] = $keyboards[$to][$i];
244 }
245 }
246 }
247 }
248
249 if (isset($keyboards[$combo]))
250 {
251 $text = static::StrToArray($text);
252 foreach ($text as $pos => $char)
253 {
254 if (isset($keyboards[$combo][$char]))
255 {
256 $text[$pos] = $keyboards[$combo][$char];
257 }
258 }
259 return implode('', $text);
260 }
261 else
262 {
263 return $text;
264 }
265 }
266
267 //This function converts text into array of character positions
268 //on the keyboard. Not defined chars turns into "false" value.
269 function ConvertToScancode($text, $strict=false, $binary=false)
270 {
271 static $cache = [];
272 if (!isset($cache[$this->_lang_id]))
273 {
274 $cache[$this->_lang_id] = [];
275 $keyboard = $this->GetKeyboardLayout();
276
277 foreach ($this->StrToArray($keyboard['lo']) as $pos => $ch)
278 {
279 $cache[$this->_lang_id][$ch] = $pos;
280 }
281
282 foreach ($this->StrToArray($keyboard['hi']) as $pos => $ch)
283 {
284 $cache[$this->_lang_id][$ch] = $pos;
285 }
286 }
287
288 $scancodes = &$cache[$this->_lang_id];
289
290 if ($binary)
291 {
292 $result = '';
293 foreach ($this->StrToArray($text) as $ch)
294 {
295 if (
296 isset($scancodes[$ch])
297 && !($ch === ' ')
298 && !($strict && !isset($this->_abc[$ch]))
299 )
300 {
301 $result .= chr($scancodes[$ch] + 1);
302 }
303 }
304 }
305 else
306 {
307 $result = [];
308 foreach ($this->StrToArray($text) as $ch)
309 {
310 if ($ch === ' ')
311 {
312 $result[] = false;
313 }
314 elseif ($strict && !isset($this->_abc[$ch]))
315 {
316 $result[] = false;
317 }
318 elseif (isset($scancodes[$ch]))
319 {
320 $result[] = $scancodes[$ch];
321 }
322 else
323 {
324 $result[] = false;
325 }
326 }
327 }
328 return $result;
329 }
330
331 function PreGuessLanguage($text, $lang=false)
332 {
333 //Indicates that there is no own guess
334 return false;
335 //In subclasses you should return array("from" => lang, "to" => lang) to translate
336 //or return true when no translation nedded
337 //or parent::GuessLanguage for futher processing
338 }
339
340 public static function GuessLanguage($text, $lang=false)
341 {
342 if ($text == '')
343 {
344 return false;
345 }
346
347 static $cache = [];
348 if (empty($cache))
349 {
350 $cache[] = 'en';//English is always in mind and on the first place
351 $rsLanguages = CLanguage::GetList();
352 while ($arLanguage = $rsLanguages->Fetch())
353 {
354 if ($arLanguage['LID'] != 'en')
355 {
356 $cache[] = $arLanguage['LID'];
357 }
358 }
359 }
360
361 if (is_array($lang))
362 {
363 $arLanguages = $lang;
364 }
365 else
366 {
367 $arLanguages = $cache;
368 }
369
370 if (count($arLanguages) < 2)
371 {
372 return false;
373 }
374
375 //Give customized languages a chance to guess
376 foreach ($arLanguages as $lang)
377 {
379 $res = $ob->PreGuessLanguage($text, $lang);
380 if (is_array($res))
381 {
382 return $res;
383 }
384 elseif ($res === true)
385 {
386 return false;
387 }
388 }
389
390 //First try to detect language which
391 //was used to type the phrase
392 $max_len = 0;
393 $languages_from = [];
394 foreach ($arLanguages as $lang)
395 {
397
398 $arScanCodesTmp1 = $ob->ConvertToScancode($text, true);
399 $_cnt = count(array_filter($arScanCodesTmp1));
400 if ($_cnt > $max_len)
401 {
402 $max_len = $_cnt;
403 }
404 $languages_from[$lang] = $arScanCodesTmp1;
405 }
406
407 if (empty($languages_from))
408 {
409 return false;
410 }
411
412 if ($max_len < 2)
413 {
414 return false;
415 }
416
417 $languages_from = array_filter($languages_from,
418 function($a) use($max_len)
419 {
420 return count(array_filter($a)) >= $max_len;
421 }
422 );
423
424 uasort($languages_from,
425 function($a, $b)
426 {
427 return count(array_filter($b)) - count(array_filter($a));
428 }
429 );
430
431 //If more than one language is detected as input
432 //try to get one with best trigram info
433 $arDetectionFrom = [];
434 $i = 0;
435 foreach ($languages_from as $lang => $arScanCodes)
436 {
438 //Calculate how far sequence of scan codes
439 //is from language model
440 $deviation = $ob->GetDeviation($arScanCodes);
441
442 $arDetectionFrom[$lang] = [
443 $ob->HasTrigrams(),
444 $ob->CheckTrigrams($arScanCodes),
445 $deviation[1],
446 intval($deviation[0] * 100),
447 $i,
448 ];
449
450 $i++;
451 }
452 uasort($arDetectionFrom, ['CSearchLanguage', 'cmp']);
453
454 //Now try the best to detect the language
455 $arDetection = [];
456 $i = 0;
457 foreach ($arDetectionFrom as $lang_from => $arTemp)
458 {
459 foreach ($arLanguages as $lang)
460 {
461 $lang_from_to = $lang_from . '=>' . $lang;
462
463 $arDetection[$lang_from_to] = [];
464
466
467 $alt_text = CSearchLanguage::ConvertKeyboardLayout($text, $lang_from, $lang);
468 $arScanCodes = $ob->ConvertToScancode($alt_text, true);
469
470 $arDetection[$lang_from_to][] = $ob->HasBigrammInfo() ? 0 : 1;
471 $arDetection[$lang_from_to][] = $ob->CheckTrigrams($arScanCodes);
472 $arDetection[$lang_from_to][] = -count(array_filter($arScanCodes));
473
474 //Calculate how far sequence of scan codes
475 //is from language model
476 $deviation = $ob->GetDeviation($arScanCodes);
477 $arDetection[$lang_from_to][] = $deviation[1];
478 $arDetection[$lang_from_to][] = $deviation[0];
479
480 $arDetection[$lang_from_to][] = $i;
481 $arDetection[$lang_from_to][] = $lang_from_to;
482 $i++;
483 }
484 }
485
486 uasort($arDetection, ['CSearchLanguage', 'cmp']);
487 $language_from_to = key($arDetection);
488
489 list($language_from, $language_to) = explode('=>', $language_from_to);
490
491 $alt_text = CSearchLanguage::ConvertKeyboardLayout($text, $language_from, $language_to);
492 if ($alt_text === $text)
493 {
494 return false;
495 }
496
497 return ['from' => $language_from, 'to' => $language_to];
498 }
499
500 //Compare to results of text analysis
501 static function cmp($a, $b)
502 {
503 $c = count($a);
504 for ($i = 0; $i < $c; $i++)
505 {
506 if ($a[$i] < $b[$i])
507 {
508 return -1;
509 }
510 elseif ($a[$i] > $b[$i])
511 {
512 return 1;
513 }
514 }
515 return 0;//never happens
516 }
517
518 //Function returns distance of the text (sequence of scan codes)
519 //from language model
520 function GetDeviation($arScanCodes)
521 {
522 //This is language model
523 $lang_bigrams = $this->GetBigrammScancodeFreq();
524 $lang_count = $lang_bigrams['count'];
525 unset($lang_bigrams['count']);
526
527 //This is text model
528 $text_bigrams = $this->ConvertToBigramms($arScanCodes);
529 $count = $text_bigrams['count'];
530 unset($text_bigrams['count']);
531
532 $deviation = 0;
533 $zeroes = 0;
534 foreach ($text_bigrams as $key => $value)
535 {
536 for ($i = 0;$i < $value; $i++)
537 {
538 if (!isset($lang_bigrams[$key]))
539 {
540 $zeroes++;
541 $deviation += 1 / $count;
542 }
543 else
544 {
545 $deviation += abs(1 / $count - $lang_bigrams[$key] / $lang_count);
546 }
547 }
548 }
549
550 return [$deviation, $zeroes];
551 }
552
553 //Function returns bigramms of the text (array of scancodes)
554 //For example "FAT RAT" will be
555 //array("FA", "AT", "RA", "AT")
556 //This is model of the text
557 function ConvertToBigramms($arScancodes)
558 {
559 $result = ['count' => 0];
560
561 $len = count($arScancodes) - 1;
562 for ($i = 0; $i < $len; $i++)
563 {
564 $code1 = $arScancodes[$i];
565 $code2 = $arScancodes[$i + 1];
566 if ($code1 !== false && $code2 !== false)
567 {
568 $result['count']++;
569
570 if (!isset($result[$code1 . ' ' . $code2]))
571 {
572 $result[$code1 . ' ' . $code2] = 0;
573 }
574
575 $result[$code1 . ' ' . $code2]++;
576 }
577 }
578 return $result;
579 }
580
581 function HasBigrammInfo()
582 {
584 }
585
586 //Function returns model of the language
588 {
589 if (!$this->HasBigrammInfo())
590 {
591 return ['count' => 1];
592 }
593
594 if (!isset($this->_lang_bigramm_cache))
595 {
596 $bigramms = $this->GetBigrammLetterFreq();
597 $keyboard = $this->GetKeyboardLayout();
598 $keyboard_lo = $keyboard['lo'];
599 $keyboard_hi = $keyboard['hi'];
600
601 $result = ['count' => 0];
602 foreach ($bigramms as $letter1 => $row)
603 {
604 $p1 = mb_strpos($keyboard_lo, $letter1);
605 if ($p1 === false)
606 {
607 $p1 = mb_strpos($keyboard_hi, $letter1);
608 }
609
610 $i = 0;
611 foreach ($bigramms as $letter2 => $tmp)
612 {
613 $p2 = mb_strpos($keyboard_lo, $letter2);
614 if ($p2 === false)
615 {
616 $p2 = mb_strpos($keyboard_hi, $letter2);
617 }
618
619 $weight = $row[$i];
620 $result['count'] += $weight;
621 $result[$p1 . ' ' . $p2] = $weight;
622 $i++;
623 }
624 }
625 $this->_lang_bigramm_cache = $result;
626 }
628 }
629}
$count
Определения admin_tab.php:4
static convertEncoding($data, $charsetFrom, $charsetTo)
Определения encoding.php:17
static GetList($by="sort", $order="asc", $arFilter=[])
Определения language.php:12
Определения language.php:3
GetDeviation($arScanCodes)
Определения language.php:520
static ConvertKeyboardLayout($text, $from, $to)
Определения language.php:199
$_trigrams
Определения language.php:7
$_lang_id
Определения language.php:5
HasBigrammInfo()
Определения language.php:581
PreGuessLanguage($text, $lang=false)
Определения language.php:331
static GuessLanguage($text, $lang=false)
Определения language.php:340
CheckTrigrams($arScanCodes)
Определения language.php:126
ConvertToBigramms($arScancodes)
Определения language.php:557
$_has_bigramm_info
Определения language.php:8
GetKeyboardLayout()
Определения language.php:167
ConvertToScancode($text, $strict=false, $binary=false)
Определения language.php:269
static cmp($a, $b)
Определения language.php:501
static GetLanguage($sLang)
Определения language.php:17
$_abc
Определения language.php:4
$_lang_bigramm_cache
Определения language.php:6
HasTrigrams()
Определения language.php:120
GetBigrammScancodeFreq()
Определения language.php:587
__construct($lang_id)
Определения language.php:11
LoadTrigrams($dir_name)
Определения language.php:80
$_bigrams
Определения language.php:9
static StrToArray($str)
Определения language.php:187
ConvertFromScancode($arScancode)
Определения language.php:176
$str
Определения commerceml2.php:63
if(!is_array($prop["VALUES"])) $tmp
Определения component_props.php:203
$res
Определения filter_act.php:7
$result
Определения get_property_values.php:14
else $ch
Определения group_list_element_edit.php:27
$_SERVER["DOCUMENT_ROOT"]
Определения cron_frame.php:9
if(!is_null($config))($config as $configItem)(! $configItem->isVisible()) $code
Определения options.php:195
if(!defined('SITE_ID')) $lang
Определения include.php:91
if( $daysToExpire >=0 &&$daysToExpire< 60 elseif)( $daysToExpire< 0)
Определения prolog_main_admin.php:393
$ar
Определения options.php:199
if(empty($signedUserToken)) $key
Определения quickway.php:257
$text
Определения template_pdf.php:79
$i
Определения factura.php:643
</p ></td >< td valign=top style='border-top:none;border-left:none;border-bottom:solid windowtext 1.0pt;border-right:solid windowtext 1.0pt;padding:0cm 2.0pt 0cm 2.0pt;height:9.0pt'>< p class=Normal align=center style='margin:0cm;margin-bottom:.0001pt;text-align:center;line-height:normal'>< a name=ТекстовоеПоле54 ></a ><?=($taxRate > count( $arTaxList) > 0) ? $taxRate."%"
Определения waybill.php:936
else $a
Определения template.php:137
stemming_init($sLang='ru')
Определения stemming.php:3