Bitrix-D7 23.9
 
Загрузка...
Поиск...
Не найдено
encoding.php
1<?php
2namespace Bitrix\Main\Text;
3
6
8{
17 public static function convertEncoding($data, $charsetFrom, $charsetTo)
18 {
19 $charsetFrom = static::resolveAlias($charsetFrom);
20 $charsetTo = static::resolveAlias($charsetTo);
21
22 if ((string)$charsetFrom == '' || (string)$charsetTo == '')
23 {
24 // incorrect encoding
25 return $data;
26 }
27
28 if (strcasecmp($charsetFrom, $charsetTo) == 0)
29 {
30 // no need to convert
31 return $data;
32 }
33
34 try
35 {
36 // mb_encoding_aliases emits an E_WARNING level error if encoding is unknown
37 if (@mb_encoding_aliases($charsetFrom) === false || @mb_encoding_aliases($charsetTo) === false)
38 {
39 // unknown encoding
40 return $data;
41 }
42 }
43 catch(\ValueError $e)
44 {
45 return $data;
46 }
47
48 return self::convert($data, $charsetFrom, $charsetTo);
49 }
50
51 protected static function resolveAlias($alias)
52 {
53 static $map = array(
54 'csksc56011987' => 'euc-kr',
55 'ks_c_5601-1987' => 'euc-kr',
56 'ks_c_5601-1989' => 'euc-kr',
57 'ksc5601' => 'euc-kr',
58 'ksc_5601' => 'euc-kr',
59 'windows-1257' => 'ISO-8859-13',
60 );
61
62 if(is_string($alias))
63 {
64 $alias = strtolower(trim($alias));
65 if(isset($map[$alias]))
66 {
67 return $map[$alias];
68 }
69 }
70
71 return $alias;
72 }
73
74 protected static function convert($data, $charsetFrom, $charsetTo)
75 {
76 if (is_array($data) || $data instanceof \SplFixedArray)
77 {
78 //let's do a recursion
79 if ($data instanceof \SplFixedArray)
80 {
81 $result = clone $data;
82 }
83 else
84 {
85 $result = [];
86 }
87 foreach ($data as $key => $value)
88 {
89 $newKey = self::convert($key, $charsetFrom, $charsetTo);
90 $newValue = self::convert($value, $charsetFrom, $charsetTo);
91
92 $result[$newKey] = $newValue;
93 }
94 return $result;
95 }
96 elseif (is_string($data))
97 {
98 if ($data == '')
99 {
100 return '';
101 }
102 return static::convertByMbstring($data, $charsetFrom, $charsetTo);
103 }
104 return $data;
105 }
106
114 public static function convertEncodingArray($data, $charsetFrom, $charsetTo)
115 {
116 return self::convertEncoding($data, $charsetFrom, $charsetTo);
117 }
118
123 public static function convertEncodingToCurrent($string)
124 {
125 $isUtf8String = self::detectUtf8($string);
126 $isUtf8Config = Application::isUtfMode();
127
128 $from = '';
129 $to = '';
130 if ($isUtf8Config && !$isUtf8String)
131 {
132 $from = static::getCurrentEncoding();
133 $to = 'UTF-8';
134 }
135 elseif (!$isUtf8Config && $isUtf8String)
136 {
137 $from = 'UTF-8';
138 $to = static::getCurrentEncoding();
139 }
140
141 if ($from !== $to)
142 {
143 $string = self::convertEncoding($string, $from, $to);
144 }
145
146 return $string;
147 }
148
153 public static function convertToUtf($string)
154 {
155 if (self::detectUtf8($string))
156 {
157 return $string;
158 }
159
160 $from = '';
161 $to = '';
163 {
164 $from = static::getCurrentEncoding();
165 $to = 'UTF-8';
166 }
167
168 if ($from !== $to)
169 {
170 $string = self::convertEncoding($string, $from, $to);
171 }
172
173 return $string;
174 }
175
176 protected static function getCurrentEncoding(): string
177 {
178 $currentCharset = null;
179
180 $context = Application::getInstance()->getContext();
181 if ($context != null)
182 {
183 $culture = $context->getCulture();
184 if ($culture != null)
185 {
186 $currentCharset = $culture->getCharset();
187 }
188 }
189
190 if ($currentCharset == null)
191 {
192 $currentCharset = Configuration::getValue("default_charset");
193 }
194
195 if ($currentCharset == null)
196 {
197 $currentCharset = "Windows-1251";
198 }
199
200 return $currentCharset;
201 }
202
208 public static function detectUtf8($string, $replaceHex = true)
209 {
210 if ($replaceHex)
211 {
212 $string = preg_replace_callback(
213 "/(%)([\\dA-F]{2})/i",
214 function ($match) {
215 return chr(hexdec($match[2]));
216 },
217 $string
218 );
219 }
220
221 return (bool)preg_match('//u', $string);
222 }
223
224 protected static function convertByMbstring($data, $charsetFrom, $charsetTo)
225 {
226 //For UTF-16 we have to detect the order of bytes
227 //Default for mbstring extension is Big endian
228 //Little endian have to pointed explicitly
229 if (strtoupper($charsetFrom) == "UTF-16")
230 {
231 $ch = substr($data, 0, 1);
232 if ($ch == "\xFF" && substr($data, 1, 1) == "\xFE")
233 {
234 //If Little endian found - cutoff BOF bytes and point mbstring to this fact explicitly
235 $res = mb_convert_encoding(substr($data, 2), $charsetTo, "UTF-16LE");
236 }
237 elseif ($ch == "\xFE" && substr($data, 1, 1) == "\xFF")
238 {
239 //If it is Big endian, just remove BOF bytes
240 $res = mb_convert_encoding(substr($data, 2), $charsetTo, $charsetFrom);
241 }
242 else
243 {
244 //Otherwise, assime Little endian without BOF
245 $res = mb_convert_encoding($data, $charsetTo, "UTF-16LE");
246 }
247 }
248 else
249 {
250 $res = mb_convert_encoding($data, $charsetTo, $charsetFrom);
251 }
252
253 return $res;
254 }
255}
static convertEncoding($data, $charsetFrom, $charsetTo)
Definition encoding.php:17
static resolveAlias($alias)
Definition encoding.php:51
static convertToUtf($string)
Definition encoding.php:153
static convertByMbstring($data, $charsetFrom, $charsetTo)
Definition encoding.php:224
static convertEncodingArray($data, $charsetFrom, $charsetTo)
Definition encoding.php:114
static convertEncodingToCurrent($string)
Definition encoding.php:123
static detectUtf8($string, $replaceHex=true)
Definition encoding.php:208
static convert($data, $charsetFrom, $charsetTo)
Definition encoding.php:74