92 $this->__getter->setStreamTimeout(25);
93 $this->__getter->setRedirect(
true);
95 if (
$result = $this->__getter->get($this->__url))
98 $headers = $this->__getter->getHeaders()->toArray();
100 foreach ($headers as $header)
102 $currHeader =
array();
103 foreach($header[
'values'] as $value)
104 $currHeader[] = $value;
105 $currHeader = implode(
", ", $currHeader);
106 $this->__result_headers[$header[
"name"]] = $currHeader;
111 unset($this->__getter);
112 $this->bError =
false;
115 if(
$errors = $this->__getter->getError())
116 $this->errorString = implode(
', ',
$errors);
117 unset($this->__getter);
118 $this->bError =
true;
136 if($this->pcre_backtrack_limit ===
false)
137 $this->pcre_backtrack_limit = intval(ini_get(
"pcre.backtrack_limit"));
138 $text_len = strlen($this->__result_data);
140 if($this->pcre_backtrack_limit > 0 && $this->pcre_backtrack_limit < $text_len)
142 @ini_set(
"pcre.backtrack_limit", $text_len);
143 $this->pcre_backtrack_limit = intval(ini_get(
"pcre.backtrack_limit"));
146 if($this->__bCheckErrors && $this->pcre_backtrack_limit > 0 && $this->pcre_backtrack_limit < $text_len)
148 $this->__result_errors[] =
array(
149 'CODE' =>
'SEO_PCRE',
152 '#PCRE_BACKTRACK_LIMIT#' => $this->pcre_backtrack_limit,
153 '#TEXT_LEN#' => $text_len,
161 $this->__result_data = preg_replace(
'/<img[^>]*(alt|title)=\"([^\"]*)\".*?>/is',
'\\2', $this->__result_data);
163 if ($this->__bCheckErrors && ($img_cnt = preg_match(
'/<img.*?>/is', $this->__result_data)))
165 $this->__result_errors[] =
array(
166 'CODE' =>
'SEO_IMG_NO_ALT',
169 '#COUNT#' => $img_cnt
175 $this->__index[
'TOTAL'] = $this->
__prepareText($this->__result_data);
179 if(preg_match_all(
"/<(b|strong)>(.*?)<\\/\\1>/is", $this->__result_data,
$arRes))
181 $this->__result_extended[
'BOLD'] =
$arRes[0];
186 if(preg_match_all(
"/<(i|em)>(.*?)<\\/\\1>/is", $this->__result_data,
$arRes))
188 $this->__result_extended[
'ITALIC'] =
$arRes[0];
193 if(preg_match_all(
"/<(noindex)>(.*?)<\\/\\1>/is", $this->__result_data,
$arRes))
195 $this->__result_extended[
'NOINDEX'] =
$arRes[0];
199 if(preg_match_all(
"/<(a) ([^>]*)>(.*?)<\\/\\1>/is", $this->__result_data,
$arRes))
201 $this->__result_extended[
'LINK'] =
$arRes[0];
204 $this->__result_extended[
'NOFOLLOW'] =
array();
205 $this->__result_extended[
'LINK_EXTERNAL'] =
array();
206 $this->__index[
'LINK_EXTERNAL'] =
array();
210 if (
false !== mb_strpos($attrs,
'rel="nofollow"'))
211 $this->__result_extended[
'NOFOLLOW'][] =
$arRes[0][
$key];
212 if (
false !== ($pos = mb_strpos($attrs,
'href="')))
214 $pos1 = mb_strpos($attrs,
'"', $pos + 6);
215 $url = mb_substr($attrs, $pos, $pos1 - $pos);
219 $this->__index[
'LINK_EXTERNAL'] = array_merge($this->__index[
'LINK_EXTERNAL'], $this->
__prepareText(
$arRes[3][
$key]));
220 $this->__result_extended[
'LINK_EXTERNAL'][] =
$arRes[0][
$key];
225 if ($this->__bCheckErrors &&
count(
$arRes[0]) > $this->__qualifier_links_count)
227 $this->__result_errors[] =
array(
228 'CODE' =>
'SEO_LINKS_COUNT',
232 '#COUNT_EXTERNAL#' =>
count($this->__result_extended[
'LINK_EXTERNAL']),
233 '#QUALIFIER#' => $this->__qualifier_links_count,
241 if(preg_match(
'/<meta.*?name=\"description\".*?content=\"([^\"]+)\"[^>]*>/i', $this->__result_data,
$arRes))
243 $this->__result_meta[
'DESCRIPTION'] =
$arRes[1];
244 $this->__result_extended[
'META_DESCRIPTION'] =
$arRes[0];
245 $this->__index[
'DESCRIPTION'] = $this->
__prepareText($this->__result_meta[
'DESCRIPTION']);
249 $this->__result_errors[] =
array(
250 'CODE' =>
'SEO_META_NO_DESCRIPTION',
257 if(preg_match(
'/<meta.*?name=\"keywords\".*?content=\"([^\"]+)\"[^>]*>/i', $this->__result_data,
$arRes))
259 $this->__result_meta[
'KEYWORDS'] =
$arRes[1];
260 $this->__result_extended[
'META_KEYWORDS'] =
$arRes[0];
261 $this->__index[
'KEYWORDS'] = $this->
__prepareText($this->__result_meta[
'KEYWORDS']);
265 $this->__result_errors[] =
array(
266 'CODE' =>
'SEO_META_NO_KEYWORDS',
273 if(preg_match(
"/<(title)>(.*?)<\\/\\1>/is", $this->__result_data,
$arRes))
275 $this->__result_extended[
'TITLE'] =
$arRes[0];
279 if(preg_match_all(
"/<(h[\d]{1}).*?>.*?<\\/\\1>/is", $this->__result_data,
$arRes))
281 $this->__result_extended[
'H'] =
$arRes[0];
284 if(preg_match_all(
"/<(h1).*?>(.*?)<\\/\\1>/is", $this->__result_data,
$arRes))
288 $this->__result_errors[] =
array(
289 'CODE' =>
'SEO_H1_UNIQUE',
300 elseif ($this->__bCheckErrors)
302 $this->__result_errors[] =
array(
303 'CODE' =>
'SEO_H1_ABSENT',
309 if ($this->__bCheckErrors)
315 'URL' => $this->__url,
316 'LANG' => $this->__lang,
317 'SERVER_NAME' => $this->__server_name,
318 'SITE' => $this->__site,
321 'HEADERS' => $this->__result_headers,
322 'BODY' => $this->__result_data,
324 $this->__result_meta,
326 )) && ($ex =
$GLOBALS[
'APPLICATION']->GetException()))
328 $this->__result_errors[] =
array(
329 'CODE' => $ex->GetId(),
331 'TEXT' => $ex->GetString(),
352 if (!is_array($this->__index))
355 if (
null == $this->__index_total_len)
356 $this->__index_total_len = array_sum($this->__index[
'TOTAL']);
359 'URL' => $this->__url,
360 'TOTAL_LENGTH' => strlen($this->__result_data),
361 'TOTAL_WORDS_COUNT' => $this->__index_total_len ? $this->__index_total_len :
'-',
362 'UNIQUE_WORDS_COUNT' => $this->__index_total_len ?
count($this->__index[
'TOTAL']) :
'-',
363 'META_KEYWORDS' => $this->__result_meta[
'KEYWORDS'],
364 'META_DESCRIPTION' => $this->__result_meta[
'DESCRIPTION'],
375 if (!is_array($this->__index))
378 if (is_array($keyword))
382 foreach ($keyword as
$key => $word)
389 if (!$bStemmed && $this->bSearch)
390 $keyword =
stemming($keyword, $this->__lang);
392 if (is_array($keyword))
396 'TOTAL' => intval($this->__index[
'TOTAL'][$keyword]),
397 'BOLD' => intval($this->__index[
'BOLD'][$keyword]),
398 'ITALIC' => intval($this->__index[
'ITALIC'][$keyword]),
399 'LINK' => intval($this->__index[
'LINK'][$keyword]),
400 'LINK_EXTERNAL' => intval($this->__index[
'LINK_EXTERNAL'][$keyword]),
401 'DESCRIPTION' => intval($this->__index[
'DESCRIPTION'][$keyword]),
402 'KEYWORDS' => intval($this->__index[
'KEYWORDS'][$keyword]),
403 'TITLE' => intval($this->__index[
'TITLE'][$keyword]),
404 'H1' => intval($this->__index[
'H1'][$keyword]),
440 if (strncmp(
$url,
'#', 1) === 0)
return false;
441 if (strncmp(
$url,
'mailto:', 7) === 0)
return false;
442 if (strncmp(
$url,
'javascript:', 11) === 0)
return false;
444 $pos = mb_strpos(
$url,
'://');
445 if ($pos ===
false)
return false;
447 static $arDomainNames =
null;
449 if (
null == $arDomainNames)
453 $dbRes = CSite::GetList(
'sort',
'asc',
array(
'ACTIVE' =>
'Y'));
454 while ($arSite =
$dbRes->Fetch())
456 if ($arSite[
'DOMAINS'])
457 $arDomainNames = array_merge($arDomainNames, explode(
"\r\n", $arSite[
'DOMAINS']));
460 $arDomainNames = array_values(array_unique($arDomainNames));
464 $pos = mb_strpos(
$url,
'/');
468 $pos = mb_strlen(
$url);
471 $domain = mb_substr(
$url, 0, $pos);
472 if (mb_substr($domain, 0, 4) ==
'www.')
474 $domain = mb_substr($domain, 4);
478 return !in_array($domain, $arDomainNames);