1C-Bitrix 25.700.0
Загрузка...
Поиск...
Не найдено
schemaorg.php
См. документацию.
1<?php
2
3namespace Bitrix\Main\UrlPreview\Parser;
4
5use Bitrix\Main\Text\Encoding;
6use Bitrix\Main\UrlPreview\HtmlDocument;
7use Bitrix\Main\UrlPreview\Parser;
8
9class SchemaOrg extends Parser
10{
12 protected $dom;
13
15 protected $schemaMetadata = array();
16
18
24 public function handle(HtmlDocument $document)
25 {
26 $this->documentEncoding = $document->getEncoding();
27 if(!str_contains($document->getHtml(), 'itemscope'))
28 return null;
29
30 if(!$this->initializeDom($document))
31 return null;
32
33 if(!$this->getSchemaMetadata())
34 return null;
35
36 if($document->getTitle() == '' && isset($this->schemaMetadata['name']))
37 {
38 $document->setTitle($this->schemaMetadata['name']);
39 }
40
41 if($document->getDescription() == '' && isset($this->schemaMetadata['description']))
42 {
43 $document->setDescription($this->schemaMetadata['description']);
44 }
45
46 if($document->getImage() == '' && isset($this->schemaMetadata['image']))
47 {
48 $document->setImage($this->schemaMetadata['image']);
49 }
50 }
51
55 protected function getSchemaMetadata()
56 {
57 // Starting with first node with itemscope attribute, to prevent walking over full document.
58 $xpath = new \DOMXPath($this->dom);
59 $itemScopeNodes = $xpath->query('//*[@itemscope]');
60
61 if(!is_a($itemScopeNodes, '\DOMNodeList') || $itemScopeNodes->length < 1)
62 return false;
63
64 $mainNode = $itemScopeNodes->item(0);
65 if(!is_a($mainNode, '\DOMElement'))
66 return false;
67
68 $this->walkDomTree($mainNode);
69
70 return true;
71 }
72
77 protected function walkDomTree(\DOMElement $currentNode, $currentDepth = 0)
78 {
79 $this->handleNode($currentNode);
80 foreach($currentNode->childNodes as $childNode)
81 {
82 if(is_a($childNode, '\DOMElement') && !($currentDepth == 0 xor $currentNode->hasAttribute('itemscope')))
83 {
84 $this->walkDomTree($childNode, $currentDepth + 1);
85 }
86 }
87 }
88
93 protected function getSchemaPropertyValue(\DOMElement $node)
94 {
95 $result = null;
96
97 switch($node->tagName)
98 {
99 case 'img':
100 $result = $node->getAttribute('src');
101 break;
102 case 'meta':
103 $result = $node->getAttribute('content');
104 break;
105 case 'a':
106 $result = $node->getAttribute('href');
107 break;
108 case 'time':
109 if($node->hasAttribute('datetime'))
110 $result = $node->getAttribute('datetime');
111 else
112 $result = $node->textContent;
113 break;
114 case 'div':
115 $result = $this->getNodeInnerHtml($node);
116 break;
117 case 'p':
118 case 'span':
119 case 'h1':
120 case 'h2':
121 case 'h3':
122 case 'h4':
123 case 'h5':
124 case 'h6':
125 $result = $node->textContent;
126 break;
127 }
128
129 // dom extension's internal encoding is always utf-8
130 $result = Encoding::convertEncoding($result, 'utf-8', $this->documentEncoding);
131 $result = trim($result);
132 return ($result <> '' ? $result : null);
133 }
134
138 protected function handleNode(\DOMElement $node)
139 {
140 if($node->hasAttribute('itemprop') && !$node->hasAttribute('itemscope'))
141 {
142 $propertyName = mb_strtolower($node->getAttribute('itemprop'));
144 $this->schemaMetadata[$propertyName] = $propertyValue;
145 }
146 }
147
152 protected function getNodeInnerHtml(\DOMElement $element)
153 {
154 $innerHTML = "";
155 $children = $element->childNodes;
156
157 foreach ($children as $child)
158 {
159 $innerHTML .= $element->ownerDocument->saveHTML($child);
160 }
161
162 return $innerHTML;
163 }
164
169 protected function initializeDom(HtmlDocument $document)
170 {
171 if(!class_exists('DOMDocument'))
172 {
173 return false;
174 }
175
176 $this->dom = new \DOMDocument();
177 // Prevents parsing errors bubbling
178 libxml_use_internal_errors(true);
179 $result = $this->dom->loadHTML('<?xml encoding="'.$document->getEncoding().'">'.$document->getHtml(), LIBXML_COMPACT);
180
181 return $result;
182 }
183}
setDescription($description)
Определения htmldocument.php:144
getNodeInnerHtml(\DOMElement $element)
Определения schemaorg.php:152
handle(HtmlDocument $document)
Определения schemaorg.php:24
initializeDom(HtmlDocument $document)
Определения schemaorg.php:169
handleNode(\DOMElement $node)
Определения schemaorg.php:138
getSchemaPropertyValue(\DOMElement $node)
Определения schemaorg.php:93
walkDomTree(\DOMElement $currentNode, $currentDepth=0)
Определения schemaorg.php:77
$children
Определения sync.php:12
</td ></tr ></table ></td ></tr ><?endif?><? $propertyIndex=0;foreach( $arGlobalProperties as $propertyCode=> $propertyValue
Определения file_new.php:729
</td ></tr ></table ></td ></tr >< tr >< td class="bx-popup-label bx-width30"><?=GetMessage("PAGE_NEW_TAGS")?> array( $site)
Определения file_new.php:804
$result
Определения get_property_values.php:14