Bitrix-D7  20.0.0
sitemapfile.php
См. документацию.
1 <?php
2 /**
3  * Bitrix Framework
4  * @package bitrix
5  * @subpackage seo
6  * @copyright 2001-2013 Bitrix
7  */
8 namespace Bitrix\Seo;
9 
14 
15 /**
16  * Base class for sitemapfile
17  * Class SitemapFile
18  * @package Bitrix\Seo
19  */
21  extends File
22 {
23  const XML_HEADER = '<?xml version="1.0" encoding="UTF-8"?>';
24 
25  const FILE_HEADER = '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">';
26  const FILE_FOOTER = '</urlset>';
27 
28  const ENTRY_TPL = '<url><loc>%s</loc><lastmod>%s</lastmod></url>';
29  const ENTRY_TPL_SEARCH = '<url><loc>%s</loc>';
30 
31  const XPATH_URL = '/urlset/url';
32 
33  const MAX_SIZE = 5000000;
34 
35  const FILE_EXT = '.xml';
36  const FILE_PART_SUFFIX = '.part';
37 
38  protected $documentRoot;
39  protected $settings = array();
40  protected $parser = false;
41 
42  protected $siteRoot = '';
43  protected $partFile = '';
44  protected $partList = array();
45  protected $part = 0;
46  protected $partChanged = false;
47  protected $footerClosed = false;
48 
49  protected $urlToSearch = '';
50  protected $urlFound = false;
51 
52  public function __construct($fileName, $settings)
53  {
54  $this->settings = array(
55  'SITE_ID' => $settings['SITE_ID'],
56  'PROTOCOL' => $settings['PROTOCOL'] == 'https' ? 'https' : 'http',
57  'DOMAIN' => $settings['DOMAIN'],
58  );
59 
60  $site = SiteTable::getRow(array("filter" => array("LID" => $this->settings['SITE_ID'])));
61 
62  $this->documentRoot = SiteTable::getDocumentRoot($this->settings['SITE_ID']);
63  $this->footerClosed = false;
64 
65  $this->siteRoot = Path::combine(
66  $this->documentRoot,
67  $site['DIR']
68  );
69 
70  $fileName = $this->prepareFileName($fileName);
71  $this->partFile = $this->partFile ? $fileName : $this->partFile;
72  $this->pathPhysical = null; // hack for object reconstuct during file splitting
73  parent::__construct($this->siteRoot.'/'.$fileName, $this->settings['SITE_ID']);
74  $this->partChanged = $this->isExists() && !$this->isSplitNeeded();
75  }
76 
77  protected function prepareFileName($fileName)
78  {
79  // normalize slashes
80  $fileName = Path::normalize($fileName);
81  if (substr($fileName, -strlen(self::FILE_EXT)) != self::FILE_EXT)
82  {
83  $fileName .= self::FILE_EXT;
84  }
85 
86  // convert words delimiter, google dont't like '_''
87  $fileName = str_replace('_', '-', $fileName);
88 
89  return $fileName;
90  }
91 
92  /**
93  * Reinitializes current object with new file name.
94  *
95  * @param string $fileName New file name.
96  */
97  protected function reInit($fileName)
98  {
99  $this->__construct($fileName, $this->settings);
100  }
101 
102  /**
103  * Adds header to the current sitemap file.
104  *
105  * @return void
106  */
107  public function addHeader()
108  {
109  $this->partChanged = true;
110  $this->putContents(self::XML_HEADER.self::FILE_HEADER);
111  }
112 
113  /**
114  * Checks is it needed to create new part of sitemap file
115  *
116  * @return bool
117  * @throws \Bitrix\Main\IO\FileNotFoundException
118  */
119  protected function isSplitNeeded()
120  {
121  return $this->isExists() && $this->getSize() >= self::MAX_SIZE;
122  }
123 
124  /**
125  * Adds new entry to the current sitemap file
126  *
127  * Entry array keys
128  * XML_LOC - loc field value
129  * XML_LASTMOD - lastmod field value
130  *
131  * @param array $entry Entry array.
132  *
133  * @return void
134  */
135  public function addEntry($entry)
136  {
137  if ($this->isSplitNeeded())
138  {
139  $this->split();
140  $this->addEntry($entry);
141  }
142  else
143  {
144  if (!$this->partChanged)
145  {
146  $this->addHeader();
147  }
148 
149  $this->putContents(
150  sprintf(
151  self::ENTRY_TPL,
152  Converter::getXmlConverter()->encode($entry['XML_LOC']),
153  Converter::getXmlConverter()->encode($entry['XML_LASTMOD'])
154  ), self::APPEND
155  );
156  }
157  }
158 
159  /**
160  * Creates next sitemap file part. Returns new part file name.
161  *
162  * @return string
163  */
164  public function split()
165  {
166  if($this->partChanged)
167  {
168  $this->addFooter();
169  }
170 
171  $this->partList[] = $this->getName();
172  $this->part++;
173 
174  $fileName = $this->partFile;
175  $fileName = substr($fileName, 0, -strlen(self::FILE_EXT)).self::FILE_PART_SUFFIX.$this->part.substr($fileName, -strlen(self::FILE_EXT));
176 
177  $this->reInit($fileName);
178 
179  $this->partChanged = $this->isExists() && !$this->isSplitNeeded();
180 
181  return $fileName;
182  }
183 
184  /**
185  * Returns list of file parts.
186  *
187  * @return array
188  */
189  public function getNameList()
190  {
191  return $this->isCurrentPartNotEmpty() ? array_merge($this->partList, array($this->getName())) : $this->partList;
192  }
193 
194  /**
195  * Divide path to directory and filemname
196  * @return array
197  */
198  public function getPathDirectory()
199  {
200 // normalize slashes
201  $siteRoot = Path::normalize($this->siteRoot);
202  $fileName = $this->getName();
203  $path = Path::normalize($this->path);
204 
205  $directory = str_replace(array($siteRoot, $fileName), array('',''), $path);
206 
207  return ltrim($directory, '/');
208  }
209 
210  /**
211  * Returns if the whole sitemap is empty (not only current part).
212  *
213  * @return bool
214  */
215  public function isNotEmpty()
216  {
217  return (count($this->partList) > 0) || $this->isCurrentPartNotEmpty();
218  }
219 
220  /**
221  * Returns if current sitemap part contains something besides header.
222  *
223  * @return bool
224  */
225  public function isCurrentPartNotEmpty()
226  {
227  if($this->isExists())
228  {
229  $c = $this->getContents();
230  return strlen($c) > 0 && $c != self::XML_HEADER.self::FILE_HEADER;
231  }
232 
233  return false;
234  }
235 
236  /**
237  * Appends new entry to the existing and finished sitemap file
238  *
239  * Entry array keys
240  * XML_LOC - loc field value
241  * XML_LASTMOD - lastmod field value
242  *
243  * @param array $entry Entry array.
244  *
245  * @return void
246  */
247  public function appendEntry($entry)
248  {
249  if($this->isSplitNeeded())
250  {
251  $this->split();
252  $this->appendEntry($entry);
253  }
254  else
255  {
256  if(!$this->partChanged)
257  {
258  $this->addHeader();
259  $offset = $this->getSize();
260  }
261  else
262  {
263  $offset = $this->getSize()-strlen(self::FILE_FOOTER);
264  }
265 
266  $fd = $this->open('r+');
267 
268  fseek($fd, $offset);
269  fwrite($fd, sprintf(
270  self::ENTRY_TPL,
271  Converter::getXmlConverter()->encode($entry['XML_LOC']),
272  Converter::getXmlConverter()->encode($entry['XML_LASTMOD'])
273  ).self::FILE_FOOTER);
274  fclose($fd);
275 
276  $this->footerClosed = true;
277  }
278  }
279 
280  /**
281  * Searches and removes entry to the existing and finished sitemap file
282  *
283  * Entry array keys
284  * XML_LOC - loc field value
285  * XML_LASTMOD - lastmod field value
286  *
287  * @param string $url Entry URL.
288  *
289  * @return string
290  */
291  public function removeEntry($url)
292  {
293  $fileName = $this->partFile;
294  $url = $this->settings['PROTOCOL'] . '://' . \CBXPunycode::toASCII($this->settings['DOMAIN'], $e = NULL) . $url;
295  $pattern = sprintf(self::ENTRY_TPL_SEARCH, $url);
296 
297  while($this->isExists())
298  {
299  $c = $this->getContents();
300  $p = strpos($c, $pattern);
301  unset($c);
302 
303  if($p !== false)
304  {
305  $fd = $this->open('r+');
306 
307  fseek($fd, intval($p));
308  fwrite($fd, str_repeat(" ", strlen(sprintf(
309  self::ENTRY_TPL,
310  Converter::getXmlConverter()->encode($url),
311  Converter::getXmlConverter()->encode(date('c'))
312  ))));
313  fclose($fd);
314  break;
315  }
316 
317  if(!$this->isSplitNeeded())
318  {
319  break;
320  }
321  else
322  {
323  $this->part++;
324  $fileName = substr($fileName, 0, -strlen(self::FILE_EXT)) . self::FILE_PART_SUFFIX . $this->part . substr($fileName, -strlen(self::FILE_EXT));
325  $this->reInit($fileName);
326  }
327  }
328 
329  return $fileName;
330  }
331 
332  /**
333  * Adds new file entry to the current sitemap
334  *
335  * @param File $f File to add.
336  *
337  * @return void
338  * @throws \Bitrix\Main\IO\FileNotFoundException
339  */
340  public function addFileEntry(File $f)
341  {
342  if($f->isExists() && !$f->isSystem())
343  {
344  $this->addEntry(array(
345  'XML_LOC' => $this->settings['PROTOCOL'].'://'.\CBXPunycode::toASCII($this->settings['DOMAIN'], $e = null).$this->getFileUrl($f),
346  'XML_LASTMOD' => date('c', $f->getModificationTime()),
347  ));
348  }
349  }
350 
351  /**
352  * Adds new IBlock entry to the current sitemap
353  *
354  * @param string $url IBlock entry URL.
355  * @param string $modifiedDate IBlock entry modify timestamp.
356  *
357  * @return void
358  */
359  public function addIBlockEntry($url, $modifiedDate)
360  {
361  $this->addEntry(array(
362  'XML_LOC' => $this->settings['PROTOCOL'].'://'.\CBXPunycode::toASCII($this->settings['DOMAIN'], $e = null).$url,
363  'XML_LASTMOD' => date('c', $modifiedDate - \CTimeZone::getOffset()),
364  ));
365  }
366 
367  /**
368  * Appends new IBlock entry to the existing finished sitemap
369  *
370  * @param string $url IBlock entry URL.
371  * @param string $modifiedDate IBlock entry modify timestamp.
372  *
373  * @return void
374  */
375  public function appendIBlockEntry($url, $modifiedDate)
376  {
377  if($this->isExists())
378  {
379  $this->appendEntry(array(
380  'XML_LOC' => $this->settings['PROTOCOL'].'://'.\CBXPunycode::toASCII($this->settings['DOMAIN'], $e = null).$url,
381  'XML_LASTMOD' => date('c', $modifiedDate - \CTimeZone::getOffset()),
382  ));
383  }
384  else
385  {
386  $this->addHeader();
387  $this->addIBlockEntry($url, $modifiedDate);
388  $this->addFooter();
389  }
390  }
391 
392  /**
393  * Adds footer to the current sitemap part
394  *
395  * @return void
396  */
397  public function addFooter()
398  {
399  $this->putContents(self::FILE_FOOTER, self::APPEND);
400  $this->footerClosed = true;
401  }
402 
403  /**
404  * Returns sitemap site root
405  *
406  * @return mixed|string
407  */
408  public function getSiteRoot()
409  {
410  return $this->siteRoot;
411  }
412 
413  /**
414  * Returns sitemap file URL
415  *
416  * @return string
417  */
418  public function getUrl()
419  {
420  return $this->settings['PROTOCOL'].'://'.\CBXPunycode::toASCII($this->settings['DOMAIN'], $e = null).$this->getFileUrl($this);
421  }
422 
423  /**
424  * Parses sitemap file
425  *
426  * @return bool|\CDataXML
427  * @throws \Bitrix\Main\IO\FileNotFoundException
428  */
429  public function parse()
430  {
431  if(!$this->parser)
432  {
433  if($this->isExists())
434  {
435  $this->parser = new \CDataXML();
436  $this->parser->loadString($this->getContents());
437  }
438  }
439 
440  return $this->parser;
441  }
442 
443  /**
444  * Returns file relative path for URL.
445  *
446  * @param File $f File object.
447  *
448  * @return string
449  */
450  protected function getFileUrl(File $f)
451  {
452  static $indexNames;
453  if(!is_array($indexNames))
454  {
455  $indexNames = GetDirIndexArray();
456  }
457 
458  $documentRoot = Path::normalize($this->documentRoot);
459  $path = '/';
460  if (substr($this->path, 0, strlen($documentRoot)) === $documentRoot)
461  {
462  $path = '/'.substr($f->getPath(), strlen($documentRoot));
463  }
464 
466 
467  $path = in_array($f->getName(), $indexNames)
468  ? str_replace('/'.$f->getName(), '/', $path)
469  : $path;
470 
471  return '/'.ltrim($path, '/');
472  }
473 }
Bitrix\Main\SiteTable\getDocumentRoot
static getDocumentRoot($siteId=null)
Definition: main/lib/site.php:16
Bitrix\Seo\SitemapFile\ENTRY_TPL
const ENTRY_TPL
Definition: sitemapfile.php:28
Bitrix\Seo\SitemapFile\MAX_SIZE
const MAX_SIZE
Definition: sitemapfile.php:33
Bitrix\Seo\SitemapFile\isSplitNeeded
isSplitNeeded()
Checks is it needed to create new part of sitemap file.
Definition: sitemapfile.php:119
Bitrix\Main\IO\Path\convertLogicalToUri
static convertLogicalToUri($path)
Definition: main/lib/io/path.php:142
Bitrix\Main\ORM\Data\DataManager\getRow
static getRow(array $parameters)
Returns one row (or null) by parameters for getList()
Definition: main/lib/orm/data/datamanager.php:407
Bitrix\Seo\SitemapFile\addHeader
addHeader()
Adds header to the current sitemap file.
Definition: sitemapfile.php:107
Bitrix\Main\IO\FileSystemEntry\$path
$path
Definition: filesystementry.php:8
Bitrix\Main\IO\File\open
open($mode)
Opens the file and returns the file pointer.
Definition: main/lib/io/file.php:26
Bitrix\Seo\SitemapFile\parse
parse()
Parses sitemap file.
Definition: sitemapfile.php:429
Bitrix\Seo\SitemapFile\removeEntry
removeEntry($url)
Searches and removes entry to the existing and finished sitemap file.
Definition: sitemapfile.php:291
Bitrix\Seo\SitemapFile\getSiteRoot
getSiteRoot()
Returns sitemap site root.
Definition: sitemapfile.php:408
Bitrix\Main\IO\File\isExists
isExists()
Definition: main/lib/io/file.php:51
Bitrix\Seo\SitemapFile\split
split()
Creates next sitemap file part.
Definition: sitemapfile.php:164
Bitrix\Seo\SitemapFile\$urlFound
$urlFound
Definition: sitemapfile.php:50
Bitrix\Seo\SitemapFile\prepareFileName
prepareFileName($fileName)
Definition: sitemapfile.php:77
Bitrix\Main\IO\Path\normalize
static normalize($path)
Definition: main/lib/io/path.php:26
Bitrix\Main\Text\Converter
Definition: main/lib/text/converter.php:4
Bitrix\Main\IO\Path\combine
static combine()
Definition: main/lib/io/path.php:221
Bitrix\Seo\SitemapFile\ENTRY_TPL_SEARCH
const ENTRY_TPL_SEARCH
Definition: sitemapfile.php:29
Bitrix\Seo\SitemapFile
Definition: sitemapfile.php:20
Bitrix\Seo\SitemapFile\isCurrentPartNotEmpty
isCurrentPartNotEmpty()
Returns if current sitemap part contains something besides header.
Definition: sitemapfile.php:225
Bitrix\Seo\SitemapFile\$footerClosed
$footerClosed
Definition: sitemapfile.php:47
Bitrix\Seo\SitemapFile\addFooter
addFooter()
Adds footer to the current sitemap part.
Definition: sitemapfile.php:397
Bitrix\Seo\SitemapFile\getFileUrl
getFileUrl(File $f)
Returns file relative path for URL.
Definition: sitemapfile.php:450
Bitrix\Seo
Bitrix\Seo\SitemapFile\$partList
$partList
Definition: sitemapfile.php:44
Bitrix\Seo\SitemapFile\appendIBlockEntry
appendIBlockEntry($url, $modifiedDate)
Appends new IBlock entry to the existing finished sitemap.
Definition: sitemapfile.php:375
Bitrix\Seo\SitemapFile\$part
$part
Definition: sitemapfile.php:45
Bitrix\Seo\SitemapFile\$documentRoot
$documentRoot
Definition: sitemapfile.php:38
Bitrix\Main\IO\File\getContents
getContents()
Definition: main/lib/io/file.php:57
Bitrix\Seo\SitemapFile\getUrl
getUrl()
Returns sitemap file URL.
Definition: sitemapfile.php:418
Bitrix\Main\IO\Path
Definition: main/lib/io/path.php:10
Bitrix\Seo\SitemapFile\FILE_EXT
const FILE_EXT
Definition: sitemapfile.php:35
Bitrix\Seo\SitemapFile\reInit
reInit($fileName)
Reinitializes current object with new file name.
Definition: sitemapfile.php:97
Bitrix\Seo\SitemapFile\addIBlockEntry
addIBlockEntry($url, $modifiedDate)
Adds new IBlock entry to the current sitemap.
Definition: sitemapfile.php:359
Bitrix\Seo\SitemapFile\XML_HEADER
const XML_HEADER
Definition: sitemapfile.php:23
Bitrix\Seo\SitemapFile\__construct
__construct($fileName, $settings)
Definition: sitemapfile.php:52
Bitrix\Seo\SitemapFile\appendEntry
appendEntry($entry)
Appends new entry to the existing and finished sitemap file.
Definition: sitemapfile.php:247
Bitrix\Main\IO\File\getSize
getSize()
Returns the file size.
Definition: main/lib/io/file.php:86
Bitrix\Seo\SitemapFile\isNotEmpty
isNotEmpty()
Returns if the whole sitemap is empty (not only current part).
Definition: sitemapfile.php:215
Bitrix\Seo\SitemapFile\getNameList
getNameList()
Returns list of file parts.
Definition: sitemapfile.php:189
Bitrix\Seo\SitemapFile\FILE_PART_SUFFIX
const FILE_PART_SUFFIX
Definition: sitemapfile.php:36
Bitrix\Seo\SitemapFile\$partChanged
$partChanged
Definition: sitemapfile.php:46
Bitrix\Sender\Connector\__construct
__construct(Base $connector)
Constructor.
Definition: resultview.php:40
Bitrix\Seo\SitemapFile\addFileEntry
addFileEntry(File $f)
Adds new file entry to the current sitemap.
Definition: sitemapfile.php:340
Bitrix\Main\Text\Converter\getXmlConverter
static getXmlConverter()
Definition: main/lib/text/converter.php:20
Bitrix\Seo\SitemapFile\$parser
$parser
Definition: sitemapfile.php:40
Bitrix\Seo\SitemapFile\$settings
$settings
Definition: sitemapfile.php:39
Bitrix\Main\IO\File\putContents
putContents($data, $flags=self::REWRITE)
Definition: main/lib/io/file.php:65
Bitrix\Seo\SitemapFile\$urlToSearch
$urlToSearch
Definition: sitemapfile.php:49
Bitrix\Main\IO\File
Definition: main/lib/io/file.php:4
Bitrix\Seo\SitemapFile\getPathDirectory
getPathDirectory()
Divide path to directory and filemname.
Definition: sitemapfile.php:198
Bitrix\Seo\SitemapFile\addEntry
addEntry($entry)
Adds new entry to the current sitemap file.
Definition: sitemapfile.php:135
Bitrix\Main\IO\FileSystemEntry\getPath
getPath()
Definition: filesystementry.php:66
Bitrix\Seo\SitemapFile\$partFile
$partFile
Definition: sitemapfile.php:43
Bitrix\Seo\SitemapFile\XPATH_URL
const XPATH_URL
Definition: sitemapfile.php:31
Bitrix\Seo\SitemapFile\FILE_HEADER
const FILE_HEADER
Definition: sitemapfile.php:25
Bitrix\Seo\SitemapFile\FILE_FOOTER
const FILE_FOOTER
Definition: sitemapfile.php:26
Bitrix\Main\IO\File\getModificationTime
getModificationTime()
Definition: main/lib/io/file.php:212
Bitrix\Seo\SitemapFile\$siteRoot
$siteRoot
Definition: sitemapfile.php:42
Bitrix\Main\IO\FileSystemEntry\getName
getName()
Definition: filesystementry.php:56
Bitrix\Main\SiteTable
Definition: main/lib/site.php:12
Bitrix\Main\IO\FileSystemEntry\isSystem
isSystem()
Definition: filesystementry.php:26