1 | <?php
|
---|
2 | /**
|
---|
3 | * Sitemap handling functions
|
---|
4 | *
|
---|
5 | * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
|
---|
6 | * @author Michael Hamann <[email protected]>
|
---|
7 | */
|
---|
8 |
|
---|
9 | if(!defined('DOKU_INC')) die('meh.');
|
---|
10 |
|
---|
11 | /**
|
---|
12 | * A class for building sitemaps and pinging search engines with the sitemap URL.
|
---|
13 | *
|
---|
14 | * @author Michael Hamann
|
---|
15 | */
|
---|
16 | class Sitemapper {
|
---|
17 | /**
|
---|
18 | * Builds a Google Sitemap of all public pages known to the indexer
|
---|
19 | *
|
---|
20 | * The map is placed in the cache directory named sitemap.xml.gz - This
|
---|
21 | * file needs to be writable!
|
---|
22 | *
|
---|
23 | * @author Michael Hamann
|
---|
24 | * @author Andreas Gohr
|
---|
25 | * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html
|
---|
26 | * @link http://www.sitemaps.org/
|
---|
27 | */
|
---|
28 | public function generate(){
|
---|
29 | global $conf;
|
---|
30 | if($conf['sitemap'] < 1 || !is_numeric($conf['sitemap'])) return false;
|
---|
31 |
|
---|
32 | $sitemap = Sitemapper::getFilePath();
|
---|
33 |
|
---|
34 | if(@file_exists($sitemap)){
|
---|
35 | if(!is_writable($sitemap)) return false;
|
---|
36 | }else{
|
---|
37 | if(!is_writable(dirname($sitemap))) return false;
|
---|
38 | }
|
---|
39 |
|
---|
40 | if(@filesize($sitemap) &&
|
---|
41 | @filemtime($sitemap) > (time()-($conf['sitemap']*86400))){ // 60*60*24=86400
|
---|
42 | dbglog('Sitemapper::generate(): Sitemap up to date'); // FIXME: only in debug mode
|
---|
43 | return false;
|
---|
44 | }
|
---|
45 |
|
---|
46 | dbglog("Sitemapper::generate(): using $sitemap"); // FIXME: Only in debug mode
|
---|
47 |
|
---|
48 | $pages = idx_get_indexer()->getPages();
|
---|
49 | dbglog('Sitemapper::generate(): creating sitemap using '.count($pages).' pages');
|
---|
50 | $items = array();
|
---|
51 |
|
---|
52 | // build the sitemap items
|
---|
53 | foreach($pages as $id){
|
---|
54 | //skip hidden, non existing and restricted files
|
---|
55 | if(isHiddenPage($id)) continue;
|
---|
56 | if(auth_aclcheck($id,'','') < AUTH_READ) continue;
|
---|
57 | $item = SitemapItem::createFromID($id);
|
---|
58 | if ($item !== NULL)
|
---|
59 | $items[] = $item;
|
---|
60 | }
|
---|
61 |
|
---|
62 | $eventData = array('items' => &$items, 'sitemap' => &$sitemap);
|
---|
63 | $event = new Doku_Event('SITEMAP_GENERATE', $eventData);
|
---|
64 | if ($event->advise_before(true)) {
|
---|
65 | //save the new sitemap
|
---|
66 | $result = io_saveFile($sitemap, Sitemapper::getXML($items));
|
---|
67 | }
|
---|
68 | $event->advise_after();
|
---|
69 |
|
---|
70 | return $result;
|
---|
71 | }
|
---|
72 |
|
---|
73 | /**
|
---|
74 | * Builds the sitemap XML string from the given array auf SitemapItems.
|
---|
75 | *
|
---|
76 | * @param $items array The SitemapItems that shall be included in the sitemap.
|
---|
77 | * @return string The sitemap XML.
|
---|
78 | * @author Michael Hamann
|
---|
79 | */
|
---|
80 | private function getXML($items) {
|
---|
81 | ob_start();
|
---|
82 | echo '<?xml version="1.0" encoding="UTF-8"?>'.NL;
|
---|
83 | echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL;
|
---|
84 | foreach ($items as $item) {
|
---|
85 | echo $item->toXML();
|
---|
86 | }
|
---|
87 | echo '</urlset>'.NL;
|
---|
88 | $result = ob_get_contents();
|
---|
89 | ob_end_clean();
|
---|
90 | return $result;
|
---|
91 | }
|
---|
92 |
|
---|
93 | /**
|
---|
94 | * Helper function for getting the path to the sitemap file.
|
---|
95 | *
|
---|
96 | * @return The path to the sitemap file.
|
---|
97 | * @author Michael Hamann
|
---|
98 | */
|
---|
99 | public function getFilePath() {
|
---|
100 | global $conf;
|
---|
101 |
|
---|
102 | $sitemap = $conf['cachedir'].'/sitemap.xml';
|
---|
103 | if($conf['compression'] === 'bz2' || $conf['compression'] === 'gz'){
|
---|
104 | $sitemap .= '.gz';
|
---|
105 | }
|
---|
106 |
|
---|
107 | return $sitemap;
|
---|
108 | }
|
---|
109 |
|
---|
110 | /**
|
---|
111 | * Pings search engines with the sitemap url. Plugins can add or remove
|
---|
112 | * urls to ping using the SITEMAP_PING event.
|
---|
113 | *
|
---|
114 | * @author Michael Hamann
|
---|
115 | */
|
---|
116 | public function pingSearchEngines() {
|
---|
117 | //ping search engines...
|
---|
118 | $http = new DokuHTTPClient();
|
---|
119 | $http->timeout = 8;
|
---|
120 |
|
---|
121 | $encoded_sitemap_url = urlencode(wl('', array('do' => 'sitemap'), true, '&'));
|
---|
122 | $ping_urls = array(
|
---|
123 | 'google' => 'http://www.google.com/webmasters/sitemaps/ping?sitemap='.$encoded_sitemap_url,
|
---|
124 | 'yahoo' => 'http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=dokuwiki&url='.$encoded_sitemap_url,
|
---|
125 | 'microsoft' => 'http://www.bing.com/webmaster/ping.aspx?siteMap='.$encoded_sitemap_url,
|
---|
126 | );
|
---|
127 |
|
---|
128 | $data = array('ping_urls' => $ping_urls,
|
---|
129 | 'encoded_sitemap_url' => $encoded_sitemap_url
|
---|
130 | );
|
---|
131 | $event = new Doku_Event('SITEMAP_PING', $data);
|
---|
132 | if ($event->advise_before(true)) {
|
---|
133 | foreach ($data['ping_urls'] as $name => $url) {
|
---|
134 | dbglog("Sitemapper::PingSearchEngines(): pinging $name");
|
---|
135 | $resp = $http->get($url);
|
---|
136 | if($http->error) dbglog("Sitemapper:pingSearchengines(): $http->error");
|
---|
137 | dbglog('Sitemapper:pingSearchengines(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)));
|
---|
138 | }
|
---|
139 | }
|
---|
140 | $event->advise_after();
|
---|
141 |
|
---|
142 | return true;
|
---|
143 | }
|
---|
144 | }
|
---|
145 |
|
---|
146 | /**
|
---|
147 | * An item of a sitemap.
|
---|
148 | *
|
---|
149 | * @author Michael Hamann
|
---|
150 | */
|
---|
151 | class SitemapItem {
|
---|
152 | public $url;
|
---|
153 | public $lastmod;
|
---|
154 | public $changefreq;
|
---|
155 | public $priority;
|
---|
156 |
|
---|
157 | /**
|
---|
158 | * Create a new item.
|
---|
159 | *
|
---|
160 | * @param $url string The url of the item
|
---|
161 | * @param $lastmod int Timestamp of the last modification
|
---|
162 | * @param $changefreq string How frequently the item is likely to change. Valid values: always, hourly, daily, weekly, monthly, yearly, never.
|
---|
163 | * @param $priority float|string The priority of the item relative to other URLs on your site. Valid values range from 0.0 to 1.0.
|
---|
164 | */
|
---|
165 | public function __construct($url, $lastmod, $changefreq = null, $priority = null) {
|
---|
166 | $this->url = $url;
|
---|
167 | $this->lastmod = $lastmod;
|
---|
168 | $this->changefreq = $changefreq;
|
---|
169 | $this->priority = $priority;
|
---|
170 | }
|
---|
171 |
|
---|
172 | /**
|
---|
173 | * Helper function for creating an item for a wikipage id.
|
---|
174 | *
|
---|
175 | * @param $id string A wikipage id.
|
---|
176 | * @param $changefreq string How frequently the item is likely to change. Valid values: always, hourly, daily, weekly, monthly, yearly, never.
|
---|
177 | * @param $priority float|string The priority of the item relative to other URLs on your site. Valid values range from 0.0 to 1.0.
|
---|
178 | * @return The sitemap item.
|
---|
179 | */
|
---|
180 | public static function createFromID($id, $changefreq = null, $priority = null) {
|
---|
181 | $id = trim($id);
|
---|
182 | $date = @filemtime(wikiFN($id));
|
---|
183 | if(!$date) return NULL;
|
---|
184 | return new SitemapItem(wl($id, '', true), $date, $changefreq, $priority);
|
---|
185 | }
|
---|
186 |
|
---|
187 | /**
|
---|
188 | * Get the XML representation of the sitemap item.
|
---|
189 | *
|
---|
190 | * @return The XML representation.
|
---|
191 | */
|
---|
192 | public function toXML() {
|
---|
193 | $result = ' <url>'.NL
|
---|
194 | .' <loc>'.hsc($this->url).'</loc>'.NL
|
---|
195 | .' <lastmod>'.date_iso8601($this->lastmod).'</lastmod>'.NL;
|
---|
196 | if ($this->changefreq !== NULL)
|
---|
197 | $result .= ' <changefreq>'.hsc($this->changefreq).'</changefreq>'.NL;
|
---|
198 | if ($this->priority !== NULL)
|
---|
199 | $result .= ' <priority>'.hsc($this->priority).'</priority>'.NL;
|
---|
200 | $result .= ' </url>'.NL;
|
---|
201 | return $result;
|
---|
202 | }
|
---|
203 | }
|
---|