1 | <?php
|
---|
2 | /**
|
---|
3 | * DokuWiki indexer
|
---|
4 | *
|
---|
5 | * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
|
---|
6 | * @author Andreas Gohr <[email protected]>
|
---|
7 | */
|
---|
8 | if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../');
|
---|
9 | define('DOKU_DISABLE_GZIP_OUTPUT',1);
|
---|
10 | require_once(DOKU_INC.'inc/init.php');
|
---|
11 | session_write_close(); //close session
|
---|
12 | if(!defined('NL')) define('NL',"\n");
|
---|
13 |
|
---|
14 | // keep running after browser closes connection
|
---|
15 | @ignore_user_abort(true);
|
---|
16 |
|
---|
17 | // check if user abort worked, if yes send output early
|
---|
18 | $defer = !@ignore_user_abort() || $conf['broken_iua'];
|
---|
19 | if(!$defer){
|
---|
20 | sendGIF(); // send gif
|
---|
21 | }
|
---|
22 |
|
---|
23 | $ID = cleanID($_REQUEST['id']);
|
---|
24 |
|
---|
25 | // Catch any possible output (e.g. errors)
|
---|
26 | $output = isset($_REQUEST['debug']) && $conf['allowdebug'];
|
---|
27 | if(!$output) ob_start();
|
---|
28 |
|
---|
29 | // run one of the jobs
|
---|
30 | $tmp = array(); // No event data
|
---|
31 | $evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp);
|
---|
32 | if ($evt->advise_before()) {
|
---|
33 | runIndexer() or
|
---|
34 | runSitemapper() or
|
---|
35 | sendDigest() or
|
---|
36 | runTrimRecentChanges() or
|
---|
37 | runTrimRecentChanges(true) or
|
---|
38 | $evt->advise_after();
|
---|
39 | }
|
---|
40 | if($defer) sendGIF();
|
---|
41 |
|
---|
42 | if(!$output) ob_end_clean();
|
---|
43 | exit;
|
---|
44 |
|
---|
45 | // --------------------------------------------------------------------
|
---|
46 |
|
---|
47 | /**
|
---|
48 | * Trims the recent changes cache (or imports the old changelog) as needed.
|
---|
49 | *
|
---|
50 | * @param media_changes If the media changelog shall be trimmed instead of
|
---|
51 | * the page changelog
|
---|
52 | *
|
---|
53 | * @author Ben Coburn <[email protected]>
|
---|
54 | */
|
---|
55 | function runTrimRecentChanges($media_changes = false) {
|
---|
56 | global $conf;
|
---|
57 |
|
---|
58 | $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']);
|
---|
59 |
|
---|
60 | // Trim the Recent Changes
|
---|
61 | // Trims the recent changes cache to the last $conf['changes_days'] recent
|
---|
62 | // changes or $conf['recent'] items, which ever is larger.
|
---|
63 | // The trimming is only done once a day.
|
---|
64 | if (@file_exists($fn) &&
|
---|
65 | (@filemtime($fn.'.trimmed')+86400)<time() &&
|
---|
66 | !@file_exists($fn.'_tmp')) {
|
---|
67 | @touch($fn.'.trimmed');
|
---|
68 | io_lock($fn);
|
---|
69 | $lines = file($fn);
|
---|
70 | if (count($lines)<=$conf['recent']) {
|
---|
71 | // nothing to trim
|
---|
72 | io_unlock($fn);
|
---|
73 | return false;
|
---|
74 | }
|
---|
75 |
|
---|
76 | io_saveFile($fn.'_tmp', ''); // presave tmp as 2nd lock
|
---|
77 | $trim_time = time() - $conf['recent_days']*86400;
|
---|
78 | $out_lines = array();
|
---|
79 |
|
---|
80 | for ($i=0; $i<count($lines); $i++) {
|
---|
81 | $log = parseChangelogLine($lines[$i]);
|
---|
82 | if ($log === false) continue; // discard junk
|
---|
83 | if ($log['date'] < $trim_time) {
|
---|
84 | $old_lines[$log['date'].".$i"] = $lines[$i]; // keep old lines for now (append .$i to prevent key collisions)
|
---|
85 | } else {
|
---|
86 | $out_lines[$log['date'].".$i"] = $lines[$i]; // definitely keep these lines
|
---|
87 | }
|
---|
88 | }
|
---|
89 |
|
---|
90 | if (count($lines)==count($out_lines)) {
|
---|
91 | // nothing to trim
|
---|
92 | @unlink($fn.'_tmp');
|
---|
93 | io_unlock($fn);
|
---|
94 | return false;
|
---|
95 | }
|
---|
96 |
|
---|
97 | // sort the final result, it shouldn't be necessary,
|
---|
98 | // however the extra robustness in making the changelog cache self-correcting is worth it
|
---|
99 | ksort($out_lines);
|
---|
100 | $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum
|
---|
101 | if ($extra > 0) {
|
---|
102 | ksort($old_lines);
|
---|
103 | $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines);
|
---|
104 | }
|
---|
105 |
|
---|
106 | // save trimmed changelog
|
---|
107 | io_saveFile($fn.'_tmp', implode('', $out_lines));
|
---|
108 | @unlink($fn);
|
---|
109 | if (!rename($fn.'_tmp', $fn)) {
|
---|
110 | // rename failed so try another way...
|
---|
111 | io_unlock($fn);
|
---|
112 | io_saveFile($fn, implode('', $out_lines));
|
---|
113 | @unlink($fn.'_tmp');
|
---|
114 | } else {
|
---|
115 | io_unlock($fn);
|
---|
116 | }
|
---|
117 | return true;
|
---|
118 | }
|
---|
119 |
|
---|
120 | // nothing done
|
---|
121 | return false;
|
---|
122 | }
|
---|
123 |
|
---|
124 | /**
|
---|
125 | * Runs the indexer for the current page
|
---|
126 | *
|
---|
127 | * @author Andreas Gohr <[email protected]>
|
---|
128 | */
|
---|
129 | function runIndexer(){
|
---|
130 | global $ID;
|
---|
131 | global $conf;
|
---|
132 | print "runIndexer(): started".NL;
|
---|
133 |
|
---|
134 | if(!$ID) return false;
|
---|
135 |
|
---|
136 | // do the work
|
---|
137 | return idx_addPage($ID, true);
|
---|
138 | }
|
---|
139 |
|
---|
140 | /**
|
---|
141 | * Builds a Google Sitemap of all public pages known to the indexer
|
---|
142 | *
|
---|
143 | * The map is placed in the root directory named sitemap.xml.gz - This
|
---|
144 | * file needs to be writable!
|
---|
145 | *
|
---|
146 | * @author Andreas Gohr
|
---|
147 | * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html
|
---|
148 | */
|
---|
149 | function runSitemapper(){
|
---|
150 | print "runSitemapper(): started".NL;
|
---|
151 | $result = Sitemapper::generate() && Sitemapper::pingSearchEngines();
|
---|
152 | print 'runSitemapper(): finished'.NL;
|
---|
153 | return $result;
|
---|
154 | }
|
---|
155 |
|
---|
156 | /**
|
---|
157 | * Send digest and list mails for all subscriptions which are in effect for the
|
---|
158 | * current page
|
---|
159 | *
|
---|
160 | * @author Adrian Lang <[email protected]>
|
---|
161 | */
|
---|
162 | function sendDigest() {
|
---|
163 | echo 'sendDigest(): start'.NL;
|
---|
164 | global $ID;
|
---|
165 | global $conf;
|
---|
166 | if (!$conf['subscribers']) {
|
---|
167 | return;
|
---|
168 | }
|
---|
169 | $subscriptions = subscription_find($ID, array('style' => '(digest|list)',
|
---|
170 | 'escaped' => true));
|
---|
171 | global $auth;
|
---|
172 | global $lang;
|
---|
173 | global $conf;
|
---|
174 | global $USERINFO;
|
---|
175 |
|
---|
176 | // remember current user info
|
---|
177 | $olduinfo = $USERINFO;
|
---|
178 | $olduser = $_SERVER['REMOTE_USER'];
|
---|
179 |
|
---|
180 | foreach($subscriptions as $id => $users) {
|
---|
181 | if (!subscription_lock($id)) {
|
---|
182 | continue;
|
---|
183 | }
|
---|
184 | foreach($users as $data) {
|
---|
185 | list($user, $style, $lastupdate) = $data;
|
---|
186 | $lastupdate = (int) $lastupdate;
|
---|
187 | if ($lastupdate + $conf['subscribe_time'] > time()) {
|
---|
188 | // Less than the configured time period passed since last
|
---|
189 | // update.
|
---|
190 | continue;
|
---|
191 | }
|
---|
192 |
|
---|
193 | // Work as the user to make sure ACLs apply correctly
|
---|
194 | $USERINFO = $auth->getUserData($user);
|
---|
195 | $_SERVER['REMOTE_USER'] = $user;
|
---|
196 | if ($USERINFO === false) {
|
---|
197 | continue;
|
---|
198 | }
|
---|
199 |
|
---|
200 | if (substr($id, -1, 1) === ':') {
|
---|
201 | // The subscription target is a namespace
|
---|
202 | $changes = getRecentsSince($lastupdate, null, getNS($id));
|
---|
203 | } else {
|
---|
204 | if(auth_quickaclcheck($id) < AUTH_READ) continue;
|
---|
205 |
|
---|
206 | $meta = p_get_metadata($id);
|
---|
207 | $changes = array($meta['last_change']);
|
---|
208 | }
|
---|
209 |
|
---|
210 | // Filter out pages only changed in small and own edits
|
---|
211 | $change_ids = array();
|
---|
212 | foreach($changes as $rev) {
|
---|
213 | $n = 0;
|
---|
214 | while (!is_null($rev) && $rev['date'] >= $lastupdate &&
|
---|
215 | ($_SERVER['REMOTE_USER'] === $rev['user'] ||
|
---|
216 | $rev['type'] === DOKU_CHANGE_TYPE_MINOR_EDIT)) {
|
---|
217 | $rev = getRevisions($rev['id'], $n++, 1);
|
---|
218 | $rev = (count($rev) > 0) ? $rev[0] : null;
|
---|
219 | }
|
---|
220 |
|
---|
221 | if (!is_null($rev) && $rev['date'] >= $lastupdate) {
|
---|
222 | // Some change was not a minor one and not by myself
|
---|
223 | $change_ids[] = $rev['id'];
|
---|
224 | }
|
---|
225 | }
|
---|
226 |
|
---|
227 | if ($style === 'digest') {
|
---|
228 | foreach($change_ids as $change_id) {
|
---|
229 | subscription_send_digest($USERINFO['mail'], $change_id,
|
---|
230 | $lastupdate);
|
---|
231 | }
|
---|
232 | } elseif ($style === 'list') {
|
---|
233 | subscription_send_list($USERINFO['mail'], $change_ids, $id);
|
---|
234 | }
|
---|
235 | // TODO: Handle duplicate subscriptions.
|
---|
236 |
|
---|
237 | // Update notification time.
|
---|
238 | subscription_set($user, $id, $style, time(), true);
|
---|
239 | }
|
---|
240 | subscription_unlock($id);
|
---|
241 | }
|
---|
242 |
|
---|
243 | // restore current user info
|
---|
244 | $USERINFO = $olduinfo;
|
---|
245 | $_SERVER['REMOTE_USER'] = $olduser;
|
---|
246 | }
|
---|
247 |
|
---|
248 | /**
|
---|
249 | * Just send a 1x1 pixel blank gif to the browser
|
---|
250 | *
|
---|
251 | * @author Andreas Gohr <[email protected]>
|
---|
252 | * @author Harry Fuecks <[email protected]>
|
---|
253 | */
|
---|
254 | function sendGIF(){
|
---|
255 | if(isset($_REQUEST['debug'])){
|
---|
256 | header('Content-Type: text/plain');
|
---|
257 | return;
|
---|
258 | }
|
---|
259 | $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7');
|
---|
260 | header('Content-Type: image/gif');
|
---|
261 | header('Content-Length: '.strlen($img));
|
---|
262 | header('Connection: Close');
|
---|
263 | print $img;
|
---|
264 | flush();
|
---|
265 | // Browser should drop connection after this
|
---|
266 | // Thinks it's got the whole image
|
---|
267 | }
|
---|
268 |
|
---|
269 | //Setup VIM: ex: et ts=4 :
|
---|
270 | // No trailing PHP closing tag - no output please!
|
---|
271 | // See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php
|
---|