1 | <?php
|
---|
2 |
|
---|
3 | # Abstract base class
|
---|
4 | class ContentProvider {
|
---|
5 | var $load_time = 0 ; # Time to load text and templates, to judge actual parsing speed
|
---|
6 | var $article_list = array () ;
|
---|
7 | var $authors = array () ;
|
---|
8 | var $block_file_download = false ;
|
---|
9 |
|
---|
10 | function get_wiki_text ( $title , $do_cache = false ) { return "" ; } # dummy
|
---|
11 | function get_template_text ( $title ) { return "" ; } # dummy
|
---|
12 |
|
---|
13 | function add_article ( $title ) {
|
---|
14 | $this->article_list[] = urlencode ( trim ( $title ) ) ;
|
---|
15 | }
|
---|
16 |
|
---|
17 | function is_an_article ( $title ) {
|
---|
18 | $title = urlencode ( trim ( $title ) ) ;
|
---|
19 | return in_array ( $title , $this->article_list ) ;
|
---|
20 | }
|
---|
21 |
|
---|
22 | /**
|
---|
23 | * XXX TODO: why are some negative?
|
---|
24 | * Gets the numeric namespace
|
---|
25 | * "6" = images
|
---|
26 | * "-8" = category link
|
---|
27 | * "-9" = interlanguage link
|
---|
28 | * "11" = templates
|
---|
29 | */ function get_namespace_id ( $text ) {
|
---|
30 | $text = strtoupper ( $text ) ;
|
---|
31 | $text = explode ( ":" , $text , 2 ) ;
|
---|
32 | if ( count ( $text ) != 2 ) return 0 ;
|
---|
33 | $text = trim ( array_shift ( $text ) ) ;
|
---|
34 | if ( $text == "" ) return 0 ;
|
---|
35 | $ns = 0 ;
|
---|
36 |
|
---|
37 | if ( $text == "CATEGORY" || $text == "KATEGORIE" ) return -8 ; # Hackish, for category link
|
---|
38 | if ( strlen ( $text ) < 4 ) return -9 ; # Hackish, for interlanguage link
|
---|
39 | if ( $text == "SIMPLE" ) return -9 ;
|
---|
40 |
|
---|
41 | # Horrible manual hack, for now
|
---|
42 | if ( $text == "IMAGE" || $text == "BILD" ) $ns = 6 ;
|
---|
43 | if ( $text == "TEMPLATE" || $text == "VORLAGE" ) $ns = 11 ;
|
---|
44 |
|
---|
45 | return $ns ;
|
---|
46 | }
|
---|
47 |
|
---|
48 | function copyimagefromwiki ( $name , $url = "" ) {
|
---|
49 | global $xmlg ;
|
---|
50 | $dir = $xmlg['image_destination'] ;
|
---|
51 | if ( $url == "" )
|
---|
52 | $url = $this->get_image_url ( name ) ;
|
---|
53 | $fname = urlencode ( $name ) ;
|
---|
54 | $target = $dir . "/" . $fname ;
|
---|
55 | if ( !file_exists ( $target ) && !$this->block_file_download ) {
|
---|
56 | @mkdir ( $dir ) ;
|
---|
57 | # dub sez... use cURL
|
---|
58 | $ch = curl_init();
|
---|
59 | curl_setopt($ch, CURLOPT_URL, $url);
|
---|
60 | $fh = @fopen($target, 'w');
|
---|
61 | curl_setopt($ch, CURLOPT_FILE, $fh);
|
---|
62 | curl_exec($ch);
|
---|
63 | curl_close($ch);
|
---|
64 | @fclose($fh);
|
---|
65 | }
|
---|
66 | return $fname ;
|
---|
67 | }
|
---|
68 |
|
---|
69 | function myurlencode ( $t ) {
|
---|
70 | $t = str_replace ( " " , "_" , $t ) ;
|
---|
71 | $t = urlencode ( $t ) ;
|
---|
72 | return $t ;
|
---|
73 | }
|
---|
74 |
|
---|
75 |
|
---|
76 | function get_image_url ( $name ) {
|
---|
77 | global $xmlg ;
|
---|
78 | $site = $xmlg['site_base_url'] ;
|
---|
79 | $parts = explode ( ".wikipedia.org/" , $site ) ;
|
---|
80 | $parts2 = explode ( ".wikibooks.org/" , $site ) ;
|
---|
81 |
|
---|
82 | $image = utf8_encode ( $name ) ;
|
---|
83 | $image2 = ucfirst ( str_replace ( " " , "_" , $name ) ) ;
|
---|
84 | $m = md5( $image2 ) ;
|
---|
85 | $m1 = substr ( $m , 0 , 1 ) ;
|
---|
86 | $m2 = substr ( $m , 0 , 2 ) ;
|
---|
87 | $i = "{$m1}/{$m2}/" . $this->myurlencode ( ucfirst ( $name ) ) ;
|
---|
88 |
|
---|
89 |
|
---|
90 | if ( count ($parts ) > 1 ) {
|
---|
91 | $lang = array_shift ( $parts ) ;
|
---|
92 | $url = "http://upload.wikimedia.org/wikipedia/{$lang}/{$i}" ;
|
---|
93 | $url2 = "http://upload.wikimedia.org/wikipedia/commons/{$i}" ;
|
---|
94 | $h = @fopen ( $url , "r" ) ;
|
---|
95 | if ( $h === false ) $url = $url2 ;
|
---|
96 | else fclose ( $h ) ;
|
---|
97 | } else if ( count ($parts2 ) > 1 ) {
|
---|
98 | $lang = array_shift ( $parts2 ) ;
|
---|
99 | $url = "http://upload.wikimedia.org/wikibooks/{$lang}/{$i}" ;
|
---|
100 | $url2 = "http://upload.wikimedia.org/wikipedia/commons/{$i}" ;
|
---|
101 | $h = @fopen ( $url , "r" ) ;
|
---|
102 | if ( $h === false ) $url = $url2 ;
|
---|
103 | else fclose ( $h ) ;
|
---|
104 | } else {
|
---|
105 | $url = "http://{$site}/images/{$i}" ;
|
---|
106 | }
|
---|
107 | # print "<a href='{$url}'>{$url}</a><br/>" ;
|
---|
108 | return $url ;
|
---|
109 | }
|
---|
110 |
|
---|
111 | function do_show_images () {
|
---|
112 | return true ;
|
---|
113 | }
|
---|
114 |
|
---|
115 | }
|
---|
116 |
|
---|
117 |
|
---|
118 | # Access through HTTP protocol
|
---|
119 | class ContentProviderHTTP extends ContentProvider {
|
---|
120 | var $article_cache = array () ;
|
---|
121 | var $first_title = "" ;
|
---|
122 | var $load_error ;
|
---|
123 |
|
---|
124 | function between_tag ( $tag , &$text ) {
|
---|
125 | $a = explode ( "<{$tag}" , $text , 2 ) ;
|
---|
126 | if ( count ( $a ) == 1 ) return "" ;
|
---|
127 | $a = explode ( ">" , " " . array_pop ( $a ) , 2 ) ;
|
---|
128 | if ( count ( $a ) == 1 ) return "" ;
|
---|
129 | $a = explode ( "</{$tag}>" , array_pop ( $a ) , 2 ) ;
|
---|
130 | if ( count ( $a ) == 1 ) return "" ;
|
---|
131 | return array_shift ( $a ) ;
|
---|
132 | }
|
---|
133 |
|
---|
134 | function do_get_contents ( $title ) {
|
---|
135 | global $xmlg ;
|
---|
136 | $use_se = false ;
|
---|
137 | if ( isset ( $xmlg["use_special_export"] ) && $xmlg["use_special_export"] == 1 ) $use_se = true ;
|
---|
138 |
|
---|
139 | if ( $xmlg["useapi"] ) {
|
---|
140 | $url = "http://" . $xmlg["site_base_url"] . "/api.php?format=php&action=query&prop=revisions&rvexpandtemplates=1&rvprop=timestamp|user|comment|content&titles=" . urlencode ( $title ) ;
|
---|
141 | $data = @file_get_contents ( $url ) ;
|
---|
142 | $data = unserialize ( $data ) ;
|
---|
143 | $data = $data['query'] ; if ( !isset ( $data ) ) return "" ;
|
---|
144 | $data = $data['pages'] ; if ( !isset ( $data ) ) return "" ;
|
---|
145 | $data = array_shift ( $data ) ;
|
---|
146 | $data = $data['revisions'] ; if ( !isset ( $data ) ) return "" ;
|
---|
147 | $data = $data['0'] ; if ( !isset ( $data ) ) return "" ;
|
---|
148 | $data = $data['*'] ; if ( !isset ( $data ) ) return "" ;
|
---|
149 | return $data ;
|
---|
150 | # $data = $data['page'] ; if ( !isset ( $data ) ) return "" ;
|
---|
151 | # $data = $data['revision'] ; if ( !isset ( $data ) ) return "" ;
|
---|
152 | # $data = $data['ref'] ; if ( !isset ( $data ) ) return "" ;
|
---|
153 | #print urldecode ( $url ) . "\n" ;
|
---|
154 | print "<pre>" ; print_r ( $data ) ; print "</pre>" ;
|
---|
155 | exit ;
|
---|
156 | $s = "Still here..." ;
|
---|
157 | return $s ;
|
---|
158 | } else if ( $use_se ) {
|
---|
159 | $url = "http://" . $xmlg["site_base_url"] . "/index.php?listauthors=1&title=Special:Export/" . urlencode ( $title ) ;
|
---|
160 | } else {
|
---|
161 | if ( $xmlg["use_toolserver_url"] ) {
|
---|
162 | # $url = "http://" . $xmlg["site_base_url"] . "/index.php?action=raw&title=" . urlencode ( $title ) ;
|
---|
163 | $u = urlencode ( $title ) ;
|
---|
164 | $site = array_shift ( explode ( "/" , $xmlg["site_base_url"] ) ) ;
|
---|
165 | $url = "http://tools.wikimedia.de/~daniel/WikiSense/WikiProxy.php?wiki={$site}&title={$u}&rev=0&go=Fetch" ;
|
---|
166 | } else {
|
---|
167 | $url = "http://" . $xmlg["site_base_url"] . "/index.php?action=raw&title=" . urlencode ( $title ) ;
|
---|
168 | }
|
---|
169 | }
|
---|
170 | $s = @file_get_contents ( $url ) ;
|
---|
171 |
|
---|
172 | if ( $use_se ) {
|
---|
173 | $text = html_entity_decode ( $this->between_tag ( "text" , $s ) ) ;
|
---|
174 | $this->authors = array () ;
|
---|
175 | $authors = $this->between_tag ( "contributors" , $s ) ;
|
---|
176 | $authors = explode ( "</contributor><contributor>" , $authors ) ;
|
---|
177 | foreach ( $authors AS $author ) {
|
---|
178 | $id = $this->between_tag ( "id" , $author ) ;
|
---|
179 | if ( $id == '0' || $id == '' ) continue ; # Skipping IPs and (possibly) broken entries
|
---|
180 | $name = $this->between_tag ( "username" , $author ) ;
|
---|
181 | $this->authors[] = $name ;
|
---|
182 | }
|
---|
183 | $s = $text ;
|
---|
184 | }
|
---|
185 | return $s ;
|
---|
186 | }
|
---|
187 |
|
---|
188 | function get_wiki_text ( $title , $do_cache = false ) {
|
---|
189 | global $xmlg ;
|
---|
190 | $load_error = false ;
|
---|
191 | $title = trim ( $title ) ;
|
---|
192 | if ( $title == "" ) return "" ; # Just in case...
|
---|
193 | if ( isset ( $this->article_cache[$title] ) ) # Already in the cache
|
---|
194 | return $this->article_cache[$title] ;
|
---|
195 |
|
---|
196 | if ( $this->first_title == "" ) $this->first_title = $title ;
|
---|
197 |
|
---|
198 | # Retrieve it
|
---|
199 | $t1 = microtime_float() ;
|
---|
200 | $s = $this->do_get_contents ( $title ) ;
|
---|
201 | if ( strtoupper ( substr ( $s , 0 , 9 ) ) == "#REDIRECT" ) {
|
---|
202 | $t2 = explode ( "[[" , $s , 2 ) ;
|
---|
203 | $t2 = array_pop ( $t2 ) ;
|
---|
204 | $t2 = explode ( "]]" , $t2 , 2 ) ;
|
---|
205 | $t2 = array_shift ( $t2 ) ;
|
---|
206 | $s = $this->do_get_contents ( $t2 ) ;
|
---|
207 | }
|
---|
208 | $this->load_time += microtime_float() - $t1 ;
|
---|
209 |
|
---|
210 | $comp = '<!DOCTYPE html PUBLIC "-//W3C//DTD' ;
|
---|
211 | if ( substr ( $s , 0 , strlen ( $comp ) ) == $comp ) $s = "" ; # Catching wrong title error
|
---|
212 |
|
---|
213 | if ( $do_cache ) $this->article_cache[$title] = $s ;
|
---|
214 | return $s ;
|
---|
215 | }
|
---|
216 |
|
---|
217 | function get_local_url ( $title ) {
|
---|
218 | return "/" . array_pop ( explode ( "/" , $this->get_var ( 'site_base_url' ) , 2 ) ) . "/index.php?title=" . urlencode ( $title ) ;
|
---|
219 | }
|
---|
220 |
|
---|
221 | function get_server_url () {
|
---|
222 | return "http://" . array_shift ( explode ( "/" , $this->get_var ( 'site_base_url' ) , 2 ) ) ;
|
---|
223 | }
|
---|
224 |
|
---|
225 | function get_full_url ( $title ) {
|
---|
226 | return $this->get_server_url () . $this->get_local_url ( $title ) ;
|
---|
227 | }
|
---|
228 |
|
---|
229 | function get_namespace_template () {
|
---|
230 | return $this->get_var ( 'namespace_template' ) ;
|
---|
231 | }
|
---|
232 |
|
---|
233 | function get_var ( $var ) {
|
---|
234 | global $xmlg ;
|
---|
235 | if ( !isset ( $xmlg[$var] ) ) return false ;
|
---|
236 | return $xmlg[$var] ;
|
---|
237 | }
|
---|
238 |
|
---|
239 | function get_template_text ( $title ) {
|
---|
240 | # Check for fix variables
|
---|
241 | if ( $title == "PAGENAME" ) return $this->first_title ;
|
---|
242 | if ( $title == "PAGENAMEE" ) return urlencode ( $this->first_title ) ;
|
---|
243 | if ( $title == "SERVER" ) return $this->get_server_url () ;
|
---|
244 | if ( $title == "CURRENTDAYNAME" ) return date ( "l" ) ;
|
---|
245 | if ( strtolower ( substr ( $title , 0 , 9 ) ) == "localurl:" )
|
---|
246 | return $this->get_local_url ( substr ( $title , 9 ) ) ;
|
---|
247 |
|
---|
248 | $title = trim ( $title ) ;
|
---|
249 | if ( count ( explode ( ":" , $title , 2 ) ) == 1 ) # Does the template title contain a ":"?
|
---|
250 | $title = $this->get_namespace_template() . ":" . $title ;
|
---|
251 | else if ( substr ( $title , 0 , 1 ) == ":" ) # Main namespace
|
---|
252 | $title = substr ( $title , 1 ) ;
|
---|
253 | return $this->get_wiki_text ( $title , true ) ; # Cache template texts
|
---|
254 | }
|
---|
255 |
|
---|
256 | function get_internal_link ( $target , $text ) {
|
---|
257 | return $text ; # Dummy
|
---|
258 | }
|
---|
259 | }
|
---|
260 |
|
---|
261 |
|
---|
262 |
|
---|
263 |
|
---|
264 | # Access through text file structure
|
---|
265 | class ContentProviderTextFile extends ContentProviderHTTP {
|
---|
266 | var $file_ending = ".txt" ;
|
---|
267 |
|
---|
268 | function do_get_contents ( $title ) {
|
---|
269 | return $this->get_page_text ( $title ) ;
|
---|
270 | }
|
---|
271 |
|
---|
272 | /**
|
---|
273 | Called from outside
|
---|
274 | Could probably remained unchanged from HTTP class, but this is shorter, and caching is irrelevant for text files (disk cache)
|
---|
275 | */
|
---|
276 | function get_wiki_text ( $title , $do_cache = false ) {
|
---|
277 | $title = trim ( $title ) ;
|
---|
278 | if ( $title == "" ) return "" ; # Just in case...
|
---|
279 | if ( $this->first_title == "" ) {
|
---|
280 | $this->first_title = $title ;
|
---|
281 | }
|
---|
282 | $text = $this->get_page_text ( $title ) ;
|
---|
283 | return $text ;
|
---|
284 | }
|
---|
285 |
|
---|
286 | function get_file_location ( $ns , $title ) {
|
---|
287 | return get_file_location_global ( $this->basedir , $ns , $title , false ) ;
|
---|
288 | }
|
---|
289 |
|
---|
290 | function get_page_text ( $page , $allow_redirect = true ) {
|
---|
291 | $filename = $this->get_file_location ( 0 , $page ) ;
|
---|
292 | $filename = $filename->fullname . $this->file_ending ;
|
---|
293 | if ( !file_exists ( $filename ) ) return "" ;
|
---|
294 | $text = trim ( file_get_contents ( $filename ) ) ;
|
---|
295 |
|
---|
296 | # REDIRECT?
|
---|
297 | if ( $allow_redirect && strtoupper ( substr ( $text , 0 , 9 ) ) == "#REDIRECT" ) {
|
---|
298 | $text = substr ( $text , 9 ) ;
|
---|
299 | $text = array_shift ( explode ( "\n" , $text , 2 ) ) ;
|
---|
300 | $text = str_replace ( "[[" , "" , $text ) ;
|
---|
301 | $text = str_replace ( "]]" , "" , $text ) ;
|
---|
302 | $text = ucfirst ( trim ( $text ) ) ;
|
---|
303 | $text = $this->get_page_text ( $text , false ) ;
|
---|
304 | }
|
---|
305 | return $text ;
|
---|
306 | }
|
---|
307 |
|
---|
308 | function get_internal_link ( $target , $text ) {
|
---|
309 | $file = $this->get_file_location ( 0 , $target ) ;
|
---|
310 | if ( !file_exists ( $file->fullname.$this->file_ending ) ) return $text ;
|
---|
311 | else return "<a href='browse_texts.php?title=" . urlencode ( $target ) . "'>{$text}</a>" ;
|
---|
312 | }
|
---|
313 |
|
---|
314 | function do_show_images () {
|
---|
315 | return false ;
|
---|
316 | }
|
---|
317 |
|
---|
318 | }
|
---|
319 |
|
---|
320 | # Access through MySQL interface
|
---|
321 | # (Used via the extension via Special::wiki2XML)
|
---|
322 | class ContentProviderMySQL extends ContentProviderHTTP {
|
---|
323 |
|
---|
324 | function do_get_contents ( $title ) {
|
---|
325 | return $this->get_page_text ( $title ) ;
|
---|
326 | }
|
---|
327 |
|
---|
328 | /**
|
---|
329 | Called from outside
|
---|
330 | */
|
---|
331 | function get_wiki_text ( $title , $do_cache = false ) {
|
---|
332 | $title = trim ( $title ) ;
|
---|
333 | if ( $title == "" ) return "" ; # Just in case...
|
---|
334 | if ( $this->first_title == "" ) {
|
---|
335 | $this->first_title = $title ;
|
---|
336 | }
|
---|
337 | $text = $this->get_page_text ( $title ) ;
|
---|
338 | return $text ;
|
---|
339 | }
|
---|
340 |
|
---|
341 | function get_file_location ( $ns , $title ) {
|
---|
342 | return get_file_location_global ( $this->basedir , $ns , $title , false ) ;
|
---|
343 | }
|
---|
344 |
|
---|
345 | function get_page_text ( $page , $allow_redirect = true ) {
|
---|
346 | $title = Title::newFromText ( $page ) ;
|
---|
347 | $article = new Article ( $title ) ;
|
---|
348 |
|
---|
349 | # article does not exist?
|
---|
350 | if (!$article->exists()) {
|
---|
351 | return "";
|
---|
352 | }
|
---|
353 | $text = $article->getContent () ;
|
---|
354 |
|
---|
355 | # REDIRECT?
|
---|
356 | if ( $allow_redirect && strtoupper ( substr ( $text , 0 , 9 ) ) == "#REDIRECT" ) {
|
---|
357 | $text = substr ( $text , 9 ) ;
|
---|
358 | $text = array_shift ( explode ( "\n" , $text , 2 ) ) ;
|
---|
359 | $text = str_replace ( "[[" , "" , $text ) ;
|
---|
360 | $text = str_replace ( "]]" , "" , $text ) ;
|
---|
361 | $text = ucfirst ( trim ( $text ) ) ;
|
---|
362 | $text = $this->get_page_text ( $text , false ) ;
|
---|
363 | }
|
---|
364 | return $text ;
|
---|
365 | }
|
---|
366 |
|
---|
367 | function get_internal_link ( $target , $text ) {
|
---|
368 | $file = $this->get_file_location ( 0 , $target ) ;
|
---|
369 | if ( !file_exists ( $file->fullname.$this->file_ending ) ) return $text ;
|
---|
370 | else return "<a href='browse_texts.php?title=" . urlencode ( $target ) . "'>{$text}</a>" ;
|
---|
371 | }
|
---|
372 |
|
---|
373 | function do_show_images () {
|
---|
374 | return false ;
|
---|
375 | }
|
---|
376 |
|
---|
377 | }
|
---|
378 |
|
---|
379 | ?>
|
---|