source: documentation/trunk/php/gs-manual-export.php@ 30117

Last change on this file since 30117 was 30117, checked in by jmt12, 7 years ago

Altered to fix bug where imgcaption tag closed before image actually output. Looks like this was intentional at some stage, but means problems in the Wiki, so I'll repair

File size: 43.1 KB
Line 
1<?php
2
3require_once('common.php');
4$debug = 1;
5
6// There are some elements that are, in my opinion, incorrectly 'resolved' in
7// code blocks just because they are valid HTML. For instance, &lt;i&gt; is
8// resolved to <i> in code blocks, while something like &lt;Metadata&gt; is
9// not. Set this to true to allow such abominations (for the purpose of
10// comparing before and after versions of the XML). [jmt12]
11$allow_bad_codeblocks = true;
12
13/** @file gs-manual-export.php
14 * This script transforms the series of dokuwiki pages that make up a certain
15 * manual (as specified by the 'm' argument) in a certain language ('l') into
16 * the XML format required by the rest of the Greenstone manual generation
17 * scripts.
18 */
19
20// 0. Initialization
21
22// - we have a counter to assign identifiers to text blocks etc without ids
23$text_id_counter = 1;
24// - we need an array of all the footnotes
25$footnotes = array();
26
27// Defaults
28if (!isset($_REQUEST['l']) || empty($_REQUEST['l']))
29{
30 $_REQUEST['l'] = 'en';
31}
32if (!isset($_REQUEST['m']) || empty($_REQUEST['m']))
33{
34 //$_REQUEST['m'] = 'user';
35 //$_REQUEST['m'] = 'install';
36 $_REQUEST['m'] = 'develop';
37 //$_REQUEST['m'] = 'paper';
38}
39if (!isset($_REQUEST['v']) || empty($_REQUEST['v']))
40{
41 $_REQUEST['v'] = 'draft';
42}
43if (!isset($_REQUEST['a']) || !preg_match('/^(download|store)$/', $_REQUEST['a']))
44{
45 $_REQUEST['a'] = 'store'; // Try to store the file to disk
46}
47
48$var_path = filecat(array($base_path, 'var'));
49$timestamp = time();
50//$xml_source_path = '**PATH TO GSDL MANUALS**'
51$xml_source_path = '/tmp';
52if ($_REQUEST['a'] == 'download')
53{
54 // Clear out previous exports
55 recursiveRemove($var_path, '/greenstone/greenstone-documentation/php/var');
56 // New export
57 $xml_source_path = fileCat(array($var_path, $timestamp));
58}
59
60echo '<html>' . "\n";
61echo '<head>' . "\n";
62echo '<title>GS Manual Export</title>' . "\n";
63echo '</head>' . "\n";
64echo '<body>' . "\n";
65
66// - validate arguments before we use them (security)
67if (!preg_match('/^(develop|install|paper|user)$/',$_REQUEST['m']))
68 {
69 printError('Unknown manual type requested: ' . htmlspecialchars($_REQUEST['m']));
70 }
71
72if (!preg_match('/^(ar|en|es|fr|pt-br|ru)$/',$_REQUEST['l']))
73 {
74 printError('Unknown language requested: ' . htmlspecialchars($_REQUEST['l']));
75 }
76
77echo '<h2>Generating Greenstone Manual XML</h2>' . "\n";
78echo '<p><b>Manual:</b> ' . $_REQUEST['m'] . ' <b>Language:</b> ' . $_REQUEST['l'] . "</p>\n<hr/>\n";
79// 1. Create the XML output file handle
80// - construct the path using the information we've been provided as arguments
81$xml_file_dir = $xml_source_path . '/' . $_REQUEST['l'];
82mkAllDir($xml_file_dir);
83$xml_file_path = $xml_file_dir . '/' . ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.xml';
84// - backup any existing file
85if (file_exists($xml_file_path))
86{
87 $xml_backup_file_path = $xml_file_path . '.bak';
88 if (!rename($xml_file_path, $xml_backup_file_path))
89 {
90 printError('Failed to rename existing manual file for backup');
91 }
92}
93
94// - and create a handle to the new file
95$xml_out = fopen($xml_file_path, 'w');
96
97// 2. Read in the top level page - this will give configuration data for the
98// manual and cover page, as well as specifying the order for the other
99// pages in the manual
100echo "<p><b>Frontmatter:</b><br/>\n";
101// - by reading this page we hope to populate an array of metadata, and also
102// extract the sequence of other pages within this manual
103$cover_metadata = array();
104$pages_in_order = array();
105// - we now need to consider if the user has asked for a draft version (i.e.
106// includes the latest version of pages regardless of approval) or if only
107// the approved versions of pages should be included
108// - only necessary for english version of manual, as those are the only pages
109// editable
110$top_page_path = '';
111if ($_REQUEST['v'] == 'draft' || $_REQUEST['l'] != 'en')
112{
113 // - again, we can construct the path to the top level page given the arguments
114 // provided
115 $top_page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '.txt';
116}
117else
118{
119 $top_page_path = getApprovedVersionPath('en:manuals:' . $_REQUEST['m']);
120}
121
122if (!file_exists($top_page_path))
123 {
124 printError('Failed to locate top level page for manual');
125 }
126// - we're going to open a handle to the file, then read it in line-by-line
127// watching for the lines we are interested in (using pattern matching)
128$top_page_in = fopen($top_page_path, 'r');
129if (!$top_page_in)
130 {
131 printError('Failed to open top level page for reading');
132 }
133$in_contents = false;
134while (($line = fgets($top_page_in)) !== false)
135 {
136 // - if we are capturing page order, and we encounter something that looks
137 // like a bulletpoint item pointing to a wiki page, then we append the
138 // name of that page to our pages in order array
139 if ($in_contents && preg_match('/^\s+\*\s+\[\[.:' . $_REQUEST['m'] . ':(.+?)\|(.*?)\]\]\s*$/', $line, $matches))
140 {
141 array_push($pages_in_order, $matches[1]);
142 }
143 // - metadata is all encoded within dokuwiki tables
144 elseif (preg_match('/^\^\s+([^\s]+)\s+\|\s+(.+?)\s+\|\s*$/', $line, $matches))
145 {
146 $field = $matches[1];
147 $value = $matches[2];
148 $values = array();
149 if (isset($cover_metadata[$field]))
150 {
151 $values = $cover_metadata[$field];
152 }
153 array_push($values, $value);
154 $cover_metadata[$field] = $values;
155 }
156 // - watch for the heading 'Contents' to begin extracting page order
157 // information
158 elseif (preg_match('/^=+\s(.+)\s=+$/', $line, $matches))
159 {
160 if ($matches[1] == 'Contents')
161 {
162 $in_contents = true;
163 }
164 // - any other title means we aren't capturing page order (anymore)
165 else
166 {
167 $in_contents = false;
168 }
169 }
170 }
171if (!feof($top_page_in))
172 {
173 printError('Unexpected fgets() fail when reading top page');
174 }
175fclose($top_page_in);
176// - ensure we have the required metadata
177$required_metadata = array('Heading','Title','Affiliation','Version','Date');
178foreach ($required_metadata as $required_field)
179{
180 if (!isset($cover_metadata[$required_field]))
181 {
182 printError('Missing required metadata: ' . $required_field);
183 }
184}
185// - now we can use the metadata to construct the XML header and the cover page.
186// This follows a pretty set recipe with only elements that can repeat---like
187// Author, SupplementaryText etc---are at all tricky
188fwrite($xml_out, '<?xml version="1.0" encoding="UTF-8"?>' . "\n");
189fwrite($xml_out, '<!DOCTYPE Manual [' . "\n");
190if (isset($cover_metadata['ENTITY']))
191 {
192 foreach ($cover_metadata['ENTITY'] as $entity)
193 {
194 fwrite($xml_out, "\t" . '<!ENTITY ' . $entity . '>' . "\n");
195 }
196 }
197fwrite($xml_out, ']>' . "\n");
198fwrite($xml_out, '<Manual id="' . ucfirst($_REQUEST['m']) . '" lang="' . $_REQUEST['l'] . '">' . "\n");
199
200///cho "<p>[Debug] metadata: " . print_r($cover_metadata, true) . "</p>\n\n";
201
202outputMetadataSingle($xml_out, $cover_metadata, 'Heading');
203outputMetadataSingle($xml_out, $cover_metadata, 'Title');
204outputMetadataSingle($xml_out, $cover_metadata, 'Author');
205outputMetadataSingle($xml_out, $cover_metadata, 'Affiliation');
206outputMetadataMultiple($xml_out, $cover_metadata, 'SupplementaryText');
207outputMetadataMultiple($xml_out, $cover_metadata, 'Text');
208outputMetadataMultiple($xml_out, $cover_metadata, 'Comment');
209outputMetadataSingle($xml_out, $cover_metadata, 'Version');
210outputMetadataSingle($xml_out, $cover_metadata, 'Date');
211
212// 3. Process each page listed in the contents of the top level page in order
213foreach ($pages_in_order as $page)
214{
215 processPage($xml_out, $page);
216}
217
218// 4. Output out list of footnotes (if any)
219if (!empty($footnotes))
220 {
221 fwrite($xml_out, '<FootnoteList>'. "\n");
222 foreach ($footnotes as $footnote=>$footnote_id)
223 {
224 ///cho '[debug] footnotes: (' . $footnote_id . ') ' . $footnote . '<br />'. "\n";
225 outputMetadataSingle($xml_out, $footnote, 'Footnote', $footnote_id);
226 }
227 fwrite($xml_out, '</FootnoteList>'. "\n");
228 }
229
230// 5. Finalize and close the XML output
231fwrite($xml_out, '</Manual>' . "\n");
232fclose($xml_out);
233chmod($xml_file_path, 0664);
234
235// 6. Complete!
236echo '<p><b>Complete!</b></p>' . "\n<hr/>\n";
237if ($_REQUEST['a'] == 'download')
238{
239 // Zip up the manual files
240 $zip_file = ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.zip';
241 $zip_path = fileCat(array($xml_source_path, $zip_file));
242 $zip_command = 'zip -r "' . $zip_path . '" . > /dev/null 2>&1';
243 //cho '<p><b>[DEBUG]</b> zip_command:' . $zip_command . '</p>';
244 $tgz_file = ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.tgz';
245 $tgz_path = fileCat(array($xml_source_path, $tgz_file));
246 $tgz_command = 'tar -czf "' . $tgz_path . '" * > /dev/null 2>&1';
247 //cho '<p><b>[DEBUG]</b> tgz_command:' . $tgz_command . '</p>';
248 // We need to move to the document folder so that archives have sensible paths
249 $original_cwd = getcwd();
250 chdir($xml_file_dir);
251 system($zip_command);
252 system($tgz_command);
253 // Go back
254 chdir($original_cwd);
255 // Links are ready
256 echo '<p>Download XML file plus images as: <a href="var/' . $timestamp . '/' . $zip_file . '">ZIP</a> or <a href="var/' . $timestamp . '/' . $tgz_file . '">TGZ</a></p>' . "\n";
257}
258echo '<p>Click <a href="' . $dokuwiki_url . '/doku.php?id=' . $_REQUEST['l'] . ':manuals:' . $_REQUEST['m'] . '">here</a> to return to dokuwiki</p>' . "\n";
259echo '</body>' . "\n";
260echo '</html>';
261exit(0);
262
263/**
264 */
265function outputMetadataSingle($xml_out, $metadata, $field, $mid=false)
266{
267 echo '[metadata: ' . $field . "] \n";
268 if ($mid)
269 {
270 fwrite($xml_out, '<' . $field . ' id="' . $mid . '">' . "\n");
271 }
272 else
273 {
274 fwrite($xml_out, '<' . $field . '>' . "\n");
275 }
276 if (is_array($metadata))
277 {
278 if (isset($metadata[$field]) && isset($metadata[$field][0]))
279 {
280 outputTextBlock($xml_out, $metadata[$field][0]);
281 }
282 else
283 {
284 echo 'no such field or no metadata';
285 }
286 }
287 elseif (!empty($metadata))
288 {
289 outputTextBlock($xml_out, $metadata);
290 }
291 else
292 {
293 echo 'no such field or no metadata';
294 }
295 fwrite($xml_out, '</' . $field . '>' . "\n");
296}
297/** outputMetadataSingle() **/
298
299function outputMetadataList($xml_out, $metadata, $field, $separator = ',', $final_separator = false)
300{
301 echo '[metadata list: ' . $field . "] \n";
302 fwrite($xml_out, '<' . $field . '>' . "\n");
303 if (isset($metadata[$field]))
304 {
305 if (count($metadata[$field]) == 1)
306 {
307 outputTextBlock($xml_out, $metadata[$field][0]);
308 }
309 if (count($metadata[$field]) > 1)
310 {
311 $last_value = '';
312 if ($final_separator)
313 {
314 $last_value = array_pop($metadata[$field]);
315 }
316 $values = implode($separator, $metadata[$field]);
317 if ($final_separator)
318 {
319 $values .= $final_separator . $last_value;
320 }
321 outputTextBlock($xml_out, $values);
322 }
323 }
324 else
325 {
326 echo 'no such field or no metadata';
327 }
328 fwrite($xml_out, '</' . $field . '>' . "\n");
329}
330/** outputMetadataList() **/
331
332function outputMetadataMultiple($xml_out, $metadata, $field)
333{
334 echo '[metadata multiple: ' . $field . "] \n";
335 // - Text blocks don't need to be wrapped in Text element
336 if ($field != 'Text')
337 {
338 fwrite($xml_out, '<' . $field . '>' . "\n");
339 }
340 if (isset($metadata[$field]))
341 {
342 foreach ($metadata[$field] as $value)
343 {
344 outputTextBlock($xml_out, $value);
345 }
346 }
347 else
348 {
349 echo 'no such field or no metadata';
350 }
351 if ($field != 'Text')
352 {
353 fwrite($xml_out, '</' . $field . '>' . "\n");
354 }
355}
356
357function translateTableCodeline($text)
358{
359 // Escape any italic tags hidden in HTML comments
360 $text = str_replace('<!--i-->', '%!--i--%', $text);
361 $text = str_replace('<!--/i-->', '%!--/i--%', $text);
362 // Encode entities etc
363 $text = translateText($text, true);
364 // Restore any italics elements hidden above
365 $text = str_replace('%!--i--%', '<i>', $text);
366 $text = str_replace('%!--/i--%', '</i>', $text);
367 return $text;
368}
369/** translateTableCodeline() **/
370
371function translateText($text, $in_code_block=false)
372{
373 global $allow_bad_codeblocks;
374 $text = str_replace('&','&amp;',$text);
375 $text = str_replace('<','&lt;',$text);
376 $text = str_replace('>','&gt;',$text);
377 if ($in_code_block && $allow_bad_codeblocks)
378 {
379 ///cho "**** [debug] restoring bogus decoded tags in: |$text| ****\n";
380 $text = str_replace('&lt;i&gt;','<i>',$text);
381 $text = str_replace('&lt;/i&gt;','</i>',$text);
382 $text = str_replace('&lt;br/&gt;','<br/>',$text);
383 }
384 return $text;
385}
386
387function outputTextBlock($xml_out, $text, $type='', $in_code_block = false)
388{
389 global $cover_metadata;
390 global $text_id_counter;
391 global $footnotes;
392 global $allow_bad_codeblocks;
393
394 // - Start by dealing with any footnotes before anything else
395 while (preg_match('/\(\((.*?)\)\)/', $text, $matches))
396 {
397 $pattern = $matches[0];
398 $footnote = $matches[1];
399 $footnote_id = count($footnotes) + 1;
400 $footnotes[$footnote] = $footnote_id;
401 // - note that we have to escape the footnote reference as the following
402 // code will convert any < and > to entities...
403 $footnote_reference = '%FootnoteRef id="' . $footnote_id . '"/%';
404 $text = str_replace($pattern, $footnote_reference, $text);
405 }
406
407 $text_id = '';
408 // - check whether the string begins with an explicit id
409 if (preg_match('/^\s*<!--\s*id:(.+?)\s*-->(.*)$/', $text, $matches))
410 {
411 $text_id = $matches[1];
412 $text = $matches[2];
413 if (is_numeric($text_id))
414 {
415 $text_id_counter = $text_id + 1;
416 }
417 }
418 else
419 {
420 $text_id = $text_id_counter;
421 $text_id_counter++;
422 }
423
424 // - protect the special case of an HTML comment being actually displayed
425 // in the text
426 $text = preg_replace('/<!--([\s\.]+?)-->/','##lt##!--\1--##gt##',$text);
427
428 // - reformat dokuwiki syntax to HTML tag syntax
429 $text = preg_replace('/<!--.*?-->/', '', $text);
430
431 // we leave code blocks alone in terms of ampersands
432 if (!$in_code_block)
433 {
434 // - ampersands aren't safe in XML...
435 $text = str_replace('&', '&amp;', $text);
436 // ...except for the entities that we have registered as metadata
437 if (isset($cover_metadata['ENTITY']))
438 {
439 foreach ($cover_metadata['ENTITY'] as $entity)
440 {
441 if (preg_match('/([a-z]+)\s+"&#(\d+);"/', $entity, $matches))
442 {
443 $entity_name = $matches[1];
444 if ($entity_name != 'mdash')
445 {
446 $entity_character = html_entity_decode('&#'.$matches[2].';',ENT_NOQUOTES,'UTF-8');
447 $text = str_replace('&amp;' . $entity_name . ';', '&' . $entity_name . ';', $text);
448 // - we also convert any characters that match the entity char into
449 // the entity
450 $text = str_replace($entity_character, '&' . $entity_name . ';', $text);
451 }
452 }
453 }
454 }
455 // - protect <br/> tags
456 $text = str_replace('<br/>','%%br/%%',$text);
457 // - encoding all of the < and > that appear in the text (rather than
458 // true html formatting)
459 $text = str_replace('<','&lt;',$text);
460 $text = str_replace('>','&gt;',$text);
461 // - restore <br/> tags
462 $text = str_replace('%%br/%%','<br/>',$text);
463 }
464 else if ($type == 'code')
465 {
466 $text = str_replace('<','&lt;',$text);
467 $text = str_replace('>','&gt;',$text);
468 }
469
470 // - links, oh how I hate thee
471 // - external links are slightly easier
472 $text = preg_replace('/\[\[http:\/\/(.*?)\|(.*?)\]\]/', '<Link url="http://\1">\2</Link>', $text);
473 // - internals have to become the horrible <CrossRef> tags. We ignore any
474 // number prefix on the page name as that is just used for ordering within
475 // Dokuwiki
476 $text = preg_replace('/\[\[\.\:(.*?)\|[^\]]+\]\]/','<CrossRef target="Chapter" ref="\1"/>', $text);
477 // - internal links starting with hash must be on the same page
478 $text = preg_replace('/\[\[###(.*?)\|.*?\]\]/','<CrossRef target="Part" ref="\1"/>', $text);
479 $text = preg_replace('/\[\[##(.*?)\|.*?\]\]/','<CrossRef target="Subsection" ref="\1"/>', $text);
480 $text = preg_replace('/\[\[#(.*?)\|.*?\]\]/','<CrossRef target="Section" ref="\1"/>', $text);
481 // - 'external' internal wiki links are even worst - since we can't know what
482 // the page order number for another manual's chapters might be, we instead
483 // use a search
484 $text = preg_replace('/\[\[\?do\=search\&amp;id\=([^\s]+)\s+@([a-z]+):manuals:([a-z]+)\|.*?\]\]/i', '<CrossRef external="\3" lang="\2" target="Chapter" ref="\1"/>', $text);
485 // - references to images and tables
486 $text = preg_replace('/(?:<|&lt;)imgref\sfigure_(.+?)(?:>|&gt;)/','<CrossRef target="Figure" ref="\1"/>', $text);
487 $text = preg_replace('/(?:<|&lt;)tblref\stable_(.+?)(?:>|&gt;)/','<CrossRef target="Table" ref="\1"/>', $text);
488 // - explicitly convert URLs as they are a bit messy
489 // - first all the cases of URLs in italics, without protocol
490 $text = preg_replace('/\/\/\s([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
491 $text = preg_replace('/\/\/\s([a-z0-9\-]+\.org(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
492 $text = preg_replace('/\/\/\s(localhost(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
493 // - now all the protocol ones (with care taken to protect // in protocol)
494 $text = preg_replace('/\/\/\shttp:\/\/([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
495 $text = preg_replace('/\/\/\shttp:\/\/([a-z0-9\-]+\.org(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
496 $text = preg_replace('/\/\/\shttp:\/\/(localhost(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
497 // - next we have the underlined URLs sans protocols
498 $text = preg_replace('/__\s([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
499 $text = preg_replace('/__\s([a-z0-9\-]+\.org(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
500 $text = preg_replace('/__\s(localhost(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
501 // - and finally the protocol prefixed underlined URLs
502 $text = preg_replace('/__\shttp:\/\/([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
503 $text = preg_replace('/__\shttp:\/\/([a-z0-9\-]+\.org(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
504 $text = preg_replace('/__\shttp:\/\/(localhost(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
505 // - lets also protect any other protocols we find floating around
506 $text = preg_replace('/(file|ftp|http):\/\//i', '\1:##DOUBLESLASH##', $text);
507
508 // - italic formatting (taking care of protected double slashes)
509 $text = preg_replace('/%%\/\/%%/', '##DOUBLESLASH##', $text);
510 $text = preg_replace('/\/{5}/', '<i>/</i>', $text); // another special case
511 $text = preg_replace('/\/\/(\/.+?)\s*\/\//', '<i>\1</i>', $text); // another special case
512 $text = preg_replace('/\/\/\s*(.+?\/)\/\//', '<i>\1</i>', $text); // another special case
513 $text = preg_replace('/\/\/\s*(.+?)\s*\/\//', '<i>\1</i>', $text);
514 $text = preg_replace('/##DOUBLESLASH##/', '//', $text);
515 // - bold formatting
516 $text = preg_replace('/\*\*([^"]+?)\*\*/', '<b>\1</b>', $text);
517 // - underline formatting
518 $text = preg_replace('/__([^"]+?)__/', '<u>\1</u>', $text);
519
520 // - decode certain entities in codeblock (just because they are valid HTML,
521 // derp).
522 if ($in_code_block && $allow_bad_codeblocks)
523 {
524 ///cho "**** [debug] restoring bogus decoded tags in: |$text| ****\n";
525 $text = str_replace('&lt;i&gt;','<i>',$text);
526 $text = str_replace('&lt;/i&gt;','</i>',$text);
527 //$text = str_replace('&lt;br/&gt;','<br/>',$text);
528 }
529 // - restore protected entities
530 $text = preg_replace('/##(gt|lt)##/','&\1;',$text);
531 // - restore protected comment blocks
532 $text = str_replace('%!--', '&lt;!--', $text);
533 $text = str_replace('--%', '--&gt;', $text);
534 // - restore protected footnote refs
535 $text = preg_replace('/%FootnoteRef id="([^"]+)"\/%/', '<FootnoteRef id="\1"/>', $text);
536 // output the text block
537 $text = trim($text);
538 if (empty($text))
539 {
540 fwrite($xml_out, '<Text id="' . $text_id . '"/>' . "\n");
541 }
542 else if (!empty($type))
543 {
544 fwrite($xml_out, '<Text type="' . $type . '" id="' . $text_id . '">' . $text . '</Text>' . "\n");
545 }
546 else
547 {
548 fwrite($xml_out, '<Text id="' . $text_id . '">' . $text . '</Text>' . "\n");
549 }
550}
551/** outputTextBlock($xml_out, $text) **/
552
553/**
554 */
555function processPage($xml_out, $page_name)
556{
557 global $dokuwiki_path;
558 global $seen_ids;
559 echo "</p>\n<p><b>Export Chapter:</b> " . $page_name . "<br/>\n";
560 // - locate the page in question (taking into account if the user asked for a
561 // draft version or an approved version of the manual)
562 $page_path = '';
563 if ($_REQUEST['v'] == 'draft' || $_REQUEST['l'] != 'en')
564 {
565 $page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '/' . $page_name . '.txt';
566 }
567 else
568 {
569 $page_path = getApprovedVersionPath('en:manuals:' . $_REQUEST['m'] . ':' . $page_name);
570 }
571 $page_in = @fopen($page_path, 'r');
572 if (!$page_in)
573 {
574 printError('Failed to open page for reading:' . $page_name, false);
575 return;
576 }
577 // - once again we read in line-by-line, but this time we are going to output
578 // each line as we go through. We expect to encounter certain lines in a
579 // predefined order, and should complain if we don't find what we expect.
580 $in_chapter = false;
581 $in_section = false;
582 $in_subsection = false;
583 $in_part = false;
584 $in_list = false;
585 $lists = array();
586 $previous_listitem_type = '';
587 $in_figure = false;
588 $in_table = false;
589 $column_widths = array();
590 $in_code_block = false;
591 while (($line = fgets($page_in)) !== false)
592 {
593 // remove newline character
594 $line = preg_replace('/\r?\n$/','',$line);
595 // - we need to know the 'depth' for the bulletpoint lists
596 $depth = 0;
597 while (strlen($line) > 2 && preg_match('/^\s+[\*\-]/', $line) && substr($line, 0, 2) == ' ')
598 {
599 $depth++;
600 $line = substr($line, 2);
601 }
602 $first_character = substr($line, 0, 1);
603 // - special case for the end of bullet lists
604 if ($in_list && ($first_character != "*" && $first_character != "-"))
605 {
606 while (count($lists) > 0)
607 {
608 $list_type = array_pop($lists);
609 if ($list_type == '*')
610 {
611 fwrite($xml_out, '</Bullet>' . "\n");
612 fwrite($xml_out, '</BulletList>' . "\n");
613 }
614 else
615 {
616 fwrite($xml_out, '</NumberedItem>' . "\n");
617 fwrite($xml_out, '</NumberedList>' . "\n");
618 }
619 }
620 $in_list = false;
621 }
622 // - special case for the end of tables
623 if ($in_table && $first_character != '^' && $first_character != '|')
624 {
625 fwrite($xml_out, '</TableContent>' . "\n");
626 fwrite($xml_out, '</Table>' . "\n");
627 $in_table = false;
628 }
629 // - special cases for premature closing of sections, subsections and parts
630 if (preg_match('/<!-- close:(section|subsection|part) -->/', $line, $matches))
631 {
632 // - we always try to do this (regardless of actual flag) as we must
633 // always close the smallest 'granularity' first
634 if ($in_part)
635 {
636 fwrite($xml_out, '</Content>' . "\n");
637 fwrite($xml_out, '</Part>' . "\n");
638 $in_part = false;
639 }
640 if ($in_subsection && ($matches[1] == 'section' || $matches[1] == 'subsection'))
641 {
642 fwrite($xml_out, '</Content>' . "\n");
643 fwrite($xml_out, '</Subsection>' . "\n");
644 $in_subsection = false;
645 }
646 if ($in_section && $matches[1] == 'section')
647 {
648 fwrite($xml_out, '</Content>' . "\n");
649 fwrite($xml_out, '</Section>' . "\n");
650 $in_section = false;
651 }
652 }
653
654 // - if this page is a chapter, then the first thing on the page should be
655 // the chapter title (six equals)
656 if (preg_match('/====== (.+) ======/', $line, $matches))
657 {
658 $chapter_title = $matches[1];
659 $chapter_id = $page_name;
660 if (empty($chapter_id))
661 {
662 $chapter_id = generateID($chapter_title);
663 }
664 // - are we already processing a part? if so end it, end it now
665 if ($in_part)
666 {
667 fwrite($xml_out, '</Content>' . "\n");
668 fwrite($xml_out, '</Part>' . "\n");
669 $in_part = false;
670 }
671 // - are we already processing a subsection? if so end it, end it now
672 if ($in_subsection)
673 {
674 fwrite($xml_out, '</Content>' . "\n");
675 fwrite($xml_out, '</Subsection>' . "\n");
676 $in_subsection = false;
677 }
678 // - are we already processing a section? if so end it, end it now
679 if ($in_section)
680 {
681 fwrite($xml_out, '</Content>' . "\n");
682 fwrite($xml_out, '</Section>' . "\n");
683 $in_section = false;
684 }
685 // - are we already processing a chapter? if so end it, end it now
686 if ($in_chapter)
687 {
688 fwrite($xml_out, '</Content>' . "\n");
689 fwrite($xml_out, '</Chapter>' . "\n");
690 $in_chapter = false;
691 }
692 // - write out this chapter's header
693 fwrite($xml_out, '<Chapter id="' . $chapter_id . '">' . "\n");
694 outputMetadataSingle($xml_out, $chapter_title, 'Title');
695 fwrite($xml_out, '<Content>' . "\n");
696 $in_chapter = true;
697 }
698 // - the next likely thing to encounter is a section heading (five equals)
699 elseif (preg_match('/=====\s+(.+)\s+=====/', $line, $matches))
700 {
701 $section_title = $matches[1];
702 // - check for explicit section id
703 $section_id = '';
704 if (preg_match('/<!-- sid:(.+?) -->(.*)/', $section_title, $matches))
705 {
706 $section_id = $matches[1];
707 $section_title = $matches[2];
708 }
709 if (empty($section_id))
710 {
711 $section_id = generateID($section_title);
712 }
713 // - are we already processing a part? if so end it, end it now
714 if ($in_part)
715 {
716 fwrite($xml_out, '</Content>' . "\n");
717 fwrite($xml_out, '</Part>' . "\n");
718 $in_part = false;
719 }
720 // - are we already processing a subsection? if so end it, end it now
721 if ($in_subsection)
722 {
723 fwrite($xml_out, '</Content>' . "\n");
724 fwrite($xml_out, '</Subsection>' . "\n");
725 $in_subsection = false;
726 }
727 // - are we already processing a section? if so end it, end it now
728 if ($in_section)
729 {
730 fwrite($xml_out, '</Content>' . "\n");
731 fwrite($xml_out, '</Section>' . "\n");
732 $in_section = false;
733 }
734 // - write out this section's header
735 fwrite($xml_out, '<Section id="' . $section_id . '">' . "\n");
736 outputMetadataSingle($xml_out, $section_title, 'Title');
737 fwrite($xml_out, '<Content>' . "\n");
738 $in_section = true;
739 }
740 // - similar for subsection heading (four equals)
741 elseif (preg_match('/==== (.+) ====/', $line, $matches))
742 {
743 $subsection_title = $matches[1];
744 // - check for explicit subsection id
745 $subsection_id = '';
746 if (preg_match('/<!-- sid:(.+?) -->(.*)/', $subsection_title, $matches))
747 {
748 $subsection_id = $matches[1];
749 $subsection_title = $matches[2];
750 }
751 if (empty($subsection_id))
752 {
753 $subsection_id = generateID($subsection_title);
754 }
755 // - are we already processing a part? if so end it, end it now
756 if ($in_part)
757 {
758 fwrite($xml_out, '</Content>' . "\n");
759 fwrite($xml_out, '</Part>' . "\n");
760 $in_part = false;
761 }
762 // - are we already processing a subsection? if so end it, end it now
763 if ($in_subsection)
764 {
765 fwrite($xml_out, '</Content>' . "\n");
766 fwrite($xml_out, '</Subsection>' . "\n");
767 $in_subsection = false;
768 }
769 // - write out this subsection's header
770 fwrite($xml_out, '<Subsection id="' . $subsection_id . '">' . "\n");
771 outputMetadataSingle($xml_out, $subsection_title, 'Title');
772 fwrite($xml_out, '<Content>' . "\n");
773 $in_subsection = true;
774 }
775 // - and part heading (three equals)
776 elseif (preg_match('/=== (.+) ===/', $line, $matches))
777 {
778 $part_title = $matches[1];
779 // - check for explicit part id
780 $part_id = '';
781 if (preg_match('/<!-- sid:(.+?) -->(.*)/', $part_title, $matches))
782 {
783 $part_id = $matches[1];
784 $part_title = $matches[2];
785 }
786 if (empty($part_id))
787 {
788 $part_id = generateID($part_title);
789 }
790 // - are we already processing a part? if so end it, end it now
791 if ($in_part)
792 {
793 fwrite($xml_out, '</Content>' . "\n");
794 fwrite($xml_out, '</Part>' . "\n");
795 $in_part = false;
796 }
797 // - write out this part's header
798 fwrite($xml_out, '<Part id="' . $part_id . '">' . "\n");
799 outputMetadataSingle($xml_out, '**//' . $part_title . '//**', 'Title');
800 fwrite($xml_out, '<Content>' . "\n");
801 $in_part = true;
802 }
803 // - Ignore 5th level heading - they are only used to allow more convenient
804 // editing of figures and tables
805 elseif (preg_match('/== (.+) ==/', $line, $matches))
806 {
807 }
808 // - lists need special handling
809 elseif (preg_match('/^(\*|\-)\s+(.*)/', $line, $matches))
810 {
811 $list_type = $matches[1];
812 $list_text = $matches[2];
813 $list_depth = count($lists);
814 if (!$in_list)
815 {
816 if ($list_type == '*')
817 {
818 fwrite($xml_out, '<BulletList>' . "\n");
819 }
820 else
821 {
822 fwrite($xml_out, '<NumberedList>' . "\n");
823 }
824 $in_list = true;
825 array_push($lists, $list_type);
826 }
827 // - this bullet is at the same depth as previous - close the previous
828 // point
829 elseif ($depth == $list_depth)
830 {
831 $previous_list_type = end($lists);
832 if ($previous_list_type == '*')
833 {
834 fwrite($xml_out, '</Bullet>' . "\n");
835 }
836 else
837 {
838 fwrite($xml_out, '</NumberedItem>' . "\n");
839 }
840 // - we don't match in type anymore... close the previous list and open
841 // a new list of the appropriate type
842 if ($list_type != $previous_list_type)
843 {
844 if ($previous_list_type == '*')
845 {
846 fwrite($xml_out, '</BulletList>' . "\n");
847 fwrite($xml_out, '<NumberedList>' . "\n");
848 }
849 else
850 {
851 fwrite($xml_out, '</NumberedNumbered>' . "\n");
852 fwrite($xml_out, '<BulletList>' . "\n");
853 }
854 array_pop($lists);
855 array_push($lists, $list_type);
856 }
857 }
858 else
859 {
860 // - we have either got deeper...
861 if ($depth > $list_depth)
862 {
863 if ($list_type == '*')
864 {
865 fwrite($xml_out, '<BulletList>' . "\n");
866 }
867 else
868 {
869 fwrite($xml_out, '<NumberedList>' . "\n");
870 }
871 array_push($lists, $list_type);
872 }
873 // ... or shallower in the bullet listing
874 if ($depth < $list_depth)
875 {
876 $previous_list_type = array_pop($lists);
877 if ($previous_list_type == '*')
878 {
879 fwrite($xml_out, '</Bullet>' . "\n");
880 fwrite($xml_out, '</BulletList>' . "\n");
881 }
882 else
883 {
884 fwrite($xml_out, '</NumberedItem>' . "\n");
885 fwrite($xml_out, '</NumberedList>' . "\n");
886 }
887 // - we still have to close the last item too
888 $previous_listitem_type = end($lists);
889 if ($previous_listitem_type == '*')
890 {
891 fwrite($xml_out, '</Bullet>' . "\n");
892 }
893 else
894 {
895 fwrite($xml_out, '</NumberedItem>' . "\n");
896 }
897 }
898 }
899 if ($list_type == '*')
900 {
901 fwrite($xml_out, '<Bullet>' . "\n");
902 }
903 else
904 {
905 fwrite($xml_out, '<NumberedItem>' . "\n");
906 }
907 // Special Case: bullets that contain (start) a code block
908 if (preg_match('/^(.*)<code>\s*$/', $list_text, $matches))
909 {
910 $list_text = $matches[1];
911 $in_code_block = true;
912 }
913
914 outputTextBlock($xml_out, $list_text);
915
916 // - to make things clearer, we'll process any and all code blocks within
917 // bullets here - especially as there may be more text block *after*
918 // the code block finishes
919 if ($in_code_block)
920 {
921 $sub_line = '';
922 while ($in_code_block && ($sub_line = fgets($page_in)) !== false)
923 {
924 $sub_line = trim($sub_line);
925 // - closing code
926 if (preg_match('/^<\/code>(.*)$/', $sub_line, $matches))
927 {
928 $sub_line = $matches[1]; // may be empty string
929 $in_code_block = false;
930 }
931 // - output another plain codeline
932 else
933 {
934 fwrite($xml_out, '<CodeLine>' . $sub_line . "</CodeLine>\n");
935 $sub_line = '';
936 }
937 }
938 // - if sub_line still has anything in it, then add that content as a
939 // text block
940 if (!empty($sub_line))
941 {
942 outputTextBlock($xml_out, $sub_line);
943 }
944 }
945 }
946 // - images start with an image caption 'element'
947 elseif (preg_match('/<imgcaption\s+figure_([a-z0-9_\-]+)\|(.+)>([^<]*?)<\/imgcaption>/', $line, $matches))
948 {
949 $figure_id = $matches[1];
950 $figure_title = $matches[2];
951 $image_content = $matches[3];
952 // - watch for the special withLineNumber flag
953 $class_attribute = '';
954 if (strpos($figure_title, '%!-- withLineNumber --%') != false)
955 {
956 $class_attribute = ' class="withLineNumber"';
957 $figure_title = str_replace('%!-- withLineNumber --%','',$figure_title);
958 }
959 fwrite($xml_out, '<Figure id="' . $figure_id . '"' . $class_attribute . '>' . "\n");
960 echo '[figure: ' . $figure_id . "] \n";
961 fwrite($xml_out, '<Title>' . "\n");
962 // - decode any comments in the title (used to store explicit id
963 // information)
964 $figure_title = str_replace('%!--', '<!--', $figure_title);
965 $figure_title = str_replace('--%', '-->', $figure_title);
966 // - special case: the title may have a subtitle (as a prefix)
967 $figure_subtitle_id = '';
968 $figure_subtitle = '';
969 // - subtitle with explicit id
970 if (preg_match('/^(<!-- id:.+? -->\([a-z]\))\s*(.*)$/', $figure_title, $matches))
971 {
972 $figure_subtitle = $matches[1];
973 $figure_title = $matches[2];
974 }
975 // - subtitle without explicit id
976 else if (preg_match('/^(\([a-z]\))\s*(.*)$/', $figure_title, $matches))
977 {
978 $figure_subtitle = $matches[1];
979 $figure_title = $matches[2];
980 }
981 outputTextBlock($xml_out, $figure_title);
982 if (!empty($figure_subtitle))
983 {
984 fwrite($xml_out, '<SubTitle>' . "\n");
985 outputTextBlock($xml_out, $figure_subtitle);
986 fwrite($xml_out, '</SubTitle>' . "\n");
987 }
988 fwrite($xml_out, '</Title>' . "\n");
989 // Try and find the image itself
990 if (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $image_content))
991 {
992 processImage($xml_out, $line);
993 fwrite($xml_out, '</Figure>' . "\n");
994 }
995 // Didn't find an image? Weird, but mark the imgcaption as open, and
996 // we'll chomp up the next image found as the content.
997 else
998 {
999 $in_figure = true;
1000 }
1001 // - record the id to prevent repeating
1002 $seen_ids[$figure_id] = true;
1003 }
1004 // - tables start with a table caption 'element'
1005 elseif (preg_match('/<tblcaption\s+table_([a-z0-9_\-]+)\|([^>]+)>\s*<\/tblcaption>/', $line, $matches))
1006 {
1007 $table_id = $matches[1];
1008 $table_title = $matches[2];
1009 if ($table_title == '##NOCAPTION##')
1010 {
1011 echo '[non-captioned table: ' . $table_id . "] \n";
1012 // - watch for autogenerated ids... no point in outputting them
1013 if (preg_match('/^table(_\d+)?$/', $table_id))
1014 {
1015 fwrite($xml_out, "<Table>\n");
1016 }
1017 else
1018 {
1019 fwrite($xml_out, '<Table id="' . $table_id . '">' . "\n");
1020 }
1021 fwrite($xml_out, '<Title/>' . "\n");
1022 }
1023 elseif ($table_title == '##HIDDEN##')
1024 {
1025 echo '[hidden table: ' . $table_id . "] \n";
1026 // - watch for autogenerated ids... no point in outputting them
1027 if (preg_match('/^table(_\d+)?$/', $table_id))
1028 {
1029 fwrite($xml_out, "<Table class=\"hidden\">\n");
1030 }
1031 else
1032 {
1033 fwrite($xml_out, '<Table class="hidden" id="' . $table_id . '">' . "\n");
1034 }
1035 fwrite($xml_out, '<Title/>' . "\n");
1036 }
1037 else
1038 {
1039 echo '[table: ' . $table_id . "] \n";
1040 // - watch for autogenerated ids... no point in outputting them
1041 if (preg_match('/^table(_\d+)?$/', $table_id))
1042 {
1043 fwrite($xml_out, "<Table>\n");
1044 }
1045 else
1046 {
1047 fwrite($xml_out, '<Table id="' . $table_id . '">' . "\n");
1048 }
1049 fwrite($xml_out, '<Title>' . "\n");
1050 outputTextBlock($xml_out, $table_title);
1051 fwrite($xml_out, '</Title>' . "\n");
1052 }
1053 fwrite($xml_out, '<TableContent>' . "\n");
1054 $in_table = true;
1055 // - record the id to prevent repeating
1056 $seen_ids[$table_id] = true;
1057 }
1058 // - the second line in a table should be it's column width values
1059 elseif (preg_match('/\|<\s-\s([0-9 ]+?)\s>\|/', $line, $matches))
1060 {
1061 $column_widths = explode(' ', $matches[1]);
1062 }
1063 // - then every row will be made of a number of cells
1064 elseif (preg_match('/^\|(.*?)\|$/', $line, $matches))
1065 {
1066 $row_content = $matches[1];
1067 $cell_contents = preg_split('/(\s+\||\|\s+)/', $row_content);
1068 fwrite($xml_out, '<tr>' . "\n");
1069 foreach ($cell_contents as $index=>$cell_content)
1070 {
1071 $cell_content = trim($cell_content);
1072 $th_text = '';
1073 if (isset($column_widths[$index]))
1074 {
1075 $th_text = '<th width="' . $column_widths[$index] . '"';
1076 }
1077 else
1078 {
1079 $th_text = '<th';
1080 }
1081 // - if the cell would be empty, we use the shorthand
1082 if (empty($cell_content))
1083 {
1084 $th_text .= '/>' . "\n";
1085 fwrite($xml_out, $th_text);
1086 }
1087 else
1088 {
1089 $th_text .= '>' . "\n";
1090 fwrite($xml_out, $th_text);
1091
1092 // GAH - this is proving harder than a hard thing thats hard.
1093 // The issue is that the most straightforward way of fixing this,
1094 // namely using explicit newlines (\\) in the dokuwiki txt causes
1095 // lots a legitimately translated <br/> to also be split up. I
1096 // think the only way forward would be to maybe extend the HTML
1097 // Comment plugin to also respect and process <br/> tags. Then I
1098 // can avoid transforming them, and use the \\ sentinel to
1099 // separate multi-line table cells.
1100 $cell_content_lines = explode('\\\\', $cell_content);
1101 foreach ($cell_content_lines as $cell_content)
1102 {
1103 // - watch out, as the content may be an image
1104 if (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $cell_content))
1105 {
1106 processImage($xml_out, $cell_content);
1107 }
1108 elseif (preg_match('/\'\'(.*)\'\'/', $cell_content, $matches))
1109 {
1110 fwrite($xml_out, '<CodeLine>' . translateTableCodeline($matches[1]) . '</CodeLine>' . "\n");
1111 }
1112 // - anything else it text
1113 else
1114 {
1115 outputTextBlock($xml_out, $cell_content);
1116 }
1117 }
1118 fwrite($xml_out, '</th>' . "\n");
1119 }
1120 }
1121 fwrite($xml_out, '</tr>' . "\n");
1122 }
1123 // - links to image media in the wiki!
1124 elseif (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $line))
1125 {
1126 processImage($xml_out, $line);
1127 // - if we were processing a figure, then now is a good time to close it
1128 if ($in_figure)
1129 {
1130 fwrite($xml_out, '</Figure>' . "\n");
1131 $in_figure = false;
1132 }
1133 }
1134 // - if the line starts with a <code> block, then we have a tag
1135 // for that (which is special in that it get a unique text id)
1136 elseif (preg_match('/^<code\s*\d*\s*>(.*?)(<\/code>)?$/', $line, $matches) || ($in_code_block && preg_match('/^(.*?)(<\/code>)?$/', $line, $matches)))
1137 {
1138 $payload = $matches[1];
1139 $found_end = (isset($matches[2]));
1140 $in_code_block = true;
1141 // - be careful with empty lines
1142 if (empty($payload))
1143 {
1144 // - as they may appear in the body of the code (in which case we need
1145 // to output them). The empty lines at the start or end of a code
1146 // block are just an unfortunate consequence of the support for code
1147 // line numbering.
1148 if (!$found_end && strpos($line, '<code') === false)
1149 {
1150 fwrite($xml_out, "<CodeLine/>\n");
1151 }
1152 }
1153 elseif (preg_match('/^<!-- id:([^\s]+) -->/', $payload, $matches))
1154 {
1155 $text_id = $matches[1];
1156 outputTextBlock($xml_out, $payload, 'code', true);
1157 // - record the id to prevent repeating
1158 $seen_ids[$text_id] = true;
1159 }
1160 else
1161 {
1162 fwrite($xml_out, '<CodeLine>' . translateText($payload, true) . '</CodeLine>' . "\n");
1163 }
1164 // - if we didn't find an endtag we have to keep doing code mode until
1165 // we do
1166 $in_code_block = (!$found_end);
1167 if ($found_end)
1168 {
1169 // - if we were processing a figure, then now is a good time to close it
1170 if ($in_figure)
1171 {
1172 fwrite($xml_out, '</Figure>' . "\n");
1173 $in_figure = false;
1174 }
1175 }
1176 }
1177 // - entities on a line by themselves (i.e. references to external files)
1178 // go through verbatim
1179 elseif (preg_match('/^\s*&[a-z0-9_-]+;\s*$/', $line))
1180 {
1181 fwrite($xml_out, $line . "\n");
1182 }
1183 // - lines starting with > are indented text blocks
1184 elseif (preg_match('/^>(.*)$/', $line, $matches))
1185 {
1186 $payload = $matches[1];
1187 fwrite($xml_out, "<Indented>\n");
1188 outputTextBlock($xml_out, $payload);
1189 fwrite($xml_out, "</Indented>\n");
1190 }
1191 // - everything else goes straight through as a text block
1192 // - note that for code blocks, even empty lines count
1193 elseif (!empty($line))
1194 {
1195 // - output the line of text having encoded entities etc
1196 outputTextBlock($xml_out, $line, '', $in_code_block);
1197 }
1198 }
1199 // Complete any open part
1200 if ($in_part)
1201 {
1202 fwrite($xml_out, '</Content>' . "\n");
1203 fwrite($xml_out, '</Part>' . "\n");
1204 $in_part = false;
1205 }
1206 // Complete any open subsection
1207 if ($in_subsection)
1208 {
1209 fwrite($xml_out, '</Content>' . "\n");
1210 fwrite($xml_out, '</Subsection>' . "\n");
1211 $in_subsection = false;
1212 }
1213 // Complete any open section
1214 if ($in_section)
1215 {
1216 fwrite($xml_out, '</Content>' . "\n");
1217 fwrite($xml_out, '</Section>' . "\n");
1218 $in_section = false;
1219 }
1220 // Complete any open chapter
1221 if ($in_chapter)
1222 {
1223 fwrite($xml_out, '</Content>' . "\n");
1224 fwrite($xml_out, '</Chapter>' . "\n");
1225 $in_chapter = false;
1226 }
1227}
1228/** processPage($xml_out, $page_name) **/
1229
1230function processImage($xml_out, $text)
1231{
1232 global $dokuwiki_path;
1233 global $xml_source_path;
1234 if (preg_match('/\{\{.+?([^:?]+)\?(\d+)x(\d+)(&direct)?\}\}/', $text, $matches))
1235 {
1236 $filename = $matches[1];
1237 $width = $matches[2];
1238 $height = $matches[3];
1239 // - copy the file into place
1240 $image_source_path = $dokuwiki_path . '/data/media/' . $_REQUEST['l'] . '/manuals/images/' . strtolower($filename);
1241 $image_destination_dir = $xml_source_path . '/' . $_REQUEST['l'] . '/images';
1242 mkAllDir($image_destination_dir);
1243 $image_destination_path = $image_destination_dir . '/' . $filename;
1244 if (copy($image_source_path, $image_destination_path))
1245 {
1246 echo '[copying file: ' . $filename . "] \n";
1247 chmod($image_destination_path, 0664);
1248 }
1249 else
1250 {
1251 printError('Failed to copy image into place: ' . $filename, false);
1252 }
1253 // - spit out the XML element
1254 fwrite($xml_out, '<File width="' . $width . '" height="' . $height . '" url="images/' . $filename . '"/>' . "\n");
1255 }
1256}
Note: See TracBrowser for help on using the repository browser.