root/documentation/trunk/php/gs-manual-export.php @ 30117

Revision 30117, 43.1 KB (checked in by jmt12, 4 years ago)

Altered to fix bug where imgcaption tag closed before image actually output. Looks like this was intentional at some stage, but means problems in the Wiki, so I'll repair

Line 
1<?php
2
3require_once('common.php');
4$debug = 1;
5
6// There are some elements that are, in my opinion, incorrectly 'resolved' in
7// code blocks just because they are valid HTML. For instance, &lt;i&gt; is
8// resolved to <i> in code blocks, while something like &lt;Metadata&gt; is
9// not. Set this to true to allow such abominations (for the purpose of
10// comparing before and after versions of the XML). [jmt12]
11$allow_bad_codeblocks = true;
12
13/** @file gs-manual-export.php
14 *  This script transforms the series of dokuwiki pages that make up a certain
15 *  manual (as specified by the 'm' argument) in a certain language ('l') into
16 *  the XML format required by the rest of the Greenstone manual generation
17 *  scripts.
18 */
19
20// 0. Initialization
21
22// - we have a counter to assign identifiers to text blocks etc without ids
23$text_id_counter = 1;
24// - we need an array of all the footnotes
25$footnotes = array();
26
27// Defaults
28if (!isset($_REQUEST['l']) || empty($_REQUEST['l']))
29{
30  $_REQUEST['l'] = 'en';
31}
32if (!isset($_REQUEST['m']) || empty($_REQUEST['m']))
33{
34  //$_REQUEST['m'] = 'user';
35  //$_REQUEST['m'] = 'install';
36  $_REQUEST['m'] = 'develop';
37  //$_REQUEST['m'] = 'paper';
38}
39if (!isset($_REQUEST['v']) || empty($_REQUEST['v']))
40{
41  $_REQUEST['v'] = 'draft';
42}
43if (!isset($_REQUEST['a']) || !preg_match('/^(download|store)$/', $_REQUEST['a']))
44{
45  $_REQUEST['a'] = 'store'; // Try to store the file to disk
46}
47
48$var_path = filecat(array($base_path, 'var'));
49$timestamp = time();
50//$xml_source_path = '**PATH TO GSDL MANUALS**'
51$xml_source_path = '/tmp';
52if ($_REQUEST['a'] == 'download')
53{
54  // Clear out previous exports
55  recursiveRemove($var_path, '/greenstone/greenstone-documentation/php/var');
56  // New export
57  $xml_source_path = fileCat(array($var_path, $timestamp));
58}
59
60echo '<html>' . "\n";
61echo '<head>' . "\n";
62echo '<title>GS Manual Export</title>' . "\n";
63echo '</head>' . "\n";
64echo '<body>' . "\n";
65
66// - validate arguments before we use them (security)
67if (!preg_match('/^(develop|install|paper|user)$/',$_REQUEST['m']))
68 {
69   printError('Unknown manual type requested: ' . htmlspecialchars($_REQUEST['m']));
70 }
71
72if (!preg_match('/^(ar|en|es|fr|pt-br|ru)$/',$_REQUEST['l']))
73 {
74   printError('Unknown language requested: ' . htmlspecialchars($_REQUEST['l']));
75 }
76
77echo '<h2>Generating Greenstone Manual XML</h2>' . "\n";
78echo '<p><b>Manual:</b> ' . $_REQUEST['m'] . ' <b>Language:</b> ' . $_REQUEST['l'] . "</p>\n<hr/>\n";
79// 1. Create the XML output file handle
80// - construct the path using the information we've been provided as arguments
81$xml_file_dir = $xml_source_path . '/' . $_REQUEST['l'];
82mkAllDir($xml_file_dir);
83$xml_file_path = $xml_file_dir . '/' . ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.xml';
84// - backup any existing file
85if (file_exists($xml_file_path))
86{
87  $xml_backup_file_path = $xml_file_path . '.bak';
88  if (!rename($xml_file_path, $xml_backup_file_path))
89  {
90    printError('Failed to rename existing manual file for backup');
91  }
92}
93
94// - and create a handle to the new file
95$xml_out = fopen($xml_file_path, 'w');
96
97// 2. Read in the top level page - this will give configuration data for the
98//    manual and cover page, as well as specifying the order for the other
99//    pages in the manual
100echo "<p><b>Frontmatter:</b><br/>\n";
101// - by reading this page we hope to populate an array of metadata, and also
102//   extract the sequence of other pages within this manual
103$cover_metadata = array();
104$pages_in_order = array();
105// - we now need to consider if the user has asked for a draft version (i.e.
106//   includes the latest version of pages regardless of approval) or if only
107//   the approved versions of pages should be included
108// - only necessary for english version of manual, as those are the only pages
109//   editable
110$top_page_path = '';
111if ($_REQUEST['v'] == 'draft' || $_REQUEST['l'] != 'en')
112{
113  // - again, we can construct the path to the top level page given the arguments
114  //   provided
115  $top_page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '.txt';
116}
117else
118{
119  $top_page_path = getApprovedVersionPath('en:manuals:' . $_REQUEST['m']);
120}
121
122if (!file_exists($top_page_path))
123 {
124   printError('Failed to locate top level page for manual');
125 }
126// - we're going to open a handle to the file, then read it in line-by-line
127//   watching for the lines we are interested in (using pattern matching)
128$top_page_in = fopen($top_page_path, 'r');
129if (!$top_page_in)
130 {
131   printError('Failed to open top level page for reading');
132 }
133$in_contents = false;
134while (($line = fgets($top_page_in)) !== false)
135 {
136   // - if we are capturing page order, and we encounter something that looks
137   //   like a bulletpoint item pointing to a wiki page, then we append the
138   //   name of that page to our pages in order array
139   if ($in_contents && preg_match('/^\s+\*\s+\[\[.:' . $_REQUEST['m'] . ':(.+?)\|(.*?)\]\]\s*$/', $line, $matches))
140   {
141     array_push($pages_in_order, $matches[1]);
142   }
143   // - metadata is all encoded within dokuwiki tables
144   elseif (preg_match('/^\^\s+([^\s]+)\s+\|\s+(.+?)\s+\|\s*$/', $line, $matches))
145   {
146     $field = $matches[1];
147     $value = $matches[2];
148     $values = array();
149     if (isset($cover_metadata[$field]))
150     {
151       $values = $cover_metadata[$field];
152     }
153     array_push($values, $value);
154     $cover_metadata[$field] = $values;
155   }
156   // - watch for the heading 'Contents' to begin extracting page order
157   //   information
158   elseif (preg_match('/^=+\s(.+)\s=+$/', $line, $matches))
159   {
160     if ($matches[1] == 'Contents')
161     {
162       $in_contents = true;
163     }
164     // - any other title means we aren't capturing page order (anymore)
165     else
166     {
167       $in_contents = false;
168     }
169   }
170 }
171if (!feof($top_page_in))
172 {
173   printError('Unexpected fgets() fail when reading top page');
174 }
175fclose($top_page_in);
176// - ensure we have the required metadata
177$required_metadata = array('Heading','Title','Affiliation','Version','Date');
178foreach ($required_metadata as $required_field)
179{
180  if (!isset($cover_metadata[$required_field]))
181  {
182    printError('Missing required metadata: ' . $required_field);
183  }
184}
185// - now we can use the metadata to construct the XML header and the cover page.
186//   This follows a pretty set recipe with only elements that can repeat---like
187//   Author, SupplementaryText etc---are at all tricky
188fwrite($xml_out, '<?xml version="1.0" encoding="UTF-8"?>' . "\n");
189fwrite($xml_out, '<!DOCTYPE Manual [' . "\n");
190if (isset($cover_metadata['ENTITY']))
191 {
192   foreach ($cover_metadata['ENTITY'] as $entity)
193   {
194     fwrite($xml_out, "\t" . '<!ENTITY ' . $entity . '>' . "\n");
195   }
196 }
197fwrite($xml_out, ']>' . "\n");
198fwrite($xml_out, '<Manual id="' . ucfirst($_REQUEST['m']) . '" lang="' . $_REQUEST['l'] . '">' . "\n");
199
200///cho "<p>[Debug] metadata: " . print_r($cover_metadata, true) . "</p>\n\n";
201
202outputMetadataSingle($xml_out, $cover_metadata, 'Heading');
203outputMetadataSingle($xml_out, $cover_metadata, 'Title');
204outputMetadataSingle($xml_out, $cover_metadata, 'Author');
205outputMetadataSingle($xml_out, $cover_metadata, 'Affiliation');
206outputMetadataMultiple($xml_out, $cover_metadata, 'SupplementaryText');
207outputMetadataMultiple($xml_out, $cover_metadata, 'Text');
208outputMetadataMultiple($xml_out, $cover_metadata, 'Comment');
209outputMetadataSingle($xml_out, $cover_metadata, 'Version');
210outputMetadataSingle($xml_out, $cover_metadata, 'Date');
211
212// 3. Process each page listed in the contents of the top level page in order
213foreach ($pages_in_order as $page)
214{
215  processPage($xml_out, $page);
216}
217
218// 4. Output out list of footnotes (if any)
219if (!empty($footnotes))
220 {
221   fwrite($xml_out, '<FootnoteList>'. "\n");
222   foreach ($footnotes as $footnote=>$footnote_id)
223   {
224     ///cho '[debug] footnotes: (' . $footnote_id . ') ' . $footnote . '<br />'. "\n";
225     outputMetadataSingle($xml_out, $footnote, 'Footnote', $footnote_id);
226   }
227   fwrite($xml_out, '</FootnoteList>'. "\n");
228 }
229
230// 5. Finalize and close the XML output
231fwrite($xml_out, '</Manual>' . "\n");
232fclose($xml_out);
233chmod($xml_file_path, 0664);
234
235// 6. Complete!
236echo '<p><b>Complete!</b></p>' . "\n<hr/>\n";
237if ($_REQUEST['a'] == 'download')
238{
239  // Zip up the manual files
240  $zip_file = ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.zip';
241  $zip_path = fileCat(array($xml_source_path, $zip_file));
242  $zip_command = 'zip -r "' . $zip_path . '" . > /dev/null 2>&1';
243  //cho '<p><b>[DEBUG]</b> zip_command:' . $zip_command . '</p>';
244  $tgz_file = ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.tgz';
245  $tgz_path = fileCat(array($xml_source_path, $tgz_file));
246  $tgz_command = 'tar -czf "' . $tgz_path . '" * > /dev/null 2>&1';
247  //cho '<p><b>[DEBUG]</b> tgz_command:' . $tgz_command . '</p>';
248  // We need to move to the document folder so that archives have sensible paths
249  $original_cwd = getcwd();
250  chdir($xml_file_dir);
251  system($zip_command);
252  system($tgz_command);
253  // Go back
254  chdir($original_cwd);
255  // Links are ready
256  echo '<p>Download XML file plus images as: <a href="var/' . $timestamp . '/' . $zip_file . '">ZIP</a> or <a href="var/' . $timestamp . '/' . $tgz_file . '">TGZ</a></p>' . "\n";
257}
258echo '<p>Click <a href="' . $dokuwiki_url . '/doku.php?id=' . $_REQUEST['l'] . ':manuals:' . $_REQUEST['m'] . '">here</a> to return to dokuwiki</p>' . "\n";
259echo '</body>' . "\n";
260echo '</html>';
261exit(0);
262
263/**
264 */
265function outputMetadataSingle($xml_out, $metadata, $field, $mid=false)
266{
267  echo '[metadata: ' . $field . "] \n";
268  if ($mid)
269  {
270    fwrite($xml_out, '<' . $field . ' id="' . $mid . '">' . "\n");
271  }
272  else
273  {
274    fwrite($xml_out, '<' . $field . '>' . "\n");
275  }
276  if (is_array($metadata))
277  {
278    if (isset($metadata[$field]) && isset($metadata[$field][0]))
279    {
280      outputTextBlock($xml_out, $metadata[$field][0]);
281    }
282    else
283    {
284      echo 'no such field or no metadata';
285    }
286  }
287  elseif (!empty($metadata))
288  {
289    outputTextBlock($xml_out, $metadata);
290  }
291  else
292  {
293    echo 'no such field or no metadata';
294  }
295  fwrite($xml_out, '</' . $field . '>' . "\n");
296}
297/** outputMetadataSingle() **/
298
299function outputMetadataList($xml_out, $metadata, $field, $separator = ',', $final_separator = false)
300{
301  echo '[metadata list: ' . $field . "] \n";
302  fwrite($xml_out, '<' . $field . '>' . "\n");
303  if (isset($metadata[$field]))
304  {
305    if (count($metadata[$field]) == 1)
306    {
307      outputTextBlock($xml_out, $metadata[$field][0]);
308    }
309    if (count($metadata[$field]) > 1)
310    {
311      $last_value = '';
312      if ($final_separator)
313      {
314        $last_value = array_pop($metadata[$field]);
315      }
316      $values = implode($separator, $metadata[$field]);
317      if ($final_separator)
318      {
319        $values .= $final_separator . $last_value;
320      }
321      outputTextBlock($xml_out, $values);
322    }
323  }
324  else
325  {
326    echo 'no such field or no metadata';
327  }
328  fwrite($xml_out, '</' . $field . '>' . "\n");
329}
330/** outputMetadataList() **/
331
332function outputMetadataMultiple($xml_out, $metadata, $field)
333{
334  echo '[metadata multiple: ' . $field . "] \n";
335  // - Text blocks don't need to be wrapped in Text element
336  if ($field != 'Text')
337  {
338    fwrite($xml_out, '<' . $field . '>' . "\n");
339  }
340  if (isset($metadata[$field]))
341  {
342    foreach ($metadata[$field] as $value)
343    {
344      outputTextBlock($xml_out, $value);
345    }
346  }
347  else
348  {
349    echo 'no such field or no metadata';
350  }
351  if ($field != 'Text')
352  {
353    fwrite($xml_out, '</' . $field . '>' . "\n");
354  }
355}
356
357function translateTableCodeline($text)
358{
359  // Escape any italic tags hidden in HTML comments
360  $text = str_replace('<!--i-->', '%!--i--%', $text);
361  $text = str_replace('<!--/i-->', '%!--/i--%', $text);
362  // Encode entities etc
363  $text = translateText($text, true);
364  // Restore any italics elements hidden above
365  $text = str_replace('%!--i--%', '<i>', $text);
366  $text = str_replace('%!--/i--%', '</i>', $text);
367  return $text;
368}
369/** translateTableCodeline() **/
370
371function translateText($text, $in_code_block=false)
372{
373  global $allow_bad_codeblocks;
374  $text = str_replace('&','&amp;',$text);
375  $text = str_replace('<','&lt;',$text);
376  $text = str_replace('>','&gt;',$text);
377  if ($in_code_block && $allow_bad_codeblocks)
378  {
379    ///cho "**** [debug] restoring bogus decoded tags in: |$text| ****\n";
380    $text = str_replace('&lt;i&gt;','<i>',$text);
381    $text = str_replace('&lt;/i&gt;','</i>',$text);
382    $text = str_replace('&lt;br/&gt;','<br/>',$text);
383  }
384  return $text;
385}
386
387function outputTextBlock($xml_out, $text, $type='', $in_code_block = false)
388{
389  global $cover_metadata;
390  global $text_id_counter;
391  global $footnotes;
392  global $allow_bad_codeblocks;
393
394  // - Start by dealing with any footnotes before anything else
395  while (preg_match('/\(\((.*?)\)\)/', $text, $matches))
396  {
397    $pattern = $matches[0];
398    $footnote = $matches[1];
399    $footnote_id = count($footnotes) + 1;
400    $footnotes[$footnote] = $footnote_id;
401    // - note that we have to escape the footnote reference as the following
402    //   code will convert any < and > to entities...
403    $footnote_reference = '%FootnoteRef id="' . $footnote_id . '"/%';
404    $text = str_replace($pattern, $footnote_reference, $text);
405  }
406
407  $text_id = '';
408  // - check whether the string begins with an explicit id
409  if (preg_match('/^\s*<!--\s*id:(.+?)\s*-->(.*)$/', $text, $matches))
410  {
411    $text_id = $matches[1];
412    $text = $matches[2];
413    if (is_numeric($text_id))
414    {
415      $text_id_counter = $text_id + 1;
416    }
417  }
418  else
419  {
420    $text_id = $text_id_counter;
421    $text_id_counter++;
422  }
423
424  // - protect the special case of an HTML comment being actually displayed
425  //   in the text
426  $text = preg_replace('/<!--([\s\.]+?)-->/','##lt##!--\1--##gt##',$text);
427
428  // - reformat dokuwiki syntax to HTML tag syntax
429  $text = preg_replace('/<!--.*?-->/', '', $text);
430
431  // we leave code blocks alone in terms of ampersands
432  if (!$in_code_block)
433  {
434    // - ampersands aren't safe in XML...
435    $text = str_replace('&', '&amp;', $text);
436    // ...except for the entities that we have registered as metadata
437    if (isset($cover_metadata['ENTITY']))
438    {
439      foreach ($cover_metadata['ENTITY'] as $entity)
440      {
441        if (preg_match('/([a-z]+)\s+"&#(\d+);"/', $entity, $matches))
442        {
443          $entity_name = $matches[1];
444          if ($entity_name != 'mdash')
445          {
446          $entity_character = html_entity_decode('&#'.$matches[2].';',ENT_NOQUOTES,'UTF-8');
447          $text = str_replace('&amp;' . $entity_name . ';', '&' . $entity_name . ';', $text);
448          // - we also convert any characters that match the entity char into
449          //   the entity
450          $text = str_replace($entity_character, '&' . $entity_name . ';', $text);
451          }
452        }
453      }
454    }
455    // - protect <br/> tags
456    $text = str_replace('<br/>','%%br/%%',$text);
457    // - encoding all of the < and > that appear in the text (rather than
458    //   true html formatting)
459    $text = str_replace('<','&lt;',$text);
460    $text = str_replace('>','&gt;',$text);
461    // - restore <br/> tags
462    $text = str_replace('%%br/%%','<br/>',$text);
463  }
464  else if ($type == 'code')
465  {
466    $text = str_replace('<','&lt;',$text);
467    $text = str_replace('>','&gt;',$text);
468  }
469
470  // - links, oh how I hate thee
471  // - external links are slightly easier
472  $text = preg_replace('/\[\[http:\/\/(.*?)\|(.*?)\]\]/', '<Link url="http://\1">\2</Link>', $text);
473  // - internals have to become the horrible <CrossRef> tags. We ignore any
474  //   number prefix on the page name as that is just used for ordering within
475  //   Dokuwiki
476  $text = preg_replace('/\[\[\.\:(.*?)\|[^\]]+\]\]/','<CrossRef target="Chapter" ref="\1"/>', $text);
477  // - internal links starting with hash must be on the same page
478  $text = preg_replace('/\[\[###(.*?)\|.*?\]\]/','<CrossRef target="Part" ref="\1"/>', $text);
479  $text = preg_replace('/\[\[##(.*?)\|.*?\]\]/','<CrossRef target="Subsection" ref="\1"/>', $text);
480  $text = preg_replace('/\[\[#(.*?)\|.*?\]\]/','<CrossRef target="Section" ref="\1"/>', $text);
481  // - 'external' internal wiki links are even worst - since we can't know what
482  //   the page order number for another manual's chapters might be, we instead
483  //   use a search
484  $text = preg_replace('/\[\[\?do\=search\&amp;id\=([^\s]+)\s+@([a-z]+):manuals:([a-z]+)\|.*?\]\]/i', '<CrossRef external="\3" lang="\2" target="Chapter" ref="\1"/>', $text);
485  // - references to images and tables
486  $text = preg_replace('/(?:<|&lt;)imgref\sfigure_(.+?)(?:>|&gt;)/','<CrossRef target="Figure" ref="\1"/>', $text);
487  $text = preg_replace('/(?:<|&lt;)tblref\stable_(.+?)(?:>|&gt;)/','<CrossRef target="Table" ref="\1"/>', $text);
488  // - explicitly convert URLs as they are a bit messy
489  // - first all the cases of URLs in italics, without protocol
490  $text = preg_replace('/\/\/\s([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
491  $text = preg_replace('/\/\/\s([a-z0-9\-]+\.org(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
492  $text = preg_replace('/\/\/\s(localhost(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
493  // - now all the protocol ones (with care taken to protect // in protocol)
494  $text = preg_replace('/\/\/\shttp:\/\/([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
495  $text = preg_replace('/\/\/\shttp:\/\/([a-z0-9\-]+\.org(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
496  $text = preg_replace('/\/\/\shttp:\/\/(localhost(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
497  // - next we have the underlined URLs sans protocols
498  $text = preg_replace('/__\s([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
499  $text = preg_replace('/__\s([a-z0-9\-]+\.org(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
500  $text = preg_replace('/__\s(localhost(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
501  // - and finally the protocol prefixed underlined URLs
502  $text = preg_replace('/__\shttp:\/\/([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
503  $text = preg_replace('/__\shttp:\/\/([a-z0-9\-]+\.org(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
504  $text = preg_replace('/__\shttp:\/\/(localhost(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
505  // - lets also protect any other protocols we find floating around
506  $text = preg_replace('/(file|ftp|http):\/\//i', '\1:##DOUBLESLASH##', $text);
507
508  // - italic formatting (taking care of protected double slashes)
509  $text = preg_replace('/%%\/\/%%/', '##DOUBLESLASH##', $text);
510  $text = preg_replace('/\/{5}/', '<i>/</i>', $text); // another special case
511  $text = preg_replace('/\/\/(\/.+?)\s*\/\//', '<i>\1</i>', $text); // another special case
512  $text = preg_replace('/\/\/\s*(.+?\/)\/\//', '<i>\1</i>', $text); // another special case
513  $text = preg_replace('/\/\/\s*(.+?)\s*\/\//', '<i>\1</i>', $text);
514  $text = preg_replace('/##DOUBLESLASH##/', '//', $text);
515  // - bold formatting
516  $text = preg_replace('/\*\*([^"]+?)\*\*/', '<b>\1</b>', $text);
517  // - underline formatting
518  $text = preg_replace('/__([^"]+?)__/', '<u>\1</u>', $text);
519
520  // - decode certain entities in codeblock (just because they are valid HTML,
521  //   derp).
522  if ($in_code_block && $allow_bad_codeblocks)
523  {
524    ///cho "**** [debug] restoring bogus decoded tags in: |$text| ****\n";
525    $text = str_replace('&lt;i&gt;','<i>',$text);
526    $text = str_replace('&lt;/i&gt;','</i>',$text);
527    //$text = str_replace('&lt;br/&gt;','<br/>',$text);
528  }
529  // - restore protected entities
530  $text = preg_replace('/##(gt|lt)##/','&\1;',$text);
531  // - restore protected comment blocks
532  $text = str_replace('%!--', '&lt;!--', $text);
533  $text = str_replace('--%', '--&gt;', $text);
534  // - restore protected footnote refs
535  $text = preg_replace('/%FootnoteRef id="([^"]+)"\/%/', '<FootnoteRef id="\1"/>', $text);
536  // output the text block
537  $text = trim($text);
538  if (empty($text))
539  {
540    fwrite($xml_out, '<Text id="' . $text_id . '"/>' . "\n");
541  }
542  else if (!empty($type))
543  {
544    fwrite($xml_out, '<Text type="' . $type . '" id="' . $text_id . '">' . $text . '</Text>' . "\n");
545  }
546  else
547  {
548    fwrite($xml_out, '<Text id="' . $text_id . '">' . $text . '</Text>' . "\n");
549  }
550}
551/** outputTextBlock($xml_out, $text) **/
552
553/**
554 */
555function processPage($xml_out, $page_name)
556{
557  global $dokuwiki_path;
558  global $seen_ids;
559  echo "</p>\n<p><b>Export Chapter:</b> " . $page_name . "<br/>\n";
560  // - locate the page in question (taking into account if the user asked for a
561  //   draft version or an approved version of the manual)
562  $page_path = '';
563  if ($_REQUEST['v'] == 'draft' || $_REQUEST['l'] != 'en')
564  {
565    $page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '/' . $page_name . '.txt';
566  }
567  else
568  {
569    $page_path = getApprovedVersionPath('en:manuals:' . $_REQUEST['m'] . ':' . $page_name);
570  }
571  $page_in = @fopen($page_path, 'r');
572  if (!$page_in)
573  {
574    printError('Failed to open page for reading:' . $page_name, false);
575    return;
576  }
577  // - once again we read in line-by-line, but this time we are going to output
578  //   each line as we go through. We expect to encounter certain lines in a
579  //   predefined order, and should complain if we don't find what we expect.
580  $in_chapter = false;
581  $in_section = false;
582  $in_subsection = false;
583  $in_part = false;
584  $in_list = false;
585  $lists = array();
586  $previous_listitem_type = '';
587  $in_figure = false;
588  $in_table = false;
589  $column_widths = array();
590  $in_code_block = false;
591  while (($line = fgets($page_in)) !== false)
592  {
593    // remove newline character
594    $line = preg_replace('/\r?\n$/','',$line);
595    // - we need to know the 'depth' for the bulletpoint lists
596    $depth = 0;
597    while (strlen($line) > 2 && preg_match('/^\s+[\*\-]/', $line) && substr($line, 0, 2) == '  ')
598    {
599      $depth++;
600      $line = substr($line, 2);
601    }
602    $first_character = substr($line, 0, 1);
603    // - special case for the end of bullet lists
604    if ($in_list && ($first_character != "*" && $first_character != "-"))
605    {
606      while (count($lists) > 0)
607      {
608        $list_type = array_pop($lists);
609        if ($list_type == '*')
610        {
611          fwrite($xml_out, '</Bullet>' . "\n");
612          fwrite($xml_out, '</BulletList>' . "\n");
613        }
614        else
615        {
616          fwrite($xml_out, '</NumberedItem>' . "\n");
617          fwrite($xml_out, '</NumberedList>' . "\n");
618        }
619      }
620      $in_list = false;
621    }
622    // - special case for the end of tables
623    if ($in_table && $first_character != '^' && $first_character != '|')
624    {
625      fwrite($xml_out, '</TableContent>' . "\n");
626      fwrite($xml_out, '</Table>' . "\n");
627      $in_table = false;
628    }
629    // - special cases for premature closing of sections, subsections and parts
630    if (preg_match('/<!-- close:(section|subsection|part) -->/', $line, $matches))
631    {
632      // - we always try to do this (regardless of actual flag) as we must
633      //   always close the smallest 'granularity' first
634      if ($in_part)
635      {
636        fwrite($xml_out, '</Content>' . "\n");
637        fwrite($xml_out, '</Part>' . "\n");
638        $in_part = false;
639      }
640      if ($in_subsection && ($matches[1] == 'section' || $matches[1] == 'subsection'))
641      {
642        fwrite($xml_out, '</Content>' . "\n");
643        fwrite($xml_out, '</Subsection>' . "\n");
644        $in_subsection = false;
645      }
646      if ($in_section && $matches[1] == 'section')
647      {
648        fwrite($xml_out, '</Content>' . "\n");
649        fwrite($xml_out, '</Section>' . "\n");
650        $in_section = false;
651      }
652    }
653
654    // - if this page is a chapter, then the first thing on the page should be
655    //   the chapter title (six equals)
656    if (preg_match('/====== (.+) ======/', $line, $matches))
657    {
658      $chapter_title = $matches[1];
659      $chapter_id = $page_name;
660      if (empty($chapter_id))
661      {
662        $chapter_id = generateID($chapter_title);
663      }
664      // - are we already processing a part? if so end it, end it now
665      if ($in_part)
666      {
667        fwrite($xml_out, '</Content>' . "\n");
668        fwrite($xml_out, '</Part>' . "\n");
669        $in_part = false;
670      }
671      // - are we already processing a subsection? if so end it, end it now
672      if ($in_subsection)
673      {
674        fwrite($xml_out, '</Content>' . "\n");
675        fwrite($xml_out, '</Subsection>' . "\n");
676        $in_subsection = false;
677      }
678      // - are we already processing a section? if so end it, end it now
679      if ($in_section)
680      {
681        fwrite($xml_out, '</Content>' . "\n");
682        fwrite($xml_out, '</Section>' . "\n");
683        $in_section = false;
684      }
685      // - are we already processing a chapter? if so end it, end it now
686      if ($in_chapter)
687      {
688        fwrite($xml_out, '</Content>' . "\n");
689        fwrite($xml_out, '</Chapter>' . "\n");
690        $in_chapter = false;
691      }
692      // - write out this chapter's header
693      fwrite($xml_out, '<Chapter id="' . $chapter_id . '">' . "\n");
694      outputMetadataSingle($xml_out, $chapter_title, 'Title');
695      fwrite($xml_out, '<Content>' . "\n");
696      $in_chapter = true;
697    }
698    // - the next likely thing to encounter is a section heading (five equals)
699    elseif (preg_match('/=====\s+(.+)\s+=====/', $line, $matches))
700    {
701      $section_title = $matches[1];
702      // - check for explicit section id
703      $section_id = '';
704      if (preg_match('/<!-- sid:(.+?) -->(.*)/', $section_title, $matches))
705      {
706        $section_id = $matches[1];
707        $section_title = $matches[2];
708      }
709      if (empty($section_id))
710      {
711        $section_id = generateID($section_title);
712      }
713      // - are we already processing a part? if so end it, end it now
714      if ($in_part)
715      {
716        fwrite($xml_out, '</Content>' . "\n");
717        fwrite($xml_out, '</Part>' . "\n");
718        $in_part = false;
719      }
720      // - are we already processing a subsection? if so end it, end it now
721      if ($in_subsection)
722      {
723        fwrite($xml_out, '</Content>' . "\n");
724        fwrite($xml_out, '</Subsection>' . "\n");
725        $in_subsection = false;
726      }
727      // - are we already processing a section? if so end it, end it now
728      if ($in_section)
729      {
730        fwrite($xml_out, '</Content>' . "\n");
731        fwrite($xml_out, '</Section>' . "\n");
732        $in_section = false;
733      }
734      // - write out this section's header
735      fwrite($xml_out, '<Section id="' . $section_id . '">' . "\n");
736      outputMetadataSingle($xml_out, $section_title, 'Title');
737      fwrite($xml_out, '<Content>' . "\n");
738      $in_section = true;
739    }
740    // - similar for subsection heading (four equals)
741    elseif (preg_match('/==== (.+) ====/', $line, $matches))
742    {
743      $subsection_title = $matches[1];
744      // - check for explicit subsection id
745      $subsection_id = '';
746      if (preg_match('/<!-- sid:(.+?) -->(.*)/', $subsection_title, $matches))
747      {
748        $subsection_id = $matches[1];
749        $subsection_title = $matches[2];
750      }
751      if (empty($subsection_id))
752      {
753        $subsection_id = generateID($subsection_title);
754      }
755      // - are we already processing a part? if so end it, end it now
756      if ($in_part)
757      {
758        fwrite($xml_out, '</Content>' . "\n");
759        fwrite($xml_out, '</Part>' . "\n");
760        $in_part = false;
761      }
762      // - are we already processing a subsection? if so end it, end it now
763      if ($in_subsection)
764      {
765        fwrite($xml_out, '</Content>' . "\n");
766        fwrite($xml_out, '</Subsection>' . "\n");
767        $in_subsection = false;
768      }
769      // - write out this subsection's header
770      fwrite($xml_out, '<Subsection id="' . $subsection_id . '">' . "\n");
771      outputMetadataSingle($xml_out, $subsection_title, 'Title');
772      fwrite($xml_out, '<Content>' . "\n");
773      $in_subsection = true;
774    }
775    // - and part heading (three equals)
776    elseif (preg_match('/=== (.+) ===/', $line, $matches))
777    {
778      $part_title = $matches[1];
779      // - check for explicit part id
780      $part_id = '';
781      if (preg_match('/<!-- sid:(.+?) -->(.*)/', $part_title, $matches))
782      {
783        $part_id = $matches[1];
784        $part_title = $matches[2];
785      }
786      if (empty($part_id))
787      {
788        $part_id = generateID($part_title);
789      }
790      // - are we already processing a part? if so end it, end it now
791      if ($in_part)
792      {
793        fwrite($xml_out, '</Content>' . "\n");
794        fwrite($xml_out, '</Part>' . "\n");
795        $in_part = false;
796      }
797      // - write out this part's header
798      fwrite($xml_out, '<Part id="' . $part_id . '">' . "\n");
799      outputMetadataSingle($xml_out, '**//' . $part_title . '//**', 'Title');
800      fwrite($xml_out, '<Content>' . "\n");
801      $in_part = true;
802    }
803    // - Ignore 5th level heading - they are only used to allow more convenient
804    //   editing of figures and tables
805    elseif (preg_match('/== (.+) ==/', $line, $matches))
806    {
807    }
808    // - lists need special handling
809    elseif (preg_match('/^(\*|\-)\s+(.*)/', $line, $matches))
810    {
811      $list_type = $matches[1];
812      $list_text = $matches[2];
813      $list_depth = count($lists);
814      if (!$in_list)
815      {
816        if ($list_type == '*')
817        {
818          fwrite($xml_out, '<BulletList>' . "\n");
819        }
820        else
821        {
822          fwrite($xml_out, '<NumberedList>' . "\n");
823        }
824        $in_list = true;
825        array_push($lists, $list_type);
826      }
827      // - this bullet is at the same depth as previous - close the previous
828      //   point
829      elseif ($depth == $list_depth)
830      {
831        $previous_list_type = end($lists);
832        if ($previous_list_type == '*')
833        {
834          fwrite($xml_out, '</Bullet>' . "\n");
835        }
836        else
837        {
838          fwrite($xml_out, '</NumberedItem>' . "\n");
839        }
840        // - we don't match in type anymore... close the previous list and open
841        //   a new list of the appropriate type
842        if ($list_type != $previous_list_type)
843        {
844          if ($previous_list_type == '*')
845          {
846            fwrite($xml_out, '</BulletList>' . "\n");
847            fwrite($xml_out, '<NumberedList>' . "\n");
848          }
849          else
850          {
851            fwrite($xml_out, '</NumberedNumbered>' . "\n");
852            fwrite($xml_out, '<BulletList>' . "\n");
853          }
854          array_pop($lists);
855          array_push($lists, $list_type);
856        }
857      }
858      else
859      {
860        // - we have either got deeper...
861        if ($depth > $list_depth)
862        {
863          if ($list_type == '*')
864          {
865            fwrite($xml_out, '<BulletList>' . "\n");
866          }
867          else
868          {
869            fwrite($xml_out, '<NumberedList>' . "\n");
870          }
871          array_push($lists, $list_type);
872        }
873        // ... or shallower in the bullet listing
874        if ($depth < $list_depth)
875        {
876          $previous_list_type = array_pop($lists);
877          if ($previous_list_type == '*')
878          {
879            fwrite($xml_out, '</Bullet>' . "\n");
880            fwrite($xml_out, '</BulletList>' . "\n");
881          }
882          else
883          {
884            fwrite($xml_out, '</NumberedItem>' . "\n");
885            fwrite($xml_out, '</NumberedList>' . "\n");
886          }
887          // - we still have to close the last item too
888          $previous_listitem_type = end($lists);
889          if ($previous_listitem_type == '*')
890          {
891            fwrite($xml_out, '</Bullet>' . "\n");
892          }
893          else
894          {
895            fwrite($xml_out, '</NumberedItem>' . "\n");
896          }
897        }
898      }
899      if ($list_type == '*')
900      {
901        fwrite($xml_out, '<Bullet>' . "\n");
902      }
903      else
904      {
905        fwrite($xml_out, '<NumberedItem>' . "\n");
906      }
907      // Special Case: bullets that contain (start) a code block
908      if (preg_match('/^(.*)<code>\s*$/', $list_text, $matches))
909      {
910        $list_text = $matches[1];
911        $in_code_block = true;
912      }
913
914      outputTextBlock($xml_out, $list_text);
915
916      // - to make things clearer, we'll process any and all code blocks within
917      //   bullets here - especially as there may be more text block *after*
918      //   the code block finishes
919      if ($in_code_block)
920      {
921        $sub_line = '';
922        while ($in_code_block && ($sub_line = fgets($page_in)) !== false)
923        {
924          $sub_line = trim($sub_line);
925          // - closing code
926          if (preg_match('/^<\/code>(.*)$/', $sub_line, $matches))
927          {
928            $sub_line = $matches[1]; // may be empty string
929            $in_code_block = false;
930          }
931          // - output another plain codeline
932          else
933          {
934            fwrite($xml_out, '<CodeLine>' . $sub_line . "</CodeLine>\n");
935            $sub_line = '';
936          }
937        }
938        // - if sub_line still has anything in it, then add that content as a
939        //   text block
940        if (!empty($sub_line))
941        {
942          outputTextBlock($xml_out, $sub_line);
943        }
944      }
945    }
946    // - images start with an image caption 'element'
947    elseif (preg_match('/<imgcaption\s+figure_([a-z0-9_\-]+)\|(.+)>([^<]*?)<\/imgcaption>/', $line, $matches))
948    {
949      $figure_id = $matches[1];
950      $figure_title = $matches[2];
951      $image_content = $matches[3];
952      // - watch for the special withLineNumber flag
953      $class_attribute = '';
954      if (strpos($figure_title, '%!-- withLineNumber --%') != false)
955      {
956        $class_attribute = ' class="withLineNumber"';
957        $figure_title = str_replace('%!-- withLineNumber --%','',$figure_title);
958      }
959      fwrite($xml_out, '<Figure id="' . $figure_id . '"' . $class_attribute . '>' . "\n");
960      echo '[figure: ' . $figure_id . "] \n";
961      fwrite($xml_out, '<Title>' . "\n");
962      // - decode any comments in the title (used to store explicit id
963      //   information)
964      $figure_title = str_replace('%!--', '<!--', $figure_title);
965      $figure_title = str_replace('--%', '-->', $figure_title);
966      // - special case: the title may have a subtitle (as a prefix)
967      $figure_subtitle_id = '';
968      $figure_subtitle = '';
969      // - subtitle with explicit id
970      if (preg_match('/^(<!-- id:.+? -->\([a-z]\))\s*(.*)$/', $figure_title, $matches))
971      {
972        $figure_subtitle = $matches[1];
973        $figure_title = $matches[2];
974      }
975      // - subtitle without explicit id
976      else if (preg_match('/^(\([a-z]\))\s*(.*)$/', $figure_title, $matches))
977      {
978        $figure_subtitle = $matches[1];
979        $figure_title = $matches[2];
980      }
981      outputTextBlock($xml_out, $figure_title);
982      if (!empty($figure_subtitle))
983      {
984        fwrite($xml_out, '<SubTitle>' . "\n");
985        outputTextBlock($xml_out, $figure_subtitle);
986        fwrite($xml_out, '</SubTitle>' . "\n");
987      }
988      fwrite($xml_out, '</Title>' . "\n");
989      // Try and find the image itself
990      if (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $image_content))
991      {
992        processImage($xml_out, $line);
993        fwrite($xml_out, '</Figure>' . "\n");
994      }
995      // Didn't find an image? Weird, but mark the imgcaption as open, and
996      // we'll chomp up the next image found as the content.
997      else
998      {
999          $in_figure = true;
1000      }
1001      // - record the id to prevent repeating
1002      $seen_ids[$figure_id] = true;
1003    }
1004    // - tables start with a table caption 'element'
1005    elseif (preg_match('/<tblcaption\s+table_([a-z0-9_\-]+)\|([^>]+)>\s*<\/tblcaption>/', $line, $matches))
1006    {
1007      $table_id = $matches[1];
1008      $table_title = $matches[2];
1009      if ($table_title == '##NOCAPTION##')
1010      {
1011        echo '[non-captioned table: ' . $table_id . "] \n";
1012        // - watch for autogenerated ids... no point in outputting them
1013        if (preg_match('/^table(_\d+)?$/', $table_id))
1014        {
1015          fwrite($xml_out, "<Table>\n");
1016        }
1017        else
1018        {
1019          fwrite($xml_out, '<Table id="' . $table_id . '">' . "\n");
1020        }
1021        fwrite($xml_out, '<Title/>' . "\n");
1022      }
1023      elseif ($table_title == '##HIDDEN##')
1024      {
1025        echo '[hidden table: ' . $table_id . "] \n";
1026        // - watch for autogenerated ids... no point in outputting them
1027        if (preg_match('/^table(_\d+)?$/', $table_id))
1028        {
1029          fwrite($xml_out, "<Table class=\"hidden\">\n");
1030        }
1031        else
1032        {
1033          fwrite($xml_out, '<Table class="hidden" id="' . $table_id . '">' . "\n");
1034        }
1035        fwrite($xml_out, '<Title/>' . "\n");
1036      }
1037      else
1038      {
1039        echo '[table: ' . $table_id . "] \n";
1040        // - watch for autogenerated ids... no point in outputting them
1041        if (preg_match('/^table(_\d+)?$/', $table_id))
1042        {
1043          fwrite($xml_out, "<Table>\n");
1044        }
1045        else
1046        {
1047          fwrite($xml_out, '<Table id="' . $table_id . '">' . "\n");
1048        }
1049        fwrite($xml_out, '<Title>' . "\n");
1050        outputTextBlock($xml_out, $table_title);
1051        fwrite($xml_out, '</Title>' . "\n");
1052      }
1053      fwrite($xml_out, '<TableContent>' . "\n");
1054      $in_table = true;
1055      // - record the id to prevent repeating
1056      $seen_ids[$table_id] = true;
1057    }
1058    // - the second line in a table should be it's column width values
1059    elseif (preg_match('/\|<\s-\s([0-9 ]+?)\s>\|/', $line, $matches))
1060    {
1061      $column_widths = explode(' ', $matches[1]);
1062    }
1063    // - then every row will be made of a number of cells
1064    elseif (preg_match('/^\|(.*?)\|$/', $line, $matches))
1065    {
1066      $row_content = $matches[1];
1067      $cell_contents = preg_split('/(\s+\||\|\s+)/', $row_content);
1068      fwrite($xml_out, '<tr>' . "\n");
1069      foreach ($cell_contents as $index=>$cell_content)
1070      {
1071        $cell_content = trim($cell_content);
1072        $th_text = '';
1073        if (isset($column_widths[$index]))
1074        {
1075          $th_text = '<th width="' . $column_widths[$index] . '"';
1076        }
1077        else
1078        {
1079          $th_text = '<th';
1080        }
1081        // - if the cell would be empty, we use the shorthand
1082        if (empty($cell_content))
1083        {
1084          $th_text .= '/>' . "\n";
1085          fwrite($xml_out, $th_text);
1086        }
1087        else
1088        {
1089          $th_text .= '>' . "\n";
1090          fwrite($xml_out, $th_text);
1091
1092          // GAH - this is proving harder than a hard thing thats hard.
1093          // The issue is that the most straightforward way of fixing this,
1094          // namely using explicit newlines (\\) in the dokuwiki txt causes
1095          // lots a legitimately translated <br/> to also be split up. I
1096          // think the only way forward would be to maybe extend the HTML
1097          // Comment plugin to also respect and process <br/> tags. Then I
1098          // can avoid transforming them, and use the \\ sentinel to
1099          // separate multi-line table cells.
1100          $cell_content_lines = explode('\\\\', $cell_content);
1101          foreach ($cell_content_lines as $cell_content)
1102          {
1103            // - watch out, as the content may be an image
1104            if (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $cell_content))
1105            {
1106              processImage($xml_out, $cell_content);
1107            }
1108            elseif (preg_match('/\'\'(.*)\'\'/', $cell_content, $matches))
1109            {
1110              fwrite($xml_out, '<CodeLine>' . translateTableCodeline($matches[1]) . '</CodeLine>' . "\n");
1111            }
1112            // - anything else it text
1113            else
1114            {
1115              outputTextBlock($xml_out, $cell_content);
1116            }
1117          }
1118          fwrite($xml_out, '</th>' . "\n");
1119        }
1120      }
1121      fwrite($xml_out, '</tr>' . "\n");
1122    }
1123    // - links to image media in the wiki!
1124    elseif (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $line))
1125    {
1126      processImage($xml_out, $line);
1127      // - if we were processing a figure, then now is a good time to close it
1128      if ($in_figure)
1129      {
1130        fwrite($xml_out, '</Figure>' . "\n");
1131        $in_figure = false;
1132      }
1133    }
1134    // - if the line starts with a <code> block, then we have a tag
1135    //   for that (which is special in that it get a unique text id)
1136    elseif (preg_match('/^<code\s*\d*\s*>(.*?)(<\/code>)?$/', $line, $matches) || ($in_code_block && preg_match('/^(.*?)(<\/code>)?$/', $line, $matches)))
1137    {
1138      $payload = $matches[1];
1139      $found_end = (isset($matches[2]));
1140      $in_code_block = true;
1141      // - be careful with empty lines
1142      if (empty($payload))
1143      {
1144        // - as they may appear in the body of the code (in which case we need
1145        //   to output them). The empty lines at the start or end of a code
1146        //   block are just an unfortunate consequence of the support for code
1147        //   line numbering.
1148        if (!$found_end && strpos($line, '<code') === false)
1149        {
1150          fwrite($xml_out, "<CodeLine/>\n");
1151        }
1152      }
1153      elseif (preg_match('/^<!-- id:([^\s]+) -->/', $payload, $matches))
1154      {
1155        $text_id = $matches[1];
1156        outputTextBlock($xml_out, $payload, 'code', true);
1157        // - record the id to prevent repeating
1158        $seen_ids[$text_id] = true;
1159      }
1160      else
1161      {
1162        fwrite($xml_out, '<CodeLine>' . translateText($payload, true) . '</CodeLine>' . "\n");
1163      }
1164      // - if we didn't find an endtag we have to keep doing code mode until
1165      //   we do
1166      $in_code_block = (!$found_end);
1167      if ($found_end)
1168      {
1169        // - if we were processing a figure, then now is a good time to close it
1170        if ($in_figure)
1171        {
1172          fwrite($xml_out, '</Figure>' . "\n");
1173          $in_figure = false;
1174        }
1175      }
1176    }
1177    // - entities on a line by themselves (i.e. references to external files)
1178    //   go through verbatim
1179    elseif (preg_match('/^\s*&[a-z0-9_-]+;\s*$/', $line))
1180    {
1181      fwrite($xml_out, $line . "\n");
1182    }
1183    // - lines starting with > are indented text blocks
1184    elseif (preg_match('/^>(.*)$/', $line, $matches))
1185    {
1186      $payload = $matches[1];
1187      fwrite($xml_out, "<Indented>\n");
1188      outputTextBlock($xml_out, $payload);
1189      fwrite($xml_out, "</Indented>\n");
1190    }
1191    // - everything else goes straight through as a text block
1192    // - note that for code blocks, even empty lines count
1193    elseif (!empty($line))
1194    {
1195      // - output the line of text having encoded entities etc
1196      outputTextBlock($xml_out, $line, '', $in_code_block);
1197    }
1198  }
1199  // Complete any open part
1200  if ($in_part)
1201  {
1202    fwrite($xml_out, '</Content>' . "\n");
1203    fwrite($xml_out, '</Part>' . "\n");
1204    $in_part = false;
1205  }
1206  // Complete any open subsection
1207  if ($in_subsection)
1208  {
1209    fwrite($xml_out, '</Content>' . "\n");
1210    fwrite($xml_out, '</Subsection>' . "\n");
1211    $in_subsection = false;
1212  }
1213  // Complete any open section
1214  if ($in_section)
1215  {
1216    fwrite($xml_out, '</Content>' . "\n");
1217    fwrite($xml_out, '</Section>' . "\n");
1218    $in_section = false;
1219  }
1220  // Complete any open chapter
1221  if ($in_chapter)
1222  {
1223    fwrite($xml_out, '</Content>' . "\n");
1224    fwrite($xml_out, '</Chapter>' . "\n");
1225    $in_chapter = false;
1226  }
1227}
1228/** processPage($xml_out, $page_name) **/
1229
1230function processImage($xml_out, $text)
1231{
1232  global $dokuwiki_path;
1233  global $xml_source_path;
1234  if (preg_match('/\{\{.+?([^:?]+)\?(\d+)x(\d+)(&direct)?\}\}/', $text, $matches))
1235  {
1236    $filename = $matches[1];
1237    $width = $matches[2];
1238    $height = $matches[3];
1239    // - copy the file into place
1240    $image_source_path = $dokuwiki_path . '/data/media/' . $_REQUEST['l'] . '/manuals/images/' . strtolower($filename);
1241    $image_destination_dir = $xml_source_path . '/' . $_REQUEST['l'] . '/images';
1242    mkAllDir($image_destination_dir);
1243    $image_destination_path = $image_destination_dir . '/' . $filename;
1244    if (copy($image_source_path, $image_destination_path))
1245    {
1246      echo '[copying file: ' . $filename . "] \n";
1247      chmod($image_destination_path, 0664);
1248    }
1249    else
1250    {
1251      printError('Failed to copy image into place: ' . $filename, false);
1252    }
1253    // - spit out the XML element
1254    fwrite($xml_out, '<File width="' . $width . '" height="' . $height . '" url="images/' . $filename . '"/>' . "\n");
1255  }
1256}
Note: See TracBrowser for help on using the browser.