root/documentation/trunk/php/gs-manual-export.php @ 27507

Revision 27507, 42.7 KB (checked in by jmt12, 6 years ago)

Ensuring the downloadable versions of the XML exports are stored in a writable location, are now downloaded as archives (ZIP and TGZ) containing the XML and images, and clean up old files left lying around

Line 
1<?php
2
3require_once('common.php');
4$debug = 1;
5
6// There are some elements that are, in my opinion, incorrectly 'resolved' in
7// code blocks just because they are valid HTML. For instance, &lt;i&gt; is
8// resolved to <i> in code blocks, while something like &lt;Metadata&gt; is
9// not. Set this to true to allow such abominations (for the purpose of
10// comparing before and after versions of the XML). [jmt12]
11$allow_bad_codeblocks = true;
12
13/** @file gs-manual-export.php
14 *  This script transforms the series of dokuwiki pages that make up a certain
15 *  manual (as specified by the 'm' argument) in a certain language ('l') into
16 *  the XML format required by the rest of the Greenstone manual generation
17 *  scripts.
18 */
19
20// 0. Initialization
21
22// - we have a counter to assign identifiers to text blocks etc without ids
23$text_id_counter = 1;
24// - we need an array of all the footnotes
25$footnotes = array();
26
27// Defaults
28if (!isset($_REQUEST['l']) || empty($_REQUEST['l']))
29{
30  $_REQUEST['l'] = 'en';
31}
32if (!isset($_REQUEST['m']) || empty($_REQUEST['m']))
33{
34  //$_REQUEST['m'] = 'user';
35  //$_REQUEST['m'] = 'install';
36  $_REQUEST['m'] = 'develop';
37  //$_REQUEST['m'] = 'paper';
38}
39if (!isset($_REQUEST['v']) || empty($_REQUEST['v']))
40{
41  $_REQUEST['v'] = 'draft';
42}
43if (!isset($_REQUEST['a']) || !preg_match('/^(download|store)$/', $_REQUEST['a']))
44{
45  $_REQUEST['a'] = 'store'; // Try to store the file to disk
46}
47
48$var_path = filecat(array($base_path, 'var'));
49$timestamp = time();
50//$xml_source_path = '**PATH TO GSDL MANUALS**'
51$xml_source_path = '/tmp';
52if ($_REQUEST['a'] == 'download')
53{
54  // Clear out previous exports
55  recursiveRemove($var_path, '/greenstone/greenstone-documentation/php/var');
56  // New export
57  $xml_source_path = fileCat(array($var_path, $timestamp));
58}
59
60echo '<html>' . "\n";
61echo '<head>' . "\n";
62echo '<title>GS Manual Export</title>' . "\n";
63echo '</head>' . "\n";
64echo '<body>' . "\n";
65
66// - validate arguments before we use them (security)
67if (!preg_match('/^(develop|install|paper|user)$/',$_REQUEST['m']))
68 {
69   printError('Unknown manual type requested: ' . htmlspecialchars($_REQUEST['m']));
70 }
71
72if (!preg_match('/^(ar|en|es|fr|pt-br|ru)$/',$_REQUEST['l']))
73 {
74   printError('Unknown language requested: ' . htmlspecialchars($_REQUEST['l']));
75 }
76
77echo '<h2>Generating Greenstone Manual XML</h2>' . "\n";
78echo '<p><b>Manual:</b> ' . $_REQUEST['m'] . ' <b>Language:</b> ' . $_REQUEST['l'] . "</p>\n<hr/>\n";
79// 1. Create the XML output file handle
80// - construct the path using the information we've been provided as arguments
81$xml_file_dir = $xml_source_path . '/' . $_REQUEST['l'];
82mkAllDir($xml_file_dir);
83$xml_file_path = $xml_file_dir . '/' . ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.xml';
84// - backup any existing file
85if (file_exists($xml_file_path))
86{
87  $xml_backup_file_path = $xml_file_path . '.bak';
88  if (!rename($xml_file_path, $xml_backup_file_path))
89  {
90    printError('Failed to rename existing manual file for backup');
91  }
92}
93
94// - and create a handle to the new file
95$xml_out = fopen($xml_file_path, 'w');
96
97// 2. Read in the top level page - this will give configuration data for the
98//    manual and cover page, as well as specifying the order for the other
99//    pages in the manual
100echo "<p><b>Frontmatter:</b><br/>\n";
101// - by reading this page we hope to populate an array of metadata, and also
102//   extract the sequence of other pages within this manual
103$cover_metadata = array();
104$pages_in_order = array();
105// - we now need to consider if the user has asked for a draft version (i.e.
106//   includes the latest version of pages regardless of approval) or if only
107//   the approved versions of pages should be included
108// - only necessary for english version of manual, as those are the only pages
109//   editable
110$top_page_path = '';
111if ($_REQUEST['v'] == 'draft' || $_REQUEST['l'] != 'en')
112{
113  // - again, we can construct the path to the top level page given the arguments
114  //   provided
115  $top_page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '.txt';
116}
117else
118{
119  $top_page_path = getApprovedVersionPath('en:manuals:' . $_REQUEST['m']);
120}
121
122if (!file_exists($top_page_path))
123 {
124   printError('Failed to locate top level page for manual');
125 }
126// - we're going to open a handle to the file, then read it in line-by-line
127//   watching for the lines we are interested in (using pattern matching)
128$top_page_in = fopen($top_page_path, 'r');
129if (!$top_page_in)
130 {
131   printError('Failed to open top level page for reading');
132 }
133$in_contents = false;
134while (($line = fgets($top_page_in)) !== false)
135 {
136   // - if we are capturing page order, and we encounter something that looks
137   //   like a bulletpoint item pointing to a wiki page, then we append the
138   //   name of that page to our pages in order array
139   if ($in_contents && preg_match('/^\s+\*\s+\[\[.:' . $_REQUEST['m'] . ':(.+?)\|(.*?)\]\]\s*$/', $line, $matches))
140   {
141     array_push($pages_in_order, $matches[1]);
142   }
143   // - metadata is all encoded within dokuwiki tables
144   elseif (preg_match('/^\^\s+([^\s]+)\s+\|\s+(.+?)\s+\|\s*$/', $line, $matches))
145   {
146     $field = $matches[1];
147     $value = $matches[2];
148     $values = array();
149     if (isset($cover_metadata[$field]))
150     {
151       $values = $cover_metadata[$field];
152     }
153     array_push($values, $value);
154     $cover_metadata[$field] = $values;
155   }
156   // - watch for the heading 'Contents' to begin extracting page order
157   //   information
158   elseif (preg_match('/^=+\s(.+)\s=+$/', $line, $matches))
159   {
160     if ($matches[1] == 'Contents')
161     {
162       $in_contents = true;
163     }
164     // - any other title means we aren't capturing page order (anymore)
165     else
166     {
167       $in_contents = false;
168     }
169   }
170 }
171if (!feof($top_page_in))
172 {
173   printError('Unexpected fgets() fail when reading top page');
174 }
175fclose($top_page_in);
176// - ensure we have the required metadata
177$required_metadata = array('Heading','Title','Affiliation','Version','Date');
178foreach ($required_metadata as $required_field)
179{
180  if (!isset($cover_metadata[$required_field]))
181  {
182    printError('Missing required metadata: ' . $required_field);
183  }
184}
185// - now we can use the metadata to construct the XML header and the cover page.
186//   This follows a pretty set recipe with only elements that can repeat---like
187//   Author, SupplementaryText etc---are at all tricky
188fwrite($xml_out, '<?xml version="1.0" encoding="UTF-8"?>' . "\n");
189fwrite($xml_out, '<!DOCTYPE Manual [' . "\n");
190if (isset($cover_metadata['ENTITY']))
191 {
192   foreach ($cover_metadata['ENTITY'] as $entity)
193   {
194     fwrite($xml_out, "\t" . '<!ENTITY ' . $entity . '>' . "\n");
195   }
196 }
197fwrite($xml_out, ']>' . "\n");
198fwrite($xml_out, '<Manual id="' . ucfirst($_REQUEST['m']) . '" lang="' . $_REQUEST['l'] . '">' . "\n");
199
200///cho "<p>[Debug] metadata: " . print_r($cover_metadata, true) . "</p>\n\n";
201
202outputMetadataSingle($xml_out, $cover_metadata, 'Heading');
203outputMetadataSingle($xml_out, $cover_metadata, 'Title');
204outputMetadataSingle($xml_out, $cover_metadata, 'Author');
205outputMetadataSingle($xml_out, $cover_metadata, 'Affiliation');
206outputMetadataMultiple($xml_out, $cover_metadata, 'SupplementaryText');
207outputMetadataMultiple($xml_out, $cover_metadata, 'Text');
208outputMetadataMultiple($xml_out, $cover_metadata, 'Comment');
209outputMetadataSingle($xml_out, $cover_metadata, 'Version');
210outputMetadataSingle($xml_out, $cover_metadata, 'Date');
211
212// 3. Process each page listed in the contents of the top level page in order
213foreach ($pages_in_order as $page)
214{
215  processPage($xml_out, $page);
216}
217
218// 4. Output out list of footnotes (if any)
219if (!empty($footnotes))
220 {
221   fwrite($xml_out, '<FootnoteList>'. "\n");
222   foreach ($footnotes as $footnote=>$footnote_id)
223   {
224     ///cho '[debug] footnotes: (' . $footnote_id . ') ' . $footnote . '<br />'. "\n";
225     outputMetadataSingle($xml_out, $footnote, 'Footnote', $footnote_id);
226   }
227   fwrite($xml_out, '</FootnoteList>'. "\n");
228 }
229
230// 5. Finalize and close the XML output
231fwrite($xml_out, '</Manual>' . "\n");
232fclose($xml_out);
233chmod($xml_file_path, 0664);
234
235// 6. Complete!
236echo '<p><b>Complete!</b></p>' . "\n<hr/>\n";
237if ($_REQUEST['a'] == 'download')
238{
239  // Zip up the manual files
240  $zip_file = ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.zip';
241  $zip_path = fileCat(array($xml_source_path, $zip_file));
242  $zip_command = 'zip -r "' . $zip_path . '" . > /dev/null 2>&1';
243  //cho '<p><b>[DEBUG]</b> zip_command:' . $zip_command . '</p>';
244  $tgz_file = ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.tgz';
245  $tgz_path = fileCat(array($xml_source_path, $tgz_file));
246  $tgz_command = 'tar -czf "' . $tgz_path . '" * > /dev/null 2>&1';
247  //cho '<p><b>[DEBUG]</b> tgz_command:' . $tgz_command . '</p>';
248  // We need to move to the document folder so that archives have sensible paths
249  $original_cwd = getcwd();
250  chdir($xml_file_dir);
251  system($zip_command);
252  system($tgz_command);
253  // Go back
254  chdir($original_cwd);
255  // Links are ready
256  echo '<p>Download XML file plus images as: <a href="var/' . $timestamp . '/' . $zip_file . '">ZIP</a> or <a href="var/' . $timestamp . '/' . $tgz_file . '">TGZ</a></p>' . "\n";
257}
258echo '<p>Click <a href="' . $dokuwiki_url . '/doku.php?id=' . $_REQUEST['l'] . ':manuals:' . $_REQUEST['m'] . '">here</a> to return to dokuwiki</p>' . "\n";
259echo '</body>' . "\n";
260echo '</html>';
261exit(0);
262
263/**
264 */
265function outputMetadataSingle($xml_out, $metadata, $field, $mid=false)
266{
267  echo '[metadata: ' . $field . "] \n";
268  if ($mid)
269  {
270    fwrite($xml_out, '<' . $field . ' id="' . $mid . '">' . "\n");
271  }
272  else
273  {
274    fwrite($xml_out, '<' . $field . '>' . "\n");
275  }
276  if (is_array($metadata))
277  {
278    if (isset($metadata[$field]) && isset($metadata[$field][0]))
279    {
280      outputTextBlock($xml_out, $metadata[$field][0]);
281    }
282    else
283    {
284      echo 'no such field or no metadata';
285    }
286  }
287  elseif (!empty($metadata))
288  {
289    outputTextBlock($xml_out, $metadata);
290  }
291  else
292  {
293    echo 'no such field or no metadata';
294  }
295  fwrite($xml_out, '</' . $field . '>' . "\n");
296}
297/** outputMetadataSingle() **/
298
299function outputMetadataList($xml_out, $metadata, $field, $separator = ',', $final_separator = false)
300{
301  echo '[metadata list: ' . $field . "] \n";
302  fwrite($xml_out, '<' . $field . '>' . "\n");
303  if (isset($metadata[$field]))
304  {
305    if (count($metadata[$field]) == 1)
306    {
307      outputTextBlock($xml_out, $metadata[$field][0]);
308    }
309    if (count($metadata[$field]) > 1)
310    {
311      $last_value = '';
312      if ($final_separator)
313      {
314        $last_value = array_pop($metadata[$field]);
315      }
316      $values = implode($separator, $metadata[$field]);
317      if ($final_separator)
318      {
319        $values .= $final_separator . $last_value;
320      }
321      outputTextBlock($xml_out, $values);
322    }
323  }
324  else
325  {
326    echo 'no such field or no metadata';
327  }
328  fwrite($xml_out, '</' . $field . '>' . "\n");
329}
330/** outputMetadataList() **/
331
332function outputMetadataMultiple($xml_out, $metadata, $field)
333{
334  echo '[metadata multiple: ' . $field . "] \n";
335  // - Text blocks don't need to be wrapped in Text element
336  if ($field != 'Text')
337  {
338    fwrite($xml_out, '<' . $field . '>' . "\n");
339  }
340  if (isset($metadata[$field]))
341  {
342    foreach ($metadata[$field] as $value)
343    {
344      outputTextBlock($xml_out, $value);
345    }
346  }
347  else
348  {
349    echo 'no such field or no metadata';
350  }
351  if ($field != 'Text')
352  {
353    fwrite($xml_out, '</' . $field . '>' . "\n");
354  }
355}
356
357function translateTableCodeline($text)
358{
359  // Escape any italic tags hidden in HTML comments
360  $text = str_replace('<!--i-->', '%!--i--%', $text);
361  $text = str_replace('<!--/i-->', '%!--/i--%', $text);
362  // Encode entities etc
363  $text = translateText($text, true);
364  // Restore any italics elements hidden above
365  $text = str_replace('%!--i--%', '<i>', $text);
366  $text = str_replace('%!--/i--%', '</i>', $text);
367  return $text;
368}
369/** translateTableCodeline() **/
370
371function translateText($text, $in_code_block=false)
372{
373  global $allow_bad_codeblocks;
374  $text = str_replace('&','&amp;',$text);
375  $text = str_replace('<','&lt;',$text);
376  $text = str_replace('>','&gt;',$text);
377  if ($in_code_block && $allow_bad_codeblocks)
378  {
379    ///cho "**** [debug] restoring bogus decoded tags in: |$text| ****\n";
380    $text = str_replace('&lt;i&gt;','<i>',$text);
381    $text = str_replace('&lt;/i&gt;','</i>',$text);
382    $text = str_replace('&lt;br/&gt;','<br/>',$text);
383  }
384  return $text;
385}
386
387function outputTextBlock($xml_out, $text, $type='', $in_code_block = false)
388{
389  global $cover_metadata;
390  global $text_id_counter;
391  global $footnotes;
392  global $allow_bad_codeblocks;
393
394  // - Start by dealing with any footnotes before anything else
395  while (preg_match('/\(\((.*?)\)\)/', $text, $matches))
396  {
397    $pattern = $matches[0];
398    $footnote = $matches[1];
399    $footnote_id = count($footnotes) + 1;
400    $footnotes[$footnote] = $footnote_id;
401    // - note that we have to escape the footnote reference as the following
402    //   code will convert any < and > to entities...
403    $footnote_reference = '%FootnoteRef id="' . $footnote_id . '"/%';
404    $text = str_replace($pattern, $footnote_reference, $text);
405  }
406
407  $text_id = '';
408  // - check whether the string begins with an explicit id
409  if (preg_match('/^\s*<!--\s*id:(.+?)\s*-->(.*)$/', $text, $matches))
410  {
411    $text_id = $matches[1];
412    $text = $matches[2];
413    if (is_numeric($text_id))
414    {
415      $text_id_counter = $text_id + 1;
416    }
417  }
418  else
419  {
420    $text_id = $text_id_counter;
421    $text_id_counter++;
422  }
423
424  // - protect the special case of an HTML comment being actually displayed
425  //   in the text
426  $text = preg_replace('/<!--([\s\.]+?)-->/','##lt##!--\1--##gt##',$text);
427
428  // - reformat dokuwiki syntax to HTML tag syntax
429  $text = preg_replace('/<!--.*?-->/', '', $text);
430
431  // we leave code blocks alone in terms of ampersands
432  if (!$in_code_block)
433  {
434    // - ampersands aren't safe in XML...
435    $text = str_replace('&', '&amp;', $text);
436    // ...except for the entities that we have registered as metadata
437    if (isset($cover_metadata['ENTITY']))
438    {
439      foreach ($cover_metadata['ENTITY'] as $entity)
440      {
441        if (preg_match('/([a-z]+)\s+"&#(\d+);"/', $entity, $matches))
442        {
443          $entity_name = $matches[1];
444          if ($entity_name != 'mdash')
445          {
446          $entity_character = html_entity_decode('&#'.$matches[2].';',ENT_NOQUOTES,'UTF-8');
447          $text = str_replace('&amp;' . $entity_name . ';', '&' . $entity_name . ';', $text);
448          // - we also convert any characters that match the entity char into
449          //   the entity
450          $text = str_replace($entity_character, '&' . $entity_name . ';', $text);
451          }
452        }
453      }
454    }
455    // - protect <br/> tags
456    $text = str_replace('<br/>','%%br/%%',$text);
457    // - encoding all of the < and > that appear in the text (rather than
458    //   true html formatting)
459    $text = str_replace('<','&lt;',$text);
460    $text = str_replace('>','&gt;',$text);
461    // - restore <br/> tags
462    $text = str_replace('%%br/%%','<br/>',$text);
463  }
464  else if ($type == 'code')
465  {
466    $text = str_replace('<','&lt;',$text);
467    $text = str_replace('>','&gt;',$text);
468  }
469
470  // - links, oh how I hate thee
471  // - external links are slightly easier
472  $text = preg_replace('/\[\[http:\/\/(.*?)\|(.*?)\]\]/', '<Link url="http://\1">\2</Link>', $text);
473  // - internals have to become the horrible <CrossRef> tags. We ignore any
474  //   number prefix on the page name as that is just used for ordering within
475  //   Dokuwiki
476  $text = preg_replace('/\[\[\.\:(.*?)\|[^\]]+\]\]/','<CrossRef target="Chapter" ref="\1"/>', $text);
477  // - internal links starting with hash must be on the same page
478  $text = preg_replace('/\[\[###(.*?)\|.*?\]\]/','<CrossRef target="Part" ref="\1"/>', $text);
479  $text = preg_replace('/\[\[##(.*?)\|.*?\]\]/','<CrossRef target="Subsection" ref="\1"/>', $text);
480  $text = preg_replace('/\[\[#(.*?)\|.*?\]\]/','<CrossRef target="Section" ref="\1"/>', $text);
481  // - 'external' internal wiki links are even worst - since we can't know what
482  //   the page order number for another manual's chapters might be, we instead
483  //   use a search
484  $text = preg_replace('/\[\[\?do\=search\&amp;id\=([^\s]+)\s+@([a-z]+):manuals:([a-z]+)\|.*?\]\]/i', '<CrossRef external="\3" lang="\2" target="Chapter" ref="\1"/>', $text);
485  // - references to images and tables
486  $text = preg_replace('/(?:<|&lt;)imgref\sfigure_(.+?)(?:>|&gt;)/','<CrossRef target="Figure" ref="\1"/>', $text);
487  $text = preg_replace('/(?:<|&lt;)tblref\stable_(.+?)(?:>|&gt;)/','<CrossRef target="Table" ref="\1"/>', $text);
488  // - explicitly convert URLs as they are a bit messy
489  // - first all the cases of URLs in italics, without protocol
490  $text = preg_replace('/\/\/\s([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
491  $text = preg_replace('/\/\/\s([a-z0-9\-]+\.org(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
492  $text = preg_replace('/\/\/\s(localhost(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
493  // - now all the protocol ones (with care taken to protect // in protocol)
494  $text = preg_replace('/\/\/\shttp:\/\/([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
495  $text = preg_replace('/\/\/\shttp:\/\/([a-z0-9\-]+\.org(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
496  $text = preg_replace('/\/\/\shttp:\/\/(localhost(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
497  // - next we have the underlined URLs sans protocols
498  $text = preg_replace('/__\s([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
499  $text = preg_replace('/__\s([a-z0-9\-]+\.org(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
500  $text = preg_replace('/__\s(localhost(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
501  // - and finally the protocol prefixed underlined URLs
502  $text = preg_replace('/__\shttp:\/\/([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
503  $text = preg_replace('/__\shttp:\/\/([a-z0-9\-]+\.org(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
504  $text = preg_replace('/__\shttp:\/\/(localhost(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
505  // - lets also protect any other protocols we find floating around
506  $text = preg_replace('/(file|ftp|http):\/\//i', '\1:##DOUBLESLASH##', $text);
507
508  // - italic formatting (taking care of protected double slashes)
509  $text = preg_replace('/%%\/\/%%/', '##DOUBLESLASH##', $text);
510  $text = preg_replace('/\/{5}/', '<i>/</i>', $text); // another special case
511  $text = preg_replace('/\/\/(\/.+?)\s*\/\//', '<i>\1</i>', $text); // another special case
512  $text = preg_replace('/\/\/\s*(.+?\/)\/\//', '<i>\1</i>', $text); // another special case
513  $text = preg_replace('/\/\/\s*(.+?)\s*\/\//', '<i>\1</i>', $text);
514  $text = preg_replace('/##DOUBLESLASH##/', '//', $text);
515  // - bold formatting
516  $text = preg_replace('/\*\*([^"]+?)\*\*/', '<b>\1</b>', $text);
517  // - underline formatting
518  $text = preg_replace('/__([^"]+?)__/', '<u>\1</u>', $text);
519
520  // - decode certain entities in codeblock (just because they are valid HTML,
521  //   derp).
522  if ($in_code_block && $allow_bad_codeblocks)
523  {
524    ///cho "**** [debug] restoring bogus decoded tags in: |$text| ****\n";
525    $text = str_replace('&lt;i&gt;','<i>',$text);
526    $text = str_replace('&lt;/i&gt;','</i>',$text);
527    //$text = str_replace('&lt;br/&gt;','<br/>',$text);
528  }
529  // - restore protected entities
530  $text = preg_replace('/##(gt|lt)##/','&\1;',$text);
531  // - restore protected comment blocks
532  $text = str_replace('%!--', '&lt;!--', $text);
533  $text = str_replace('--%', '--&gt;', $text);
534  // - restore protected footnote refs
535  $text = preg_replace('/%FootnoteRef id="([^"]+)"\/%/', '<FootnoteRef id="\1"/>', $text);
536  // output the text block
537  $text = trim($text);
538  if (empty($text))
539  {
540    fwrite($xml_out, '<Text id="' . $text_id . '"/>' . "\n");
541  }
542  else if (!empty($type))
543  {
544    fwrite($xml_out, '<Text type="' . $type . '" id="' . $text_id . '">' . $text . '</Text>' . "\n");
545  }
546  else
547  {
548    fwrite($xml_out, '<Text id="' . $text_id . '">' . $text . '</Text>' . "\n");
549  }
550}
551/** outputTextBlock($xml_out, $text) **/
552
553/**
554 */
555function processPage($xml_out, $page_name)
556{
557  global $dokuwiki_path;
558  global $seen_ids;
559  echo "</p>\n<p><b>Export Chapter:</b> " . $page_name . "<br/>\n";
560  // - locate the page in question (taking into account if the user asked for a
561  //   draft version or an approved version of the manual)
562  $page_path = '';
563  if ($_REQUEST['v'] == 'draft' || $_REQUEST['l'] != 'en')
564  {
565    $page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '/' . $page_name . '.txt';
566  }
567  else
568  {
569    $page_path = getApprovedVersionPath('en:manuals:' . $_REQUEST['m'] . ':' . $page_name);
570  }
571  $page_in = @fopen($page_path, 'r');
572  if (!$page_in)
573  {
574    printError('Failed to open page for reading:' . $page_name, false);
575    return;
576  }
577  // - once again we read in line-by-line, but this time we are going to output
578  //   each line as we go through. We expect to encounter certain lines in a
579  //   predefined order, and should complain if we don't find what we expect.
580  $in_chapter = false;
581  $in_section = false;
582  $in_subsection = false;
583  $in_part = false;
584  $in_list = false;
585  $lists = array();
586  $previous_listitem_type = '';
587  $in_figure = false;
588  $in_table = false;
589  $column_widths = array();
590  $in_code_block = false;
591  while (($line = fgets($page_in)) !== false)
592  {
593    // remove newline character
594    $line = preg_replace('/\r?\n$/','',$line);
595    // - we need to know the 'depth' for the bulletpoint lists
596    $depth = 0;
597    while (strlen($line) > 2 && preg_match('/^\s+[\*\-]/', $line) && substr($line, 0, 2) == '  ')
598    {
599      $depth++;
600      $line = substr($line, 2);
601    }
602    $first_character = substr($line, 0, 1);
603    // - special case for the end of bullet lists
604    if ($in_list && ($first_character != "*" && $first_character != "-"))
605    {
606      while (count($lists) > 0)
607      {
608        $list_type = array_pop($lists);
609        if ($list_type == '*')
610        {
611          fwrite($xml_out, '</Bullet>' . "\n");
612          fwrite($xml_out, '</BulletList>' . "\n");
613        }
614        else
615        {
616          fwrite($xml_out, '</NumberedItem>' . "\n");
617          fwrite($xml_out, '</NumberedList>' . "\n");
618        }
619      }
620      $in_list = false;
621    }
622    // - special case for the end of tables
623    if ($in_table && $first_character != '^' && $first_character != '|')
624    {
625      fwrite($xml_out, '</TableContent>' . "\n");
626      fwrite($xml_out, '</Table>' . "\n");
627      $in_table = false;
628    }
629    // - special cases for premature closing of sections, subsections and parts
630    if (preg_match('/<!-- close:(section|subsection|part) -->/', $line, $matches))
631    {
632      // - we always try to do this (regardless of actual flag) as we must
633      //   always close the smallest 'granularity' first
634      if ($in_part)
635      {
636        fwrite($xml_out, '</Content>' . "\n");
637        fwrite($xml_out, '</Part>' . "\n");
638        $in_part = false;
639      }
640      if ($in_subsection && ($matches[1] == 'section' || $matches[1] == 'subsection'))
641      {
642        fwrite($xml_out, '</Content>' . "\n");
643        fwrite($xml_out, '</Subsection>' . "\n");
644        $in_subsection = false;
645      }
646      if ($in_section && $matches[1] == 'section')
647      {
648        fwrite($xml_out, '</Content>' . "\n");
649        fwrite($xml_out, '</Section>' . "\n");
650        $in_section = false;
651      }
652    }
653
654    // - if this page is a chapter, then the first thing on the page should be
655    //   the chapter title (six equals)
656    if (preg_match('/====== (.+) ======/', $line, $matches))
657    {
658      $chapter_title = $matches[1];
659      $chapter_id = $page_name;
660      if (empty($chapter_id))
661      {
662        $chapter_id = generateID($chapter_title);
663      }
664      // - are we already processing a part? if so end it, end it now
665      if ($in_part)
666      {
667        fwrite($xml_out, '</Content>' . "\n");
668        fwrite($xml_out, '</Part>' . "\n");
669        $in_part = false;
670      }
671      // - are we already processing a subsection? if so end it, end it now
672      if ($in_subsection)
673      {
674        fwrite($xml_out, '</Content>' . "\n");
675        fwrite($xml_out, '</Subsection>' . "\n");
676        $in_subsection = false;
677      }
678      // - are we already processing a section? if so end it, end it now
679      if ($in_section)
680      {
681        fwrite($xml_out, '</Content>' . "\n");
682        fwrite($xml_out, '</Section>' . "\n");
683        $in_section = false;
684      }
685      // - are we already processing a chapter? if so end it, end it now
686      if ($in_chapter)
687      {
688        fwrite($xml_out, '</Content>' . "\n");
689        fwrite($xml_out, '</Chapter>' . "\n");
690        $in_chapter = false;
691      }
692      // - write out this chapter's header
693      fwrite($xml_out, '<Chapter id="' . $chapter_id . '">' . "\n");
694      outputMetadataSingle($xml_out, $chapter_title, 'Title');
695      fwrite($xml_out, '<Content>' . "\n");
696      $in_chapter = true;
697    }
698    // - the next likely thing to encounter is a section heading (five equals)
699    elseif (preg_match('/=====\s+(.+)\s+=====/', $line, $matches))
700    {
701      $section_title = $matches[1];
702      // - check for explicit section id
703      $section_id = '';
704      if (preg_match('/<!-- sid:(.+?) -->(.*)/', $section_title, $matches))
705      {
706        $section_id = $matches[1];
707        $section_title = $matches[2];
708      }
709      if (empty($section_id))
710      {
711        $section_id = generateID($section_title);
712      }
713      // - are we already processing a part? if so end it, end it now
714      if ($in_part)
715      {
716        fwrite($xml_out, '</Content>' . "\n");
717        fwrite($xml_out, '</Part>' . "\n");
718        $in_part = false;
719      }
720      // - are we already processing a subsection? if so end it, end it now
721      if ($in_subsection)
722      {
723        fwrite($xml_out, '</Content>' . "\n");
724        fwrite($xml_out, '</Subsection>' . "\n");
725        $in_subsection = false;
726      }
727      // - are we already processing a section? if so end it, end it now
728      if ($in_section)
729      {
730        fwrite($xml_out, '</Content>' . "\n");
731        fwrite($xml_out, '</Section>' . "\n");
732        $in_section = false;
733      }
734      // - write out this section's header
735      fwrite($xml_out, '<Section id="' . $section_id . '">' . "\n");
736      outputMetadataSingle($xml_out, $section_title, 'Title');
737      fwrite($xml_out, '<Content>' . "\n");
738      $in_section = true;
739    }
740    // - similar for subsection heading (four equals)
741    elseif (preg_match('/==== (.+) ====/', $line, $matches))
742    {
743      $subsection_title = $matches[1];
744      // - check for explicit subsection id
745      $subsection_id = '';
746      if (preg_match('/<!-- sid:(.+?) -->(.*)/', $subsection_title, $matches))
747      {
748        $subsection_id = $matches[1];
749        $subsection_title = $matches[2];
750      }
751      if (empty($subsection_id))
752      {
753        $subsection_id = generateID($subsection_title);
754      }
755      // - are we already processing a part? if so end it, end it now
756      if ($in_part)
757      {
758        fwrite($xml_out, '</Content>' . "\n");
759        fwrite($xml_out, '</Part>' . "\n");
760        $in_part = false;
761      }
762      // - are we already processing a subsection? if so end it, end it now
763      if ($in_subsection)
764      {
765        fwrite($xml_out, '</Content>' . "\n");
766        fwrite($xml_out, '</Subsection>' . "\n");
767        $in_subsection = false;
768      }
769      // - write out this subsection's header
770      fwrite($xml_out, '<Subsection id="' . $subsection_id . '">' . "\n");
771      outputMetadataSingle($xml_out, $subsection_title, 'Title');
772      fwrite($xml_out, '<Content>' . "\n");
773      $in_subsection = true;
774    }
775    // - and part heading (three equals)
776    elseif (preg_match('/=== (.+) ===/', $line, $matches))
777    {
778      $part_title = $matches[1];
779      // - check for explicit part id
780      $part_id = '';
781      if (preg_match('/<!-- sid:(.+?) -->(.*)/', $part_title, $matches))
782      {
783        $part_id = $matches[1];
784        $part_title = $matches[2];
785      }
786      if (empty($part_id))
787      {
788        $part_id = generateID($part_title);
789      }
790      // - are we already processing a part? if so end it, end it now
791      if ($in_part)
792      {
793        fwrite($xml_out, '</Content>' . "\n");
794        fwrite($xml_out, '</Part>' . "\n");
795        $in_part = false;
796      }
797      // - write out this part's header
798      fwrite($xml_out, '<Part id="' . $part_id . '">' . "\n");
799      outputMetadataSingle($xml_out, '**//' . $part_title . '//**', 'Title');
800      fwrite($xml_out, '<Content>' . "\n");
801      $in_part = true;
802    }
803    // - Ignore 5th level heading - they are only used to allow more convenient
804    //   editing of figures and tables
805    elseif (preg_match('/== (.+) ==/', $line, $matches))
806    {
807    }
808    // - lists need special handling
809    elseif (preg_match('/^(\*|\-)\s+(.*)/', $line, $matches))
810    {
811      $list_type = $matches[1];
812      $list_text = $matches[2];
813      $list_depth = count($lists);
814      if (!$in_list)
815      {
816        if ($list_type == '*')
817        {
818          fwrite($xml_out, '<BulletList>' . "\n");
819        }
820        else
821        {
822          fwrite($xml_out, '<NumberedList>' . "\n");
823        }
824        $in_list = true;
825        array_push($lists, $list_type);
826      }
827      // - this bullet is at the same depth as previous - close the previous
828      //   point
829      elseif ($depth == $list_depth)
830      {
831        $previous_list_type = end($lists);
832        if ($previous_list_type == '*')
833        {
834          fwrite($xml_out, '</Bullet>' . "\n");
835        }
836        else
837        {
838          fwrite($xml_out, '</NumberedItem>' . "\n");
839        }
840        // - we don't match in type anymore... close the previous list and open
841        //   a new list of the appropriate type
842        if ($list_type != $previous_list_type)
843        {
844          if ($previous_list_type == '*')
845          {
846            fwrite($xml_out, '</BulletList>' . "\n");
847            fwrite($xml_out, '<NumberedList>' . "\n");
848          }
849          else
850          {
851            fwrite($xml_out, '</NumberedNumbered>' . "\n");
852            fwrite($xml_out, '<BulletList>' . "\n");
853          }
854          array_pop($lists);
855          array_push($lists, $list_type);
856        }
857      }
858      else
859      {
860        // - we have either got deeper...
861        if ($depth > $list_depth)
862        {
863          if ($list_type == '*')
864          {
865            fwrite($xml_out, '<BulletList>' . "\n");
866          }
867          else
868          {
869            fwrite($xml_out, '<NumberedList>' . "\n");
870          }
871          array_push($lists, $list_type);
872        }
873        // ... or shallower in the bullet listing
874        if ($depth < $list_depth)
875        {
876          $previous_list_type = array_pop($lists);
877          if ($previous_list_type == '*')
878          {
879            fwrite($xml_out, '</Bullet>' . "\n");
880            fwrite($xml_out, '</BulletList>' . "\n");
881          }
882          else
883          {
884            fwrite($xml_out, '</NumberedItem>' . "\n");
885            fwrite($xml_out, '</NumberedList>' . "\n");
886          }
887          // - we still have to close the last item too
888          $previous_listitem_type = end($lists);
889          if ($previous_listitem_type == '*')
890          {
891            fwrite($xml_out, '</Bullet>' . "\n");
892          }
893          else
894          {
895            fwrite($xml_out, '</NumberedItem>' . "\n");
896          }
897        }
898      }
899      if ($list_type == '*')
900      {
901        fwrite($xml_out, '<Bullet>' . "\n");
902      }
903      else
904      {
905        fwrite($xml_out, '<NumberedItem>' . "\n");
906      }
907      // Special Case: bullets that contain (start) a code block
908      if (preg_match('/^(.*)<code>\s*$/', $list_text, $matches))
909      {
910        $list_text = $matches[1];
911        $in_code_block = true;
912      }
913
914      outputTextBlock($xml_out, $list_text);
915
916      // - to make things clearer, we'll process any and all code blocks within
917      //   bullets here - especially as there may be more text block *after*
918      //   the code block finishes
919      if ($in_code_block)
920      {
921        $sub_line = '';
922        while ($in_code_block && ($sub_line = fgets($page_in)) !== false)
923        {
924          $sub_line = trim($sub_line);
925          // - closing code
926          if (preg_match('/^<\/code>(.*)$/', $sub_line, $matches))
927          {
928            $sub_line = $matches[1]; // may be empty string
929            $in_code_block = false;
930          }
931          // - output another plain codeline
932          else
933          {
934            fwrite($xml_out, '<CodeLine>' . $sub_line . "</CodeLine>\n");
935            $sub_line = '';
936          }
937        }
938        // - if sub_line still has anything in it, then add that content as a
939        //   text block
940        if (!empty($sub_line))
941        {
942          outputTextBlock($xml_out, $sub_line);
943        }
944      }
945    }
946    // - images start with an image caption 'element'
947    elseif (preg_match('/<imgcaption\s+figure_([a-z0-9_\-]+)\|(.+)>\s*<\/imgcaption>/', $line, $matches))
948    {
949      $figure_id = $matches[1];
950      $figure_title = $matches[2];
951      // - watch for the special withLineNumber flag
952      $class_attribute = '';
953      if (strpos($figure_title, '%!-- withLineNumber --%') != false)
954      {
955        $class_attribute = ' class="withLineNumber"';
956        $figure_title = str_replace('%!-- withLineNumber --%','',$figure_title);
957      }
958      fwrite($xml_out, '<Figure id="' . $figure_id . '"' . $class_attribute . '>' . "\n");
959      echo '[figure: ' . $figure_id . "] \n";
960      fwrite($xml_out, '<Title>' . "\n");
961      // - decode any comments in the title (used to store explicit id
962      //   information)
963      $figure_title = str_replace('%!--', '<!--', $figure_title);
964      $figure_title = str_replace('--%', '-->', $figure_title);
965      // - special case: the title may have a subtitle (as a prefix)
966      $figure_subtitle_id = '';
967      $figure_subtitle = '';
968      // - subtitle with explicit id
969      if (preg_match('/^(<!-- id:.+? -->\([a-z]\))\s*(.*)$/', $figure_title, $matches))
970      {
971        $figure_subtitle = $matches[1];
972        $figure_title = $matches[2];
973      }
974      // - subtitle without explicit id
975      else if (preg_match('/^(\([a-z]\))\s*(.*)$/', $figure_title, $matches))
976      {
977        $figure_subtitle = $matches[1];
978        $figure_title = $matches[2];
979      }
980      outputTextBlock($xml_out, $figure_title);
981      if (!empty($figure_subtitle))
982      {
983        fwrite($xml_out, '<SubTitle>' . "\n");
984        outputTextBlock($xml_out, $figure_subtitle);
985        fwrite($xml_out, '</SubTitle>' . "\n");
986      }
987      fwrite($xml_out, '</Title>' . "\n");
988      $in_figure = true;
989      // - record the id to prevent repeating
990      $seen_ids[$figure_id] = true;
991    }
992    // - tables start with a table caption 'element'
993    elseif (preg_match('/<tblcaption\s+table_([a-z0-9_\-]+)\|([^>]+)>\s*<\/tblcaption>/', $line, $matches))
994    {
995      $table_id = $matches[1];
996      $table_title = $matches[2];
997      if ($table_title == '##NOCAPTION##')
998      {
999        echo '[non-captioned table: ' . $table_id . "] \n";
1000        // - watch for autogenerated ids... no point in outputting them
1001        if (preg_match('/^table(_\d+)?$/', $table_id))
1002        {
1003          fwrite($xml_out, "<Table>\n");
1004        }
1005        else
1006        {
1007          fwrite($xml_out, '<Table id="' . $table_id . '">' . "\n");
1008        }
1009        fwrite($xml_out, '<Title/>' . "\n");
1010      }
1011      elseif ($table_title == '##HIDDEN##')
1012      {
1013        echo '[hidden table: ' . $table_id . "] \n";
1014        // - watch for autogenerated ids... no point in outputting them
1015        if (preg_match('/^table(_\d+)?$/', $table_id))
1016        {
1017          fwrite($xml_out, "<Table class=\"hidden\">\n");
1018        }
1019        else
1020        {
1021          fwrite($xml_out, '<Table class="hidden" id="' . $table_id . '">' . "\n");
1022        }
1023        fwrite($xml_out, '<Title/>' . "\n");
1024      }
1025      else
1026      {
1027        echo '[table: ' . $table_id . "] \n";
1028        // - watch for autogenerated ids... no point in outputting them
1029        if (preg_match('/^table(_\d+)?$/', $table_id))
1030        {
1031          fwrite($xml_out, "<Table>\n");
1032        }
1033        else
1034        {
1035          fwrite($xml_out, '<Table id="' . $table_id . '">' . "\n");
1036        }
1037        fwrite($xml_out, '<Title>' . "\n");
1038        outputTextBlock($xml_out, $table_title);
1039        fwrite($xml_out, '</Title>' . "\n");
1040      }
1041      fwrite($xml_out, '<TableContent>' . "\n");
1042      $in_table = true;
1043      // - record the id to prevent repeating
1044      $seen_ids[$table_id] = true;
1045    }
1046    // - the second line in a table should be it's column width values
1047    elseif (preg_match('/\|<\s-\s([0-9 ]+?)\s>\|/', $line, $matches))
1048    {
1049      $column_widths = explode(' ', $matches[1]);
1050    }
1051    // - then every row will be made of a number of cells
1052    elseif (preg_match('/^\|(.*?)\|$/', $line, $matches))
1053    {
1054      $row_content = $matches[1];
1055      $cell_contents = preg_split('/(\s+\||\|\s+)/', $row_content);
1056      fwrite($xml_out, '<tr>' . "\n");
1057      foreach ($cell_contents as $index=>$cell_content)
1058      {
1059        $cell_content = trim($cell_content);
1060        $th_text = '';
1061        if (isset($column_widths[$index]))
1062        {
1063          $th_text = '<th width="' . $column_widths[$index] . '"';
1064        }
1065        else
1066        {
1067          $th_text = '<th';
1068        }
1069        // - if the cell would be empty, we use the shorthand
1070        if (empty($cell_content))
1071        {
1072          $th_text .= '/>' . "\n";
1073          fwrite($xml_out, $th_text);
1074        }
1075        else
1076        {
1077          $th_text .= '>' . "\n";
1078          fwrite($xml_out, $th_text);
1079
1080          // GAH - this is proving harder than a hard thing thats hard.
1081          // The issue is that the most straightforward way of fixing this,
1082          // namely using explicit newlines (\\) in the dokuwiki txt causes
1083          // lots a legitimately translated <br/> to also be split up. I
1084          // think the only way forward would be to maybe extend the HTML
1085          // Comment plugin to also respect and process <br/> tags. Then I
1086          // can avoid transforming them, and use the \\ sentinel to
1087          // separate multi-line table cells.
1088          $cell_content_lines = explode('\\\\', $cell_content);
1089          foreach ($cell_content_lines as $cell_content)
1090          {
1091            // - watch out, as the content may be an image
1092            if (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $cell_content))
1093            {
1094              processImage($xml_out, $cell_content);
1095            }
1096            elseif (preg_match('/\'\'(.*)\'\'/', $cell_content, $matches))
1097            {
1098              fwrite($xml_out, '<CodeLine>' . translateTableCodeline($matches[1]) . '</CodeLine>' . "\n");
1099            }
1100            // - anything else it text
1101            else
1102            {
1103              outputTextBlock($xml_out, $cell_content);
1104            }
1105          }
1106          fwrite($xml_out, '</th>' . "\n");
1107        }
1108      }
1109      fwrite($xml_out, '</tr>' . "\n");
1110    }
1111    // - links to image media in the wiki!
1112    elseif (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $line))
1113    {
1114      processImage($xml_out, $line);
1115      // - if we were processing a figure, then now is a good time to close it
1116      if ($in_figure)
1117      {
1118        fwrite($xml_out, '</Figure>' . "\n");
1119        $in_figure = false;
1120      }
1121    }
1122    // - if the line starts with a <code> block, then we have a tag
1123    //   for that (which is special in that it get a unique text id)
1124    elseif (preg_match('/^<code\s*\d*\s*>(.*?)(<\/code>)?$/', $line, $matches) || ($in_code_block && preg_match('/^(.*?)(<\/code>)?$/', $line, $matches)))
1125    {
1126      $payload = $matches[1];
1127      $found_end = (isset($matches[2]));
1128      $in_code_block = true;
1129      // - be careful with empty lines
1130      if (empty($payload))
1131      {
1132        // - as they may appear in the body of the code (in which case we need
1133        //   to output them). The empty lines at the start or end of a code
1134        //   block are just an unfortunate consequence of the support for code
1135        //   line numbering.
1136        if (!$found_end && strpos($line, '<code') === false)
1137        {
1138          fwrite($xml_out, "<CodeLine/>\n");
1139        }
1140      }
1141      elseif (preg_match('/^<!-- id:([^\s]+) -->/', $payload, $matches))
1142      {
1143        $text_id = $matches[1];
1144        outputTextBlock($xml_out, $payload, 'code', true);
1145        // - record the id to prevent repeating
1146        $seen_ids[$text_id] = true;
1147      }
1148      else
1149      {
1150        fwrite($xml_out, '<CodeLine>' . translateText($payload, true) . '</CodeLine>' . "\n");
1151      }
1152      // - if we didn't find an endtag we have to keep doing code mode until
1153      //   we do
1154      $in_code_block = (!$found_end);
1155      if ($found_end)
1156      {
1157        // - if we were processing a figure, then now is a good time to close it
1158        if ($in_figure)
1159        {
1160          fwrite($xml_out, '</Figure>' . "\n");
1161          $in_figure = false;
1162        }
1163      }
1164    }
1165    // - entities on a line by themselves (i.e. references to external files)
1166    //   go through verbatim
1167    elseif (preg_match('/^\s*&[a-z0-9_-]+;\s*$/', $line))
1168    {
1169      fwrite($xml_out, $line . "\n");
1170    }
1171    // - lines starting with > are indented text blocks
1172    elseif (preg_match('/^>(.*)$/', $line, $matches))
1173    {
1174      $payload = $matches[1];
1175      fwrite($xml_out, "<Indented>\n");
1176      outputTextBlock($xml_out, $payload);
1177      fwrite($xml_out, "</Indented>\n");
1178    }
1179    // - everything else goes straight through as a text block
1180    // - note that for code blocks, even empty lines count
1181    elseif (!empty($line))
1182    {
1183      // - output the line of text having encoded entities etc
1184      outputTextBlock($xml_out, $line, '', $in_code_block);
1185    }
1186  }
1187  // Complete any open part
1188  if ($in_part)
1189  {
1190    fwrite($xml_out, '</Content>' . "\n");
1191    fwrite($xml_out, '</Part>' . "\n");
1192    $in_part = false;
1193  }
1194  // Complete any open subsection
1195  if ($in_subsection)
1196  {
1197    fwrite($xml_out, '</Content>' . "\n");
1198    fwrite($xml_out, '</Subsection>' . "\n");
1199    $in_subsection = false;
1200  }
1201  // Complete any open section
1202  if ($in_section)
1203  {
1204    fwrite($xml_out, '</Content>' . "\n");
1205    fwrite($xml_out, '</Section>' . "\n");
1206    $in_section = false;
1207  }
1208  // Complete any open chapter
1209  if ($in_chapter)
1210  {
1211    fwrite($xml_out, '</Content>' . "\n");
1212    fwrite($xml_out, '</Chapter>' . "\n");
1213    $in_chapter = false;
1214  }
1215}
1216/** processPage($xml_out, $page_name) **/
1217
1218function processImage($xml_out, $text)
1219{
1220  global $dokuwiki_path;
1221  global $xml_source_path;
1222  if (preg_match('/\{\{.+?([^:?]+)\?(\d+)x(\d+)(&direct)?\}\}/', $text, $matches))
1223  {
1224    $filename = $matches[1];
1225    $width = $matches[2];
1226    $height = $matches[3];
1227    // - copy the file into place
1228    $image_source_path = $dokuwiki_path . '/data/media/' . $_REQUEST['l'] . '/manuals/images/' . strtolower($filename);
1229    $image_destination_dir = $xml_source_path . '/' . $_REQUEST['l'] . '/images';
1230    mkAllDir($image_destination_dir);
1231    $image_destination_path = $image_destination_dir . '/' . $filename;
1232    if (copy($image_source_path, $image_destination_path))
1233    {
1234      echo '[copying file: ' . $filename . "] \n";
1235      chmod($image_destination_path, 0664);
1236    }
1237    else
1238    {
1239      printError('Failed to copy image into place: ' . $filename, false);
1240    }
1241    // - spit out the XML element
1242    fwrite($xml_out, '<File width="' . $width . '" height="' . $height . '" url="images/' . $filename . '"/>' . "\n");
1243  }
1244}
Note: See TracBrowser for help on using the browser.