Context Navigation

gs-manual-export.php@ 30195

Last change on this file since 30195 was 30117, checked in by jmt12, 9 years ago
Altered to fix bug where imgcaption tag closed before image actually output. Looks like this was intentional at some stage, but means problems in the Wiki, so I'll repair
File size: 43.1 KB

Line
1	<?php
2
3	require_once('common.php');
4	$debug = 1;
5
6	// There are some elements that are, in my opinion, incorrectly 'resolved' in
7	// code blocks just because they are valid HTML. For instance, <i> is
8	// resolved to <i> in code blocks, while something like <Metadata> is
9	// not. Set this to true to allow such abominations (for the purpose of
10	// comparing before and after versions of the XML). [jmt12]
11	$allow_bad_codeblocks = true;
12
13	/** @file gs-manual-export.php
14	* This script transforms the series of dokuwiki pages that make up a certain
15	* manual (as specified by the 'm' argument) in a certain language ('l') into
16	* the XML format required by the rest of the Greenstone manual generation
17	* scripts.
18	*/
19
20	// 0. Initialization
21
22	// - we have a counter to assign identifiers to text blocks etc without ids
23	$text_id_counter = 1;
24	// - we need an array of all the footnotes
25	$footnotes = array();
26
27	// Defaults
28	if (!isset($_REQUEST['l']) \|\| empty($_REQUEST['l']))
29	{
30	$_REQUEST['l'] = 'en';
31	}
32	if (!isset($_REQUEST['m']) \|\| empty($_REQUEST['m']))
33	{
34	//$_REQUEST['m'] = 'user';
35	//$_REQUEST['m'] = 'install';
36	$_REQUEST['m'] = 'develop';
37	//$_REQUEST['m'] = 'paper';
38	}
39	if (!isset($_REQUEST['v']) \|\| empty($_REQUEST['v']))
40	{
41	$_REQUEST['v'] = 'draft';
42	}
43	if (!isset($_REQUEST['a']) \|\| !preg_match('/^(download\|store)$/', $_REQUEST['a']))
44	{
45	$_REQUEST['a'] = 'store'; // Try to store the file to disk
46	}
47
48	$var_path = filecat(array($base_path, 'var'));
49	$timestamp = time();
50	//$xml_source_path = 'PATH TO GSDL MANUALS'
51	$xml_source_path = '/tmp';
52	if ($_REQUEST['a'] == 'download')
53	{
54	// Clear out previous exports
55	recursiveRemove($var_path, '/greenstone/greenstone-documentation/php/var');
56	// New export
57	$xml_source_path = fileCat(array($var_path, $timestamp));
58	}
59
60	echo '<html>' . "\n";
61	echo '<head>' . "\n";
62	echo '<title>GS Manual Export</title>' . "\n";
63	echo '</head>' . "\n";
64	echo '<body>' . "\n";
65
66	// - validate arguments before we use them (security)
67	if (!preg_match('/^(develop\|install\|paper\|user)$/',$_REQUEST['m']))
68	{
69	printError('Unknown manual type requested: ' . htmlspecialchars($_REQUEST['m']));
70	}
71
72	if (!preg_match('/^(ar\|en\|es\|fr\|pt-br\|ru)$/',$_REQUEST['l']))
73	{
74	printError('Unknown language requested: ' . htmlspecialchars($_REQUEST['l']));
75	}
76
77	echo '<h2>Generating Greenstone Manual XML</h2>' . "\n";
78	echo '<p><b>Manual:</b> ' . $_REQUEST['m'] . ' <b>Language:</b> ' . $_REQUEST['l'] . "</p>\n<hr/>\n";
79	// 1. Create the XML output file handle
80	// - construct the path using the information we've been provided as arguments
81	$xml_file_dir = $xml_source_path . '/' . $_REQUEST['l'];
82	mkAllDir($xml_file_dir);
83	$xml_file_path = $xml_file_dir . '/' . ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.xml';
84	// - backup any existing file
85	if (file_exists($xml_file_path))
86	{
87	$xml_backup_file_path = $xml_file_path . '.bak';
88	if (!rename($xml_file_path, $xml_backup_file_path))
89	{
90	printError('Failed to rename existing manual file for backup');
91	}
92	}
93
94	// - and create a handle to the new file
95	$xml_out = fopen($xml_file_path, 'w');
96
97	// 2. Read in the top level page - this will give configuration data for the
98	// manual and cover page, as well as specifying the order for the other
99	// pages in the manual
100	echo "<p><b>Frontmatter:</b><br/>\n";
101	// - by reading this page we hope to populate an array of metadata, and also
102	// extract the sequence of other pages within this manual
103	$cover_metadata = array();
104	$pages_in_order = array();
105	// - we now need to consider if the user has asked for a draft version (i.e.
106	// includes the latest version of pages regardless of approval) or if only
107	// the approved versions of pages should be included
108	// - only necessary for english version of manual, as those are the only pages
109	// editable
110	$top_page_path = '';
111	if ($_REQUEST['v'] == 'draft' \|\| $_REQUEST['l'] != 'en')
112	{
113	// - again, we can construct the path to the top level page given the arguments
114	// provided
115	$top_page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '.txt';
116	}
117	else
118	{
119	$top_page_path = getApprovedVersionPath('en:manuals:' . $_REQUEST['m']);
120	}
121
122	if (!file_exists($top_page_path))
123	{
124	printError('Failed to locate top level page for manual');
125	}
126	// - we're going to open a handle to the file, then read it in line-by-line
127	// watching for the lines we are interested in (using pattern matching)
128	$top_page_in = fopen($top_page_path, 'r');
129	if (!$top_page_in)
130	{
131	printError('Failed to open top level page for reading');
132	}
133	$in_contents = false;
134	while (($line = fgets($top_page_in)) !== false)
135	{
136	// - if we are capturing page order, and we encounter something that looks
137	// like a bulletpoint item pointing to a wiki page, then we append the
138	// name of that page to our pages in order array
139	if ($in_contents && preg_match('/^\s+\\s+\[\[.:' . $_REQUEST['m'] . ':(.+?)\\|(.?)\]\]\s*$/', $line, $matches))
140	{
141	array_push($pages_in_order, $matches[1]);
142	}
143	// - metadata is all encoded within dokuwiki tables
144	elseif (preg_match('/^\^\s+([^\s]+)\s+\\|\s+(.+?)\s+\\|\s*$/', $line, $matches))
145	{
146	$field = $matches[1];
147	$value = $matches[2];
148	$values = array();
149	if (isset($cover_metadata[$field]))
150	{
151	$values = $cover_metadata[$field];
152	}
153	array_push($values, $value);
154	$cover_metadata[$field] = $values;
155	}
156	// - watch for the heading 'Contents' to begin extracting page order
157	// information
158	elseif (preg_match('/^=+\s(.+)\s=+$/', $line, $matches))
159	{
160	if ($matches[1] == 'Contents')
161	{
162	$in_contents = true;
163	}
164	// - any other title means we aren't capturing page order (anymore)
165	else
166	{
167	$in_contents = false;
168	}
169	}
170	}
171	if (!feof($top_page_in))
172	{
173	printError('Unexpected fgets() fail when reading top page');
174	}
175	fclose($top_page_in);
176	// - ensure we have the required metadata
177	$required_metadata = array('Heading','Title','Affiliation','Version','Date');
178	foreach ($required_metadata as $required_field)
179	{
180	if (!isset($cover_metadata[$required_field]))
181	{
182	printError('Missing required metadata: ' . $required_field);
183	}
184	}
185	// - now we can use the metadata to construct the XML header and the cover page.
186	// This follows a pretty set recipe with only elements that can repeat---like
187	// Author, SupplementaryText etc---are at all tricky
188	fwrite($xml_out, '<?xml version="1.0" encoding="UTF-8"?>' . "\n");
189	fwrite($xml_out, '<!DOCTYPE Manual [' . "\n");
190	if (isset($cover_metadata['ENTITY']))
191	{
192	foreach ($cover_metadata['ENTITY'] as $entity)
193	{
194	fwrite($xml_out, "\t" . '<!ENTITY ' . $entity . '>' . "\n");
195	}
196	}
197	fwrite($xml_out, ']>' . "\n");
198	fwrite($xml_out, '<Manual id="' . ucfirst($_REQUEST['m']) . '" lang="' . $_REQUEST['l'] . '">' . "\n");
199
200	///cho "<p>[Debug] metadata: " . print_r($cover_metadata, true) . "</p>\n\n";
201
202	outputMetadataSingle($xml_out, $cover_metadata, 'Heading');
203	outputMetadataSingle($xml_out, $cover_metadata, 'Title');
204	outputMetadataSingle($xml_out, $cover_metadata, 'Author');
205	outputMetadataSingle($xml_out, $cover_metadata, 'Affiliation');
206	outputMetadataMultiple($xml_out, $cover_metadata, 'SupplementaryText');
207	outputMetadataMultiple($xml_out, $cover_metadata, 'Text');
208	outputMetadataMultiple($xml_out, $cover_metadata, 'Comment');
209	outputMetadataSingle($xml_out, $cover_metadata, 'Version');
210	outputMetadataSingle($xml_out, $cover_metadata, 'Date');
211
212	// 3. Process each page listed in the contents of the top level page in order
213	foreach ($pages_in_order as $page)
214	{
215	processPage($xml_out, $page);
216	}
217
218	// 4. Output out list of footnotes (if any)
219	if (!empty($footnotes))
220	{
221	fwrite($xml_out, '<FootnoteList>'. "\n");
222	foreach ($footnotes as $footnote=>$footnote_id)
223	{
224	///cho '[debug] footnotes: (' . $footnote_id . ') ' . $footnote . '<br />'. "\n";
225	outputMetadataSingle($xml_out, $footnote, 'Footnote', $footnote_id);
226	}
227	fwrite($xml_out, '</FootnoteList>'. "\n");
228	}
229
230	// 5. Finalize and close the XML output
231	fwrite($xml_out, '</Manual>' . "\n");
232	fclose($xml_out);
233	chmod($xml_file_path, 0664);
234
235	// 6. Complete!
236	echo '<p><b>Complete!</b></p>' . "\n<hr/>\n";
237	if ($_REQUEST['a'] == 'download')
238	{
239	// Zip up the manual files
240	$zip_file = ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.zip';
241	$zip_path = fileCat(array($xml_source_path, $zip_file));
242	$zip_command = 'zip -r "' . $zip_path . '" . > /dev/null 2>&1';
243	//cho '<p><b>[DEBUG]</b> zip_command:' . $zip_command . '</p>';
244	$tgz_file = ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.tgz';
245	$tgz_path = fileCat(array($xml_source_path, $tgz_file));
246	$tgz_command = 'tar -czf "' . $tgz_path . '" * > /dev/null 2>&1';
247	//cho '<p><b>[DEBUG]</b> tgz_command:' . $tgz_command . '</p>';
248	// We need to move to the document folder so that archives have sensible paths
249	$original_cwd = getcwd();
250	chdir($xml_file_dir);
251	system($zip_command);
252	system($tgz_command);
253	// Go back
254	chdir($original_cwd);
255	// Links are ready
256	echo '<p>Download XML file plus images as: <a href="var/' . $timestamp . '/' . $zip_file . '">ZIP</a> or <a href="var/' . $timestamp . '/' . $tgz_file . '">TGZ</a></p>' . "\n";
257	}
258	echo '<p>Click <a href="' . $dokuwiki_url . '/doku.php?id=' . $_REQUEST['l'] . ':manuals:' . $_REQUEST['m'] . '">here</a> to return to dokuwiki</p>' . "\n";
259	echo '</body>' . "\n";
260	echo '</html>';
261	exit(0);
262
263	/**
264	*/
265	function outputMetadataSingle($xml_out, $metadata, $field, $mid=false)
266	{
267	echo '[metadata: ' . $field . "] \n";
268	if ($mid)
269	{
270	fwrite($xml_out, '<' . $field . ' id="' . $mid . '">' . "\n");
271	}
272	else
273	{
274	fwrite($xml_out, '<' . $field . '>' . "\n");
275	}
276	if (is_array($metadata))
277	{
278	if (isset($metadata[$field]) && isset($metadata[$field][0]))
279	{
280	outputTextBlock($xml_out, $metadata[$field][0]);
281	}
282	else
283	{
284	echo 'no such field or no metadata';
285	}
286	}
287	elseif (!empty($metadata))
288	{
289	outputTextBlock($xml_out, $metadata);
290	}
291	else
292	{
293	echo 'no such field or no metadata';
294	}
295	fwrite($xml_out, '</' . $field . '>' . "\n");
296	}
297	/ outputMetadataSingle() /
298
299	function outputMetadataList($xml_out, $metadata, $field, $separator = ',', $final_separator = false)
300	{
301	echo '[metadata list: ' . $field . "] \n";
302	fwrite($xml_out, '<' . $field . '>' . "\n");
303	if (isset($metadata[$field]))
304	{
305	if (count($metadata[$field]) == 1)
306	{
307	outputTextBlock($xml_out, $metadata[$field][0]);
308	}
309	if (count($metadata[$field]) > 1)
310	{
311	$last_value = '';
312	if ($final_separator)
313	{
314	$last_value = array_pop($metadata[$field]);
315	}
316	$values = implode($separator, $metadata[$field]);
317	if ($final_separator)
318	{
319	$values .= $final_separator . $last_value;
320	}
321	outputTextBlock($xml_out, $values);
322	}
323	}
324	else
325	{
326	echo 'no such field or no metadata';
327	}
328	fwrite($xml_out, '</' . $field . '>' . "\n");
329	}
330	/ outputMetadataList() /
331
332	function outputMetadataMultiple($xml_out, $metadata, $field)
333	{
334	echo '[metadata multiple: ' . $field . "] \n";
335	// - Text blocks don't need to be wrapped in Text element
336	if ($field != 'Text')
337	{
338	fwrite($xml_out, '<' . $field . '>' . "\n");
339	}
340	if (isset($metadata[$field]))
341	{
342	foreach ($metadata[$field] as $value)
343	{
344	outputTextBlock($xml_out, $value);
345	}
346	}
347	else
348	{
349	echo 'no such field or no metadata';
350	}
351	if ($field != 'Text')
352	{
353	fwrite($xml_out, '</' . $field . '>' . "\n");
354	}
355	}
356
357	function translateTableCodeline($text)
358	{
359	// Escape any italic tags hidden in HTML comments
360	$text = str_replace('<!--i-->', '%!--i--%', $text);
361	$text = str_replace('<!--/i-->', '%!--/i--%', $text);
362	// Encode entities etc
363	$text = translateText($text, true);
364	// Restore any italics elements hidden above
365	$text = str_replace('%!--i--%', '<i>', $text);
366	$text = str_replace('%!--/i--%', '</i>', $text);
367	return $text;
368	}
369	/ translateTableCodeline() /
370
371	function translateText($text, $in_code_block=false)
372	{
373	global $allow_bad_codeblocks;
374	$text = str_replace('&','&',$text);
375	$text = str_replace('<','<',$text);
376	$text = str_replace('>','>',$text);
377	if ($in_code_block && $allow_bad_codeblocks)
378	{
379	///cho "** [debug] restoring bogus decoded tags in: \|$text\| **\n";
380	$text = str_replace('<i>','<i>',$text);
381	$text = str_replace('</i>','</i>',$text);
382	$text = str_replace('<br/>','<br/>',$text);
383	}
384	return $text;
385	}
386
387	function outputTextBlock($xml_out, $text, $type='', $in_code_block = false)
388	{
389	global $cover_metadata;
390	global $text_id_counter;
391	global $footnotes;
392	global $allow_bad_codeblocks;
393
394	// - Start by dealing with any footnotes before anything else
395	while (preg_match('/$\((.*?)$\)/', $text, $matches))
396	{
397	$pattern = $matches[0];
398	$footnote = $matches[1];
399	$footnote_id = count($footnotes) + 1;
400	$footnotes[$footnote] = $footnote_id;
401	// - note that we have to escape the footnote reference as the following
402	// code will convert any < and > to entities...
403	$footnote_reference = '%FootnoteRef id="' . $footnote_id . '"/%';
404	$text = str_replace($pattern, $footnote_reference, $text);
405	}
406
407	$text_id = '';
408	// - check whether the string begins with an explicit id
409	if (preg_match('/^\s<!--\sid:(.+?)\s-->(.)$/', $text, $matches))
410	{
411	$text_id = $matches[1];
412	$text = $matches[2];
413	if (is_numeric($text_id))
414	{
415	$text_id_counter = $text_id + 1;
416	}
417	}
418	else
419	{
420	$text_id = $text_id_counter;
421	$text_id_counter++;
422	}
423
424	// - protect the special case of an HTML comment being actually displayed
425	// in the text
426	$text = preg_replace('/<!--([\s\.]+?)-->/','##lt##!--\1--##gt##',$text);
427
428	// - reformat dokuwiki syntax to HTML tag syntax
429	$text = preg_replace('/<!--.*?-->/', '', $text);
430
431	// we leave code blocks alone in terms of ampersands
432	if (!$in_code_block)
433	{
434	// - ampersands aren't safe in XML...
435	$text = str_replace('&', '&', $text);
436	// ...except for the entities that we have registered as metadata
437	if (isset($cover_metadata['ENTITY']))
438	{
439	foreach ($cover_metadata['ENTITY'] as $entity)
440	{
441	if (preg_match('/([a-z]+)\s+"&#(\d+);"/', $entity, $matches))
442	{
443	$entity_name = $matches[1];
444	if ($entity_name != 'mdash')
445	{
446	$entity_character = html_entity_decode('&#'.$matches[2].';',ENT_NOQUOTES,'UTF-8');
447	$text = str_replace('&' . $entity_name . ';', '&' . $entity_name . ';', $text);
448	// - we also convert any characters that match the entity char into
449	// the entity
450	$text = str_replace($entity_character, '&' . $entity_name . ';', $text);
451	}
452	}
453	}
454	}
455	// - protect <br/> tags
456	$text = str_replace('<br/>','%%br/%%',$text);
457	// - encoding all of the < and > that appear in the text (rather than
458	// true html formatting)
459	$text = str_replace('<','<',$text);
460	$text = str_replace('>','>',$text);
461	// - restore <br/> tags
462	$text = str_replace('%%br/%%','<br/>',$text);
463	}
464	else if ($type == 'code')
465	{
466	$text = str_replace('<','<',$text);
467	$text = str_replace('>','>',$text);
468	}
469
470	// - links, oh how I hate thee
471	// - external links are slightly easier
472	$text = preg_replace('/\[\[http:\/\/(.?)\\|(.?)\]\]/', '<Link url="http://\1">\2</Link>', $text);
473	// - internals have to become the horrible <CrossRef> tags. We ignore any
474	// number prefix on the page name as that is just used for ordering within
475	// Dokuwiki
476	$text = preg_replace('/\[\[\.\:(.*?)\\|[^\]]+\]\]/','<CrossRef target="Chapter" ref="\1"/>', $text);
477	// - internal links starting with hash must be on the same page
478	$text = preg_replace('/\[\[###(.?)\\|.?\]\]/','<CrossRef target="Part" ref="\1"/>', $text);
479	$text = preg_replace('/\[\[##(.?)\\|.?\]\]/','<CrossRef target="Subsection" ref="\1"/>', $text);
480	$text = preg_replace('/\[\[#(.?)\\|.?\]\]/','<CrossRef target="Section" ref="\1"/>', $text);
481	// - 'external' internal wiki links are even worst - since we can't know what
482	// the page order number for another manual's chapters might be, we instead
483	// use a search
484	$text = preg_replace('/\[\[\?do\=search\&id\=([^\s]+)\s+@([a-z]+):manuals:([a-z]+)\\|.*?\]\]/i', '<CrossRef external="\3" lang="\2" target="Chapter" ref="\1"/>', $text);
485	// - references to images and tables
486	$text = preg_replace('/(?:<\|<)imgref\sfigure_(.+?)(?:>\|>)/','<CrossRef target="Figure" ref="\1"/>', $text);
487	$text = preg_replace('/(?:<\|<)tblref\stable_(.+?)(?:>\|>)/','<CrossRef target="Table" ref="\1"/>', $text);
488	// - explicitly convert URLs as they are a bit messy
489	// - first all the cases of URLs in italics, without protocol
490	$text = preg_replace('/\/\/\s([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
491	$text = preg_replace('/\/\/\s([a-z0-9\-]+\.org(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
492	$text = preg_replace('/\/\/\s(localhost(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
493	// - now all the protocol ones (with care taken to protect // in protocol)
494	$text = preg_replace('/\/\/\shttp:\/\/([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
495	$text = preg_replace('/\/\/\shttp:\/\/([a-z0-9\-]+\.org(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
496	$text = preg_replace('/\/\/\shttp:\/\/(localhost(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
497	// - next we have the underlined URLs sans protocols
498	$text = preg_replace('/__\s([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
499	$text = preg_replace('/__\s([a-z0-9\-]+\.org(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
500	$text = preg_replace('/__\s(localhost(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
501	// - and finally the protocol prefixed underlined URLs
502	$text = preg_replace('/__\shttp:\/\/([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
503	$text = preg_replace('/__\shttp:\/\/([a-z0-9\-]+\.org(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
504	$text = preg_replace('/__\shttp:\/\/(localhost(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
505	// - lets also protect any other protocols we find floating around
506	$text = preg_replace('/(file\|ftp\|http):\/\//i', '\1:##DOUBLESLASH##', $text);
507
508	// - italic formatting (taking care of protected double slashes)
509	$text = preg_replace('/%%\/\/%%/', '##DOUBLESLASH##', $text);
510	$text = preg_replace('/\/{5}/', '<i>/</i>', $text); // another special case
511	$text = preg_replace('/\/\/(\/.+?)\s*\/\//', '<i>\1</i>', $text); // another special case
512	$text = preg_replace('/\/\/\s*(.+?\/)\/\//', '<i>\1</i>', $text); // another special case
513	$text = preg_replace('/\/\/\s(.+?)\s\/\//', '<i>\1</i>', $text);
514	$text = preg_replace('/##DOUBLESLASH##/', '//', $text);
515	// - bold formatting
516	$text = preg_replace('/\\([^"]+?)\\/', '<b>\1</b>', $text);
517	// - underline formatting
518	$text = preg_replace('/__([^"]+?)__/', '<u>\1</u>', $text);
519
520	// - decode certain entities in codeblock (just because they are valid HTML,
521	// derp).
522	if ($in_code_block && $allow_bad_codeblocks)
523	{
524	///cho "** [debug] restoring bogus decoded tags in: \|$text\| **\n";
525	$text = str_replace('<i>','<i>',$text);
526	$text = str_replace('</i>','</i>',$text);
527	//$text = str_replace('<br/>','<br/>',$text);
528	}
529	// - restore protected entities
530	$text = preg_replace('/##(gt\|lt)##/','&\1;',$text);
531	// - restore protected comment blocks
532	$text = str_replace('%!--', '<!--', $text);
533	$text = str_replace('--%', '-->', $text);
534	// - restore protected footnote refs
535	$text = preg_replace('/%FootnoteRef id="([^"]+)"\/%/', '<FootnoteRef id="\1"/>', $text);
536	// output the text block
537	$text = trim($text);
538	if (empty($text))
539	{
540	fwrite($xml_out, '<Text id="' . $text_id . '"/>' . "\n");
541	}
542	else if (!empty($type))
543	{
544	fwrite($xml_out, '<Text type="' . $type . '" id="' . $text_id . '">' . $text . '</Text>' . "\n");
545	}
546	else
547	{
548	fwrite($xml_out, '<Text id="' . $text_id . '">' . $text . '</Text>' . "\n");
549	}
550	}
551	/ outputTextBlock($xml_out, $text) /
552
553	/**
554	*/
555	function processPage($xml_out, $page_name)
556	{
557	global $dokuwiki_path;
558	global $seen_ids;
559	echo "</p>\n<p><b>Export Chapter:</b> " . $page_name . "<br/>\n";
560	// - locate the page in question (taking into account if the user asked for a
561	// draft version or an approved version of the manual)
562	$page_path = '';
563	if ($_REQUEST['v'] == 'draft' \|\| $_REQUEST['l'] != 'en')
564	{
565	$page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '/' . $page_name . '.txt';
566	}
567	else
568	{
569	$page_path = getApprovedVersionPath('en:manuals:' . $_REQUEST['m'] . ':' . $page_name);
570	}
571	$page_in = @fopen($page_path, 'r');
572	if (!$page_in)
573	{
574	printError('Failed to open page for reading:' . $page_name, false);
575	return;
576	}
577	// - once again we read in line-by-line, but this time we are going to output
578	// each line as we go through. We expect to encounter certain lines in a
579	// predefined order, and should complain if we don't find what we expect.
580	$in_chapter = false;
581	$in_section = false;
582	$in_subsection = false;
583	$in_part = false;
584	$in_list = false;
585	$lists = array();
586	$previous_listitem_type = '';
587	$in_figure = false;
588	$in_table = false;
589	$column_widths = array();
590	$in_code_block = false;
591	while (($line = fgets($page_in)) !== false)
592	{
593	// remove newline character
594	$line = preg_replace('/\r?\n$/','',$line);
595	// - we need to know the 'depth' for the bulletpoint lists
596	$depth = 0;
597	while (strlen($line) > 2 && preg_match('/^\s+[\*\-]/', $line) && substr($line, 0, 2) == ' ')
598	{
599	$depth++;
600	$line = substr($line, 2);
601	}
602	$first_character = substr($line, 0, 1);
603	// - special case for the end of bullet lists
604	if ($in_list && ($first_character != "*" && $first_character != "-"))
605	{
606	while (count($lists) > 0)
607	{
608	$list_type = array_pop($lists);
609	if ($list_type == '*')
610	{
611	fwrite($xml_out, '</Bullet>' . "\n");
612	fwrite($xml_out, '</BulletList>' . "\n");
613	}
614	else
615	{
616	fwrite($xml_out, '</NumberedItem>' . "\n");
617	fwrite($xml_out, '</NumberedList>' . "\n");
618	}
619	}
620	$in_list = false;
621	}
622	// - special case for the end of tables
623	if ($in_table && $first_character != '^' && $first_character != '\|')
624	{
625	fwrite($xml_out, '</TableContent>' . "\n");
626	fwrite($xml_out, '</Table>' . "\n");
627	$in_table = false;
628	}
629	// - special cases for premature closing of sections, subsections and parts
630	if (preg_match('/<!-- close:(section\|subsection\|part) -->/', $line, $matches))
631	{
632	// - we always try to do this (regardless of actual flag) as we must
633	// always close the smallest 'granularity' first
634	if ($in_part)
635	{
636	fwrite($xml_out, '</Content>' . "\n");
637	fwrite($xml_out, '</Part>' . "\n");
638	$in_part = false;
639	}
640	if ($in_subsection && ($matches[1] == 'section' \|\| $matches[1] == 'subsection'))
641	{
642	fwrite($xml_out, '</Content>' . "\n");
643	fwrite($xml_out, '</Subsection>' . "\n");
644	$in_subsection = false;
645	}
646	if ($in_section && $matches[1] == 'section')
647	{
648	fwrite($xml_out, '</Content>' . "\n");
649	fwrite($xml_out, '</Section>' . "\n");
650	$in_section = false;
651	}
652	}
653
654	// - if this page is a chapter, then the first thing on the page should be
655	// the chapter title (six equals)
656	if (preg_match('/====== (.+) ======/', $line, $matches))
657	{
658	$chapter_title = $matches[1];
659	$chapter_id = $page_name;
660	if (empty($chapter_id))
661	{
662	$chapter_id = generateID($chapter_title);
663	}
664	// - are we already processing a part? if so end it, end it now
665	if ($in_part)
666	{
667	fwrite($xml_out, '</Content>' . "\n");
668	fwrite($xml_out, '</Part>' . "\n");
669	$in_part = false;
670	}
671	// - are we already processing a subsection? if so end it, end it now
672	if ($in_subsection)
673	{
674	fwrite($xml_out, '</Content>' . "\n");
675	fwrite($xml_out, '</Subsection>' . "\n");
676	$in_subsection = false;
677	}
678	// - are we already processing a section? if so end it, end it now
679	if ($in_section)
680	{
681	fwrite($xml_out, '</Content>' . "\n");
682	fwrite($xml_out, '</Section>' . "\n");
683	$in_section = false;
684	}
685	// - are we already processing a chapter? if so end it, end it now
686	if ($in_chapter)
687	{
688	fwrite($xml_out, '</Content>' . "\n");
689	fwrite($xml_out, '</Chapter>' . "\n");
690	$in_chapter = false;
691	}
692	// - write out this chapter's header
693	fwrite($xml_out, '<Chapter id="' . $chapter_id . '">' . "\n");
694	outputMetadataSingle($xml_out, $chapter_title, 'Title');
695	fwrite($xml_out, '<Content>' . "\n");
696	$in_chapter = true;
697	}
698	// - the next likely thing to encounter is a section heading (five equals)
699	elseif (preg_match('/=====\s+(.+)\s+=====/', $line, $matches))
700	{
701	$section_title = $matches[1];
702	// - check for explicit section id
703	$section_id = '';
704	if (preg_match('/<!-- sid:(.+?) -->(.*)/', $section_title, $matches))
705	{
706	$section_id = $matches[1];
707	$section_title = $matches[2];
708	}
709	if (empty($section_id))
710	{
711	$section_id = generateID($section_title);
712	}
713	// - are we already processing a part? if so end it, end it now
714	if ($in_part)
715	{
716	fwrite($xml_out, '</Content>' . "\n");
717	fwrite($xml_out, '</Part>' . "\n");
718	$in_part = false;
719	}
720	// - are we already processing a subsection? if so end it, end it now
721	if ($in_subsection)
722	{
723	fwrite($xml_out, '</Content>' . "\n");
724	fwrite($xml_out, '</Subsection>' . "\n");
725	$in_subsection = false;
726	}
727	// - are we already processing a section? if so end it, end it now
728	if ($in_section)
729	{
730	fwrite($xml_out, '</Content>' . "\n");
731	fwrite($xml_out, '</Section>' . "\n");
732	$in_section = false;
733	}
734	// - write out this section's header
735	fwrite($xml_out, '<Section id="' . $section_id . '">' . "\n");
736	outputMetadataSingle($xml_out, $section_title, 'Title');
737	fwrite($xml_out, '<Content>' . "\n");
738	$in_section = true;
739	}
740	// - similar for subsection heading (four equals)
741	elseif (preg_match('/==== (.+) ====/', $line, $matches))
742	{
743	$subsection_title = $matches[1];
744	// - check for explicit subsection id
745	$subsection_id = '';
746	if (preg_match('/<!-- sid:(.+?) -->(.*)/', $subsection_title, $matches))
747	{
748	$subsection_id = $matches[1];
749	$subsection_title = $matches[2];
750	}
751	if (empty($subsection_id))
752	{
753	$subsection_id = generateID($subsection_title);
754	}
755	// - are we already processing a part? if so end it, end it now
756	if ($in_part)
757	{
758	fwrite($xml_out, '</Content>' . "\n");
759	fwrite($xml_out, '</Part>' . "\n");
760	$in_part = false;
761	}
762	// - are we already processing a subsection? if so end it, end it now
763	if ($in_subsection)
764	{
765	fwrite($xml_out, '</Content>' . "\n");
766	fwrite($xml_out, '</Subsection>' . "\n");
767	$in_subsection = false;
768	}
769	// - write out this subsection's header
770	fwrite($xml_out, '<Subsection id="' . $subsection_id . '">' . "\n");
771	outputMetadataSingle($xml_out, $subsection_title, 'Title');
772	fwrite($xml_out, '<Content>' . "\n");
773	$in_subsection = true;
774	}
775	// - and part heading (three equals)
776	elseif (preg_match('/=== (.+) ===/', $line, $matches))
777	{
778	$part_title = $matches[1];
779	// - check for explicit part id
780	$part_id = '';
781	if (preg_match('/<!-- sid:(.+?) -->(.*)/', $part_title, $matches))
782	{
783	$part_id = $matches[1];
784	$part_title = $matches[2];
785	}
786	if (empty($part_id))
787	{
788	$part_id = generateID($part_title);
789	}
790	// - are we already processing a part? if so end it, end it now
791	if ($in_part)
792	{
793	fwrite($xml_out, '</Content>' . "\n");
794	fwrite($xml_out, '</Part>' . "\n");
795	$in_part = false;
796	}
797	// - write out this part's header
798	fwrite($xml_out, '<Part id="' . $part_id . '">' . "\n");
799	outputMetadataSingle($xml_out, '//' . $part_title . '//', 'Title');
800	fwrite($xml_out, '<Content>' . "\n");
801	$in_part = true;
802	}
803	// - Ignore 5th level heading - they are only used to allow more convenient
804	// editing of figures and tables
805	elseif (preg_match('/== (.+) ==/', $line, $matches))
806	{
807	}
808	// - lists need special handling
809	elseif (preg_match('/^(\\|\-)\s+(.)/', $line, $matches))
810	{
811	$list_type = $matches[1];
812	$list_text = $matches[2];
813	$list_depth = count($lists);
814	if (!$in_list)
815	{
816	if ($list_type == '*')
817	{
818	fwrite($xml_out, '<BulletList>' . "\n");
819	}
820	else
821	{
822	fwrite($xml_out, '<NumberedList>' . "\n");
823	}
824	$in_list = true;
825	array_push($lists, $list_type);
826	}
827	// - this bullet is at the same depth as previous - close the previous
828	// point
829	elseif ($depth == $list_depth)
830	{
831	$previous_list_type = end($lists);
832	if ($previous_list_type == '*')
833	{
834	fwrite($xml_out, '</Bullet>' . "\n");
835	}
836	else
837	{
838	fwrite($xml_out, '</NumberedItem>' . "\n");
839	}
840	// - we don't match in type anymore... close the previous list and open
841	// a new list of the appropriate type
842	if ($list_type != $previous_list_type)
843	{
844	if ($previous_list_type == '*')
845	{
846	fwrite($xml_out, '</BulletList>' . "\n");
847	fwrite($xml_out, '<NumberedList>' . "\n");
848	}
849	else
850	{
851	fwrite($xml_out, '</NumberedNumbered>' . "\n");
852	fwrite($xml_out, '<BulletList>' . "\n");
853	}
854	array_pop($lists);
855	array_push($lists, $list_type);
856	}
857	}
858	else
859	{
860	// - we have either got deeper...
861	if ($depth > $list_depth)
862	{
863	if ($list_type == '*')
864	{
865	fwrite($xml_out, '<BulletList>' . "\n");
866	}
867	else
868	{
869	fwrite($xml_out, '<NumberedList>' . "\n");
870	}
871	array_push($lists, $list_type);
872	}
873	// ... or shallower in the bullet listing
874	if ($depth < $list_depth)
875	{
876	$previous_list_type = array_pop($lists);
877	if ($previous_list_type == '*')
878	{
879	fwrite($xml_out, '</Bullet>' . "\n");
880	fwrite($xml_out, '</BulletList>' . "\n");
881	}
882	else
883	{
884	fwrite($xml_out, '</NumberedItem>' . "\n");
885	fwrite($xml_out, '</NumberedList>' . "\n");
886	}
887	// - we still have to close the last item too
888	$previous_listitem_type = end($lists);
889	if ($previous_listitem_type == '*')
890	{
891	fwrite($xml_out, '</Bullet>' . "\n");
892	}
893	else
894	{
895	fwrite($xml_out, '</NumberedItem>' . "\n");
896	}
897	}
898	}
899	if ($list_type == '*')
900	{
901	fwrite($xml_out, '<Bullet>' . "\n");
902	}
903	else
904	{
905	fwrite($xml_out, '<NumberedItem>' . "\n");
906	}
907	// Special Case: bullets that contain (start) a code block
908	if (preg_match('/^(.)<code>\s$/', $list_text, $matches))
909	{
910	$list_text = $matches[1];
911	$in_code_block = true;
912	}
913
914	outputTextBlock($xml_out, $list_text);
915
916	// - to make things clearer, we'll process any and all code blocks within
917	// bullets here - especially as there may be more text block after
918	// the code block finishes
919	if ($in_code_block)
920	{
921	$sub_line = '';
922	while ($in_code_block && ($sub_line = fgets($page_in)) !== false)
923	{
924	$sub_line = trim($sub_line);
925	// - closing code
926	if (preg_match('/^<\/code>(.*)$/', $sub_line, $matches))
927	{
928	$sub_line = $matches[1]; // may be empty string
929	$in_code_block = false;
930	}
931	// - output another plain codeline
932	else
933	{
934	fwrite($xml_out, '<CodeLine>' . $sub_line . "</CodeLine>\n");
935	$sub_line = '';
936	}
937	}
938	// - if sub_line still has anything in it, then add that content as a
939	// text block
940	if (!empty($sub_line))
941	{
942	outputTextBlock($xml_out, $sub_line);
943	}
944	}
945	}
946	// - images start with an image caption 'element'
947	elseif (preg_match('/<imgcaption\s+figure_([a-z0-9_\-]+)\\|(.+)>([^<]*?)<\/imgcaption>/', $line, $matches))
948	{
949	$figure_id = $matches[1];
950	$figure_title = $matches[2];
951	$image_content = $matches[3];
952	// - watch for the special withLineNumber flag
953	$class_attribute = '';
954	if (strpos($figure_title, '%!-- withLineNumber --%') != false)
955	{
956	$class_attribute = ' class="withLineNumber"';
957	$figure_title = str_replace('%!-- withLineNumber --%','',$figure_title);
958	}
959	fwrite($xml_out, '<Figure id="' . $figure_id . '"' . $class_attribute . '>' . "\n");
960	echo '[figure: ' . $figure_id . "] \n";
961	fwrite($xml_out, '<Title>' . "\n");
962	// - decode any comments in the title (used to store explicit id
963	// information)
964	$figure_title = str_replace('%!--', '<!--', $figure_title);
965	$figure_title = str_replace('--%', '-->', $figure_title);
966	// - special case: the title may have a subtitle (as a prefix)
967	$figure_subtitle_id = '';
968	$figure_subtitle = '';
969	// - subtitle with explicit id
970	if (preg_match('/^(<!-- id:.+? -->$[a-z]$)\s(.)$/', $figure_title, $matches))
971	{
972	$figure_subtitle = $matches[1];
973	$figure_title = $matches[2];
974	}
975	// - subtitle without explicit id
976	else if (preg_match('/^($[a-z]$)\s(.)$/', $figure_title, $matches))
977	{
978	$figure_subtitle = $matches[1];
979	$figure_title = $matches[2];
980	}
981	outputTextBlock($xml_out, $figure_title);
982	if (!empty($figure_subtitle))
983	{
984	fwrite($xml_out, '<SubTitle>' . "\n");
985	outputTextBlock($xml_out, $figure_subtitle);
986	fwrite($xml_out, '</SubTitle>' . "\n");
987	}
988	fwrite($xml_out, '</Title>' . "\n");
989	// Try and find the image itself
990	if (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $image_content))
991	{
992	processImage($xml_out, $line);
993	fwrite($xml_out, '</Figure>' . "\n");
994	}
995	// Didn't find an image? Weird, but mark the imgcaption as open, and
996	// we'll chomp up the next image found as the content.
997	else
998	{
999	$in_figure = true;
1000	}
1001	// - record the id to prevent repeating
1002	$seen_ids[$figure_id] = true;
1003	}
1004	// - tables start with a table caption 'element'
1005	elseif (preg_match('/<tblcaption\s+table_([a-z0-9_\-]+)\\|([^>]+)>\s*<\/tblcaption>/', $line, $matches))
1006	{
1007	$table_id = $matches[1];
1008	$table_title = $matches[2];
1009	if ($table_title == '##NOCAPTION##')
1010	{
1011	echo '[non-captioned table: ' . $table_id . "] \n";
1012	// - watch for autogenerated ids... no point in outputting them
1013	if (preg_match('/^table(_\d+)?$/', $table_id))
1014	{
1015	fwrite($xml_out, "<Table>\n");
1016	}
1017	else
1018	{
1019	fwrite($xml_out, '<Table id="' . $table_id . '">' . "\n");
1020	}
1021	fwrite($xml_out, '<Title/>' . "\n");
1022	}
1023	elseif ($table_title == '##HIDDEN##')
1024	{
1025	echo '[hidden table: ' . $table_id . "] \n";
1026	// - watch for autogenerated ids... no point in outputting them
1027	if (preg_match('/^table(_\d+)?$/', $table_id))
1028	{
1029	fwrite($xml_out, "<Table class=\"hidden\">\n");
1030	}
1031	else
1032	{
1033	fwrite($xml_out, '<Table class="hidden" id="' . $table_id . '">' . "\n");
1034	}
1035	fwrite($xml_out, '<Title/>' . "\n");
1036	}
1037	else
1038	{
1039	echo '[table: ' . $table_id . "] \n";
1040	// - watch for autogenerated ids... no point in outputting them
1041	if (preg_match('/^table(_\d+)?$/', $table_id))
1042	{
1043	fwrite($xml_out, "<Table>\n");
1044	}
1045	else
1046	{
1047	fwrite($xml_out, '<Table id="' . $table_id . '">' . "\n");
1048	}
1049	fwrite($xml_out, '<Title>' . "\n");
1050	outputTextBlock($xml_out, $table_title);
1051	fwrite($xml_out, '</Title>' . "\n");
1052	}
1053	fwrite($xml_out, '<TableContent>' . "\n");
1054	$in_table = true;
1055	// - record the id to prevent repeating
1056	$seen_ids[$table_id] = true;
1057	}
1058	// - the second line in a table should be it's column width values
1059	elseif (preg_match('/\\|<\s-\s([0-9 ]+?)\s>\\|/', $line, $matches))
1060	{
1061	$column_widths = explode(' ', $matches[1]);
1062	}
1063	// - then every row will be made of a number of cells
1064	elseif (preg_match('/^\\|(.*?)\\|$/', $line, $matches))
1065	{
1066	$row_content = $matches[1];
1067	$cell_contents = preg_split('/(\s+\\|\|\\|\s+)/', $row_content);
1068	fwrite($xml_out, '<tr>' . "\n");
1069	foreach ($cell_contents as $index=>$cell_content)
1070	{
1071	$cell_content = trim($cell_content);
1072	$th_text = '';
1073	if (isset($column_widths[$index]))
1074	{
1075	$th_text = '<th width="' . $column_widths[$index] . '"';
1076	}
1077	else
1078	{
1079	$th_text = '<th';
1080	}
1081	// - if the cell would be empty, we use the shorthand
1082	if (empty($cell_content))
1083	{
1084	$th_text .= '/>' . "\n";
1085	fwrite($xml_out, $th_text);
1086	}
1087	else
1088	{
1089	$th_text .= '>' . "\n";
1090	fwrite($xml_out, $th_text);
1091
1092	// GAH - this is proving harder than a hard thing thats hard.
1093	// The issue is that the most straightforward way of fixing this,
1094	// namely using explicit newlines (\\) in the dokuwiki txt causes
1095	// lots a legitimately translated <br/> to also be split up. I
1096	// think the only way forward would be to maybe extend the HTML
1097	// Comment plugin to also respect and process <br/> tags. Then I
1098	// can avoid transforming them, and use the \\ sentinel to
1099	// separate multi-line table cells.
1100	$cell_content_lines = explode('\\\\', $cell_content);
1101	foreach ($cell_content_lines as $cell_content)
1102	{
1103	// - watch out, as the content may be an image
1104	if (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $cell_content))
1105	{
1106	processImage($xml_out, $cell_content);
1107	}
1108	elseif (preg_match('/\'\'(.*)\'\'/', $cell_content, $matches))
1109	{
1110	fwrite($xml_out, '<CodeLine>' . translateTableCodeline($matches[1]) . '</CodeLine>' . "\n");
1111	}
1112	// - anything else it text
1113	else
1114	{
1115	outputTextBlock($xml_out, $cell_content);
1116	}
1117	}
1118	fwrite($xml_out, '</th>' . "\n");
1119	}
1120	}
1121	fwrite($xml_out, '</tr>' . "\n");
1122	}
1123	// - links to image media in the wiki!
1124	elseif (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $line))
1125	{
1126	processImage($xml_out, $line);
1127	// - if we were processing a figure, then now is a good time to close it
1128	if ($in_figure)
1129	{
1130	fwrite($xml_out, '</Figure>' . "\n");
1131	$in_figure = false;
1132	}
1133	}
1134	// - if the line starts with a <code> block, then we have a tag
1135	// for that (which is special in that it get a unique text id)
1136	elseif (preg_match('/^<code\s\d\s>(.?)(<\/code>)?$/', $line, $matches) \|\| ($in_code_block && preg_match('/^(.*?)(<\/code>)?$/', $line, $matches)))
1137	{
1138	$payload = $matches[1];
1139	$found_end = (isset($matches[2]));
1140	$in_code_block = true;
1141	// - be careful with empty lines
1142	if (empty($payload))
1143	{
1144	// - as they may appear in the body of the code (in which case we need
1145	// to output them). The empty lines at the start or end of a code
1146	// block are just an unfortunate consequence of the support for code
1147	// line numbering.
1148	if (!$found_end && strpos($line, '<code') === false)
1149	{
1150	fwrite($xml_out, "<CodeLine/>\n");
1151	}
1152	}
1153	elseif (preg_match('/^<!-- id:([^\s]+) -->/', $payload, $matches))
1154	{
1155	$text_id = $matches[1];
1156	outputTextBlock($xml_out, $payload, 'code', true);
1157	// - record the id to prevent repeating
1158	$seen_ids[$text_id] = true;
1159	}
1160	else
1161	{
1162	fwrite($xml_out, '<CodeLine>' . translateText($payload, true) . '</CodeLine>' . "\n");
1163	}
1164	// - if we didn't find an endtag we have to keep doing code mode until
1165	// we do
1166	$in_code_block = (!$found_end);
1167	if ($found_end)
1168	{
1169	// - if we were processing a figure, then now is a good time to close it
1170	if ($in_figure)
1171	{
1172	fwrite($xml_out, '</Figure>' . "\n");
1173	$in_figure = false;
1174	}
1175	}
1176	}
1177	// - entities on a line by themselves (i.e. references to external files)
1178	// go through verbatim
1179	elseif (preg_match('/^\s&[a-z0-9_-]+;\s$/', $line))
1180	{
1181	fwrite($xml_out, $line . "\n");
1182	}
1183	// - lines starting with > are indented text blocks
1184	elseif (preg_match('/^>(.*)$/', $line, $matches))
1185	{
1186	$payload = $matches[1];
1187	fwrite($xml_out, "<Indented>\n");
1188	outputTextBlock($xml_out, $payload);
1189	fwrite($xml_out, "</Indented>\n");
1190	}
1191	// - everything else goes straight through as a text block
1192	// - note that for code blocks, even empty lines count
1193	elseif (!empty($line))
1194	{
1195	// - output the line of text having encoded entities etc
1196	outputTextBlock($xml_out, $line, '', $in_code_block);
1197	}
1198	}
1199	// Complete any open part
1200	if ($in_part)
1201	{
1202	fwrite($xml_out, '</Content>' . "\n");
1203	fwrite($xml_out, '</Part>' . "\n");
1204	$in_part = false;
1205	}
1206	// Complete any open subsection
1207	if ($in_subsection)
1208	{
1209	fwrite($xml_out, '</Content>' . "\n");
1210	fwrite($xml_out, '</Subsection>' . "\n");
1211	$in_subsection = false;
1212	}
1213	// Complete any open section
1214	if ($in_section)
1215	{
1216	fwrite($xml_out, '</Content>' . "\n");
1217	fwrite($xml_out, '</Section>' . "\n");
1218	$in_section = false;
1219	}
1220	// Complete any open chapter
1221	if ($in_chapter)
1222	{
1223	fwrite($xml_out, '</Content>' . "\n");
1224	fwrite($xml_out, '</Chapter>' . "\n");
1225	$in_chapter = false;
1226	}
1227	}
1228	/ processPage($xml_out, $page_name) /
1229
1230	function processImage($xml_out, $text)
1231	{
1232	global $dokuwiki_path;
1233	global $xml_source_path;
1234	if (preg_match('/\{\{.+?([^:?]+)\?(\d+)x(\d+)(&direct)?\}\}/', $text, $matches))
1235	{
1236	$filename = $matches[1];
1237	$width = $matches[2];
1238	$height = $matches[3];
1239	// - copy the file into place
1240	$image_source_path = $dokuwiki_path . '/data/media/' . $_REQUEST['l'] . '/manuals/images/' . strtolower($filename);
1241	$image_destination_dir = $xml_source_path . '/' . $_REQUEST['l'] . '/images';
1242	mkAllDir($image_destination_dir);
1243	$image_destination_path = $image_destination_dir . '/' . $filename;
1244	if (copy($image_source_path, $image_destination_path))
1245	{
1246	echo '[copying file: ' . $filename . "] \n";
1247	chmod($image_destination_path, 0664);
1248	}
1249	else
1250	{
1251	printError('Failed to copy image into place: ' . $filename, false);
1252	}
1253	// - spit out the XML element
1254	fwrite($xml_out, '<File width="' . $width . '" height="' . $height . '" url="images/' . $filename . '"/>' . "\n");
1255	}
1256	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: documentation/trunk/php/gs-manual-export.php@ 30195

Download in other formats: