Context Navigation

gs-manual-export.php@ 30117

Last change on this file since 30117 was 30117, checked in by jmt12, 9 years ago
Altered to fix bug where imgcaption tag closed before image actually output. Looks like this was intentional at some stage, but means problems in the Wiki, so I'll repair
File size: 43.1 KB

Rev	Line
[25026]	1	<?php
	2
	3	require_once('common.php');
	4	$debug = 1;
	5
	6	// There are some elements that are, in my opinion, incorrectly 'resolved' in
	7	// code blocks just because they are valid HTML. For instance, <i> is
	8	// resolved to <i> in code blocks, while something like <Metadata> is
	9	// not. Set this to true to allow such abominations (for the purpose of
	10	// comparing before and after versions of the XML). [jmt12]
	11	$allow_bad_codeblocks = true;
	12
	13	/** @file gs-manual-export.php
	14	* This script transforms the series of dokuwiki pages that make up a certain
	15	* manual (as specified by the 'm' argument) in a certain language ('l') into
	16	* the XML format required by the rest of the Greenstone manual generation
	17	* scripts.
	18	*/
	19
	20	// 0. Initialization
[27507]	21
[25026]	22	// - we have a counter to assign identifiers to text blocks etc without ids
	23	$text_id_counter = 1;
	24	// - we need an array of all the footnotes
	25	$footnotes = array();
	26
	27	// Defaults
	28	if (!isset($_REQUEST['l']) \|\| empty($_REQUEST['l']))
	29	{
	30	$_REQUEST['l'] = 'en';
	31	}
	32	if (!isset($_REQUEST['m']) \|\| empty($_REQUEST['m']))
	33	{
	34	//$_REQUEST['m'] = 'user';
	35	//$_REQUEST['m'] = 'install';
	36	$_REQUEST['m'] = 'develop';
	37	//$_REQUEST['m'] = 'paper';
	38	}
	39	if (!isset($_REQUEST['v']) \|\| empty($_REQUEST['v']))
	40	{
	41	$_REQUEST['v'] = 'draft';
	42	}
	43	if (!isset($_REQUEST['a']) \|\| !preg_match('/^(download\|store)$/', $_REQUEST['a']))
	44	{
	45	$_REQUEST['a'] = 'store'; // Try to store the file to disk
	46	}
	47
[27507]	48	$var_path = filecat(array($base_path, 'var'));
	49	$timestamp = time();
	50	//$xml_source_path = 'PATH TO GSDL MANUALS'
	51	$xml_source_path = '/tmp';
	52	if ($_REQUEST['a'] == 'download')
	53	{
	54	// Clear out previous exports
	55	recursiveRemove($var_path, '/greenstone/greenstone-documentation/php/var');
	56	// New export
	57	$xml_source_path = fileCat(array($var_path, $timestamp));
	58	}
	59
[25026]	60	echo '<html>' . "\n";
	61	echo '<head>' . "\n";
	62	echo '<title>GS Manual Export</title>' . "\n";
	63	echo '</head>' . "\n";
	64	echo '<body>' . "\n";
	65
	66	// - validate arguments before we use them (security)
	67	if (!preg_match('/^(develop\|install\|paper\|user)$/',$_REQUEST['m']))
	68	{
	69	printError('Unknown manual type requested: ' . htmlspecialchars($_REQUEST['m']));
	70	}
	71
	72	if (!preg_match('/^(ar\|en\|es\|fr\|pt-br\|ru)$/',$_REQUEST['l']))
	73	{
	74	printError('Unknown language requested: ' . htmlspecialchars($_REQUEST['l']));
	75	}
	76
	77	echo '<h2>Generating Greenstone Manual XML</h2>' . "\n";
	78	echo '<p><b>Manual:</b> ' . $_REQUEST['m'] . ' <b>Language:</b> ' . $_REQUEST['l'] . "</p>\n<hr/>\n";
	79	// 1. Create the XML output file handle
[27507]	80	// - construct the path using the information we've been provided as arguments
	81	$xml_file_dir = $xml_source_path . '/' . $_REQUEST['l'];
	82	mkAllDir($xml_file_dir);
	83	$xml_file_path = $xml_file_dir . '/' . ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.xml';
	84	// - backup any existing file
	85	if (file_exists($xml_file_path))
[25026]	86	{
[27507]	87	$xml_backup_file_path = $xml_file_path . '.bak';
	88	if (!rename($xml_file_path, $xml_backup_file_path))
[25026]	89	{
[27507]	90	printError('Failed to rename existing manual file for backup');
[25026]	91	}
	92	}
[27507]	93
[25026]	94	// - and create a handle to the new file
	95	$xml_out = fopen($xml_file_path, 'w');
	96
	97	// 2. Read in the top level page - this will give configuration data for the
	98	// manual and cover page, as well as specifying the order for the other
	99	// pages in the manual
	100	echo "<p><b>Frontmatter:</b><br/>\n";
	101	// - by reading this page we hope to populate an array of metadata, and also
	102	// extract the sequence of other pages within this manual
	103	$cover_metadata = array();
	104	$pages_in_order = array();
	105	// - we now need to consider if the user has asked for a draft version (i.e.
	106	// includes the latest version of pages regardless of approval) or if only
	107	// the approved versions of pages should be included
	108	// - only necessary for english version of manual, as those are the only pages
	109	// editable
	110	$top_page_path = '';
	111	if ($_REQUEST['v'] == 'draft' \|\| $_REQUEST['l'] != 'en')
	112	{
	113	// - again, we can construct the path to the top level page given the arguments
	114	// provided
	115	$top_page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '.txt';
	116	}
	117	else
	118	{
	119	$top_page_path = getApprovedVersionPath('en:manuals:' . $_REQUEST['m']);
	120	}
	121
	122	if (!file_exists($top_page_path))
	123	{
	124	printError('Failed to locate top level page for manual');
	125	}
	126	// - we're going to open a handle to the file, then read it in line-by-line
	127	// watching for the lines we are interested in (using pattern matching)
	128	$top_page_in = fopen($top_page_path, 'r');
	129	if (!$top_page_in)
	130	{
	131	printError('Failed to open top level page for reading');
	132	}
	133	$in_contents = false;
	134	while (($line = fgets($top_page_in)) !== false)
	135	{
	136	// - if we are capturing page order, and we encounter something that looks
	137	// like a bulletpoint item pointing to a wiki page, then we append the
	138	// name of that page to our pages in order array
	139	if ($in_contents && preg_match('/^\s+\\s+\[\[.:' . $_REQUEST['m'] . ':(.+?)\\|(.?)\]\]\s*$/', $line, $matches))
	140	{
	141	array_push($pages_in_order, $matches[1]);
	142	}
	143	// - metadata is all encoded within dokuwiki tables
	144	elseif (preg_match('/^\^\s+([^\s]+)\s+\\|\s+(.+?)\s+\\|\s*$/', $line, $matches))
	145	{
	146	$field = $matches[1];
	147	$value = $matches[2];
	148	$values = array();
	149	if (isset($cover_metadata[$field]))
	150	{
	151	$values = $cover_metadata[$field];
	152	}
	153	array_push($values, $value);
	154	$cover_metadata[$field] = $values;
	155	}
	156	// - watch for the heading 'Contents' to begin extracting page order
	157	// information
	158	elseif (preg_match('/^=+\s(.+)\s=+$/', $line, $matches))
	159	{
	160	if ($matches[1] == 'Contents')
	161	{
	162	$in_contents = true;
	163	}
	164	// - any other title means we aren't capturing page order (anymore)
	165	else
	166	{
	167	$in_contents = false;
	168	}
	169	}
	170	}
	171	if (!feof($top_page_in))
	172	{
	173	printError('Unexpected fgets() fail when reading top page');
	174	}
	175	fclose($top_page_in);
	176	// - ensure we have the required metadata
	177	$required_metadata = array('Heading','Title','Affiliation','Version','Date');
	178	foreach ($required_metadata as $required_field)
	179	{
	180	if (!isset($cover_metadata[$required_field]))
	181	{
	182	printError('Missing required metadata: ' . $required_field);
	183	}
	184	}
	185	// - now we can use the metadata to construct the XML header and the cover page.
	186	// This follows a pretty set recipe with only elements that can repeat---like
	187	// Author, SupplementaryText etc---are at all tricky
	188	fwrite($xml_out, '<?xml version="1.0" encoding="UTF-8"?>' . "\n");
	189	fwrite($xml_out, '<!DOCTYPE Manual [' . "\n");
	190	if (isset($cover_metadata['ENTITY']))
	191	{
	192	foreach ($cover_metadata['ENTITY'] as $entity)
	193	{
	194	fwrite($xml_out, "\t" . '<!ENTITY ' . $entity . '>' . "\n");
	195	}
	196	}
	197	fwrite($xml_out, ']>' . "\n");
	198	fwrite($xml_out, '<Manual id="' . ucfirst($_REQUEST['m']) . '" lang="' . $_REQUEST['l'] . '">' . "\n");
	199
	200	///cho "<p>[Debug] metadata: " . print_r($cover_metadata, true) . "</p>\n\n";
	201
	202	outputMetadataSingle($xml_out, $cover_metadata, 'Heading');
	203	outputMetadataSingle($xml_out, $cover_metadata, 'Title');
	204	outputMetadataSingle($xml_out, $cover_metadata, 'Author');
	205	outputMetadataSingle($xml_out, $cover_metadata, 'Affiliation');
	206	outputMetadataMultiple($xml_out, $cover_metadata, 'SupplementaryText');
	207	outputMetadataMultiple($xml_out, $cover_metadata, 'Text');
	208	outputMetadataMultiple($xml_out, $cover_metadata, 'Comment');
	209	outputMetadataSingle($xml_out, $cover_metadata, 'Version');
	210	outputMetadataSingle($xml_out, $cover_metadata, 'Date');
	211
	212	// 3. Process each page listed in the contents of the top level page in order
	213	foreach ($pages_in_order as $page)
	214	{
	215	processPage($xml_out, $page);
	216	}
	217
	218	// 4. Output out list of footnotes (if any)
	219	if (!empty($footnotes))
	220	{
	221	fwrite($xml_out, '<FootnoteList>'. "\n");
	222	foreach ($footnotes as $footnote=>$footnote_id)
	223	{
	224	///cho '[debug] footnotes: (' . $footnote_id . ') ' . $footnote . '<br />'. "\n";
	225	outputMetadataSingle($xml_out, $footnote, 'Footnote', $footnote_id);
	226	}
	227	fwrite($xml_out, '</FootnoteList>'. "\n");
	228	}
	229
	230	// 5. Finalize and close the XML output
	231	fwrite($xml_out, '</Manual>' . "\n");
	232	fclose($xml_out);
[27507]	233	chmod($xml_file_path, 0664);
[25026]	234
	235	// 6. Complete!
	236	echo '<p><b>Complete!</b></p>' . "\n<hr/>\n";
	237	if ($_REQUEST['a'] == 'download')
	238	{
[27507]	239	// Zip up the manual files
	240	$zip_file = ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.zip';
	241	$zip_path = fileCat(array($xml_source_path, $zip_file));
	242	$zip_command = 'zip -r "' . $zip_path . '" . > /dev/null 2>&1';
	243	//cho '<p><b>[DEBUG]</b> zip_command:' . $zip_command . '</p>';
	244	$tgz_file = ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.tgz';
	245	$tgz_path = fileCat(array($xml_source_path, $tgz_file));
	246	$tgz_command = 'tar -czf "' . $tgz_path . '" * > /dev/null 2>&1';
	247	//cho '<p><b>[DEBUG]</b> tgz_command:' . $tgz_command . '</p>';
	248	// We need to move to the document folder so that archives have sensible paths
	249	$original_cwd = getcwd();
	250	chdir($xml_file_dir);
	251	system($zip_command);
	252	system($tgz_command);
	253	// Go back
	254	chdir($original_cwd);
	255	// Links are ready
	256	echo '<p>Download XML file plus images as: <a href="var/' . $timestamp . '/' . $zip_file . '">ZIP</a> or <a href="var/' . $timestamp . '/' . $tgz_file . '">TGZ</a></p>' . "\n";
[25026]	257	}
	258	echo '<p>Click <a href="' . $dokuwiki_url . '/doku.php?id=' . $_REQUEST['l'] . ':manuals:' . $_REQUEST['m'] . '">here</a> to return to dokuwiki</p>' . "\n";
	259	echo '</body>' . "\n";
	260	echo '</html>';
	261	exit(0);
	262
	263	/**
	264	*/
	265	function outputMetadataSingle($xml_out, $metadata, $field, $mid=false)
	266	{
	267	echo '[metadata: ' . $field . "] \n";
	268	if ($mid)
	269	{
	270	fwrite($xml_out, '<' . $field . ' id="' . $mid . '">' . "\n");
	271	}
	272	else
	273	{
	274	fwrite($xml_out, '<' . $field . '>' . "\n");
	275	}
	276	if (is_array($metadata))
	277	{
	278	if (isset($metadata[$field]) && isset($metadata[$field][0]))
	279	{
	280	outputTextBlock($xml_out, $metadata[$field][0]);
	281	}
	282	else
	283	{
	284	echo 'no such field or no metadata';
	285	}
	286	}
	287	elseif (!empty($metadata))
	288	{
	289	outputTextBlock($xml_out, $metadata);
	290	}
	291	else
	292	{
	293	echo 'no such field or no metadata';
	294	}
	295	fwrite($xml_out, '</' . $field . '>' . "\n");
	296	}
	297	/ outputMetadataSingle() /
	298
	299	function outputMetadataList($xml_out, $metadata, $field, $separator = ',', $final_separator = false)
	300	{
	301	echo '[metadata list: ' . $field . "] \n";
	302	fwrite($xml_out, '<' . $field . '>' . "\n");
	303	if (isset($metadata[$field]))
	304	{
	305	if (count($metadata[$field]) == 1)
	306	{
	307	outputTextBlock($xml_out, $metadata[$field][0]);
	308	}
	309	if (count($metadata[$field]) > 1)
	310	{
	311	$last_value = '';
	312	if ($final_separator)
	313	{
	314	$last_value = array_pop($metadata[$field]);
	315	}
	316	$values = implode($separator, $metadata[$field]);
	317	if ($final_separator)
	318	{
	319	$values .= $final_separator . $last_value;
	320	}
	321	outputTextBlock($xml_out, $values);
	322	}
	323	}
	324	else
	325	{
	326	echo 'no such field or no metadata';
	327	}
	328	fwrite($xml_out, '</' . $field . '>' . "\n");
	329	}
	330	/ outputMetadataList() /
	331
	332	function outputMetadataMultiple($xml_out, $metadata, $field)
	333	{
	334	echo '[metadata multiple: ' . $field . "] \n";
	335	// - Text blocks don't need to be wrapped in Text element
	336	if ($field != 'Text')
	337	{
	338	fwrite($xml_out, '<' . $field . '>' . "\n");
	339	}
	340	if (isset($metadata[$field]))
	341	{
	342	foreach ($metadata[$field] as $value)
	343	{
	344	outputTextBlock($xml_out, $value);
	345	}
	346	}
	347	else
	348	{
	349	echo 'no such field or no metadata';
	350	}
	351	if ($field != 'Text')
	352	{
	353	fwrite($xml_out, '</' . $field . '>' . "\n");
	354	}
	355	}
	356
	357	function translateTableCodeline($text)
	358	{
	359	// Escape any italic tags hidden in HTML comments
	360	$text = str_replace('<!--i-->', '%!--i--%', $text);
	361	$text = str_replace('<!--/i-->', '%!--/i--%', $text);
	362	// Encode entities etc
	363	$text = translateText($text, true);
	364	// Restore any italics elements hidden above
	365	$text = str_replace('%!--i--%', '<i>', $text);
	366	$text = str_replace('%!--/i--%', '</i>', $text);
	367	return $text;
	368	}
	369	/ translateTableCodeline() /
	370
	371	function translateText($text, $in_code_block=false)
	372	{
	373	global $allow_bad_codeblocks;
	374	$text = str_replace('&','&',$text);
	375	$text = str_replace('<','<',$text);
	376	$text = str_replace('>','>',$text);
	377	if ($in_code_block && $allow_bad_codeblocks)
	378	{
	379	///cho "** [debug] restoring bogus decoded tags in: \|$text\| **\n";
	380	$text = str_replace('<i>','<i>',$text);
	381	$text = str_replace('</i>','</i>',$text);
	382	$text = str_replace('<br/>','<br/>',$text);
	383	}
	384	return $text;
	385	}
	386
	387	function outputTextBlock($xml_out, $text, $type='', $in_code_block = false)
	388	{
	389	global $cover_metadata;
	390	global $text_id_counter;
	391	global $footnotes;
	392	global $allow_bad_codeblocks;
	393
	394	// - Start by dealing with any footnotes before anything else
	395	while (preg_match('/$\((.*?)$\)/', $text, $matches))
	396	{
	397	$pattern = $matches[0];
	398	$footnote = $matches[1];
	399	$footnote_id = count($footnotes) + 1;
	400	$footnotes[$footnote] = $footnote_id;
	401	// - note that we have to escape the footnote reference as the following
	402	// code will convert any < and > to entities...
	403	$footnote_reference = '%FootnoteRef id="' . $footnote_id . '"/%';
	404	$text = str_replace($pattern, $footnote_reference, $text);
	405	}
	406
	407	$text_id = '';
	408	// - check whether the string begins with an explicit id
	409	if (preg_match('/^\s<!--\sid:(.+?)\s-->(.)$/', $text, $matches))
	410	{
	411	$text_id = $matches[1];
	412	$text = $matches[2];
	413	if (is_numeric($text_id))
	414	{
	415	$text_id_counter = $text_id + 1;
	416	}
	417	}
	418	else
	419	{
	420	$text_id = $text_id_counter;
	421	$text_id_counter++;
	422	}
	423
	424	// - protect the special case of an HTML comment being actually displayed
	425	// in the text
	426	$text = preg_replace('/<!--([\s\.]+?)-->/','##lt##!--\1--##gt##',$text);
	427
	428	// - reformat dokuwiki syntax to HTML tag syntax
	429	$text = preg_replace('/<!--.*?-->/', '', $text);
	430
	431	// we leave code blocks alone in terms of ampersands
	432	if (!$in_code_block)
	433	{
	434	// - ampersands aren't safe in XML...
	435	$text = str_replace('&', '&', $text);
	436	// ...except for the entities that we have registered as metadata
	437	if (isset($cover_metadata['ENTITY']))
	438	{
	439	foreach ($cover_metadata['ENTITY'] as $entity)
	440	{
	441	if (preg_match('/([a-z]+)\s+"&#(\d+);"/', $entity, $matches))
	442	{
	443	$entity_name = $matches[1];
	444	if ($entity_name != 'mdash')
	445	{
	446	$entity_character = html_entity_decode('&#'.$matches[2].';',ENT_NOQUOTES,'UTF-8');
	447	$text = str_replace('&' . $entity_name . ';', '&' . $entity_name . ';', $text);
	448	// - we also convert any characters that match the entity char into
	449	// the entity
	450	$text = str_replace($entity_character, '&' . $entity_name . ';', $text);
	451	}
	452	}
	453	}
	454	}
	455	// - protect <br/> tags
	456	$text = str_replace('<br/>','%%br/%%',$text);
	457	// - encoding all of the < and > that appear in the text (rather than
	458	// true html formatting)
	459	$text = str_replace('<','<',$text);
	460	$text = str_replace('>','>',$text);
	461	// - restore <br/> tags
	462	$text = str_replace('%%br/%%','<br/>',$text);
	463	}
	464	else if ($type == 'code')
	465	{
	466	$text = str_replace('<','<',$text);
	467	$text = str_replace('>','>',$text);
	468	}
	469
	470	// - links, oh how I hate thee
	471	// - external links are slightly easier
	472	$text = preg_replace('/\[\[http:\/\/(.?)\\|(.?)\]\]/', '<Link url="http://\1">\2</Link>', $text);
	473	// - internals have to become the horrible <CrossRef> tags. We ignore any
	474	// number prefix on the page name as that is just used for ordering within
	475	// Dokuwiki
	476	$text = preg_replace('/\[\[\.\:(.*?)\\|[^\]]+\]\]/','<CrossRef target="Chapter" ref="\1"/>', $text);
	477	// - internal links starting with hash must be on the same page
	478	$text = preg_replace('/\[\[###(.?)\\|.?\]\]/','<CrossRef target="Part" ref="\1"/>', $text);
	479	$text = preg_replace('/\[\[##(.?)\\|.?\]\]/','<CrossRef target="Subsection" ref="\1"/>', $text);
	480	$text = preg_replace('/\[\[#(.?)\\|.?\]\]/','<CrossRef target="Section" ref="\1"/>', $text);
	481	// - 'external' internal wiki links are even worst - since we can't know what
	482	// the page order number for another manual's chapters might be, we instead
	483	// use a search
	484	$text = preg_replace('/\[\[\?do\=search\&id\=([^\s]+)\s+@([a-z]+):manuals:([a-z]+)\\|.*?\]\]/i', '<CrossRef external="\3" lang="\2" target="Chapter" ref="\1"/>', $text);
	485	// - references to images and tables
	486	$text = preg_replace('/(?:<\|<)imgref\sfigure_(.+?)(?:>\|>)/','<CrossRef target="Figure" ref="\1"/>', $text);
	487	$text = preg_replace('/(?:<\|<)tblref\stable_(.+?)(?:>\|>)/','<CrossRef target="Table" ref="\1"/>', $text);
	488	// - explicitly convert URLs as they are a bit messy
	489	// - first all the cases of URLs in italics, without protocol
	490	$text = preg_replace('/\/\/\s([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
	491	$text = preg_replace('/\/\/\s([a-z0-9\-]+\.org(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
	492	$text = preg_replace('/\/\/\s(localhost(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
	493	// - now all the protocol ones (with care taken to protect // in protocol)
	494	$text = preg_replace('/\/\/\shttp:\/\/([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
	495	$text = preg_replace('/\/\/\shttp:\/\/([a-z0-9\-]+\.org(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
	496	$text = preg_replace('/\/\/\shttp:\/\/(localhost(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
	497	// - next we have the underlined URLs sans protocols
	498	$text = preg_replace('/__\s([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
	499	$text = preg_replace('/__\s([a-z0-9\-]+\.org(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
	500	$text = preg_replace('/__\s(localhost(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
	501	// - and finally the protocol prefixed underlined URLs
	502	$text = preg_replace('/__\shttp:\/\/([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
	503	$text = preg_replace('/__\shttp:\/\/([a-z0-9\-]+\.org(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
	504	$text = preg_replace('/__\shttp:\/\/(localhost(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
	505	// - lets also protect any other protocols we find floating around
	506	$text = preg_replace('/(file\|ftp\|http):\/\//i', '\1:##DOUBLESLASH##', $text);
	507
	508	// - italic formatting (taking care of protected double slashes)
	509	$text = preg_replace('/%%\/\/%%/', '##DOUBLESLASH##', $text);
	510	$text = preg_replace('/\/{5}/', '<i>/</i>', $text); // another special case
	511	$text = preg_replace('/\/\/(\/.+?)\s*\/\//', '<i>\1</i>', $text); // another special case
	512	$text = preg_replace('/\/\/\s*(.+?\/)\/\//', '<i>\1</i>', $text); // another special case
	513	$text = preg_replace('/\/\/\s(.+?)\s\/\//', '<i>\1</i>', $text);
	514	$text = preg_replace('/##DOUBLESLASH##/', '//', $text);
	515	// - bold formatting
	516	$text = preg_replace('/\\([^"]+?)\\/', '<b>\1</b>', $text);
	517	// - underline formatting
	518	$text = preg_replace('/__([^"]+?)__/', '<u>\1</u>', $text);
	519
	520	// - decode certain entities in codeblock (just because they are valid HTML,
	521	// derp).
	522	if ($in_code_block && $allow_bad_codeblocks)
	523	{
	524	///cho "** [debug] restoring bogus decoded tags in: \|$text\| **\n";
	525	$text = str_replace('<i>','<i>',$text);
	526	$text = str_replace('</i>','</i>',$text);
	527	//$text = str_replace('<br/>','<br/>',$text);
	528	}
	529	// - restore protected entities
	530	$text = preg_replace('/##(gt\|lt)##/','&\1;',$text);
	531	// - restore protected comment blocks
	532	$text = str_replace('%!--', '<!--', $text);
	533	$text = str_replace('--%', '-->', $text);
	534	// - restore protected footnote refs
	535	$text = preg_replace('/%FootnoteRef id="([^"]+)"\/%/', '<FootnoteRef id="\1"/>', $text);
	536	// output the text block
	537	$text = trim($text);
	538	if (empty($text))
	539	{
	540	fwrite($xml_out, '<Text id="' . $text_id . '"/>' . "\n");
	541	}
	542	else if (!empty($type))
	543	{
	544	fwrite($xml_out, '<Text type="' . $type . '" id="' . $text_id . '">' . $text . '</Text>' . "\n");
	545	}
	546	else
	547	{
	548	fwrite($xml_out, '<Text id="' . $text_id . '">' . $text . '</Text>' . "\n");
	549	}
	550	}
	551	/ outputTextBlock($xml_out, $text) /
	552
	553	/**
	554	*/
	555	function processPage($xml_out, $page_name)
	556	{
	557	global $dokuwiki_path;
	558	global $seen_ids;
	559	echo "</p>\n<p><b>Export Chapter:</b> " . $page_name . "<br/>\n";
	560	// - locate the page in question (taking into account if the user asked for a
	561	// draft version or an approved version of the manual)
	562	$page_path = '';
	563	if ($_REQUEST['v'] == 'draft' \|\| $_REQUEST['l'] != 'en')
	564	{
	565	$page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '/' . $page_name . '.txt';
	566	}
	567	else
	568	{
	569	$page_path = getApprovedVersionPath('en:manuals:' . $_REQUEST['m'] . ':' . $page_name);
	570	}
	571	$page_in = @fopen($page_path, 'r');
	572	if (!$page_in)
	573	{
	574	printError('Failed to open page for reading:' . $page_name, false);
	575	return;
	576	}
	577	// - once again we read in line-by-line, but this time we are going to output
	578	// each line as we go through. We expect to encounter certain lines in a
	579	// predefined order, and should complain if we don't find what we expect.
	580	$in_chapter = false;
	581	$in_section = false;
	582	$in_subsection = false;
	583	$in_part = false;
	584	$in_list = false;
	585	$lists = array();
	586	$previous_listitem_type = '';
	587	$in_figure = false;
	588	$in_table = false;
	589	$column_widths = array();
	590	$in_code_block = false;
	591	while (($line = fgets($page_in)) !== false)
	592	{
	593	// remove newline character
	594	$line = preg_replace('/\r?\n$/','',$line);
	595	// - we need to know the 'depth' for the bulletpoint lists
	596	$depth = 0;
	597	while (strlen($line) > 2 && preg_match('/^\s+[\*\-]/', $line) && substr($line, 0, 2) == ' ')
	598	{
	599	$depth++;
	600	$line = substr($line, 2);
	601	}
	602	$first_character = substr($line, 0, 1);
	603	// - special case for the end of bullet lists
	604	if ($in_list && ($first_character != "*" && $first_character != "-"))
	605	{
	606	while (count($lists) > 0)
	607	{
	608	$list_type = array_pop($lists);
	609	if ($list_type == '*')
	610	{
	611	fwrite($xml_out, '</Bullet>' . "\n");
	612	fwrite($xml_out, '</BulletList>' . "\n");
	613	}
	614	else
	615	{
	616	fwrite($xml_out, '</NumberedItem>' . "\n");
	617	fwrite($xml_out, '</NumberedList>' . "\n");
	618	}
	619	}
	620	$in_list = false;
	621	}
	622	// - special case for the end of tables
	623	if ($in_table && $first_character != '^' && $first_character != '\|')
	624	{
	625	fwrite($xml_out, '</TableContent>' . "\n");
	626	fwrite($xml_out, '</Table>' . "\n");
	627	$in_table = false;
	628	}
	629	// - special cases for premature closing of sections, subsections and parts
	630	if (preg_match('/<!-- close:(section\|subsection\|part) -->/', $line, $matches))
	631	{
	632	// - we always try to do this (regardless of actual flag) as we must
	633	// always close the smallest 'granularity' first
	634	if ($in_part)
	635	{
	636	fwrite($xml_out, '</Content>' . "\n");
	637	fwrite($xml_out, '</Part>' . "\n");
	638	$in_part = false;
	639	}
	640	if ($in_subsection && ($matches[1] == 'section' \|\| $matches[1] == 'subsection'))
	641	{
	642	fwrite($xml_out, '</Content>' . "\n");
	643	fwrite($xml_out, '</Subsection>' . "\n");
	644	$in_subsection = false;
	645	}
	646	if ($in_section && $matches[1] == 'section')
	647	{
	648	fwrite($xml_out, '</Content>' . "\n");
	649	fwrite($xml_out, '</Section>' . "\n");
	650	$in_section = false;
	651	}
	652	}
	653
	654	// - if this page is a chapter, then the first thing on the page should be
	655	// the chapter title (six equals)
	656	if (preg_match('/====== (.+) ======/', $line, $matches))
	657	{
	658	$chapter_title = $matches[1];
	659	$chapter_id = $page_name;
	660	if (empty($chapter_id))
	661	{
	662	$chapter_id = generateID($chapter_title);
	663	}
	664	// - are we already processing a part? if so end it, end it now
	665	if ($in_part)
	666	{
	667	fwrite($xml_out, '</Content>' . "\n");
	668	fwrite($xml_out, '</Part>' . "\n");
	669	$in_part = false;
	670	}
	671	// - are we already processing a subsection? if so end it, end it now
	672	if ($in_subsection)
	673	{
	674	fwrite($xml_out, '</Content>' . "\n");
	675	fwrite($xml_out, '</Subsection>' . "\n");
	676	$in_subsection = false;
	677	}
	678	// - are we already processing a section? if so end it, end it now
	679	if ($in_section)
	680	{
	681	fwrite($xml_out, '</Content>' . "\n");
	682	fwrite($xml_out, '</Section>' . "\n");
	683	$in_section = false;
	684	}
	685	// - are we already processing a chapter? if so end it, end it now
	686	if ($in_chapter)
	687	{
	688	fwrite($xml_out, '</Content>' . "\n");
	689	fwrite($xml_out, '</Chapter>' . "\n");
	690	$in_chapter = false;
	691	}
	692	// - write out this chapter's header
	693	fwrite($xml_out, '<Chapter id="' . $chapter_id . '">' . "\n");
	694	outputMetadataSingle($xml_out, $chapter_title, 'Title');
	695	fwrite($xml_out, '<Content>' . "\n");
	696	$in_chapter = true;
	697	}
	698	// - the next likely thing to encounter is a section heading (five equals)
	699	elseif (preg_match('/=====\s+(.+)\s+=====/', $line, $matches))
	700	{
	701	$section_title = $matches[1];
	702	// - check for explicit section id
	703	$section_id = '';
	704	if (preg_match('/<!-- sid:(.+?) -->(.*)/', $section_title, $matches))
	705	{
	706	$section_id = $matches[1];
	707	$section_title = $matches[2];
	708	}
	709	if (empty($section_id))
	710	{
	711	$section_id = generateID($section_title);
	712	}
	713	// - are we already processing a part? if so end it, end it now
	714	if ($in_part)
	715	{
	716	fwrite($xml_out, '</Content>' . "\n");
	717	fwrite($xml_out, '</Part>' . "\n");
	718	$in_part = false;
	719	}
	720	// - are we already processing a subsection? if so end it, end it now
	721	if ($in_subsection)
	722	{
	723	fwrite($xml_out, '</Content>' . "\n");
	724	fwrite($xml_out, '</Subsection>' . "\n");
	725	$in_subsection = false;
	726	}
	727	// - are we already processing a section? if so end it, end it now
	728	if ($in_section)
	729	{
	730	fwrite($xml_out, '</Content>' . "\n");
	731	fwrite($xml_out, '</Section>' . "\n");
	732	$in_section = false;
	733	}
	734	// - write out this section's header
	735	fwrite($xml_out, '<Section id="' . $section_id . '">' . "\n");
	736	outputMetadataSingle($xml_out, $section_title, 'Title');
	737	fwrite($xml_out, '<Content>' . "\n");
	738	$in_section = true;
	739	}
	740	// - similar for subsection heading (four equals)
	741	elseif (preg_match('/==== (.+) ====/', $line, $matches))
	742	{
	743	$subsection_title = $matches[1];
	744	// - check for explicit subsection id
	745	$subsection_id = '';
	746	if (preg_match('/<!-- sid:(.+?) -->(.*)/', $subsection_title, $matches))
	747	{
	748	$subsection_id = $matches[1];
	749	$subsection_title = $matches[2];
	750	}
	751	if (empty($subsection_id))
	752	{
	753	$subsection_id = generateID($subsection_title);
	754	}
	755	// - are we already processing a part? if so end it, end it now
	756	if ($in_part)
	757	{
	758	fwrite($xml_out, '</Content>' . "\n");
	759	fwrite($xml_out, '</Part>' . "\n");
	760	$in_part = false;
	761	}
	762	// - are we already processing a subsection? if so end it, end it now
	763	if ($in_subsection)
	764	{
	765	fwrite($xml_out, '</Content>' . "\n");
	766	fwrite($xml_out, '</Subsection>' . "\n");
	767	$in_subsection = false;
	768	}
	769	// - write out this subsection's header
	770	fwrite($xml_out, '<Subsection id="' . $subsection_id . '">' . "\n");
	771	outputMetadataSingle($xml_out, $subsection_title, 'Title');
	772	fwrite($xml_out, '<Content>' . "\n");
	773	$in_subsection = true;
	774	}
	775	// - and part heading (three equals)
	776	elseif (preg_match('/=== (.+) ===/', $line, $matches))
	777	{
	778	$part_title = $matches[1];
	779	// - check for explicit part id
	780	$part_id = '';
	781	if (preg_match('/<!-- sid:(.+?) -->(.*)/', $part_title, $matches))
	782	{
	783	$part_id = $matches[1];
	784	$part_title = $matches[2];
	785	}
	786	if (empty($part_id))
	787	{
	788	$part_id = generateID($part_title);
	789	}
	790	// - are we already processing a part? if so end it, end it now
	791	if ($in_part)
	792	{
	793	fwrite($xml_out, '</Content>' . "\n");
	794	fwrite($xml_out, '</Part>' . "\n");
	795	$in_part = false;
	796	}
	797	// - write out this part's header
	798	fwrite($xml_out, '<Part id="' . $part_id . '">' . "\n");
	799	outputMetadataSingle($xml_out, '//' . $part_title . '//', 'Title');
	800	fwrite($xml_out, '<Content>' . "\n");
	801	$in_part = true;
	802	}
	803	// - Ignore 5th level heading - they are only used to allow more convenient
	804	// editing of figures and tables
	805	elseif (preg_match('/== (.+) ==/', $line, $matches))
	806	{
	807	}
	808	// - lists need special handling
	809	elseif (preg_match('/^(\\|\-)\s+(.)/', $line, $matches))
	810	{
	811	$list_type = $matches[1];
	812	$list_text = $matches[2];
	813	$list_depth = count($lists);
	814	if (!$in_list)
	815	{
	816	if ($list_type == '*')
	817	{
	818	fwrite($xml_out, '<BulletList>' . "\n");
	819	}
	820	else
	821	{
	822	fwrite($xml_out, '<NumberedList>' . "\n");
	823	}
	824	$in_list = true;
	825	array_push($lists, $list_type);
	826	}
	827	// - this bullet is at the same depth as previous - close the previous
	828	// point
	829	elseif ($depth == $list_depth)
	830	{
	831	$previous_list_type = end($lists);
	832	if ($previous_list_type == '*')
	833	{
	834	fwrite($xml_out, '</Bullet>' . "\n");
	835	}
	836	else
	837	{
	838	fwrite($xml_out, '</NumberedItem>' . "\n");
	839	}
	840	// - we don't match in type anymore... close the previous list and open
	841	// a new list of the appropriate type
	842	if ($list_type != $previous_list_type)
	843	{
	844	if ($previous_list_type == '*')
	845	{
	846	fwrite($xml_out, '</BulletList>' . "\n");
	847	fwrite($xml_out, '<NumberedList>' . "\n");
	848	}
	849	else
	850	{
	851	fwrite($xml_out, '</NumberedNumbered>' . "\n");
	852	fwrite($xml_out, '<BulletList>' . "\n");
	853	}
	854	array_pop($lists);
	855	array_push($lists, $list_type);
	856	}
	857	}
	858	else
	859	{
	860	// - we have either got deeper...
	861	if ($depth > $list_depth)
	862	{
	863	if ($list_type == '*')
	864	{
	865	fwrite($xml_out, '<BulletList>' . "\n");
	866	}
	867	else
	868	{
	869	fwrite($xml_out, '<NumberedList>' . "\n");
	870	}
	871	array_push($lists, $list_type);
	872	}
	873	// ... or shallower in the bullet listing
	874	if ($depth < $list_depth)
	875	{
	876	$previous_list_type = array_pop($lists);
	877	if ($previous_list_type == '*')
	878	{
	879	fwrite($xml_out, '</Bullet>' . "\n");
	880	fwrite($xml_out, '</BulletList>' . "\n");
	881	}
	882	else
	883	{
	884	fwrite($xml_out, '</NumberedItem>' . "\n");
	885	fwrite($xml_out, '</NumberedList>' . "\n");
	886	}
	887	// - we still have to close the last item too
	888	$previous_listitem_type = end($lists);
	889	if ($previous_listitem_type == '*')
	890	{
	891	fwrite($xml_out, '</Bullet>' . "\n");
	892	}
	893	else
	894	{
	895	fwrite($xml_out, '</NumberedItem>' . "\n");
	896	}
	897	}
	898	}
	899	if ($list_type == '*')
	900	{
	901	fwrite($xml_out, '<Bullet>' . "\n");
	902	}
	903	else
	904	{
	905	fwrite($xml_out, '<NumberedItem>' . "\n");
	906	}
	907	// Special Case: bullets that contain (start) a code block
	908	if (preg_match('/^(.)<code>\s$/', $list_text, $matches))
	909	{
	910	$list_text = $matches[1];
	911	$in_code_block = true;
	912	}
	913
	914	outputTextBlock($xml_out, $list_text);
	915
	916	// - to make things clearer, we'll process any and all code blocks within
	917	// bullets here - especially as there may be more text block after
	918	// the code block finishes
	919	if ($in_code_block)
	920	{
	921	$sub_line = '';
	922	while ($in_code_block && ($sub_line = fgets($page_in)) !== false)
	923	{
	924	$sub_line = trim($sub_line);
	925	// - closing code
	926	if (preg_match('/^<\/code>(.*)$/', $sub_line, $matches))
	927	{
	928	$sub_line = $matches[1]; // may be empty string
	929	$in_code_block = false;
	930	}
	931	// - output another plain codeline
	932	else
	933	{
	934	fwrite($xml_out, '<CodeLine>' . $sub_line . "</CodeLine>\n");
	935	$sub_line = '';
	936	}
	937	}
	938	// - if sub_line still has anything in it, then add that content as a
	939	// text block
	940	if (!empty($sub_line))
	941	{
	942	outputTextBlock($xml_out, $sub_line);
	943	}
	944	}
	945	}
	946	// - images start with an image caption 'element'
[30117]	947	elseif (preg_match('/<imgcaption\s+figure_([a-z0-9_\-]+)\\|(.+)>([^<]*?)<\/imgcaption>/', $line, $matches))
[25026]	948	{
	949	$figure_id = $matches[1];
	950	$figure_title = $matches[2];
[30117]	951	$image_content = $matches[3];
[25026]	952	// - watch for the special withLineNumber flag
	953	$class_attribute = '';
	954	if (strpos($figure_title, '%!-- withLineNumber --%') != false)
	955	{
	956	$class_attribute = ' class="withLineNumber"';
	957	$figure_title = str_replace('%!-- withLineNumber --%','',$figure_title);
	958	}
	959	fwrite($xml_out, '<Figure id="' . $figure_id . '"' . $class_attribute . '>' . "\n");
	960	echo '[figure: ' . $figure_id . "] \n";
	961	fwrite($xml_out, '<Title>' . "\n");
	962	// - decode any comments in the title (used to store explicit id
	963	// information)
	964	$figure_title = str_replace('%!--', '<!--', $figure_title);
	965	$figure_title = str_replace('--%', '-->', $figure_title);
	966	// - special case: the title may have a subtitle (as a prefix)
	967	$figure_subtitle_id = '';
	968	$figure_subtitle = '';
	969	// - subtitle with explicit id
	970	if (preg_match('/^(<!-- id:.+? -->$[a-z]$)\s(.)$/', $figure_title, $matches))
	971	{
	972	$figure_subtitle = $matches[1];
	973	$figure_title = $matches[2];
	974	}
	975	// - subtitle without explicit id
	976	else if (preg_match('/^($[a-z]$)\s(.)$/', $figure_title, $matches))
	977	{
	978	$figure_subtitle = $matches[1];
	979	$figure_title = $matches[2];
	980	}
	981	outputTextBlock($xml_out, $figure_title);
	982	if (!empty($figure_subtitle))
	983	{
	984	fwrite($xml_out, '<SubTitle>' . "\n");
	985	outputTextBlock($xml_out, $figure_subtitle);
	986	fwrite($xml_out, '</SubTitle>' . "\n");
	987	}
	988	fwrite($xml_out, '</Title>' . "\n");
[30117]	989	// Try and find the image itself
	990	if (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $image_content))
	991	{
	992	processImage($xml_out, $line);
	993	fwrite($xml_out, '</Figure>' . "\n");
	994	}
	995	// Didn't find an image? Weird, but mark the imgcaption as open, and
	996	// we'll chomp up the next image found as the content.
	997	else
	998	{
	999	$in_figure = true;
	1000	}
[25026]	1001	// - record the id to prevent repeating
	1002	$seen_ids[$figure_id] = true;
	1003	}
	1004	// - tables start with a table caption 'element'
	1005	elseif (preg_match('/<tblcaption\s+table_([a-z0-9_\-]+)\\|([^>]+)>\s*<\/tblcaption>/', $line, $matches))
	1006	{
	1007	$table_id = $matches[1];
	1008	$table_title = $matches[2];
	1009	if ($table_title == '##NOCAPTION##')
	1010	{
	1011	echo '[non-captioned table: ' . $table_id . "] \n";
	1012	// - watch for autogenerated ids... no point in outputting them
	1013	if (preg_match('/^table(_\d+)?$/', $table_id))
	1014	{
	1015	fwrite($xml_out, "<Table>\n");
	1016	}
	1017	else
	1018	{
	1019	fwrite($xml_out, '<Table id="' . $table_id . '">' . "\n");
	1020	}
	1021	fwrite($xml_out, '<Title/>' . "\n");
	1022	}
	1023	elseif ($table_title == '##HIDDEN##')
	1024	{
	1025	echo '[hidden table: ' . $table_id . "] \n";
	1026	// - watch for autogenerated ids... no point in outputting them
	1027	if (preg_match('/^table(_\d+)?$/', $table_id))
	1028	{
	1029	fwrite($xml_out, "<Table class=\"hidden\">\n");
	1030	}
	1031	else
	1032	{
	1033	fwrite($xml_out, '<Table class="hidden" id="' . $table_id . '">' . "\n");
	1034	}
	1035	fwrite($xml_out, '<Title/>' . "\n");
	1036	}
	1037	else
	1038	{
	1039	echo '[table: ' . $table_id . "] \n";
	1040	// - watch for autogenerated ids... no point in outputting them
	1041	if (preg_match('/^table(_\d+)?$/', $table_id))
	1042	{
	1043	fwrite($xml_out, "<Table>\n");
	1044	}
	1045	else
	1046	{
	1047	fwrite($xml_out, '<Table id="' . $table_id . '">' . "\n");
	1048	}
	1049	fwrite($xml_out, '<Title>' . "\n");
	1050	outputTextBlock($xml_out, $table_title);
	1051	fwrite($xml_out, '</Title>' . "\n");
	1052	}
	1053	fwrite($xml_out, '<TableContent>' . "\n");
	1054	$in_table = true;
	1055	// - record the id to prevent repeating
	1056	$seen_ids[$table_id] = true;
	1057	}
	1058	// - the second line in a table should be it's column width values
	1059	elseif (preg_match('/\\|<\s-\s([0-9 ]+?)\s>\\|/', $line, $matches))
	1060	{
	1061	$column_widths = explode(' ', $matches[1]);
	1062	}
	1063	// - then every row will be made of a number of cells
	1064	elseif (preg_match('/^\\|(.*?)\\|$/', $line, $matches))
	1065	{
	1066	$row_content = $matches[1];
	1067	$cell_contents = preg_split('/(\s+\\|\|\\|\s+)/', $row_content);
	1068	fwrite($xml_out, '<tr>' . "\n");
	1069	foreach ($cell_contents as $index=>$cell_content)
	1070	{
	1071	$cell_content = trim($cell_content);
	1072	$th_text = '';
	1073	if (isset($column_widths[$index]))
	1074	{
	1075	$th_text = '<th width="' . $column_widths[$index] . '"';
	1076	}
	1077	else
	1078	{
	1079	$th_text = '<th';
	1080	}
	1081	// - if the cell would be empty, we use the shorthand
	1082	if (empty($cell_content))
	1083	{
	1084	$th_text .= '/>' . "\n";
	1085	fwrite($xml_out, $th_text);
	1086	}
	1087	else
	1088	{
	1089	$th_text .= '>' . "\n";
	1090	fwrite($xml_out, $th_text);
	1091
	1092	// GAH - this is proving harder than a hard thing thats hard.
	1093	// The issue is that the most straightforward way of fixing this,
	1094	// namely using explicit newlines (\\) in the dokuwiki txt causes
	1095	// lots a legitimately translated <br/> to also be split up. I
	1096	// think the only way forward would be to maybe extend the HTML
	1097	// Comment plugin to also respect and process <br/> tags. Then I
	1098	// can avoid transforming them, and use the \\ sentinel to
	1099	// separate multi-line table cells.
	1100	$cell_content_lines = explode('\\\\', $cell_content);
	1101	foreach ($cell_content_lines as $cell_content)
	1102	{
	1103	// - watch out, as the content may be an image
	1104	if (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $cell_content))
	1105	{
	1106	processImage($xml_out, $cell_content);
	1107	}
	1108	elseif (preg_match('/\'\'(.*)\'\'/', $cell_content, $matches))
	1109	{
	1110	fwrite($xml_out, '<CodeLine>' . translateTableCodeline($matches[1]) . '</CodeLine>' . "\n");
	1111	}
	1112	// - anything else it text
	1113	else
	1114	{
	1115	outputTextBlock($xml_out, $cell_content);
	1116	}
	1117	}
	1118	fwrite($xml_out, '</th>' . "\n");
	1119	}
	1120	}
	1121	fwrite($xml_out, '</tr>' . "\n");
	1122	}
	1123	// - links to image media in the wiki!
	1124	elseif (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $line))
	1125	{
	1126	processImage($xml_out, $line);
	1127	// - if we were processing a figure, then now is a good time to close it
	1128	if ($in_figure)
	1129	{
	1130	fwrite($xml_out, '</Figure>' . "\n");
	1131	$in_figure = false;
	1132	}
	1133	}
	1134	// - if the line starts with a <code> block, then we have a tag
	1135	// for that (which is special in that it get a unique text id)
	1136	elseif (preg_match('/^<code\s\d\s>(.?)(<\/code>)?$/', $line, $matches) \|\| ($in_code_block && preg_match('/^(.*?)(<\/code>)?$/', $line, $matches)))
	1137	{
	1138	$payload = $matches[1];
	1139	$found_end = (isset($matches[2]));
	1140	$in_code_block = true;
	1141	// - be careful with empty lines
	1142	if (empty($payload))
	1143	{
	1144	// - as they may appear in the body of the code (in which case we need
	1145	// to output them). The empty lines at the start or end of a code
	1146	// block are just an unfortunate consequence of the support for code
	1147	// line numbering.
	1148	if (!$found_end && strpos($line, '<code') === false)
	1149	{
	1150	fwrite($xml_out, "<CodeLine/>\n");
	1151	}
	1152	}
	1153	elseif (preg_match('/^<!-- id:([^\s]+) -->/', $payload, $matches))
	1154	{
	1155	$text_id = $matches[1];
	1156	outputTextBlock($xml_out, $payload, 'code', true);
	1157	// - record the id to prevent repeating
	1158	$seen_ids[$text_id] = true;
	1159	}
	1160	else
	1161	{
	1162	fwrite($xml_out, '<CodeLine>' . translateText($payload, true) . '</CodeLine>' . "\n");
	1163	}
	1164	// - if we didn't find an endtag we have to keep doing code mode until
	1165	// we do
	1166	$in_code_block = (!$found_end);
	1167	if ($found_end)
	1168	{
	1169	// - if we were processing a figure, then now is a good time to close it
	1170	if ($in_figure)
	1171	{
	1172	fwrite($xml_out, '</Figure>' . "\n");
	1173	$in_figure = false;
	1174	}
	1175	}
	1176	}
	1177	// - entities on a line by themselves (i.e. references to external files)
	1178	// go through verbatim
	1179	elseif (preg_match('/^\s&[a-z0-9_-]+;\s$/', $line))
	1180	{
	1181	fwrite($xml_out, $line . "\n");
	1182	}
	1183	// - lines starting with > are indented text blocks
	1184	elseif (preg_match('/^>(.*)$/', $line, $matches))
	1185	{
	1186	$payload = $matches[1];
	1187	fwrite($xml_out, "<Indented>\n");
	1188	outputTextBlock($xml_out, $payload);
	1189	fwrite($xml_out, "</Indented>\n");
	1190	}
	1191	// - everything else goes straight through as a text block
	1192	// - note that for code blocks, even empty lines count
	1193	elseif (!empty($line))
	1194	{
	1195	// - output the line of text having encoded entities etc
	1196	outputTextBlock($xml_out, $line, '', $in_code_block);
	1197	}
	1198	}
	1199	// Complete any open part
	1200	if ($in_part)
	1201	{
	1202	fwrite($xml_out, '</Content>' . "\n");
	1203	fwrite($xml_out, '</Part>' . "\n");
	1204	$in_part = false;
	1205	}
	1206	// Complete any open subsection
	1207	if ($in_subsection)
	1208	{
	1209	fwrite($xml_out, '</Content>' . "\n");
	1210	fwrite($xml_out, '</Subsection>' . "\n");
	1211	$in_subsection = false;
	1212	}
	1213	// Complete any open section
	1214	if ($in_section)
	1215	{
	1216	fwrite($xml_out, '</Content>' . "\n");
	1217	fwrite($xml_out, '</Section>' . "\n");
	1218	$in_section = false;
	1219	}
	1220	// Complete any open chapter
	1221	if ($in_chapter)
	1222	{
	1223	fwrite($xml_out, '</Content>' . "\n");
	1224	fwrite($xml_out, '</Chapter>' . "\n");
	1225	$in_chapter = false;
	1226	}
	1227	}
	1228	/ processPage($xml_out, $page_name) /
	1229
	1230	function processImage($xml_out, $text)
	1231	{
	1232	global $dokuwiki_path;
	1233	global $xml_source_path;
	1234	if (preg_match('/\{\{.+?([^:?]+)\?(\d+)x(\d+)(&direct)?\}\}/', $text, $matches))
	1235	{
	1236	$filename = $matches[1];
	1237	$width = $matches[2];
	1238	$height = $matches[3];
	1239	// - copy the file into place
	1240	$image_source_path = $dokuwiki_path . '/data/media/' . $_REQUEST['l'] . '/manuals/images/' . strtolower($filename);
	1241	$image_destination_dir = $xml_source_path . '/' . $_REQUEST['l'] . '/images';
[27507]	1242	mkAllDir($image_destination_dir);
[25026]	1243	$image_destination_path = $image_destination_dir . '/' . $filename;
	1244	if (copy($image_source_path, $image_destination_path))
	1245	{
	1246	echo '[copying file: ' . $filename . "] \n";
[27507]	1247	chmod($image_destination_path, 0664);
[25026]	1248	}
	1249	else
	1250	{
	1251	printError('Failed to copy image into place: ' . $filename, false);
	1252	}
	1253	// - spit out the XML element
	1254	fwrite($xml_out, '<File width="' . $width . '" height="' . $height . '" url="images/' . $filename . '"/>' . "\n");
	1255	}
	1256	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: documentation/trunk/php/gs-manual-export.php@ 30117

Download in other formats: