Context Navigation

source: documentation/trunk/php/gs-manual-export.php@ 27507

Last change on this file since 27507 was 27507, checked in by jmt12, 11 years ago
Ensuring the downloadable versions of the XML exports are stored in a writable location, are now downloaded as archives (ZIP and TGZ) containing the XML and images, and clean up old files left lying around
File size: 42.7 KB

Rev	Line
[25026]	1	<?php
	2
	3	require_once('common.php');
	4	$debug = 1;
	5
	6	// There are some elements that are, in my opinion, incorrectly 'resolved' in
	7	// code blocks just because they are valid HTML. For instance, <i> is
	8	// resolved to <i> in code blocks, while something like <Metadata> is
	9	// not. Set this to true to allow such abominations (for the purpose of
	10	// comparing before and after versions of the XML). [jmt12]
	11	$allow_bad_codeblocks = true;
	12
	13	/** @file gs-manual-export.php
	14	* This script transforms the series of dokuwiki pages that make up a certain
	15	* manual (as specified by the 'm' argument) in a certain language ('l') into
	16	* the XML format required by the rest of the Greenstone manual generation
	17	* scripts.
	18	*/
	19
	20	// 0. Initialization
[27507]	21
[25026]	22	// - we have a counter to assign identifiers to text blocks etc without ids
	23	$text_id_counter = 1;
	24	// - we need an array of all the footnotes
	25	$footnotes = array();
	26
	27	// Defaults
	28	if (!isset($_REQUEST['l']) \|\| empty($_REQUEST['l']))
	29	{
	30	$_REQUEST['l'] = 'en';
	31	}
	32	if (!isset($_REQUEST['m']) \|\| empty($_REQUEST['m']))
	33	{
	34	//$_REQUEST['m'] = 'user';
	35	//$_REQUEST['m'] = 'install';
	36	$_REQUEST['m'] = 'develop';
	37	//$_REQUEST['m'] = 'paper';
	38	}
	39	if (!isset($_REQUEST['v']) \|\| empty($_REQUEST['v']))
	40	{
	41	$_REQUEST['v'] = 'draft';
	42	}
	43	if (!isset($_REQUEST['a']) \|\| !preg_match('/^(download\|store)$/', $_REQUEST['a']))
	44	{
	45	$_REQUEST['a'] = 'store'; // Try to store the file to disk
	46	}
	47
[27507]	48	$var_path = filecat(array($base_path, 'var'));
	49	$timestamp = time();
	50	//$xml_source_path = 'PATH TO GSDL MANUALS'
	51	$xml_source_path = '/tmp';
	52	if ($_REQUEST['a'] == 'download')
	53	{
	54	// Clear out previous exports
	55	recursiveRemove($var_path, '/greenstone/greenstone-documentation/php/var');
	56	// New export
	57	$xml_source_path = fileCat(array($var_path, $timestamp));
	58	}
	59
[25026]	60	echo '<html>' . "\n";
	61	echo '<head>' . "\n";
	62	echo '<title>GS Manual Export</title>' . "\n";
	63	echo '</head>' . "\n";
	64	echo '<body>' . "\n";
	65
	66	// - validate arguments before we use them (security)
	67	if (!preg_match('/^(develop\|install\|paper\|user)$/',$_REQUEST['m']))
	68	{
	69	printError('Unknown manual type requested: ' . htmlspecialchars($_REQUEST['m']));
	70	}
	71
	72	if (!preg_match('/^(ar\|en\|es\|fr\|pt-br\|ru)$/',$_REQUEST['l']))
	73	{
	74	printError('Unknown language requested: ' . htmlspecialchars($_REQUEST['l']));
	75	}
	76
	77	echo '<h2>Generating Greenstone Manual XML</h2>' . "\n";
	78	echo '<p><b>Manual:</b> ' . $_REQUEST['m'] . ' <b>Language:</b> ' . $_REQUEST['l'] . "</p>\n<hr/>\n";
	79	// 1. Create the XML output file handle
[27507]	80	// - construct the path using the information we've been provided as arguments
	81	$xml_file_dir = $xml_source_path . '/' . $_REQUEST['l'];
	82	mkAllDir($xml_file_dir);
	83	$xml_file_path = $xml_file_dir . '/' . ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.xml';
	84	// - backup any existing file
	85	if (file_exists($xml_file_path))
[25026]	86	{
[27507]	87	$xml_backup_file_path = $xml_file_path . '.bak';
	88	if (!rename($xml_file_path, $xml_backup_file_path))
[25026]	89	{
[27507]	90	printError('Failed to rename existing manual file for backup');
[25026]	91	}
	92	}
[27507]	93
[25026]	94	// - and create a handle to the new file
	95	$xml_out = fopen($xml_file_path, 'w');
	96
	97	// 2. Read in the top level page - this will give configuration data for the
	98	// manual and cover page, as well as specifying the order for the other
	99	// pages in the manual
	100	echo "<p><b>Frontmatter:</b><br/>\n";
	101	// - by reading this page we hope to populate an array of metadata, and also
	102	// extract the sequence of other pages within this manual
	103	$cover_metadata = array();
	104	$pages_in_order = array();
	105	// - we now need to consider if the user has asked for a draft version (i.e.
	106	// includes the latest version of pages regardless of approval) or if only
	107	// the approved versions of pages should be included
	108	// - only necessary for english version of manual, as those are the only pages
	109	// editable
	110	$top_page_path = '';
	111	if ($_REQUEST['v'] == 'draft' \|\| $_REQUEST['l'] != 'en')
	112	{
	113	// - again, we can construct the path to the top level page given the arguments
	114	// provided
	115	$top_page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '.txt';
	116	}
	117	else
	118	{
	119	$top_page_path = getApprovedVersionPath('en:manuals:' . $_REQUEST['m']);
	120	}
	121
	122	if (!file_exists($top_page_path))
	123	{
	124	printError('Failed to locate top level page for manual');
	125	}
	126	// - we're going to open a handle to the file, then read it in line-by-line
	127	// watching for the lines we are interested in (using pattern matching)
	128	$top_page_in = fopen($top_page_path, 'r');
	129	if (!$top_page_in)
	130	{
	131	printError('Failed to open top level page for reading');
	132	}
	133	$in_contents = false;
	134	while (($line = fgets($top_page_in)) !== false)
	135	{
	136	// - if we are capturing page order, and we encounter something that looks
	137	// like a bulletpoint item pointing to a wiki page, then we append the
	138	// name of that page to our pages in order array
	139	if ($in_contents && preg_match('/^\s+\\s+\[\[.:' . $_REQUEST['m'] . ':(.+?)\\|(.?)\]\]\s*$/', $line, $matches))
	140	{
	141	array_push($pages_in_order, $matches[1]);
	142	}
	143	// - metadata is all encoded within dokuwiki tables
	144	elseif (preg_match('/^\^\s+([^\s]+)\s+\\|\s+(.+?)\s+\\|\s*$/', $line, $matches))
	145	{
	146	$field = $matches[1];
	147	$value = $matches[2];
	148	$values = array();
	149	if (isset($cover_metadata[$field]))
	150	{
	151	$values = $cover_metadata[$field];
	152	}
	153	array_push($values, $value);
	154	$cover_metadata[$field] = $values;
	155	}
	156	// - watch for the heading 'Contents' to begin extracting page order
	157	// information
	158	elseif (preg_match('/^=+\s(.+)\s=+$/', $line, $matches))
	159	{
	160	if ($matches[1] == 'Contents')
	161	{
	162	$in_contents = true;
	163	}
	164	// - any other title means we aren't capturing page order (anymore)
	165	else
	166	{
	167	$in_contents = false;
	168	}
	169	}
	170	}
	171	if (!feof($top_page_in))
	172	{
	173	printError('Unexpected fgets() fail when reading top page');
	174	}
	175	fclose($top_page_in);
	176	// - ensure we have the required metadata
	177	$required_metadata = array('Heading','Title','Affiliation','Version','Date');
	178	foreach ($required_metadata as $required_field)
	179	{
	180	if (!isset($cover_metadata[$required_field]))
	181	{
	182	printError('Missing required metadata: ' . $required_field);
	183	}
	184	}
	185	// - now we can use the metadata to construct the XML header and the cover page.
	186	// This follows a pretty set recipe with only elements that can repeat---like
	187	// Author, SupplementaryText etc---are at all tricky
	188	fwrite($xml_out, '<?xml version="1.0" encoding="UTF-8"?>' . "\n");
	189	fwrite($xml_out, '<!DOCTYPE Manual [' . "\n");
	190	if (isset($cover_metadata['ENTITY']))
	191	{
	192	foreach ($cover_metadata['ENTITY'] as $entity)
	193	{
	194	fwrite($xml_out, "\t" . '<!ENTITY ' . $entity . '>' . "\n");
	195	}
	196	}
	197	fwrite($xml_out, ']>' . "\n");
	198	fwrite($xml_out, '<Manual id="' . ucfirst($_REQUEST['m']) . '" lang="' . $_REQUEST['l'] . '">' . "\n");
	199
	200	///cho "<p>[Debug] metadata: " . print_r($cover_metadata, true) . "</p>\n\n";
	201
	202	outputMetadataSingle($xml_out, $cover_metadata, 'Heading');
	203	outputMetadataSingle($xml_out, $cover_metadata, 'Title');
	204	outputMetadataSingle($xml_out, $cover_metadata, 'Author');
	205	outputMetadataSingle($xml_out, $cover_metadata, 'Affiliation');
	206	outputMetadataMultiple($xml_out, $cover_metadata, 'SupplementaryText');
	207	outputMetadataMultiple($xml_out, $cover_metadata, 'Text');
	208	outputMetadataMultiple($xml_out, $cover_metadata, 'Comment');
	209	outputMetadataSingle($xml_out, $cover_metadata, 'Version');
	210	outputMetadataSingle($xml_out, $cover_metadata, 'Date');
	211
	212	// 3. Process each page listed in the contents of the top level page in order
	213	foreach ($pages_in_order as $page)
	214	{
	215	processPage($xml_out, $page);
	216	}
	217
	218	// 4. Output out list of footnotes (if any)
	219	if (!empty($footnotes))
	220	{
	221	fwrite($xml_out, '<FootnoteList>'. "\n");
	222	foreach ($footnotes as $footnote=>$footnote_id)
	223	{
	224	///cho '[debug] footnotes: (' . $footnote_id . ') ' . $footnote . '<br />'. "\n";
	225	outputMetadataSingle($xml_out, $footnote, 'Footnote', $footnote_id);
	226	}
	227	fwrite($xml_out, '</FootnoteList>'. "\n");
	228	}
	229
	230	// 5. Finalize and close the XML output
	231	fwrite($xml_out, '</Manual>' . "\n");
	232	fclose($xml_out);
[27507]	233	chmod($xml_file_path, 0664);
[25026]	234
	235	// 6. Complete!
	236	echo '<p><b>Complete!</b></p>' . "\n<hr/>\n";
	237	if ($_REQUEST['a'] == 'download')
	238	{
[27507]	239	// Zip up the manual files
	240	$zip_file = ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.zip';
	241	$zip_path = fileCat(array($xml_source_path, $zip_file));
	242	$zip_command = 'zip -r "' . $zip_path . '" . > /dev/null 2>&1';
	243	//cho '<p><b>[DEBUG]</b> zip_command:' . $zip_command . '</p>';
	244	$tgz_file = ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.tgz';
	245	$tgz_path = fileCat(array($xml_source_path, $tgz_file));
	246	$tgz_command = 'tar -czf "' . $tgz_path . '" * > /dev/null 2>&1';
	247	//cho '<p><b>[DEBUG]</b> tgz_command:' . $tgz_command . '</p>';
	248	// We need to move to the document folder so that archives have sensible paths
	249	$original_cwd = getcwd();
	250	chdir($xml_file_dir);
	251	system($zip_command);
	252	system($tgz_command);
	253	// Go back
	254	chdir($original_cwd);
	255	// Links are ready
	256	echo '<p>Download XML file plus images as: <a href="var/' . $timestamp . '/' . $zip_file . '">ZIP</a> or <a href="var/' . $timestamp . '/' . $tgz_file . '">TGZ</a></p>' . "\n";
[25026]	257	}
	258	echo '<p>Click <a href="' . $dokuwiki_url . '/doku.php?id=' . $_REQUEST['l'] . ':manuals:' . $_REQUEST['m'] . '">here</a> to return to dokuwiki</p>' . "\n";
	259	echo '</body>' . "\n";
	260	echo '</html>';
	261	exit(0);
	262
	263	/**
	264	*/
	265	function outputMetadataSingle($xml_out, $metadata, $field, $mid=false)
	266	{
	267	echo '[metadata: ' . $field . "] \n";
	268	if ($mid)
	269	{
	270	fwrite($xml_out, '<' . $field . ' id="' . $mid . '">' . "\n");
	271	}
	272	else
	273	{
	274	fwrite($xml_out, '<' . $field . '>' . "\n");
	275	}
	276	if (is_array($metadata))
	277	{
	278	if (isset($metadata[$field]) && isset($metadata[$field][0]))
	279	{
	280	outputTextBlock($xml_out, $metadata[$field][0]);
	281	}
	282	else
	283	{
	284	echo 'no such field or no metadata';
	285	}
	286	}
	287	elseif (!empty($metadata))
	288	{
	289	outputTextBlock($xml_out, $metadata);
	290	}
	291	else
	292	{
	293	echo 'no such field or no metadata';
	294	}
	295	fwrite($xml_out, '</' . $field . '>' . "\n");
	296	}
	297	/ outputMetadataSingle() /
	298
	299	function outputMetadataList($xml_out, $metadata, $field, $separator = ',', $final_separator = false)
	300	{
	301	echo '[metadata list: ' . $field . "] \n";
	302	fwrite($xml_out, '<' . $field . '>' . "\n");
	303	if (isset($metadata[$field]))
	304	{
	305	if (count($metadata[$field]) == 1)
	306	{
	307	outputTextBlock($xml_out, $metadata[$field][0]);
	308	}
	309	if (count($metadata[$field]) > 1)
	310	{
	311	$last_value = '';
	312	if ($final_separator)
	313	{
	314	$last_value = array_pop($metadata[$field]);
	315	}
	316	$values = implode($separator, $metadata[$field]);
	317	if ($final_separator)
	318	{
	319	$values .= $final_separator . $last_value;
	320	}
	321	outputTextBlock($xml_out, $values);
	322	}
	323	}
	324	else
	325	{
	326	echo 'no such field or no metadata';
	327	}
	328	fwrite($xml_out, '</' . $field . '>' . "\n");
	329	}
	330	/ outputMetadataList() /
	331
	332	function outputMetadataMultiple($xml_out, $metadata, $field)
	333	{
	334	echo '[metadata multiple: ' . $field . "] \n";
	335	// - Text blocks don't need to be wrapped in Text element
	336	if ($field != 'Text')
	337	{
	338	fwrite($xml_out, '<' . $field . '>' . "\n");
	339	}
	340	if (isset($metadata[$field]))
	341	{
	342	foreach ($metadata[$field] as $value)
	343	{
	344	outputTextBlock($xml_out, $value);
	345	}
	346	}
	347	else
	348	{
	349	echo 'no such field or no metadata';
	350	}
	351	if ($field != 'Text')
	352	{
	353	fwrite($xml_out, '</' . $field . '>' . "\n");
	354	}
	355	}
	356
	357	function translateTableCodeline($text)
	358	{
	359	// Escape any italic tags hidden in HTML comments
	360	$text = str_replace('<!--i-->', '%!--i--%', $text);
	361	$text = str_replace('<!--/i-->', '%!--/i--%', $text);
	362	// Encode entities etc
	363	$text = translateText($text, true);
	364	// Restore any italics elements hidden above
	365	$text = str_replace('%!--i--%', '<i>', $text);
	366	$text = str_replace('%!--/i--%', '</i>', $text);
	367	return $text;
	368	}
	369	/ translateTableCodeline() /
	370
	371	function translateText($text, $in_code_block=false)
	372	{
	373	global $allow_bad_codeblocks;
	374	$text = str_replace('&','&',$text);
	375	$text = str_replace('<','<',$text);
	376	$text = str_replace('>','>',$text);
	377	if ($in_code_block && $allow_bad_codeblocks)
	378	{
	379	///cho "** [debug] restoring bogus decoded tags in: \|$text\| **\n";
	380	$text = str_replace('<i>','<i>',$text);
	381	$text = str_replace('</i>','</i>',$text);
	382	$text = str_replace('<br/>','<br/>',$text);
	383	}
	384	return $text;
	385	}
	386
	387	function outputTextBlock($xml_out, $text, $type='', $in_code_block = false)
	388	{
	389	global $cover_metadata;
	390	global $text_id_counter;
	391	global $footnotes;
	392	global $allow_bad_codeblocks;
	393
	394	// - Start by dealing with any footnotes before anything else
	395	while (preg_match('/\(\((.*?)\)\)/', $text, $matches))
	396	{
	397	$pattern = $matches[0];
	398	$footnote = $matches[1];
	399	$footnote_id = count($footnotes) + 1;
	400	$footnotes[$footnote] = $footnote_id;
	401	// - note that we have to escape the footnote reference as the following
	402	// code will convert any < and > to entities...
	403	$footnote_reference = '%FootnoteRef id="' . $footnote_id . '"/%';
	404	$text = str_replace($pattern, $footnote_reference, $text);
	405	}
	406
	407	$text_id = '';
	408	// - check whether the string begins with an explicit id
	409	if (preg_match('/^\s<!--\sid:(.+?)\s-->(.)$/', $text, $matches))
	410	{
	411	$text_id = $matches[1];
	412	$text = $matches[2];
	413	if (is_numeric($text_id))
	414	{
	415	$text_id_counter = $text_id + 1;
	416	}
	417	}
	418	else
	419	{
	420	$text_id = $text_id_counter;
	421	$text_id_counter++;
	422	}
	423
	424	// - protect the special case of an HTML comment being actually displayed
	425	// in the text
	426	$text = preg_replace('/<!--([\s\.]+?)-->/','##lt##!--\1--##gt##',$text);
	427
	428	// - reformat dokuwiki syntax to HTML tag syntax
	429	$text = preg_replace('/<!--.*?-->/', '', $text);
	430
	431	// we leave code blocks alone in terms of ampersands
	432	if (!$in_code_block)
	433	{
	434	// - ampersands aren't safe in XML...
	435	$text = str_replace('&', '&', $text);
	436	// ...except for the entities that we have registered as metadata
	437	if (isset($cover_metadata['ENTITY']))
	438	{
	439	foreach ($cover_metadata['ENTITY'] as $entity)
	440	{
	441	if (preg_match('/([a-z]+)\s+"&#(\d+);"/', $entity, $matches))
	442	{
	443	$entity_name = $matches[1];
	444	if ($entity_name != 'mdash')
	445	{
	446	$entity_character = html_entity_decode('&#'.$matches[2].';',ENT_NOQUOTES,'UTF-8');
	447	$text = str_replace('&' . $entity_name . ';', '&' . $entity_name . ';', $text);
	448	// - we also convert any characters that match the entity char into
	449	// the entity
	450	$text = str_replace($entity_character, '&' . $entity_name . ';', $text);
	451	}
	452	}
	453	}
	454	}
	455	// - protect <br/> tags
	456	$text = str_replace('<br/>','%%br/%%',$text);
	457	// - encoding all of the < and > that appear in the text (rather than
	458	// true html formatting)
	459	$text = str_replace('<','<',$text);
	460	$text = str_replace('>','>',$text);
	461	// - restore <br/> tags
	462	$text = str_replace('%%br/%%','<br/>',$text);
	463	}
	464	else if ($type == 'code')
	465	{
	466	$text = str_replace('<','<',$text);
	467	$text = str_replace('>','>',$text);
	468	}
	469
	470	// - links, oh how I hate thee
	471	// - external links are slightly easier
	472	$text = preg_replace('/\[\[http:\/\/(.?)\\|(.?)\]\]/', '<Link url="http://\1">\2</Link>', $text);
	473	// - internals have to become the horrible <CrossRef> tags. We ignore any
	474	// number prefix on the page name as that is just used for ordering within
	475	// Dokuwiki
	476	$text = preg_replace('/\[\[\.\:(.*?)\\|[^\]]+\]\]/','<CrossRef target="Chapter" ref="\1"/>', $text);
	477	// - internal links starting with hash must be on the same page
	478	$text = preg_replace('/\[\[###(.?)\\|.?\]\]/','<CrossRef target="Part" ref="\1"/>', $text);
	479	$text = preg_replace('/\[\[##(.?)\\|.?\]\]/','<CrossRef target="Subsection" ref="\1"/>', $text);
	480	$text = preg_replace('/\[\[#(.?)\\|.?\]\]/','<CrossRef target="Section" ref="\1"/>', $text);
	481	// - 'external' internal wiki links are even worst - since we can't know what
	482	// the page order number for another manual's chapters might be, we instead
	483	// use a search
	484	$text = preg_replace('/\[\[\?do\=search\&id\=([^\s]+)\s+@([a-z]+):manuals:([a-z]+)\\|.*?\]\]/i', '<CrossRef external="\3" lang="\2" target="Chapter" ref="\1"/>', $text);
	485	// - references to images and tables
	486	$text = preg_replace('/(?:<\|<)imgref\sfigure_(.+?)(?:>\|>)/','<CrossRef target="Figure" ref="\1"/>', $text);
	487	$text = preg_replace('/(?:<\|<)tblref\stable_(.+?)(?:>\|>)/','<CrossRef target="Table" ref="\1"/>', $text);
	488	// - explicitly convert URLs as they are a bit messy
	489	// - first all the cases of URLs in italics, without protocol
	490	$text = preg_replace('/\/\/\s([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
	491	$text = preg_replace('/\/\/\s([a-z0-9\-]+\.org(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
	492	$text = preg_replace('/\/\/\s(localhost(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
	493	// - now all the protocol ones (with care taken to protect // in protocol)
	494	$text = preg_replace('/\/\/\shttp:\/\/([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
	495	$text = preg_replace('/\/\/\shttp:\/\/([a-z0-9\-]+\.org(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
	496	$text = preg_replace('/\/\/\shttp:\/\/(localhost(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
	497	// - next we have the underlined URLs sans protocols
	498	$text = preg_replace('/__\s([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
	499	$text = preg_replace('/__\s([a-z0-9\-]+\.org(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
	500	$text = preg_replace('/__\s(localhost(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
	501	// - and finally the protocol prefixed underlined URLs
	502	$text = preg_replace('/__\shttp:\/\/([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
	503	$text = preg_replace('/__\shttp:\/\/([a-z0-9\-]+\.org(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
	504	$text = preg_replace('/__\shttp:\/\/(localhost(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
	505	// - lets also protect any other protocols we find floating around
	506	$text = preg_replace('/(file\|ftp\|http):\/\//i', '\1:##DOUBLESLASH##', $text);
	507
	508	// - italic formatting (taking care of protected double slashes)
	509	$text = preg_replace('/%%\/\/%%/', '##DOUBLESLASH##', $text);
	510	$text = preg_replace('/\/{5}/', '<i>/</i>', $text); // another special case
	511	$text = preg_replace('/\/\/(\/.+?)\s*\/\//', '<i>\1</i>', $text); // another special case
	512	$text = preg_replace('/\/\/\s*(.+?\/)\/\//', '<i>\1</i>', $text); // another special case
	513	$text = preg_replace('/\/\/\s(.+?)\s\/\//', '<i>\1</i>', $text);
	514	$text = preg_replace('/##DOUBLESLASH##/', '//', $text);
	515	// - bold formatting
	516	$text = preg_replace('/\\([^"]+?)\\/', '<b>\1</b>', $text);
	517	// - underline formatting
	518	$text = preg_replace('/__([^"]+?)__/', '<u>\1</u>', $text);
	519
	520	// - decode certain entities in codeblock (just because they are valid HTML,
	521	// derp).
	522	if ($in_code_block && $allow_bad_codeblocks)
	523	{
	524	///cho "** [debug] restoring bogus decoded tags in: \|$text\| **\n";
	525	$text = str_replace('<i>','<i>',$text);
	526	$text = str_replace('</i>','</i>',$text);
	527	//$text = str_replace('<br/>','<br/>',$text);
	528	}
	529	// - restore protected entities
	530	$text = preg_replace('/##(gt\|lt)##/','&\1;',$text);
	531	// - restore protected comment blocks
	532	$text = str_replace('%!--', '<!--', $text);
	533	$text = str_replace('--%', '-->', $text);
	534	// - restore protected footnote refs
	535	$text = preg_replace('/%FootnoteRef id="([^"]+)"\/%/', '<FootnoteRef id="\1"/>', $text);
	536	// output the text block
	537	$text = trim($text);
	538	if (empty($text))
	539	{
	540	fwrite($xml_out, '<Text id="' . $text_id . '"/>' . "\n");
	541	}
	542	else if (!empty($type))
	543	{
	544	fwrite($xml_out, '<Text type="' . $type . '" id="' . $text_id . '">' . $text . '</Text>' . "\n");
	545	}
	546	else
	547	{
	548	fwrite($xml_out, '<Text id="' . $text_id . '">' . $text . '</Text>' . "\n");
	549	}
	550	}
	551	/ outputTextBlock($xml_out, $text) /
	552
	553	/**
	554	*/
	555	function processPage($xml_out, $page_name)
	556	{
	557	global $dokuwiki_path;
	558	global $seen_ids;
	559	echo "</p>\n<p><b>Export Chapter:</b> " . $page_name . "<br/>\n";
	560	// - locate the page in question (taking into account if the user asked for a
	561	// draft version or an approved version of the manual)
	562	$page_path = '';
	563	if ($_REQUEST['v'] == 'draft' \|\| $_REQUEST['l'] != 'en')
	564	{
	565	$page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '/' . $page_name . '.txt';
	566	}
	567	else
	568	{
	569	$page_path = getApprovedVersionPath('en:manuals:' . $_REQUEST['m'] . ':' . $page_name);
	570	}
	571	$page_in = @fopen($page_path, 'r');
	572	if (!$page_in)
	573	{
	574	printError('Failed to open page for reading:' . $page_name, false);
	575	return;
	576	}
	577	// - once again we read in line-by-line, but this time we are going to output
	578	// each line as we go through. We expect to encounter certain lines in a
	579	// predefined order, and should complain if we don't find what we expect.
	580	$in_chapter = false;
	581	$in_section = false;
	582	$in_subsection = false;
	583	$in_part = false;
	584	$in_list = false;
	585	$lists = array();
	586	$previous_listitem_type = '';
	587	$in_figure = false;
	588	$in_table = false;
	589	$column_widths = array();
	590	$in_code_block = false;
	591	while (($line = fgets($page_in)) !== false)
	592	{
	593	// remove newline character
	594	$line = preg_replace('/\r?\n$/','',$line);
	595	// - we need to know the 'depth' for the bulletpoint lists
	596	$depth = 0;
	597	while (strlen($line) > 2 && preg_match('/^\s+[\*\-]/', $line) && substr($line, 0, 2) == ' ')
	598	{
	599	$depth++;
	600	$line = substr($line, 2);
	601	}
	602	$first_character = substr($line, 0, 1);
	603	// - special case for the end of bullet lists
	604	if ($in_list && ($first_character != "*" && $first_character != "-"))
	605	{
	606	while (count($lists) > 0)
	607	{
	608	$list_type = array_pop($lists);
	609	if ($list_type == '*')
	610	{
	611	fwrite($xml_out, '</Bullet>' . "\n");
	612	fwrite($xml_out, '</BulletList>' . "\n");
	613	}
	614	else
	615	{
	616	fwrite($xml_out, '</NumberedItem>' . "\n");
	617	fwrite($xml_out, '</NumberedList>' . "\n");
	618	}
	619	}
	620	$in_list = false;
	621	}
	622	// - special case for the end of tables
	623	if ($in_table && $first_character != '^' && $first_character != '\|')
	624	{
	625	fwrite($xml_out, '</TableContent>' . "\n");
	626	fwrite($xml_out, '</Table>' . "\n");
	627	$in_table = false;
	628	}
	629	// - special cases for premature closing of sections, subsections and parts
	630	if (preg_match('/<!-- close:(section\|subsection\|part) -->/', $line, $matches))
	631	{
	632	// - we always try to do this (regardless of actual flag) as we must
	633	// always close the smallest 'granularity' first
	634	if ($in_part)
	635	{
	636	fwrite($xml_out, '</Content>' . "\n");
	637	fwrite($xml_out, '</Part>' . "\n");
	638	$in_part = false;
	639	}
	640	if ($in_subsection && ($matches[1] == 'section' \|\| $matches[1] == 'subsection'))
	641	{
	642	fwrite($xml_out, '</Content>' . "\n");
	643	fwrite($xml_out, '</Subsection>' . "\n");
	644	$in_subsection = false;
	645	}
	646	if ($in_section && $matches[1] == 'section')
	647	{
	648	fwrite($xml_out, '</Content>' . "\n");
	649	fwrite($xml_out, '</Section>' . "\n");
	650	$in_section = false;
	651	}
	652	}
	653
	654	// - if this page is a chapter, then the first thing on the page should be
	655	// the chapter title (six equals)
	656	if (preg_match('/====== (.+) ======/', $line, $matches))
	657	{
	658	$chapter_title = $matches[1];
	659	$chapter_id = $page_name;
	660	if (empty($chapter_id))
	661	{
	662	$chapter_id = generateID($chapter_title);
	663	}
	664	// - are we already processing a part? if so end it, end it now
	665	if ($in_part)
	666	{
	667	fwrite($xml_out, '</Content>' . "\n");
	668	fwrite($xml_out, '</Part>' . "\n");
	669	$in_part = false;
	670	}
	671	// - are we already processing a subsection? if so end it, end it now
	672	if ($in_subsection)
	673	{
	674	fwrite($xml_out, '</Content>' . "\n");
	675	fwrite($xml_out, '</Subsection>' . "\n");
	676	$in_subsection = false;
	677	}
	678	// - are we already processing a section? if so end it, end it now
	679	if ($in_section)
	680	{
	681	fwrite($xml_out, '</Content>' . "\n");
	682	fwrite($xml_out, '</Section>' . "\n");
	683	$in_section = false;
	684	}
	685	// - are we already processing a chapter? if so end it, end it now
	686	if ($in_chapter)
	687	{
	688	fwrite($xml_out, '</Content>' . "\n");
	689	fwrite($xml_out, '</Chapter>' . "\n");
	690	$in_chapter = false;
	691	}
	692	// - write out this chapter's header
	693	fwrite($xml_out, '<Chapter id="' . $chapter_id . '">' . "\n");
	694	outputMetadataSingle($xml_out, $chapter_title, 'Title');
	695	fwrite($xml_out, '<Content>' . "\n");
	696	$in_chapter = true;
	697	}
	698	// - the next likely thing to encounter is a section heading (five equals)
	699	elseif (preg_match('/=====\s+(.+)\s+=====/', $line, $matches))
	700	{
	701	$section_title = $matches[1];
	702	// - check for explicit section id
	703	$section_id = '';
	704	if (preg_match('/<!-- sid:(.+?) -->(.*)/', $section_title, $matches))
	705	{
	706	$section_id = $matches[1];
	707	$section_title = $matches[2];
	708	}
	709	if (empty($section_id))
	710	{
	711	$section_id = generateID($section_title);
	712	}
	713	// - are we already processing a part? if so end it, end it now
	714	if ($in_part)
	715	{
	716	fwrite($xml_out, '</Content>' . "\n");
	717	fwrite($xml_out, '</Part>' . "\n");
	718	$in_part = false;
	719	}
	720	// - are we already processing a subsection? if so end it, end it now
	721	if ($in_subsection)
	722	{
	723	fwrite($xml_out, '</Content>' . "\n");
	724	fwrite($xml_out, '</Subsection>' . "\n");
	725	$in_subsection = false;
	726	}
	727	// - are we already processing a section? if so end it, end it now
	728	if ($in_section)
	729	{
	730	fwrite($xml_out, '</Content>' . "\n");
	731	fwrite($xml_out, '</Section>' . "\n");
	732	$in_section = false;
	733	}
	734	// - write out this section's header
	735	fwrite($xml_out, '<Section id="' . $section_id . '">' . "\n");
	736	outputMetadataSingle($xml_out, $section_title, 'Title');
	737	fwrite($xml_out, '<Content>' . "\n");
	738	$in_section = true;
	739	}
	740	// - similar for subsection heading (four equals)
	741	elseif (preg_match('/==== (.+) ====/', $line, $matches))
	742	{
	743	$subsection_title = $matches[1];
	744	// - check for explicit subsection id
	745	$subsection_id = '';
	746	if (preg_match('/<!-- sid:(.+?) -->(.*)/', $subsection_title, $matches))
	747	{
	748	$subsection_id = $matches[1];
	749	$subsection_title = $matches[2];
	750	}
	751	if (empty($subsection_id))
	752	{
	753	$subsection_id = generateID($subsection_title);
	754	}
	755	// - are we already processing a part? if so end it, end it now
	756	if ($in_part)
	757	{
	758	fwrite($xml_out, '</Content>' . "\n");
	759	fwrite($xml_out, '</Part>' . "\n");
	760	$in_part = false;
	761	}
	762	// - are we already processing a subsection? if so end it, end it now
	763	if ($in_subsection)
	764	{
	765	fwrite($xml_out, '</Content>' . "\n");
	766	fwrite($xml_out, '</Subsection>' . "\n");
	767	$in_subsection = false;
	768	}
	769	// - write out this subsection's header
	770	fwrite($xml_out, '<Subsection id="' . $subsection_id . '">' . "\n");
	771	outputMetadataSingle($xml_out, $subsection_title, 'Title');
	772	fwrite($xml_out, '<Content>' . "\n");
	773	$in_subsection = true;
	774	}
	775	// - and part heading (three equals)
	776	elseif (preg_match('/=== (.+) ===/', $line, $matches))
	777	{
	778	$part_title = $matches[1];
	779	// - check for explicit part id
	780	$part_id = '';
	781	if (preg_match('/<!-- sid:(.+?) -->(.*)/', $part_title, $matches))
	782	{
	783	$part_id = $matches[1];
	784	$part_title = $matches[2];
	785	}
	786	if (empty($part_id))
	787	{
	788	$part_id = generateID($part_title);
	789	}
	790	// - are we already processing a part? if so end it, end it now
	791	if ($in_part)
	792	{
	793	fwrite($xml_out, '</Content>' . "\n");
	794	fwrite($xml_out, '</Part>' . "\n");
	795	$in_part = false;
	796	}
	797	// - write out this part's header
	798	fwrite($xml_out, '<Part id="' . $part_id . '">' . "\n");
	799	outputMetadataSingle($xml_out, '//' . $part_title . '//', 'Title');
	800	fwrite($xml_out, '<Content>' . "\n");
	801	$in_part = true;
	802	}
	803	// - Ignore 5th level heading - they are only used to allow more convenient
	804	// editing of figures and tables
	805	elseif (preg_match('/== (.+) ==/', $line, $matches))
	806	{
	807	}
	808	// - lists need special handling
	809	elseif (preg_match('/^(\\|\-)\s+(.)/', $line, $matches))
	810	{
	811	$list_type = $matches[1];
	812	$list_text = $matches[2];
	813	$list_depth = count($lists);
	814	if (!$in_list)
	815	{
	816	if ($list_type == '*')
	817	{
	818	fwrite($xml_out, '<BulletList>' . "\n");
	819	}
	820	else
	821	{
	822	fwrite($xml_out, '<NumberedList>' . "\n");
	823	}
	824	$in_list = true;
	825	array_push($lists, $list_type);
	826	}
	827	// - this bullet is at the same depth as previous - close the previous
	828	// point
	829	elseif ($depth == $list_depth)
	830	{
	831	$previous_list_type = end($lists);
	832	if ($previous_list_type == '*')
	833	{
	834	fwrite($xml_out, '</Bullet>' . "\n");
	835	}
	836	else
	837	{
	838	fwrite($xml_out, '</NumberedItem>' . "\n");
	839	}
	840	// - we don't match in type anymore... close the previous list and open
	841	// a new list of the appropriate type
	842	if ($list_type != $previous_list_type)
	843	{
	844	if ($previous_list_type == '*')
	845	{
	846	fwrite($xml_out, '</BulletList>' . "\n");
	847	fwrite($xml_out, '<NumberedList>' . "\n");
	848	}
	849	else
	850	{
	851	fwrite($xml_out, '</NumberedNumbered>' . "\n");
	852	fwrite($xml_out, '<BulletList>' . "\n");
	853	}
	854	array_pop($lists);
	855	array_push($lists, $list_type);
	856	}
	857	}
	858	else
	859	{
	860	// - we have either got deeper...
	861	if ($depth > $list_depth)
	862	{
	863	if ($list_type == '*')
	864	{
	865	fwrite($xml_out, '<BulletList>' . "\n");
	866	}
	867	else
	868	{
	869	fwrite($xml_out, '<NumberedList>' . "\n");
	870	}
	871	array_push($lists, $list_type);
	872	}
	873	// ... or shallower in the bullet listing
	874	if ($depth < $list_depth)
	875	{
	876	$previous_list_type = array_pop($lists);
	877	if ($previous_list_type == '*')
	878	{
	879	fwrite($xml_out, '</Bullet>' . "\n");
	880	fwrite($xml_out, '</BulletList>' . "\n");
	881	}
	882	else
	883	{
	884	fwrite($xml_out, '</NumberedItem>' . "\n");
	885	fwrite($xml_out, '</NumberedList>' . "\n");
	886	}
	887	// - we still have to close the last item too
	888	$previous_listitem_type = end($lists);
	889	if ($previous_listitem_type == '*')
	890	{
	891	fwrite($xml_out, '</Bullet>' . "\n");
	892	}
	893	else
	894	{
	895	fwrite($xml_out, '</NumberedItem>' . "\n");
	896	}
	897	}
	898	}
	899	if ($list_type == '*')
	900	{
	901	fwrite($xml_out, '<Bullet>' . "\n");
	902	}
	903	else
	904	{
	905	fwrite($xml_out, '<NumberedItem>' . "\n");
	906	}
	907	// Special Case: bullets that contain (start) a code block
	908	if (preg_match('/^(.)<code>\s$/', $list_text, $matches))
	909	{
	910	$list_text = $matches[1];
	911	$in_code_block = true;
	912	}
	913
	914	outputTextBlock($xml_out, $list_text);
	915
	916	// - to make things clearer, we'll process any and all code blocks within
	917	// bullets here - especially as there may be more text block after
	918	// the code block finishes
	919	if ($in_code_block)
	920	{
	921	$sub_line = '';
	922	while ($in_code_block && ($sub_line = fgets($page_in)) !== false)
	923	{
	924	$sub_line = trim($sub_line);
	925	// - closing code
	926	if (preg_match('/^<\/code>(.*)$/', $sub_line, $matches))
	927	{
	928	$sub_line = $matches[1]; // may be empty string
	929	$in_code_block = false;
	930	}
	931	// - output another plain codeline
	932	else
	933	{
	934	fwrite($xml_out, '<CodeLine>' . $sub_line . "</CodeLine>\n");
	935	$sub_line = '';
	936	}
	937	}
	938	// - if sub_line still has anything in it, then add that content as a
	939	// text block
	940	if (!empty($sub_line))
	941	{
	942	outputTextBlock($xml_out, $sub_line);
	943	}
	944	}
	945	}
	946	// - images start with an image caption 'element'
	947	elseif (preg_match('/<imgcaption\s+figure_([a-z0-9_\-]+)\\|(.+)>\s*<\/imgcaption>/', $line, $matches))
	948	{
	949	$figure_id = $matches[1];
	950	$figure_title = $matches[2];
	951	// - watch for the special withLineNumber flag
	952	$class_attribute = '';
	953	if (strpos($figure_title, '%!-- withLineNumber --%') != false)
	954	{
	955	$class_attribute = ' class="withLineNumber"';
	956	$figure_title = str_replace('%!-- withLineNumber --%','',$figure_title);
	957	}
	958	fwrite($xml_out, '<Figure id="' . $figure_id . '"' . $class_attribute . '>' . "\n");
	959	echo '[figure: ' . $figure_id . "] \n";
	960	fwrite($xml_out, '<Title>' . "\n");
	961	// - decode any comments in the title (used to store explicit id
	962	// information)
	963	$figure_title = str_replace('%!--', '<!--', $figure_title);
	964	$figure_title = str_replace('--%', '-->', $figure_title);
	965	// - special case: the title may have a subtitle (as a prefix)
	966	$figure_subtitle_id = '';
	967	$figure_subtitle = '';
	968	// - subtitle with explicit id
	969	if (preg_match('/^(<!-- id:.+? -->\([a-z]\))\s(.)$/', $figure_title, $matches))
	970	{
	971	$figure_subtitle = $matches[1];
	972	$figure_title = $matches[2];
	973	}
	974	// - subtitle without explicit id
	975	else if (preg_match('/^(\([a-z]\))\s(.)$/', $figure_title, $matches))
	976	{
	977	$figure_subtitle = $matches[1];
	978	$figure_title = $matches[2];
	979	}
	980	outputTextBlock($xml_out, $figure_title);
	981	if (!empty($figure_subtitle))
	982	{
	983	fwrite($xml_out, '<SubTitle>' . "\n");
	984	outputTextBlock($xml_out, $figure_subtitle);
	985	fwrite($xml_out, '</SubTitle>' . "\n");
	986	}
	987	fwrite($xml_out, '</Title>' . "\n");
	988	$in_figure = true;
	989	// - record the id to prevent repeating
	990	$seen_ids[$figure_id] = true;
	991	}
	992	// - tables start with a table caption 'element'
	993	elseif (preg_match('/<tblcaption\s+table_([a-z0-9_\-]+)\\|([^>]+)>\s*<\/tblcaption>/', $line, $matches))
	994	{
	995	$table_id = $matches[1];
	996	$table_title = $matches[2];
	997	if ($table_title == '##NOCAPTION##')
	998	{
	999	echo '[non-captioned table: ' . $table_id . "] \n";
	1000	// - watch for autogenerated ids... no point in outputting them
	1001	if (preg_match('/^table(_\d+)?$/', $table_id))
	1002	{
	1003	fwrite($xml_out, "<Table>\n");
	1004	}
	1005	else
	1006	{
	1007	fwrite($xml_out, '<Table id="' . $table_id . '">' . "\n");
	1008	}
	1009	fwrite($xml_out, '<Title/>' . "\n");
	1010	}
	1011	elseif ($table_title == '##HIDDEN##')
	1012	{
	1013	echo '[hidden table: ' . $table_id . "] \n";
	1014	// - watch for autogenerated ids... no point in outputting them
	1015	if (preg_match('/^table(_\d+)?$/', $table_id))
	1016	{
	1017	fwrite($xml_out, "<Table class=\"hidden\">\n");
	1018	}
	1019	else
	1020	{
	1021	fwrite($xml_out, '<Table class="hidden" id="' . $table_id . '">' . "\n");
	1022	}
	1023	fwrite($xml_out, '<Title/>' . "\n");
	1024	}
	1025	else
	1026	{
	1027	echo '[table: ' . $table_id . "] \n";
	1028	// - watch for autogenerated ids... no point in outputting them
	1029	if (preg_match('/^table(_\d+)?$/', $table_id))
	1030	{
	1031	fwrite($xml_out, "<Table>\n");
	1032	}
	1033	else
	1034	{
	1035	fwrite($xml_out, '<Table id="' . $table_id . '">' . "\n");
	1036	}
	1037	fwrite($xml_out, '<Title>' . "\n");
	1038	outputTextBlock($xml_out, $table_title);
	1039	fwrite($xml_out, '</Title>' . "\n");
	1040	}
	1041	fwrite($xml_out, '<TableContent>' . "\n");
	1042	$in_table = true;
	1043	// - record the id to prevent repeating
	1044	$seen_ids[$table_id] = true;
	1045	}
	1046	// - the second line in a table should be it's column width values
	1047	elseif (preg_match('/\\|<\s-\s([0-9 ]+?)\s>\\|/', $line, $matches))
	1048	{
	1049	$column_widths = explode(' ', $matches[1]);
	1050	}
	1051	// - then every row will be made of a number of cells
	1052	elseif (preg_match('/^\\|(.*?)\\|$/', $line, $matches))
	1053	{
	1054	$row_content = $matches[1];
	1055	$cell_contents = preg_split('/(\s+\\|\|\\|\s+)/', $row_content);
	1056	fwrite($xml_out, '<tr>' . "\n");
	1057	foreach ($cell_contents as $index=>$cell_content)
	1058	{
	1059	$cell_content = trim($cell_content);
	1060	$th_text = '';
	1061	if (isset($column_widths[$index]))
	1062	{
	1063	$th_text = '<th width="' . $column_widths[$index] . '"';
	1064	}
	1065	else
	1066	{
	1067	$th_text = '<th';
	1068	}
	1069	// - if the cell would be empty, we use the shorthand
	1070	if (empty($cell_content))
	1071	{
	1072	$th_text .= '/>' . "\n";
	1073	fwrite($xml_out, $th_text);
	1074	}
	1075	else
	1076	{
	1077	$th_text .= '>' . "\n";
	1078	fwrite($xml_out, $th_text);
	1079
	1080	// GAH - this is proving harder than a hard thing thats hard.
	1081	// The issue is that the most straightforward way of fixing this,
	1082	// namely using explicit newlines (\\) in the dokuwiki txt causes
	1083	// lots a legitimately translated <br/> to also be split up. I
	1084	// think the only way forward would be to maybe extend the HTML
	1085	// Comment plugin to also respect and process <br/> tags. Then I
	1086	// can avoid transforming them, and use the \\ sentinel to
	1087	// separate multi-line table cells.
	1088	$cell_content_lines = explode('\\\\', $cell_content);
	1089	foreach ($cell_content_lines as $cell_content)
	1090	{
	1091	// - watch out, as the content may be an image
	1092	if (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $cell_content))
	1093	{
	1094	processImage($xml_out, $cell_content);
	1095	}
	1096	elseif (preg_match('/\'\'(.*)\'\'/', $cell_content, $matches))
	1097	{
	1098	fwrite($xml_out, '<CodeLine>' . translateTableCodeline($matches[1]) . '</CodeLine>' . "\n");
	1099	}
	1100	// - anything else it text
	1101	else
	1102	{
	1103	outputTextBlock($xml_out, $cell_content);
	1104	}
	1105	}
	1106	fwrite($xml_out, '</th>' . "\n");
	1107	}
	1108	}
	1109	fwrite($xml_out, '</tr>' . "\n");
	1110	}
	1111	// - links to image media in the wiki!
	1112	elseif (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $line))
	1113	{
	1114	processImage($xml_out, $line);
	1115	// - if we were processing a figure, then now is a good time to close it
	1116	if ($in_figure)
	1117	{
	1118	fwrite($xml_out, '</Figure>' . "\n");
	1119	$in_figure = false;
	1120	}
	1121	}
	1122	// - if the line starts with a <code> block, then we have a tag
	1123	// for that (which is special in that it get a unique text id)
	1124	elseif (preg_match('/^<code\s\d\s>(.?)(<\/code>)?$/', $line, $matches) \|\| ($in_code_block && preg_match('/^(.*?)(<\/code>)?$/', $line, $matches)))
	1125	{
	1126	$payload = $matches[1];
	1127	$found_end = (isset($matches[2]));
	1128	$in_code_block = true;
	1129	// - be careful with empty lines
	1130	if (empty($payload))
	1131	{
	1132	// - as they may appear in the body of the code (in which case we need
	1133	// to output them). The empty lines at the start or end of a code
	1134	// block are just an unfortunate consequence of the support for code
	1135	// line numbering.
	1136	if (!$found_end && strpos($line, '<code') === false)
	1137	{
	1138	fwrite($xml_out, "<CodeLine/>\n");
	1139	}
	1140	}
	1141	elseif (preg_match('/^<!-- id:([^\s]+) -->/', $payload, $matches))
	1142	{
	1143	$text_id = $matches[1];
	1144	outputTextBlock($xml_out, $payload, 'code', true);
	1145	// - record the id to prevent repeating
	1146	$seen_ids[$text_id] = true;
	1147	}
	1148	else
	1149	{
	1150	fwrite($xml_out, '<CodeLine>' . translateText($payload, true) . '</CodeLine>' . "\n");
	1151	}
	1152	// - if we didn't find an endtag we have to keep doing code mode until
	1153	// we do
	1154	$in_code_block = (!$found_end);
	1155	if ($found_end)
	1156	{
	1157	// - if we were processing a figure, then now is a good time to close it
	1158	if ($in_figure)
	1159	{
	1160	fwrite($xml_out, '</Figure>' . "\n");
	1161	$in_figure = false;
	1162	}
	1163	}
	1164	}
	1165	// - entities on a line by themselves (i.e. references to external files)
	1166	// go through verbatim
	1167	elseif (preg_match('/^\s&[a-z0-9_-]+;\s$/', $line))
	1168	{
	1169	fwrite($xml_out, $line . "\n");
	1170	}
	1171	// - lines starting with > are indented text blocks
	1172	elseif (preg_match('/^>(.*)$/', $line, $matches))
	1173	{
	1174	$payload = $matches[1];
	1175	fwrite($xml_out, "<Indented>\n");
	1176	outputTextBlock($xml_out, $payload);
	1177	fwrite($xml_out, "</Indented>\n");
	1178	}
	1179	// - everything else goes straight through as a text block
	1180	// - note that for code blocks, even empty lines count
	1181	elseif (!empty($line))
	1182	{
	1183	// - output the line of text having encoded entities etc
	1184	outputTextBlock($xml_out, $line, '', $in_code_block);
	1185	}
	1186	}
	1187	// Complete any open part
	1188	if ($in_part)
	1189	{
	1190	fwrite($xml_out, '</Content>' . "\n");
	1191	fwrite($xml_out, '</Part>' . "\n");
	1192	$in_part = false;
	1193	}
	1194	// Complete any open subsection
	1195	if ($in_subsection)
	1196	{
	1197	fwrite($xml_out, '</Content>' . "\n");
	1198	fwrite($xml_out, '</Subsection>' . "\n");
	1199	$in_subsection = false;
	1200	}
	1201	// Complete any open section
	1202	if ($in_section)
	1203	{
	1204	fwrite($xml_out, '</Content>' . "\n");
	1205	fwrite($xml_out, '</Section>' . "\n");
	1206	$in_section = false;
	1207	}
	1208	// Complete any open chapter
	1209	if ($in_chapter)
	1210	{
	1211	fwrite($xml_out, '</Content>' . "\n");
	1212	fwrite($xml_out, '</Chapter>' . "\n");
	1213	$in_chapter = false;
	1214	}
	1215	}
	1216	/ processPage($xml_out, $page_name) /
	1217
	1218	function processImage($xml_out, $text)
	1219	{
	1220	global $dokuwiki_path;
	1221	global $xml_source_path;
	1222	if (preg_match('/\{\{.+?([^:?]+)\?(\d+)x(\d+)(&direct)?\}\}/', $text, $matches))
	1223	{
	1224	$filename = $matches[1];
	1225	$width = $matches[2];
	1226	$height = $matches[3];
	1227	// - copy the file into place
	1228	$image_source_path = $dokuwiki_path . '/data/media/' . $_REQUEST['l'] . '/manuals/images/' . strtolower($filename);
	1229	$image_destination_dir = $xml_source_path . '/' . $_REQUEST['l'] . '/images';
[27507]	1230	mkAllDir($image_destination_dir);
[25026]	1231	$image_destination_path = $image_destination_dir . '/' . $filename;
	1232	if (copy($image_source_path, $image_destination_path))
	1233	{
	1234	echo '[copying file: ' . $filename . "] \n";
[27507]	1235	chmod($image_destination_path, 0664);
[25026]	1236	}
	1237	else
	1238	{
	1239	printError('Failed to copy image into place: ' . $filename, false);
	1240	}
	1241	// - spit out the XML element
	1242	fwrite($xml_out, '<File width="' . $width . '" height="' . $height . '" url="images/' . $filename . '"/>' . "\n");
	1243	}
	1244	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: