[25026] | 1 | <?php
|
---|
| 2 |
|
---|
| 3 | require_once('common.php');
|
---|
| 4 | $debug = 1;
|
---|
| 5 |
|
---|
| 6 | // There are some elements that are, in my opinion, incorrectly 'resolved' in
|
---|
| 7 | // code blocks just because they are valid HTML. For instance, <i> is
|
---|
| 8 | // resolved to <i> in code blocks, while something like <Metadata> is
|
---|
| 9 | // not. Set this to true to allow such abominations (for the purpose of
|
---|
| 10 | // comparing before and after versions of the XML). [jmt12]
|
---|
| 11 | $allow_bad_codeblocks = true;
|
---|
| 12 |
|
---|
| 13 | /** @file gs-manual-export.php
|
---|
| 14 | * This script transforms the series of dokuwiki pages that make up a certain
|
---|
| 15 | * manual (as specified by the 'm' argument) in a certain language ('l') into
|
---|
| 16 | * the XML format required by the rest of the Greenstone manual generation
|
---|
| 17 | * scripts.
|
---|
| 18 | */
|
---|
| 19 |
|
---|
| 20 | // 0. Initialization
|
---|
| 21 | $xml_source_path = '/research/jmt12/gsdl-docs/temp';
|
---|
| 22 | // - we have a counter to assign identifiers to text blocks etc without ids
|
---|
| 23 | $text_id_counter = 1;
|
---|
| 24 | // - we need an array of all the footnotes
|
---|
| 25 | $footnotes = array();
|
---|
| 26 |
|
---|
| 27 | // Defaults
|
---|
| 28 | if (!isset($_REQUEST['l']) || empty($_REQUEST['l']))
|
---|
| 29 | {
|
---|
| 30 | $_REQUEST['l'] = 'en';
|
---|
| 31 | }
|
---|
| 32 | if (!isset($_REQUEST['m']) || empty($_REQUEST['m']))
|
---|
| 33 | {
|
---|
| 34 | //$_REQUEST['m'] = 'user';
|
---|
| 35 | //$_REQUEST['m'] = 'install';
|
---|
| 36 | $_REQUEST['m'] = 'develop';
|
---|
| 37 | //$_REQUEST['m'] = 'paper';
|
---|
| 38 | }
|
---|
| 39 | if (!isset($_REQUEST['v']) || empty($_REQUEST['v']))
|
---|
| 40 | {
|
---|
| 41 | $_REQUEST['v'] = 'draft';
|
---|
| 42 | }
|
---|
| 43 | if (!isset($_REQUEST['a']) || !preg_match('/^(download|store)$/', $_REQUEST['a']))
|
---|
| 44 | {
|
---|
| 45 | $_REQUEST['a'] = 'store'; // Try to store the file to disk
|
---|
| 46 | }
|
---|
| 47 |
|
---|
| 48 | echo '<html>' . "\n";
|
---|
| 49 | echo '<head>' . "\n";
|
---|
| 50 | echo '<title>GS Manual Export</title>' . "\n";
|
---|
| 51 | echo '</head>' . "\n";
|
---|
| 52 | echo '<body>' . "\n";
|
---|
| 53 |
|
---|
| 54 | // - validate arguments before we use them (security)
|
---|
| 55 | if (!preg_match('/^(develop|install|paper|user)$/',$_REQUEST['m']))
|
---|
| 56 | {
|
---|
| 57 | printError('Unknown manual type requested: ' . htmlspecialchars($_REQUEST['m']));
|
---|
| 58 | }
|
---|
| 59 |
|
---|
| 60 | if (!preg_match('/^(ar|en|es|fr|pt-br|ru)$/',$_REQUEST['l']))
|
---|
| 61 | {
|
---|
| 62 | printError('Unknown language requested: ' . htmlspecialchars($_REQUEST['l']));
|
---|
| 63 | }
|
---|
| 64 |
|
---|
| 65 | echo '<h2>Generating Greenstone Manual XML</h2>' . "\n";
|
---|
| 66 | echo '<p><b>Manual:</b> ' . $_REQUEST['m'] . ' <b>Language:</b> ' . $_REQUEST['l'] . "</p>\n<hr/>\n";
|
---|
| 67 | // 1. Create the XML output file handle
|
---|
| 68 | $xml_file_path = '';
|
---|
| 69 | if ($_REQUEST['a'] == 'download')
|
---|
| 70 | {
|
---|
| 71 | $xml_file_path = fileCat(array($base_path, 'var', ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.xml'));
|
---|
| 72 | }
|
---|
| 73 | else
|
---|
| 74 | {
|
---|
| 75 | // - construct the path using the information we've been provided as arguments
|
---|
| 76 | $xml_file_dir = $xml_source_path . '/' . $_REQUEST['l'];
|
---|
| 77 | mkAllDir($xml_file_dir, 0755);
|
---|
| 78 | $xml_file_path = $xml_file_dir . '/' . ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.xml';
|
---|
| 79 | // - backup any existing file
|
---|
| 80 | if (file_exists($xml_file_path))
|
---|
| 81 | {
|
---|
| 82 | $xml_backup_file_path = $xml_file_path . '.bak';
|
---|
| 83 | if (!rename($xml_file_path, $xml_backup_file_path))
|
---|
| 84 | {
|
---|
| 85 | printError('Failed to rename existing manual file for backup');
|
---|
| 86 | }
|
---|
| 87 | }
|
---|
| 88 | }
|
---|
| 89 | // - and create a handle to the new file
|
---|
| 90 | $xml_out = fopen($xml_file_path, 'w');
|
---|
| 91 |
|
---|
| 92 | // 2. Read in the top level page - this will give configuration data for the
|
---|
| 93 | // manual and cover page, as well as specifying the order for the other
|
---|
| 94 | // pages in the manual
|
---|
| 95 | echo "<p><b>Frontmatter:</b><br/>\n";
|
---|
| 96 | // - by reading this page we hope to populate an array of metadata, and also
|
---|
| 97 | // extract the sequence of other pages within this manual
|
---|
| 98 | $cover_metadata = array();
|
---|
| 99 | $pages_in_order = array();
|
---|
| 100 | // - we now need to consider if the user has asked for a draft version (i.e.
|
---|
| 101 | // includes the latest version of pages regardless of approval) or if only
|
---|
| 102 | // the approved versions of pages should be included
|
---|
| 103 | // - only necessary for english version of manual, as those are the only pages
|
---|
| 104 | // editable
|
---|
| 105 | $top_page_path = '';
|
---|
| 106 | if ($_REQUEST['v'] == 'draft' || $_REQUEST['l'] != 'en')
|
---|
| 107 | {
|
---|
| 108 | // - again, we can construct the path to the top level page given the arguments
|
---|
| 109 | // provided
|
---|
| 110 | $top_page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '.txt';
|
---|
| 111 | }
|
---|
| 112 | else
|
---|
| 113 | {
|
---|
| 114 | $top_page_path = getApprovedVersionPath('en:manuals:' . $_REQUEST['m']);
|
---|
| 115 | }
|
---|
| 116 |
|
---|
| 117 | if (!file_exists($top_page_path))
|
---|
| 118 | {
|
---|
| 119 | printError('Failed to locate top level page for manual');
|
---|
| 120 | }
|
---|
| 121 | // - we're going to open a handle to the file, then read it in line-by-line
|
---|
| 122 | // watching for the lines we are interested in (using pattern matching)
|
---|
| 123 | $top_page_in = fopen($top_page_path, 'r');
|
---|
| 124 | if (!$top_page_in)
|
---|
| 125 | {
|
---|
| 126 | printError('Failed to open top level page for reading');
|
---|
| 127 | }
|
---|
| 128 | $in_contents = false;
|
---|
| 129 | while (($line = fgets($top_page_in)) !== false)
|
---|
| 130 | {
|
---|
| 131 | // - if we are capturing page order, and we encounter something that looks
|
---|
| 132 | // like a bulletpoint item pointing to a wiki page, then we append the
|
---|
| 133 | // name of that page to our pages in order array
|
---|
| 134 | if ($in_contents && preg_match('/^\s+\*\s+\[\[.:' . $_REQUEST['m'] . ':(.+?)\|(.*?)\]\]\s*$/', $line, $matches))
|
---|
| 135 | {
|
---|
| 136 | array_push($pages_in_order, $matches[1]);
|
---|
| 137 | }
|
---|
| 138 | // - metadata is all encoded within dokuwiki tables
|
---|
| 139 | elseif (preg_match('/^\^\s+([^\s]+)\s+\|\s+(.+?)\s+\|\s*$/', $line, $matches))
|
---|
| 140 | {
|
---|
| 141 | $field = $matches[1];
|
---|
| 142 | $value = $matches[2];
|
---|
| 143 | $values = array();
|
---|
| 144 | if (isset($cover_metadata[$field]))
|
---|
| 145 | {
|
---|
| 146 | $values = $cover_metadata[$field];
|
---|
| 147 | }
|
---|
| 148 | array_push($values, $value);
|
---|
| 149 | $cover_metadata[$field] = $values;
|
---|
| 150 | }
|
---|
| 151 | // - watch for the heading 'Contents' to begin extracting page order
|
---|
| 152 | // information
|
---|
| 153 | elseif (preg_match('/^=+\s(.+)\s=+$/', $line, $matches))
|
---|
| 154 | {
|
---|
| 155 | if ($matches[1] == 'Contents')
|
---|
| 156 | {
|
---|
| 157 | $in_contents = true;
|
---|
| 158 | }
|
---|
| 159 | // - any other title means we aren't capturing page order (anymore)
|
---|
| 160 | else
|
---|
| 161 | {
|
---|
| 162 | $in_contents = false;
|
---|
| 163 | }
|
---|
| 164 | }
|
---|
| 165 | }
|
---|
| 166 | if (!feof($top_page_in))
|
---|
| 167 | {
|
---|
| 168 | printError('Unexpected fgets() fail when reading top page');
|
---|
| 169 | }
|
---|
| 170 | fclose($top_page_in);
|
---|
| 171 | // - ensure we have the required metadata
|
---|
| 172 | $required_metadata = array('Heading','Title','Affiliation','Version','Date');
|
---|
| 173 | foreach ($required_metadata as $required_field)
|
---|
| 174 | {
|
---|
| 175 | if (!isset($cover_metadata[$required_field]))
|
---|
| 176 | {
|
---|
| 177 | printError('Missing required metadata: ' . $required_field);
|
---|
| 178 | }
|
---|
| 179 | }
|
---|
| 180 | // - now we can use the metadata to construct the XML header and the cover page.
|
---|
| 181 | // This follows a pretty set recipe with only elements that can repeat---like
|
---|
| 182 | // Author, SupplementaryText etc---are at all tricky
|
---|
| 183 | fwrite($xml_out, '<?xml version="1.0" encoding="UTF-8"?>' . "\n");
|
---|
| 184 | fwrite($xml_out, '<!DOCTYPE Manual [' . "\n");
|
---|
| 185 | if (isset($cover_metadata['ENTITY']))
|
---|
| 186 | {
|
---|
| 187 | foreach ($cover_metadata['ENTITY'] as $entity)
|
---|
| 188 | {
|
---|
| 189 | fwrite($xml_out, "\t" . '<!ENTITY ' . $entity . '>' . "\n");
|
---|
| 190 | }
|
---|
| 191 | }
|
---|
| 192 | fwrite($xml_out, ']>' . "\n");
|
---|
| 193 | fwrite($xml_out, '<Manual id="' . ucfirst($_REQUEST['m']) . '" lang="' . $_REQUEST['l'] . '">' . "\n");
|
---|
| 194 |
|
---|
| 195 | ///cho "<p>[Debug] metadata: " . print_r($cover_metadata, true) . "</p>\n\n";
|
---|
| 196 |
|
---|
| 197 | outputMetadataSingle($xml_out, $cover_metadata, 'Heading');
|
---|
| 198 | outputMetadataSingle($xml_out, $cover_metadata, 'Title');
|
---|
| 199 | outputMetadataSingle($xml_out, $cover_metadata, 'Author');
|
---|
| 200 | outputMetadataSingle($xml_out, $cover_metadata, 'Affiliation');
|
---|
| 201 | outputMetadataMultiple($xml_out, $cover_metadata, 'SupplementaryText');
|
---|
| 202 | outputMetadataMultiple($xml_out, $cover_metadata, 'Text');
|
---|
| 203 | outputMetadataMultiple($xml_out, $cover_metadata, 'Comment');
|
---|
| 204 | outputMetadataSingle($xml_out, $cover_metadata, 'Version');
|
---|
| 205 | outputMetadataSingle($xml_out, $cover_metadata, 'Date');
|
---|
| 206 |
|
---|
| 207 | // 3. Process each page listed in the contents of the top level page in order
|
---|
| 208 | foreach ($pages_in_order as $page)
|
---|
| 209 | {
|
---|
| 210 | processPage($xml_out, $page);
|
---|
| 211 | }
|
---|
| 212 |
|
---|
| 213 | // 4. Output out list of footnotes (if any)
|
---|
| 214 | if (!empty($footnotes))
|
---|
| 215 | {
|
---|
| 216 | fwrite($xml_out, '<FootnoteList>'. "\n");
|
---|
| 217 | foreach ($footnotes as $footnote=>$footnote_id)
|
---|
| 218 | {
|
---|
| 219 | ///cho '[debug] footnotes: (' . $footnote_id . ') ' . $footnote . '<br />'. "\n";
|
---|
| 220 | outputMetadataSingle($xml_out, $footnote, 'Footnote', $footnote_id);
|
---|
| 221 | }
|
---|
| 222 | fwrite($xml_out, '</FootnoteList>'. "\n");
|
---|
| 223 | }
|
---|
| 224 |
|
---|
| 225 | // 5. Finalize and close the XML output
|
---|
| 226 | fwrite($xml_out, '</Manual>' . "\n");
|
---|
| 227 | fclose($xml_out);
|
---|
| 228 |
|
---|
| 229 | // 6. Complete!
|
---|
| 230 | echo '<p><b>Complete!</b></p>' . "\n<hr/>\n";
|
---|
| 231 | if ($_REQUEST['a'] == 'download')
|
---|
| 232 | {
|
---|
| 233 | echo '<p>Click <a href="var/' . ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.xml">here</a> to download XML file</p>' . "\n";
|
---|
| 234 | }
|
---|
| 235 | echo '<p>Click <a href="' . $dokuwiki_url . '/doku.php?id=' . $_REQUEST['l'] . ':manuals:' . $_REQUEST['m'] . '">here</a> to return to dokuwiki</p>' . "\n";
|
---|
| 236 | echo '</body>' . "\n";
|
---|
| 237 | echo '</html>';
|
---|
| 238 | exit(0);
|
---|
| 239 |
|
---|
| 240 | /**
|
---|
| 241 | */
|
---|
| 242 | function outputMetadataSingle($xml_out, $metadata, $field, $mid=false)
|
---|
| 243 | {
|
---|
| 244 | echo '[metadata: ' . $field . "] \n";
|
---|
| 245 | if ($mid)
|
---|
| 246 | {
|
---|
| 247 | fwrite($xml_out, '<' . $field . ' id="' . $mid . '">' . "\n");
|
---|
| 248 | }
|
---|
| 249 | else
|
---|
| 250 | {
|
---|
| 251 | fwrite($xml_out, '<' . $field . '>' . "\n");
|
---|
| 252 | }
|
---|
| 253 | if (is_array($metadata))
|
---|
| 254 | {
|
---|
| 255 | if (isset($metadata[$field]) && isset($metadata[$field][0]))
|
---|
| 256 | {
|
---|
| 257 | outputTextBlock($xml_out, $metadata[$field][0]);
|
---|
| 258 | }
|
---|
| 259 | else
|
---|
| 260 | {
|
---|
| 261 | echo 'no such field or no metadata';
|
---|
| 262 | }
|
---|
| 263 | }
|
---|
| 264 | elseif (!empty($metadata))
|
---|
| 265 | {
|
---|
| 266 | outputTextBlock($xml_out, $metadata);
|
---|
| 267 | }
|
---|
| 268 | else
|
---|
| 269 | {
|
---|
| 270 | echo 'no such field or no metadata';
|
---|
| 271 | }
|
---|
| 272 | fwrite($xml_out, '</' . $field . '>' . "\n");
|
---|
| 273 | }
|
---|
| 274 | /** outputMetadataSingle() **/
|
---|
| 275 |
|
---|
| 276 | function outputMetadataList($xml_out, $metadata, $field, $separator = ',', $final_separator = false)
|
---|
| 277 | {
|
---|
| 278 | echo '[metadata list: ' . $field . "] \n";
|
---|
| 279 | fwrite($xml_out, '<' . $field . '>' . "\n");
|
---|
| 280 | if (isset($metadata[$field]))
|
---|
| 281 | {
|
---|
| 282 | if (count($metadata[$field]) == 1)
|
---|
| 283 | {
|
---|
| 284 | outputTextBlock($xml_out, $metadata[$field][0]);
|
---|
| 285 | }
|
---|
| 286 | if (count($metadata[$field]) > 1)
|
---|
| 287 | {
|
---|
| 288 | $last_value = '';
|
---|
| 289 | if ($final_separator)
|
---|
| 290 | {
|
---|
| 291 | $last_value = array_pop($metadata[$field]);
|
---|
| 292 | }
|
---|
| 293 | $values = implode($separator, $metadata[$field]);
|
---|
| 294 | if ($final_separator)
|
---|
| 295 | {
|
---|
| 296 | $values .= $final_separator . $last_value;
|
---|
| 297 | }
|
---|
| 298 | outputTextBlock($xml_out, $values);
|
---|
| 299 | }
|
---|
| 300 | }
|
---|
| 301 | else
|
---|
| 302 | {
|
---|
| 303 | echo 'no such field or no metadata';
|
---|
| 304 | }
|
---|
| 305 | fwrite($xml_out, '</' . $field . '>' . "\n");
|
---|
| 306 | }
|
---|
| 307 | /** outputMetadataList() **/
|
---|
| 308 |
|
---|
| 309 | function outputMetadataMultiple($xml_out, $metadata, $field)
|
---|
| 310 | {
|
---|
| 311 | echo '[metadata multiple: ' . $field . "] \n";
|
---|
| 312 | // - Text blocks don't need to be wrapped in Text element
|
---|
| 313 | if ($field != 'Text')
|
---|
| 314 | {
|
---|
| 315 | fwrite($xml_out, '<' . $field . '>' . "\n");
|
---|
| 316 | }
|
---|
| 317 | if (isset($metadata[$field]))
|
---|
| 318 | {
|
---|
| 319 | foreach ($metadata[$field] as $value)
|
---|
| 320 | {
|
---|
| 321 | outputTextBlock($xml_out, $value);
|
---|
| 322 | }
|
---|
| 323 | }
|
---|
| 324 | else
|
---|
| 325 | {
|
---|
| 326 | echo 'no such field or no metadata';
|
---|
| 327 | }
|
---|
| 328 | if ($field != 'Text')
|
---|
| 329 | {
|
---|
| 330 | fwrite($xml_out, '</' . $field . '>' . "\n");
|
---|
| 331 | }
|
---|
| 332 | }
|
---|
| 333 |
|
---|
| 334 | function translateTableCodeline($text)
|
---|
| 335 | {
|
---|
| 336 | // Escape any italic tags hidden in HTML comments
|
---|
| 337 | $text = str_replace('<!--i-->', '%!--i--%', $text);
|
---|
| 338 | $text = str_replace('<!--/i-->', '%!--/i--%', $text);
|
---|
| 339 | // Encode entities etc
|
---|
| 340 | $text = translateText($text, true);
|
---|
| 341 | // Restore any italics elements hidden above
|
---|
| 342 | $text = str_replace('%!--i--%', '<i>', $text);
|
---|
| 343 | $text = str_replace('%!--/i--%', '</i>', $text);
|
---|
| 344 | return $text;
|
---|
| 345 | }
|
---|
| 346 | /** translateTableCodeline() **/
|
---|
| 347 |
|
---|
| 348 | function translateText($text, $in_code_block=false)
|
---|
| 349 | {
|
---|
| 350 | global $allow_bad_codeblocks;
|
---|
| 351 | $text = str_replace('&','&',$text);
|
---|
| 352 | $text = str_replace('<','<',$text);
|
---|
| 353 | $text = str_replace('>','>',$text);
|
---|
| 354 | if ($in_code_block && $allow_bad_codeblocks)
|
---|
| 355 | {
|
---|
| 356 | ///cho "**** [debug] restoring bogus decoded tags in: |$text| ****\n";
|
---|
| 357 | $text = str_replace('<i>','<i>',$text);
|
---|
| 358 | $text = str_replace('</i>','</i>',$text);
|
---|
| 359 | $text = str_replace('<br/>','<br/>',$text);
|
---|
| 360 | }
|
---|
| 361 | return $text;
|
---|
| 362 | }
|
---|
| 363 |
|
---|
| 364 | function outputTextBlock($xml_out, $text, $type='', $in_code_block = false)
|
---|
| 365 | {
|
---|
| 366 | global $cover_metadata;
|
---|
| 367 | global $text_id_counter;
|
---|
| 368 | global $footnotes;
|
---|
| 369 | global $allow_bad_codeblocks;
|
---|
| 370 |
|
---|
| 371 | // - Start by dealing with any footnotes before anything else
|
---|
| 372 | while (preg_match('/\(\((.*?)\)\)/', $text, $matches))
|
---|
| 373 | {
|
---|
| 374 | $pattern = $matches[0];
|
---|
| 375 | $footnote = $matches[1];
|
---|
| 376 | $footnote_id = count($footnotes) + 1;
|
---|
| 377 | $footnotes[$footnote] = $footnote_id;
|
---|
| 378 | // - note that we have to escape the footnote reference as the following
|
---|
| 379 | // code will convert any < and > to entities...
|
---|
| 380 | $footnote_reference = '%FootnoteRef id="' . $footnote_id . '"/%';
|
---|
| 381 | $text = str_replace($pattern, $footnote_reference, $text);
|
---|
| 382 | }
|
---|
| 383 |
|
---|
| 384 | $text_id = '';
|
---|
| 385 | // - check whether the string begins with an explicit id
|
---|
| 386 | if (preg_match('/^\s*<!--\s*id:(.+?)\s*-->(.*)$/', $text, $matches))
|
---|
| 387 | {
|
---|
| 388 | $text_id = $matches[1];
|
---|
| 389 | $text = $matches[2];
|
---|
| 390 | if (is_numeric($text_id))
|
---|
| 391 | {
|
---|
| 392 | $text_id_counter = $text_id + 1;
|
---|
| 393 | }
|
---|
| 394 | }
|
---|
| 395 | else
|
---|
| 396 | {
|
---|
| 397 | $text_id = $text_id_counter;
|
---|
| 398 | $text_id_counter++;
|
---|
| 399 | }
|
---|
| 400 |
|
---|
| 401 | // - protect the special case of an HTML comment being actually displayed
|
---|
| 402 | // in the text
|
---|
| 403 | $text = preg_replace('/<!--([\s\.]+?)-->/','##lt##!--\1--##gt##',$text);
|
---|
| 404 |
|
---|
| 405 | // - reformat dokuwiki syntax to HTML tag syntax
|
---|
| 406 | $text = preg_replace('/<!--.*?-->/', '', $text);
|
---|
| 407 |
|
---|
| 408 | // we leave code blocks alone in terms of ampersands
|
---|
| 409 | if (!$in_code_block)
|
---|
| 410 | {
|
---|
| 411 | // - ampersands aren't safe in XML...
|
---|
| 412 | $text = str_replace('&', '&', $text);
|
---|
| 413 | // ...except for the entities that we have registered as metadata
|
---|
| 414 | if (isset($cover_metadata['ENTITY']))
|
---|
| 415 | {
|
---|
| 416 | foreach ($cover_metadata['ENTITY'] as $entity)
|
---|
| 417 | {
|
---|
| 418 | if (preg_match('/([a-z]+)\s+"&#(\d+);"/', $entity, $matches))
|
---|
| 419 | {
|
---|
| 420 | $entity_name = $matches[1];
|
---|
| 421 | if ($entity_name != 'mdash')
|
---|
| 422 | {
|
---|
| 423 | $entity_character = html_entity_decode('&#'.$matches[2].';',ENT_NOQUOTES,'UTF-8');
|
---|
| 424 | $text = str_replace('&' . $entity_name . ';', '&' . $entity_name . ';', $text);
|
---|
| 425 | // - we also convert any characters that match the entity char into
|
---|
| 426 | // the entity
|
---|
| 427 | $text = str_replace($entity_character, '&' . $entity_name . ';', $text);
|
---|
| 428 | }
|
---|
| 429 | }
|
---|
| 430 | }
|
---|
| 431 | }
|
---|
| 432 | // - protect <br/> tags
|
---|
| 433 | $text = str_replace('<br/>','%%br/%%',$text);
|
---|
| 434 | // - encoding all of the < and > that appear in the text (rather than
|
---|
| 435 | // true html formatting)
|
---|
| 436 | $text = str_replace('<','<',$text);
|
---|
| 437 | $text = str_replace('>','>',$text);
|
---|
| 438 | // - restore <br/> tags
|
---|
| 439 | $text = str_replace('%%br/%%','<br/>',$text);
|
---|
| 440 | }
|
---|
| 441 | else if ($type == 'code')
|
---|
| 442 | {
|
---|
| 443 | $text = str_replace('<','<',$text);
|
---|
| 444 | $text = str_replace('>','>',$text);
|
---|
| 445 | }
|
---|
| 446 |
|
---|
| 447 | // - links, oh how I hate thee
|
---|
| 448 | // - external links are slightly easier
|
---|
| 449 | $text = preg_replace('/\[\[http:\/\/(.*?)\|(.*?)\]\]/', '<Link url="http://\1">\2</Link>', $text);
|
---|
| 450 | // - internals have to become the horrible <CrossRef> tags. We ignore any
|
---|
| 451 | // number prefix on the page name as that is just used for ordering within
|
---|
| 452 | // Dokuwiki
|
---|
| 453 | $text = preg_replace('/\[\[\.\:(.*?)\|[^\]]+\]\]/','<CrossRef target="Chapter" ref="\1"/>', $text);
|
---|
| 454 | // - internal links starting with hash must be on the same page
|
---|
| 455 | $text = preg_replace('/\[\[###(.*?)\|.*?\]\]/','<CrossRef target="Part" ref="\1"/>', $text);
|
---|
| 456 | $text = preg_replace('/\[\[##(.*?)\|.*?\]\]/','<CrossRef target="Subsection" ref="\1"/>', $text);
|
---|
| 457 | $text = preg_replace('/\[\[#(.*?)\|.*?\]\]/','<CrossRef target="Section" ref="\1"/>', $text);
|
---|
| 458 | // - 'external' internal wiki links are even worst - since we can't know what
|
---|
| 459 | // the page order number for another manual's chapters might be, we instead
|
---|
| 460 | // use a search
|
---|
| 461 | $text = preg_replace('/\[\[\?do\=search\&id\=([^\s]+)\s+@([a-z]+):manuals:([a-z]+)\|.*?\]\]/i', '<CrossRef external="\3" lang="\2" target="Chapter" ref="\1"/>', $text);
|
---|
| 462 | // - references to images and tables
|
---|
| 463 | $text = preg_replace('/(?:<|<)imgref\sfigure_(.+?)(?:>|>)/','<CrossRef target="Figure" ref="\1"/>', $text);
|
---|
| 464 | $text = preg_replace('/(?:<|<)tblref\stable_(.+?)(?:>|>)/','<CrossRef target="Table" ref="\1"/>', $text);
|
---|
| 465 | // - explicitly convert URLs as they are a bit messy
|
---|
| 466 | // - first all the cases of URLs in italics, without protocol
|
---|
| 467 | $text = preg_replace('/\/\/\s([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
|
---|
| 468 | $text = preg_replace('/\/\/\s([a-z0-9\-]+\.org(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
|
---|
| 469 | $text = preg_replace('/\/\/\s(localhost(?:\/.*?)?)\s\/\//i','<i>\1</i>', $text);
|
---|
| 470 | // - now all the protocol ones (with care taken to protect // in protocol)
|
---|
| 471 | $text = preg_replace('/\/\/\shttp:\/\/([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
|
---|
| 472 | $text = preg_replace('/\/\/\shttp:\/\/([a-z0-9\-]+\.org(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
|
---|
| 473 | $text = preg_replace('/\/\/\shttp:\/\/(localhost(?:\/.*?)?)\s\/\//i','<i>http:##DOUBLESLASH##\1</i>', $text);
|
---|
| 474 | // - next we have the underlined URLs sans protocols
|
---|
| 475 | $text = preg_replace('/__\s([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
|
---|
| 476 | $text = preg_replace('/__\s([a-z0-9\-]+\.org(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
|
---|
| 477 | $text = preg_replace('/__\s(localhost(?:\/.*?)?)\s__/i','<u>\1</u>', $text);
|
---|
| 478 | // - and finally the protocol prefixed underlined URLs
|
---|
| 479 | $text = preg_replace('/__\shttp:\/\/([a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
|
---|
| 480 | $text = preg_replace('/__\shttp:\/\/([a-z0-9\-]+\.org(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
|
---|
| 481 | $text = preg_replace('/__\shttp:\/\/(localhost(?:\/.*?)?)\s__/i','<u>http:##DOUBLESLASH##\1</u>', $text);
|
---|
| 482 | // - lets also protect any other protocols we find floating around
|
---|
| 483 | $text = preg_replace('/(file|ftp|http):\/\//i', '\1:##DOUBLESLASH##', $text);
|
---|
| 484 |
|
---|
| 485 | // - italic formatting (taking care of protected double slashes)
|
---|
| 486 | $text = preg_replace('/%%\/\/%%/', '##DOUBLESLASH##', $text);
|
---|
| 487 | $text = preg_replace('/\/{5}/', '<i>/</i>', $text); // another special case
|
---|
| 488 | $text = preg_replace('/\/\/(\/.+?)\s*\/\//', '<i>\1</i>', $text); // another special case
|
---|
| 489 | $text = preg_replace('/\/\/\s*(.+?\/)\/\//', '<i>\1</i>', $text); // another special case
|
---|
| 490 | $text = preg_replace('/\/\/\s*(.+?)\s*\/\//', '<i>\1</i>', $text);
|
---|
| 491 | $text = preg_replace('/##DOUBLESLASH##/', '//', $text);
|
---|
| 492 | // - bold formatting
|
---|
| 493 | $text = preg_replace('/\*\*([^"]+?)\*\*/', '<b>\1</b>', $text);
|
---|
| 494 | // - underline formatting
|
---|
| 495 | $text = preg_replace('/__([^"]+?)__/', '<u>\1</u>', $text);
|
---|
| 496 |
|
---|
| 497 | // - decode certain entities in codeblock (just because they are valid HTML,
|
---|
| 498 | // derp).
|
---|
| 499 | if ($in_code_block && $allow_bad_codeblocks)
|
---|
| 500 | {
|
---|
| 501 | ///cho "**** [debug] restoring bogus decoded tags in: |$text| ****\n";
|
---|
| 502 | $text = str_replace('<i>','<i>',$text);
|
---|
| 503 | $text = str_replace('</i>','</i>',$text);
|
---|
| 504 | //$text = str_replace('<br/>','<br/>',$text);
|
---|
| 505 | }
|
---|
| 506 | // - restore protected entities
|
---|
| 507 | $text = preg_replace('/##(gt|lt)##/','&\1;',$text);
|
---|
| 508 | // - restore protected comment blocks
|
---|
| 509 | $text = str_replace('%!--', '<!--', $text);
|
---|
| 510 | $text = str_replace('--%', '-->', $text);
|
---|
| 511 | // - restore protected footnote refs
|
---|
| 512 | $text = preg_replace('/%FootnoteRef id="([^"]+)"\/%/', '<FootnoteRef id="\1"/>', $text);
|
---|
| 513 | // output the text block
|
---|
| 514 | $text = trim($text);
|
---|
| 515 | if (empty($text))
|
---|
| 516 | {
|
---|
| 517 | fwrite($xml_out, '<Text id="' . $text_id . '"/>' . "\n");
|
---|
| 518 | }
|
---|
| 519 | else if (!empty($type))
|
---|
| 520 | {
|
---|
| 521 | fwrite($xml_out, '<Text type="' . $type . '" id="' . $text_id . '">' . $text . '</Text>' . "\n");
|
---|
| 522 | }
|
---|
| 523 | else
|
---|
| 524 | {
|
---|
| 525 | fwrite($xml_out, '<Text id="' . $text_id . '">' . $text . '</Text>' . "\n");
|
---|
| 526 | }
|
---|
| 527 | }
|
---|
| 528 | /** outputTextBlock($xml_out, $text) **/
|
---|
| 529 |
|
---|
| 530 | /**
|
---|
| 531 | */
|
---|
| 532 | function processPage($xml_out, $page_name)
|
---|
| 533 | {
|
---|
| 534 | global $dokuwiki_path;
|
---|
| 535 | global $seen_ids;
|
---|
| 536 | echo "</p>\n<p><b>Export Chapter:</b> " . $page_name . "<br/>\n";
|
---|
| 537 | // - locate the page in question (taking into account if the user asked for a
|
---|
| 538 | // draft version or an approved version of the manual)
|
---|
| 539 | $page_path = '';
|
---|
| 540 | if ($_REQUEST['v'] == 'draft' || $_REQUEST['l'] != 'en')
|
---|
| 541 | {
|
---|
| 542 | $page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '/' . $page_name . '.txt';
|
---|
| 543 | }
|
---|
| 544 | else
|
---|
| 545 | {
|
---|
| 546 | $page_path = getApprovedVersionPath('en:manuals:' . $_REQUEST['m'] . ':' . $page_name);
|
---|
| 547 | }
|
---|
| 548 | $page_in = @fopen($page_path, 'r');
|
---|
| 549 | if (!$page_in)
|
---|
| 550 | {
|
---|
| 551 | printError('Failed to open page for reading:' . $page_name, false);
|
---|
| 552 | return;
|
---|
| 553 | }
|
---|
| 554 | // - once again we read in line-by-line, but this time we are going to output
|
---|
| 555 | // each line as we go through. We expect to encounter certain lines in a
|
---|
| 556 | // predefined order, and should complain if we don't find what we expect.
|
---|
| 557 | $in_chapter = false;
|
---|
| 558 | $in_section = false;
|
---|
| 559 | $in_subsection = false;
|
---|
| 560 | $in_part = false;
|
---|
| 561 | $in_list = false;
|
---|
| 562 | $lists = array();
|
---|
| 563 | $previous_listitem_type = '';
|
---|
| 564 | $in_figure = false;
|
---|
| 565 | $in_table = false;
|
---|
| 566 | $column_widths = array();
|
---|
| 567 | $in_code_block = false;
|
---|
| 568 | while (($line = fgets($page_in)) !== false)
|
---|
| 569 | {
|
---|
| 570 | // remove newline character
|
---|
| 571 | $line = preg_replace('/\r?\n$/','',$line);
|
---|
| 572 | // - we need to know the 'depth' for the bulletpoint lists
|
---|
| 573 | $depth = 0;
|
---|
| 574 | while (strlen($line) > 2 && preg_match('/^\s+[\*\-]/', $line) && substr($line, 0, 2) == ' ')
|
---|
| 575 | {
|
---|
| 576 | $depth++;
|
---|
| 577 | $line = substr($line, 2);
|
---|
| 578 | }
|
---|
| 579 | $first_character = substr($line, 0, 1);
|
---|
| 580 | // - special case for the end of bullet lists
|
---|
| 581 | if ($in_list && ($first_character != "*" && $first_character != "-"))
|
---|
| 582 | {
|
---|
| 583 | while (count($lists) > 0)
|
---|
| 584 | {
|
---|
| 585 | $list_type = array_pop($lists);
|
---|
| 586 | if ($list_type == '*')
|
---|
| 587 | {
|
---|
| 588 | fwrite($xml_out, '</Bullet>' . "\n");
|
---|
| 589 | fwrite($xml_out, '</BulletList>' . "\n");
|
---|
| 590 | }
|
---|
| 591 | else
|
---|
| 592 | {
|
---|
| 593 | fwrite($xml_out, '</NumberedItem>' . "\n");
|
---|
| 594 | fwrite($xml_out, '</NumberedList>' . "\n");
|
---|
| 595 | }
|
---|
| 596 | }
|
---|
| 597 | $in_list = false;
|
---|
| 598 | }
|
---|
| 599 | // - special case for the end of tables
|
---|
| 600 | if ($in_table && $first_character != '^' && $first_character != '|')
|
---|
| 601 | {
|
---|
| 602 | fwrite($xml_out, '</TableContent>' . "\n");
|
---|
| 603 | fwrite($xml_out, '</Table>' . "\n");
|
---|
| 604 | $in_table = false;
|
---|
| 605 | }
|
---|
| 606 | // - special cases for premature closing of sections, subsections and parts
|
---|
| 607 | if (preg_match('/<!-- close:(section|subsection|part) -->/', $line, $matches))
|
---|
| 608 | {
|
---|
| 609 | // - we always try to do this (regardless of actual flag) as we must
|
---|
| 610 | // always close the smallest 'granularity' first
|
---|
| 611 | if ($in_part)
|
---|
| 612 | {
|
---|
| 613 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 614 | fwrite($xml_out, '</Part>' . "\n");
|
---|
| 615 | $in_part = false;
|
---|
| 616 | }
|
---|
| 617 | if ($in_subsection && ($matches[1] == 'section' || $matches[1] == 'subsection'))
|
---|
| 618 | {
|
---|
| 619 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 620 | fwrite($xml_out, '</Subsection>' . "\n");
|
---|
| 621 | $in_subsection = false;
|
---|
| 622 | }
|
---|
| 623 | if ($in_section && $matches[1] == 'section')
|
---|
| 624 | {
|
---|
| 625 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 626 | fwrite($xml_out, '</Section>' . "\n");
|
---|
| 627 | $in_section = false;
|
---|
| 628 | }
|
---|
| 629 | }
|
---|
| 630 |
|
---|
| 631 | // - if this page is a chapter, then the first thing on the page should be
|
---|
| 632 | // the chapter title (six equals)
|
---|
| 633 | if (preg_match('/====== (.+) ======/', $line, $matches))
|
---|
| 634 | {
|
---|
| 635 | $chapter_title = $matches[1];
|
---|
| 636 | $chapter_id = $page_name;
|
---|
| 637 | if (empty($chapter_id))
|
---|
| 638 | {
|
---|
| 639 | $chapter_id = generateID($chapter_title);
|
---|
| 640 | }
|
---|
| 641 | // - are we already processing a part? if so end it, end it now
|
---|
| 642 | if ($in_part)
|
---|
| 643 | {
|
---|
| 644 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 645 | fwrite($xml_out, '</Part>' . "\n");
|
---|
| 646 | $in_part = false;
|
---|
| 647 | }
|
---|
| 648 | // - are we already processing a subsection? if so end it, end it now
|
---|
| 649 | if ($in_subsection)
|
---|
| 650 | {
|
---|
| 651 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 652 | fwrite($xml_out, '</Subsection>' . "\n");
|
---|
| 653 | $in_subsection = false;
|
---|
| 654 | }
|
---|
| 655 | // - are we already processing a section? if so end it, end it now
|
---|
| 656 | if ($in_section)
|
---|
| 657 | {
|
---|
| 658 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 659 | fwrite($xml_out, '</Section>' . "\n");
|
---|
| 660 | $in_section = false;
|
---|
| 661 | }
|
---|
| 662 | // - are we already processing a chapter? if so end it, end it now
|
---|
| 663 | if ($in_chapter)
|
---|
| 664 | {
|
---|
| 665 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 666 | fwrite($xml_out, '</Chapter>' . "\n");
|
---|
| 667 | $in_chapter = false;
|
---|
| 668 | }
|
---|
| 669 | // - write out this chapter's header
|
---|
| 670 | fwrite($xml_out, '<Chapter id="' . $chapter_id . '">' . "\n");
|
---|
| 671 | outputMetadataSingle($xml_out, $chapter_title, 'Title');
|
---|
| 672 | fwrite($xml_out, '<Content>' . "\n");
|
---|
| 673 | $in_chapter = true;
|
---|
| 674 | }
|
---|
| 675 | // - the next likely thing to encounter is a section heading (five equals)
|
---|
| 676 | elseif (preg_match('/=====\s+(.+)\s+=====/', $line, $matches))
|
---|
| 677 | {
|
---|
| 678 | $section_title = $matches[1];
|
---|
| 679 | // - check for explicit section id
|
---|
| 680 | $section_id = '';
|
---|
| 681 | if (preg_match('/<!-- sid:(.+?) -->(.*)/', $section_title, $matches))
|
---|
| 682 | {
|
---|
| 683 | $section_id = $matches[1];
|
---|
| 684 | $section_title = $matches[2];
|
---|
| 685 | }
|
---|
| 686 | if (empty($section_id))
|
---|
| 687 | {
|
---|
| 688 | $section_id = generateID($section_title);
|
---|
| 689 | }
|
---|
| 690 | // - are we already processing a part? if so end it, end it now
|
---|
| 691 | if ($in_part)
|
---|
| 692 | {
|
---|
| 693 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 694 | fwrite($xml_out, '</Part>' . "\n");
|
---|
| 695 | $in_part = false;
|
---|
| 696 | }
|
---|
| 697 | // - are we already processing a subsection? if so end it, end it now
|
---|
| 698 | if ($in_subsection)
|
---|
| 699 | {
|
---|
| 700 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 701 | fwrite($xml_out, '</Subsection>' . "\n");
|
---|
| 702 | $in_subsection = false;
|
---|
| 703 | }
|
---|
| 704 | // - are we already processing a section? if so end it, end it now
|
---|
| 705 | if ($in_section)
|
---|
| 706 | {
|
---|
| 707 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 708 | fwrite($xml_out, '</Section>' . "\n");
|
---|
| 709 | $in_section = false;
|
---|
| 710 | }
|
---|
| 711 | // - write out this section's header
|
---|
| 712 | fwrite($xml_out, '<Section id="' . $section_id . '">' . "\n");
|
---|
| 713 | outputMetadataSingle($xml_out, $section_title, 'Title');
|
---|
| 714 | fwrite($xml_out, '<Content>' . "\n");
|
---|
| 715 | $in_section = true;
|
---|
| 716 | }
|
---|
| 717 | // - similar for subsection heading (four equals)
|
---|
| 718 | elseif (preg_match('/==== (.+) ====/', $line, $matches))
|
---|
| 719 | {
|
---|
| 720 | $subsection_title = $matches[1];
|
---|
| 721 | // - check for explicit subsection id
|
---|
| 722 | $subsection_id = '';
|
---|
| 723 | if (preg_match('/<!-- sid:(.+?) -->(.*)/', $subsection_title, $matches))
|
---|
| 724 | {
|
---|
| 725 | $subsection_id = $matches[1];
|
---|
| 726 | $subsection_title = $matches[2];
|
---|
| 727 | }
|
---|
| 728 | if (empty($subsection_id))
|
---|
| 729 | {
|
---|
| 730 | $subsection_id = generateID($subsection_title);
|
---|
| 731 | }
|
---|
| 732 | // - are we already processing a part? if so end it, end it now
|
---|
| 733 | if ($in_part)
|
---|
| 734 | {
|
---|
| 735 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 736 | fwrite($xml_out, '</Part>' . "\n");
|
---|
| 737 | $in_part = false;
|
---|
| 738 | }
|
---|
| 739 | // - are we already processing a subsection? if so end it, end it now
|
---|
| 740 | if ($in_subsection)
|
---|
| 741 | {
|
---|
| 742 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 743 | fwrite($xml_out, '</Subsection>' . "\n");
|
---|
| 744 | $in_subsection = false;
|
---|
| 745 | }
|
---|
| 746 | // - write out this subsection's header
|
---|
| 747 | fwrite($xml_out, '<Subsection id="' . $subsection_id . '">' . "\n");
|
---|
| 748 | outputMetadataSingle($xml_out, $subsection_title, 'Title');
|
---|
| 749 | fwrite($xml_out, '<Content>' . "\n");
|
---|
| 750 | $in_subsection = true;
|
---|
| 751 | }
|
---|
| 752 | // - and part heading (three equals)
|
---|
| 753 | elseif (preg_match('/=== (.+) ===/', $line, $matches))
|
---|
| 754 | {
|
---|
| 755 | $part_title = $matches[1];
|
---|
| 756 | // - check for explicit part id
|
---|
| 757 | $part_id = '';
|
---|
| 758 | if (preg_match('/<!-- sid:(.+?) -->(.*)/', $part_title, $matches))
|
---|
| 759 | {
|
---|
| 760 | $part_id = $matches[1];
|
---|
| 761 | $part_title = $matches[2];
|
---|
| 762 | }
|
---|
| 763 | if (empty($part_id))
|
---|
| 764 | {
|
---|
| 765 | $part_id = generateID($part_title);
|
---|
| 766 | }
|
---|
| 767 | // - are we already processing a part? if so end it, end it now
|
---|
| 768 | if ($in_part)
|
---|
| 769 | {
|
---|
| 770 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 771 | fwrite($xml_out, '</Part>' . "\n");
|
---|
| 772 | $in_part = false;
|
---|
| 773 | }
|
---|
| 774 | // - write out this part's header
|
---|
| 775 | fwrite($xml_out, '<Part id="' . $part_id . '">' . "\n");
|
---|
| 776 | outputMetadataSingle($xml_out, '**//' . $part_title . '//**', 'Title');
|
---|
| 777 | fwrite($xml_out, '<Content>' . "\n");
|
---|
| 778 | $in_part = true;
|
---|
| 779 | }
|
---|
| 780 | // - Ignore 5th level heading - they are only used to allow more convenient
|
---|
| 781 | // editing of figures and tables
|
---|
| 782 | elseif (preg_match('/== (.+) ==/', $line, $matches))
|
---|
| 783 | {
|
---|
| 784 | }
|
---|
| 785 | // - lists need special handling
|
---|
| 786 | elseif (preg_match('/^(\*|\-)\s+(.*)/', $line, $matches))
|
---|
| 787 | {
|
---|
| 788 | $list_type = $matches[1];
|
---|
| 789 | $list_text = $matches[2];
|
---|
| 790 | $list_depth = count($lists);
|
---|
| 791 | if (!$in_list)
|
---|
| 792 | {
|
---|
| 793 | if ($list_type == '*')
|
---|
| 794 | {
|
---|
| 795 | fwrite($xml_out, '<BulletList>' . "\n");
|
---|
| 796 | }
|
---|
| 797 | else
|
---|
| 798 | {
|
---|
| 799 | fwrite($xml_out, '<NumberedList>' . "\n");
|
---|
| 800 | }
|
---|
| 801 | $in_list = true;
|
---|
| 802 | array_push($lists, $list_type);
|
---|
| 803 | }
|
---|
| 804 | // - this bullet is at the same depth as previous - close the previous
|
---|
| 805 | // point
|
---|
| 806 | elseif ($depth == $list_depth)
|
---|
| 807 | {
|
---|
| 808 | $previous_list_type = end($lists);
|
---|
| 809 | if ($previous_list_type == '*')
|
---|
| 810 | {
|
---|
| 811 | fwrite($xml_out, '</Bullet>' . "\n");
|
---|
| 812 | }
|
---|
| 813 | else
|
---|
| 814 | {
|
---|
| 815 | fwrite($xml_out, '</NumberedItem>' . "\n");
|
---|
| 816 | }
|
---|
| 817 | // - we don't match in type anymore... close the previous list and open
|
---|
| 818 | // a new list of the appropriate type
|
---|
| 819 | if ($list_type != $previous_list_type)
|
---|
| 820 | {
|
---|
| 821 | if ($previous_list_type == '*')
|
---|
| 822 | {
|
---|
| 823 | fwrite($xml_out, '</BulletList>' . "\n");
|
---|
| 824 | fwrite($xml_out, '<NumberedList>' . "\n");
|
---|
| 825 | }
|
---|
| 826 | else
|
---|
| 827 | {
|
---|
| 828 | fwrite($xml_out, '</NumberedNumbered>' . "\n");
|
---|
| 829 | fwrite($xml_out, '<BulletList>' . "\n");
|
---|
| 830 | }
|
---|
| 831 | array_pop($lists);
|
---|
| 832 | array_push($lists, $list_type);
|
---|
| 833 | }
|
---|
| 834 | }
|
---|
| 835 | else
|
---|
| 836 | {
|
---|
| 837 | // - we have either got deeper...
|
---|
| 838 | if ($depth > $list_depth)
|
---|
| 839 | {
|
---|
| 840 | if ($list_type == '*')
|
---|
| 841 | {
|
---|
| 842 | fwrite($xml_out, '<BulletList>' . "\n");
|
---|
| 843 | }
|
---|
| 844 | else
|
---|
| 845 | {
|
---|
| 846 | fwrite($xml_out, '<NumberedList>' . "\n");
|
---|
| 847 | }
|
---|
| 848 | array_push($lists, $list_type);
|
---|
| 849 | }
|
---|
| 850 | // ... or shallower in the bullet listing
|
---|
| 851 | if ($depth < $list_depth)
|
---|
| 852 | {
|
---|
| 853 | $previous_list_type = array_pop($lists);
|
---|
| 854 | if ($previous_list_type == '*')
|
---|
| 855 | {
|
---|
| 856 | fwrite($xml_out, '</Bullet>' . "\n");
|
---|
| 857 | fwrite($xml_out, '</BulletList>' . "\n");
|
---|
| 858 | }
|
---|
| 859 | else
|
---|
| 860 | {
|
---|
| 861 | fwrite($xml_out, '</NumberedItem>' . "\n");
|
---|
| 862 | fwrite($xml_out, '</NumberedList>' . "\n");
|
---|
| 863 | }
|
---|
| 864 | // - we still have to close the last item too
|
---|
| 865 | $previous_listitem_type = end($lists);
|
---|
| 866 | if ($previous_listitem_type == '*')
|
---|
| 867 | {
|
---|
| 868 | fwrite($xml_out, '</Bullet>' . "\n");
|
---|
| 869 | }
|
---|
| 870 | else
|
---|
| 871 | {
|
---|
| 872 | fwrite($xml_out, '</NumberedItem>' . "\n");
|
---|
| 873 | }
|
---|
| 874 | }
|
---|
| 875 | }
|
---|
| 876 | if ($list_type == '*')
|
---|
| 877 | {
|
---|
| 878 | fwrite($xml_out, '<Bullet>' . "\n");
|
---|
| 879 | }
|
---|
| 880 | else
|
---|
| 881 | {
|
---|
| 882 | fwrite($xml_out, '<NumberedItem>' . "\n");
|
---|
| 883 | }
|
---|
| 884 | // Special Case: bullets that contain (start) a code block
|
---|
| 885 | if (preg_match('/^(.*)<code>\s*$/', $list_text, $matches))
|
---|
| 886 | {
|
---|
| 887 | $list_text = $matches[1];
|
---|
| 888 | $in_code_block = true;
|
---|
| 889 | }
|
---|
| 890 |
|
---|
| 891 | outputTextBlock($xml_out, $list_text);
|
---|
| 892 |
|
---|
| 893 | // - to make things clearer, we'll process any and all code blocks within
|
---|
| 894 | // bullets here - especially as there may be more text block *after*
|
---|
| 895 | // the code block finishes
|
---|
| 896 | if ($in_code_block)
|
---|
| 897 | {
|
---|
| 898 | $sub_line = '';
|
---|
| 899 | while ($in_code_block && ($sub_line = fgets($page_in)) !== false)
|
---|
| 900 | {
|
---|
| 901 | $sub_line = trim($sub_line);
|
---|
| 902 | // - closing code
|
---|
| 903 | if (preg_match('/^<\/code>(.*)$/', $sub_line, $matches))
|
---|
| 904 | {
|
---|
| 905 | $sub_line = $matches[1]; // may be empty string
|
---|
| 906 | $in_code_block = false;
|
---|
| 907 | }
|
---|
| 908 | // - output another plain codeline
|
---|
| 909 | else
|
---|
| 910 | {
|
---|
| 911 | fwrite($xml_out, '<CodeLine>' . $sub_line . "</CodeLine>\n");
|
---|
| 912 | $sub_line = '';
|
---|
| 913 | }
|
---|
| 914 | }
|
---|
| 915 | // - if sub_line still has anything in it, then add that content as a
|
---|
| 916 | // text block
|
---|
| 917 | if (!empty($sub_line))
|
---|
| 918 | {
|
---|
| 919 | outputTextBlock($xml_out, $sub_line);
|
---|
| 920 | }
|
---|
| 921 | }
|
---|
| 922 | }
|
---|
| 923 | // - images start with an image caption 'element'
|
---|
| 924 | elseif (preg_match('/<imgcaption\s+figure_([a-z0-9_\-]+)\|(.+)>\s*<\/imgcaption>/', $line, $matches))
|
---|
| 925 | {
|
---|
| 926 | $figure_id = $matches[1];
|
---|
| 927 | $figure_title = $matches[2];
|
---|
| 928 | // - watch for the special withLineNumber flag
|
---|
| 929 | $class_attribute = '';
|
---|
| 930 | if (strpos($figure_title, '%!-- withLineNumber --%') != false)
|
---|
| 931 | {
|
---|
| 932 | $class_attribute = ' class="withLineNumber"';
|
---|
| 933 | $figure_title = str_replace('%!-- withLineNumber --%','',$figure_title);
|
---|
| 934 | }
|
---|
| 935 | fwrite($xml_out, '<Figure id="' . $figure_id . '"' . $class_attribute . '>' . "\n");
|
---|
| 936 | echo '[figure: ' . $figure_id . "] \n";
|
---|
| 937 | fwrite($xml_out, '<Title>' . "\n");
|
---|
| 938 | // - decode any comments in the title (used to store explicit id
|
---|
| 939 | // information)
|
---|
| 940 | $figure_title = str_replace('%!--', '<!--', $figure_title);
|
---|
| 941 | $figure_title = str_replace('--%', '-->', $figure_title);
|
---|
| 942 | // - special case: the title may have a subtitle (as a prefix)
|
---|
| 943 | $figure_subtitle_id = '';
|
---|
| 944 | $figure_subtitle = '';
|
---|
| 945 | // - subtitle with explicit id
|
---|
| 946 | if (preg_match('/^(<!-- id:.+? -->\([a-z]\))\s*(.*)$/', $figure_title, $matches))
|
---|
| 947 | {
|
---|
| 948 | $figure_subtitle = $matches[1];
|
---|
| 949 | $figure_title = $matches[2];
|
---|
| 950 | }
|
---|
| 951 | // - subtitle without explicit id
|
---|
| 952 | else if (preg_match('/^(\([a-z]\))\s*(.*)$/', $figure_title, $matches))
|
---|
| 953 | {
|
---|
| 954 | $figure_subtitle = $matches[1];
|
---|
| 955 | $figure_title = $matches[2];
|
---|
| 956 | }
|
---|
| 957 | outputTextBlock($xml_out, $figure_title);
|
---|
| 958 | if (!empty($figure_subtitle))
|
---|
| 959 | {
|
---|
| 960 | fwrite($xml_out, '<SubTitle>' . "\n");
|
---|
| 961 | outputTextBlock($xml_out, $figure_subtitle);
|
---|
| 962 | fwrite($xml_out, '</SubTitle>' . "\n");
|
---|
| 963 | }
|
---|
| 964 | fwrite($xml_out, '</Title>' . "\n");
|
---|
| 965 | $in_figure = true;
|
---|
| 966 | // - record the id to prevent repeating
|
---|
| 967 | $seen_ids[$figure_id] = true;
|
---|
| 968 | }
|
---|
| 969 | // - tables start with a table caption 'element'
|
---|
| 970 | elseif (preg_match('/<tblcaption\s+table_([a-z0-9_\-]+)\|([^>]+)>\s*<\/tblcaption>/', $line, $matches))
|
---|
| 971 | {
|
---|
| 972 | $table_id = $matches[1];
|
---|
| 973 | $table_title = $matches[2];
|
---|
| 974 | if ($table_title == '##NOCAPTION##')
|
---|
| 975 | {
|
---|
| 976 | echo '[non-captioned table: ' . $table_id . "] \n";
|
---|
| 977 | // - watch for autogenerated ids... no point in outputting them
|
---|
| 978 | if (preg_match('/^table(_\d+)?$/', $table_id))
|
---|
| 979 | {
|
---|
| 980 | fwrite($xml_out, "<Table>\n");
|
---|
| 981 | }
|
---|
| 982 | else
|
---|
| 983 | {
|
---|
| 984 | fwrite($xml_out, '<Table id="' . $table_id . '">' . "\n");
|
---|
| 985 | }
|
---|
| 986 | fwrite($xml_out, '<Title/>' . "\n");
|
---|
| 987 | }
|
---|
| 988 | elseif ($table_title == '##HIDDEN##')
|
---|
| 989 | {
|
---|
| 990 | echo '[hidden table: ' . $table_id . "] \n";
|
---|
| 991 | // - watch for autogenerated ids... no point in outputting them
|
---|
| 992 | if (preg_match('/^table(_\d+)?$/', $table_id))
|
---|
| 993 | {
|
---|
| 994 | fwrite($xml_out, "<Table class=\"hidden\">\n");
|
---|
| 995 | }
|
---|
| 996 | else
|
---|
| 997 | {
|
---|
| 998 | fwrite($xml_out, '<Table class="hidden" id="' . $table_id . '">' . "\n");
|
---|
| 999 | }
|
---|
| 1000 | fwrite($xml_out, '<Title/>' . "\n");
|
---|
| 1001 | }
|
---|
| 1002 | else
|
---|
| 1003 | {
|
---|
| 1004 | echo '[table: ' . $table_id . "] \n";
|
---|
| 1005 | // - watch for autogenerated ids... no point in outputting them
|
---|
| 1006 | if (preg_match('/^table(_\d+)?$/', $table_id))
|
---|
| 1007 | {
|
---|
| 1008 | fwrite($xml_out, "<Table>\n");
|
---|
| 1009 | }
|
---|
| 1010 | else
|
---|
| 1011 | {
|
---|
| 1012 | fwrite($xml_out, '<Table id="' . $table_id . '">' . "\n");
|
---|
| 1013 | }
|
---|
| 1014 | fwrite($xml_out, '<Title>' . "\n");
|
---|
| 1015 | outputTextBlock($xml_out, $table_title);
|
---|
| 1016 | fwrite($xml_out, '</Title>' . "\n");
|
---|
| 1017 | }
|
---|
| 1018 | fwrite($xml_out, '<TableContent>' . "\n");
|
---|
| 1019 | $in_table = true;
|
---|
| 1020 | // - record the id to prevent repeating
|
---|
| 1021 | $seen_ids[$table_id] = true;
|
---|
| 1022 | }
|
---|
| 1023 | // - the second line in a table should be it's column width values
|
---|
| 1024 | elseif (preg_match('/\|<\s-\s([0-9 ]+?)\s>\|/', $line, $matches))
|
---|
| 1025 | {
|
---|
| 1026 | $column_widths = explode(' ', $matches[1]);
|
---|
| 1027 | }
|
---|
| 1028 | // - then every row will be made of a number of cells
|
---|
| 1029 | elseif (preg_match('/^\|(.*?)\|$/', $line, $matches))
|
---|
| 1030 | {
|
---|
| 1031 | $row_content = $matches[1];
|
---|
| 1032 | $cell_contents = preg_split('/(\s+\||\|\s+)/', $row_content);
|
---|
| 1033 | fwrite($xml_out, '<tr>' . "\n");
|
---|
| 1034 | foreach ($cell_contents as $index=>$cell_content)
|
---|
| 1035 | {
|
---|
| 1036 | $cell_content = trim($cell_content);
|
---|
| 1037 | $th_text = '';
|
---|
| 1038 | if (isset($column_widths[$index]))
|
---|
| 1039 | {
|
---|
| 1040 | $th_text = '<th width="' . $column_widths[$index] . '"';
|
---|
| 1041 | }
|
---|
| 1042 | else
|
---|
| 1043 | {
|
---|
| 1044 | $th_text = '<th';
|
---|
| 1045 | }
|
---|
| 1046 | // - if the cell would be empty, we use the shorthand
|
---|
| 1047 | if (empty($cell_content))
|
---|
| 1048 | {
|
---|
| 1049 | $th_text .= '/>' . "\n";
|
---|
| 1050 | fwrite($xml_out, $th_text);
|
---|
| 1051 | }
|
---|
| 1052 | else
|
---|
| 1053 | {
|
---|
| 1054 | $th_text .= '>' . "\n";
|
---|
| 1055 | fwrite($xml_out, $th_text);
|
---|
| 1056 |
|
---|
| 1057 | // GAH - this is proving harder than a hard thing thats hard.
|
---|
| 1058 | // The issue is that the most straightforward way of fixing this,
|
---|
| 1059 | // namely using explicit newlines (\\) in the dokuwiki txt causes
|
---|
| 1060 | // lots a legitimately translated <br/> to also be split up. I
|
---|
| 1061 | // think the only way forward would be to maybe extend the HTML
|
---|
| 1062 | // Comment plugin to also respect and process <br/> tags. Then I
|
---|
| 1063 | // can avoid transforming them, and use the \\ sentinel to
|
---|
| 1064 | // separate multi-line table cells.
|
---|
| 1065 | $cell_content_lines = explode('\\\\', $cell_content);
|
---|
| 1066 | foreach ($cell_content_lines as $cell_content)
|
---|
| 1067 | {
|
---|
| 1068 | // - watch out, as the content may be an image
|
---|
| 1069 | if (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $cell_content))
|
---|
| 1070 | {
|
---|
| 1071 | processImage($xml_out, $cell_content);
|
---|
| 1072 | }
|
---|
| 1073 | elseif (preg_match('/\'\'(.*)\'\'/', $cell_content, $matches))
|
---|
| 1074 | {
|
---|
| 1075 | fwrite($xml_out, '<CodeLine>' . translateTableCodeline($matches[1]) . '</CodeLine>' . "\n");
|
---|
| 1076 | }
|
---|
| 1077 | // - anything else it text
|
---|
| 1078 | else
|
---|
| 1079 | {
|
---|
| 1080 | outputTextBlock($xml_out, $cell_content);
|
---|
| 1081 | }
|
---|
| 1082 | }
|
---|
| 1083 | fwrite($xml_out, '</th>' . "\n");
|
---|
| 1084 | }
|
---|
| 1085 | }
|
---|
| 1086 | fwrite($xml_out, '</tr>' . "\n");
|
---|
| 1087 | }
|
---|
| 1088 | // - links to image media in the wiki!
|
---|
| 1089 | elseif (preg_match('/\{\{.+?[^:?]+\?\d+x\d+(&direct)?\}\}/', $line))
|
---|
| 1090 | {
|
---|
| 1091 | processImage($xml_out, $line);
|
---|
| 1092 | // - if we were processing a figure, then now is a good time to close it
|
---|
| 1093 | if ($in_figure)
|
---|
| 1094 | {
|
---|
| 1095 | fwrite($xml_out, '</Figure>' . "\n");
|
---|
| 1096 | $in_figure = false;
|
---|
| 1097 | }
|
---|
| 1098 | }
|
---|
| 1099 | // - if the line starts with a <code> block, then we have a tag
|
---|
| 1100 | // for that (which is special in that it get a unique text id)
|
---|
| 1101 | elseif (preg_match('/^<code\s*\d*\s*>(.*?)(<\/code>)?$/', $line, $matches) || ($in_code_block && preg_match('/^(.*?)(<\/code>)?$/', $line, $matches)))
|
---|
| 1102 | {
|
---|
| 1103 | $payload = $matches[1];
|
---|
| 1104 | $found_end = (isset($matches[2]));
|
---|
| 1105 | $in_code_block = true;
|
---|
| 1106 | // - be careful with empty lines
|
---|
| 1107 | if (empty($payload))
|
---|
| 1108 | {
|
---|
| 1109 | // - as they may appear in the body of the code (in which case we need
|
---|
| 1110 | // to output them). The empty lines at the start or end of a code
|
---|
| 1111 | // block are just an unfortunate consequence of the support for code
|
---|
| 1112 | // line numbering.
|
---|
| 1113 | if (!$found_end && strpos($line, '<code') === false)
|
---|
| 1114 | {
|
---|
| 1115 | fwrite($xml_out, "<CodeLine/>\n");
|
---|
| 1116 | }
|
---|
| 1117 | }
|
---|
| 1118 | elseif (preg_match('/^<!-- id:([^\s]+) -->/', $payload, $matches))
|
---|
| 1119 | {
|
---|
| 1120 | $text_id = $matches[1];
|
---|
| 1121 | outputTextBlock($xml_out, $payload, 'code', true);
|
---|
| 1122 | // - record the id to prevent repeating
|
---|
| 1123 | $seen_ids[$text_id] = true;
|
---|
| 1124 | }
|
---|
| 1125 | else
|
---|
| 1126 | {
|
---|
| 1127 | fwrite($xml_out, '<CodeLine>' . translateText($payload, true) . '</CodeLine>' . "\n");
|
---|
| 1128 | }
|
---|
| 1129 | // - if we didn't find an endtag we have to keep doing code mode until
|
---|
| 1130 | // we do
|
---|
| 1131 | $in_code_block = (!$found_end);
|
---|
| 1132 | if ($found_end)
|
---|
| 1133 | {
|
---|
| 1134 | // - if we were processing a figure, then now is a good time to close it
|
---|
| 1135 | if ($in_figure)
|
---|
| 1136 | {
|
---|
| 1137 | fwrite($xml_out, '</Figure>' . "\n");
|
---|
| 1138 | $in_figure = false;
|
---|
| 1139 | }
|
---|
| 1140 | }
|
---|
| 1141 | }
|
---|
| 1142 | // - entities on a line by themselves (i.e. references to external files)
|
---|
| 1143 | // go through verbatim
|
---|
| 1144 | elseif (preg_match('/^\s*&[a-z0-9_-]+;\s*$/', $line))
|
---|
| 1145 | {
|
---|
| 1146 | fwrite($xml_out, $line . "\n");
|
---|
| 1147 | }
|
---|
| 1148 | // - lines starting with > are indented text blocks
|
---|
| 1149 | elseif (preg_match('/^>(.*)$/', $line, $matches))
|
---|
| 1150 | {
|
---|
| 1151 | $payload = $matches[1];
|
---|
| 1152 | fwrite($xml_out, "<Indented>\n");
|
---|
| 1153 | outputTextBlock($xml_out, $payload);
|
---|
| 1154 | fwrite($xml_out, "</Indented>\n");
|
---|
| 1155 | }
|
---|
| 1156 | // - everything else goes straight through as a text block
|
---|
| 1157 | // - note that for code blocks, even empty lines count
|
---|
| 1158 | elseif (!empty($line))
|
---|
| 1159 | {
|
---|
| 1160 | // - output the line of text having encoded entities etc
|
---|
| 1161 | outputTextBlock($xml_out, $line, '', $in_code_block);
|
---|
| 1162 | }
|
---|
| 1163 | }
|
---|
| 1164 | // Complete any open part
|
---|
| 1165 | if ($in_part)
|
---|
| 1166 | {
|
---|
| 1167 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 1168 | fwrite($xml_out, '</Part>' . "\n");
|
---|
| 1169 | $in_part = false;
|
---|
| 1170 | }
|
---|
| 1171 | // Complete any open subsection
|
---|
| 1172 | if ($in_subsection)
|
---|
| 1173 | {
|
---|
| 1174 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 1175 | fwrite($xml_out, '</Subsection>' . "\n");
|
---|
| 1176 | $in_subsection = false;
|
---|
| 1177 | }
|
---|
| 1178 | // Complete any open section
|
---|
| 1179 | if ($in_section)
|
---|
| 1180 | {
|
---|
| 1181 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 1182 | fwrite($xml_out, '</Section>' . "\n");
|
---|
| 1183 | $in_section = false;
|
---|
| 1184 | }
|
---|
| 1185 | // Complete any open chapter
|
---|
| 1186 | if ($in_chapter)
|
---|
| 1187 | {
|
---|
| 1188 | fwrite($xml_out, '</Content>' . "\n");
|
---|
| 1189 | fwrite($xml_out, '</Chapter>' . "\n");
|
---|
| 1190 | $in_chapter = false;
|
---|
| 1191 | }
|
---|
| 1192 | }
|
---|
| 1193 | /** processPage($xml_out, $page_name) **/
|
---|
| 1194 |
|
---|
| 1195 | function processImage($xml_out, $text)
|
---|
| 1196 | {
|
---|
| 1197 | global $dokuwiki_path;
|
---|
| 1198 | global $xml_source_path;
|
---|
| 1199 | if (preg_match('/\{\{.+?([^:?]+)\?(\d+)x(\d+)(&direct)?\}\}/', $text, $matches))
|
---|
| 1200 | {
|
---|
| 1201 | $filename = $matches[1];
|
---|
| 1202 | $width = $matches[2];
|
---|
| 1203 | $height = $matches[3];
|
---|
| 1204 | // - copy the file into place
|
---|
| 1205 | $image_source_path = $dokuwiki_path . '/data/media/' . $_REQUEST['l'] . '/manuals/images/' . strtolower($filename);
|
---|
| 1206 | $image_destination_dir = $xml_source_path . '/' . $_REQUEST['l'] . '/images';
|
---|
| 1207 | mkAllDir($image_destination_dir, 0755);
|
---|
| 1208 | $image_destination_path = $image_destination_dir . '/' . $filename;
|
---|
| 1209 | if (copy($image_source_path, $image_destination_path))
|
---|
| 1210 | {
|
---|
| 1211 | echo '[copying file: ' . $filename . "] \n";
|
---|
| 1212 | }
|
---|
| 1213 | else
|
---|
| 1214 | {
|
---|
| 1215 | printError('Failed to copy image into place: ' . $filename, false);
|
---|
| 1216 | }
|
---|
| 1217 | // - spit out the XML element
|
---|
| 1218 | fwrite($xml_out, '<File width="' . $width . '" height="' . $height . '" url="images/' . $filename . '"/>' . "\n");
|
---|
| 1219 | }
|
---|
| 1220 | } |
---|