[25026] | 1 | <?php
|
---|
| 2 |
|
---|
| 3 | require_once('common.php');
|
---|
| 4 |
|
---|
| 5 | /** @file gs-manual-export.php
|
---|
| 6 | * This script transforms the single XML manual file required by the rest of
|
---|
| 7 | * the Greenstone manual generation scripts into the series of dokuwiki pages
|
---|
| 8 | * that make up a certain manual (as specified by the 'm' argument) in a
|
---|
| 9 | * certain language ('l').
|
---|
| 10 | */
|
---|
| 11 |
|
---|
| 12 | if (!parseCLIArguments())
|
---|
| 13 | {
|
---|
| 14 | printError("Error! Failed to parse arguments...\nUsage: gs-manual-import.php -m [user|install|develop|paper]");
|
---|
| 15 | }
|
---|
| 16 |
|
---|
| 17 | // 0. Initialization
|
---|
| 18 | if (!isset($_REQUEST['l']) || empty($_REQUEST['l']))
|
---|
| 19 | {
|
---|
| 20 | $_REQUEST['l'] = 'en';
|
---|
| 21 | }
|
---|
| 22 | if (!isset($_REQUEST['m']) || empty($_REQUEST['m']))
|
---|
| 23 | {
|
---|
| 24 | $_REQUEST['m'] = 'user';
|
---|
| 25 | //$_REQUEST['m'] = 'install';
|
---|
| 26 | //$_REQUEST['m'] = 'develop';
|
---|
| 27 | //$_REQUEST['m'] = 'paper';
|
---|
| 28 | }
|
---|
| 29 |
|
---|
| 30 | // - validate arguments before we use them (security)
|
---|
| 31 | if (!preg_match('/^(develop|install|paper|user)$/',$_REQUEST['m']))
|
---|
| 32 | {
|
---|
| 33 | printError('Unknown manual type requested: ' . htmlspecialchars($_REQUEST['m']));
|
---|
| 34 | }
|
---|
| 35 |
|
---|
| 36 | if (!preg_match('/^(ar|en|es|fr|pt-br|ru)$/',$_REQUEST['l']))
|
---|
| 37 | {
|
---|
| 38 | printError('Unknown language requested: ' . htmlspecialchars($_REQUEST['l']));
|
---|
| 39 | }
|
---|
| 40 |
|
---|
| 41 | echo '<h2>Importing Greenstone Manual XML</h2>' . "\n";
|
---|
| 42 | echo '<p><b>XML Source Path:</b> ' . $xml_source_path . '<br/><b>Manual:</b> ' . $_REQUEST['m'] . '<br/><b>Language:</b> ' . $_REQUEST['l'] . "</p>\n<hr/>\n";
|
---|
| 43 | echo "<p><b>Frontmatter: </b><br/>\n";
|
---|
| 44 | // 1. By-and-large we're going to process all of this in a big state machine
|
---|
| 45 | // - the top level page, containing cover page and chapter order information,
|
---|
| 46 | // needs to be created last, so we have to store it's information
|
---|
| 47 | $manual_metadata = array();
|
---|
| 48 | $entity_replacements = array();
|
---|
| 49 | $footnotes = array();
|
---|
| 50 | $page_order = array();
|
---|
| 51 | $page_count = 2;
|
---|
| 52 | $looking_for_metadata = '';
|
---|
| 53 | $chapter_txt_out = false;
|
---|
| 54 | $frontmatter_text = '';
|
---|
| 55 | $in_section = false;
|
---|
| 56 | $sections_page_name = '';
|
---|
| 57 | $in_chapter = false;
|
---|
| 58 | $chapter_id = '';
|
---|
| 59 | $bullet_depth = 0;
|
---|
| 60 | $is_numbered_list = true;
|
---|
| 61 | $line_counter = 0;
|
---|
| 62 | $in_code = false;
|
---|
| 63 | $in_footnotes = false;
|
---|
| 64 | $in_numbered_item = 0;
|
---|
| 65 | $in_bullet_item = false;
|
---|
| 66 | $seen_code_in_item = false;
|
---|
| 67 | $in_indent = false;
|
---|
| 68 | $is_code_linenumbered = false;
|
---|
| 69 | // - construct the path using the information we've been provided as arguments
|
---|
| 70 | $xml_file_path = $xml_source_path . '/' . $_REQUEST['l'] . '/' . ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.xml';
|
---|
| 71 | $xml_in = fopen($xml_file_path, 'r');
|
---|
| 72 | if (!$xml_in)
|
---|
| 73 | {
|
---|
| 74 | printError('Failed to locate top level page for manual');
|
---|
| 75 | }
|
---|
| 76 | // - we also use this opportunity to read in any footnotes as we'll need to
|
---|
| 77 | // move them onto their appropriate page
|
---|
| 78 | while (($line = fgets($xml_in)) !== false)
|
---|
| 79 | {
|
---|
| 80 | if (preg_match('/<Footnote id="(\d+)">/', $line, $matches))
|
---|
| 81 | {
|
---|
| 82 | $footnote_id = $matches[1];
|
---|
| 83 | $text_line = fgets($xml_in);
|
---|
| 84 | if (preg_match('/<Text id="([^"]+)">(.+?)<\/Text>/', $text_line, $matches))
|
---|
| 85 | {
|
---|
| 86 | $footnotes[$footnote_id] = '<!-- id:' . $matches[1] . ' -->' . translateText($matches[2]);
|
---|
| 87 | }
|
---|
| 88 | // - throw away </Footnote>
|
---|
| 89 | fgets($xml_in);
|
---|
| 90 | }
|
---|
| 91 | }
|
---|
| 92 | fclose($xml_in);
|
---|
| 93 | // - now reopen to parse it
|
---|
| 94 | $xml_in = fopen($xml_file_path, 'r');
|
---|
| 95 | if (!$xml_in)
|
---|
| 96 | {
|
---|
| 97 | printError('Failed to locate top level page for manual');
|
---|
| 98 | }
|
---|
| 99 | while (($line = getLine($xml_in)) !== false)
|
---|
| 100 | {
|
---|
| 101 | // - Special Case: lingering code blocks, continue if next line also
|
---|
| 102 | // contains code, otherwise we need an extra newline
|
---|
| 103 | if ($in_code)
|
---|
| 104 | {
|
---|
| 105 | $code_text = "\n";
|
---|
| 106 | if (strpos($line, '<CodeLine') === false && strpos($line, 'type="code"') === false)
|
---|
| 107 | {
|
---|
| 108 | $code_text .= '</code>';
|
---|
| 109 | // - codeblocks that appear inside numbered lists do not get their own
|
---|
| 110 | // newlines as that would split the item. Instead newlines will be
|
---|
| 111 | // added when </NumberedItem> encountered.
|
---|
| 112 | if (!$is_numbered_list)
|
---|
| 113 | {
|
---|
| 114 | $code_text .= "\n\n";
|
---|
| 115 | }
|
---|
| 116 | else
|
---|
| 117 | {
|
---|
| 118 | $seen_code_in_item = true;
|
---|
| 119 | }
|
---|
| 120 | $in_code = false;
|
---|
| 121 | }
|
---|
| 122 | if ($in_chapter)
|
---|
| 123 | {
|
---|
| 124 | fwrite($chapter_txt_out, $code_text);
|
---|
| 125 | }
|
---|
| 126 | else
|
---|
| 127 | {
|
---|
| 128 | $frontmatter_text .= $code_text;
|
---|
| 129 | }
|
---|
| 130 | }
|
---|
| 131 | // - some system metadata to watch for
|
---|
| 132 | if (preg_match('/<!ENTITY\s+([^>]+)>/', $line, $matches))
|
---|
| 133 | {
|
---|
| 134 | $entity = $matches[1];
|
---|
| 135 | addMetadata('ENTITY',$entity);
|
---|
| 136 | if (preg_match('/([a-z]+)\s+"&#(\d+);"/', $entity, $matches))
|
---|
| 137 | {
|
---|
| 138 | $entity_replacements[$matches[1]] = $matches[2];
|
---|
| 139 | }
|
---|
| 140 | }
|
---|
| 141 | // - we have an explicit list of cover metadata to watch for
|
---|
| 142 | elseif (!$in_section && !$in_chapter && preg_match('/<(Author|Affiliation|Comment|Date|Heading|SupplementaryText|Title|Version)>/', $line, $matches))
|
---|
| 143 | {
|
---|
| 144 | $looking_for_metadata = $matches[1];
|
---|
| 145 | }
|
---|
| 146 | elseif (!$in_section && !$in_chapter && preg_match('/<\/(Author|Affiliation|Comment|Date|Heading|SupplementaryText|Title|Version)>/', $line, $matches))
|
---|
| 147 | {
|
---|
| 148 | $looking_for_metadata = '';
|
---|
| 149 | }
|
---|
| 150 | // - found metadata we have!
|
---|
| 151 | elseif (!empty($looking_for_metadata) && preg_match('/<Text id="([^"]+)">(.+?)<\/Text>/', $line, $matches))
|
---|
| 152 | {
|
---|
| 153 | $text_id = $matches[1];
|
---|
| 154 | $text = '<!-- id:' . $text_id . ' -->' . translateText($matches[2]);
|
---|
| 155 | addMetadata($looking_for_metadata, $text);
|
---|
| 156 | }
|
---|
[25052] | 157 | // - bogus metadata found in French version
|
---|
| 158 | elseif (!empty($looking_for_metadata) && preg_match('/<Text id="([^"]+)"\/>/', $line, $matches))
|
---|
| 159 | {
|
---|
| 160 | }
|
---|
[25026] | 161 | // - any text we encounter outside of both sections and chapters also
|
---|
| 162 | // belongs on the cover
|
---|
| 163 | elseif (!$in_section && !$in_chapter && !$in_footnotes && preg_match('/<Text id="([^"]+)">(.+?)<\/Text>/', $line, $matches))
|
---|
| 164 | {
|
---|
| 165 | // (for now I'll assume id's are persistent)
|
---|
| 166 | addMetadata('Text', '<!-- id:' . $matches[1] . ' -->' . translateText($matches[2]));
|
---|
| 167 | }
|
---|
| 168 | // - we will probably encounter the opening section (which is outside of a
|
---|
| 169 | // chapter) first, so we have a special case for it
|
---|
| 170 | elseif (!$in_chapter && preg_match('/<Section id="([^"]+)">/', $line, $matches))
|
---|
| 171 | {
|
---|
| 172 | $section_id = $matches[1];
|
---|
| 173 | // - if this is the first non-chapter section we have encountered then it
|
---|
| 174 | // gets the honor of having the page---that these sections will
|
---|
| 175 | // eventually be printed out on---named after it. Typically this should
|
---|
| 176 | // be "about_this_manual"
|
---|
| 177 | if (empty($frontmatter_text))
|
---|
| 178 | {
|
---|
| 179 | $sections_page_name = $section_id;
|
---|
| 180 | }
|
---|
| 181 | $in_section = true;
|
---|
| 182 | $title = getTitle($xml_in, 'section:' . $section_id);
|
---|
| 183 | if (empty($frontmatter_text))
|
---|
| 184 | {
|
---|
| 185 | array_unshift($page_order, $sections_page_name . '|' . noComments($title));
|
---|
| 186 | }
|
---|
| 187 | // - if the title, as is, wouldn't autogenerate the appropriate id, then
|
---|
| 188 | // we have to include the id explicitly (as another html comment block)
|
---|
| 189 | if ($section_id != generateID($title))
|
---|
| 190 | {
|
---|
| 191 | $title = '<!-- sid:' . $section_id . ' -->' . $title;
|
---|
| 192 | $seen_ids[$section_id] = 1;
|
---|
| 193 | }
|
---|
| 194 | $frontmatter_text .= '===== ' . $title . ' =====' . "\n\n";
|
---|
| 195 | // - whew. Chapter's going to be just as bad though.
|
---|
| 196 | }
|
---|
| 197 | elseif ($in_section && preg_match('/<\/Section>/', $line))
|
---|
| 198 | {
|
---|
| 199 | $in_section = false;
|
---|
| 200 | }
|
---|
| 201 | elseif (preg_match('/<Chapter id="([^"]+)">/', $line, $matches))
|
---|
| 202 | {
|
---|
| 203 | $chapter_id = $matches[1];
|
---|
| 204 | echo "</p>\n<p><b>Import Chapter:</b>" . $chapter_id . '<br/>' . "\n";
|
---|
| 205 | $chapter_page_name = $chapter_id;
|
---|
| 206 | // - create a new file to store this chapter
|
---|
| 207 | $chapter_file_dir = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'];
|
---|
| 208 | if (!file_exists($chapter_file_dir))
|
---|
| 209 | {
|
---|
| 210 | mkAllDir($chapter_file_dir, 0755);
|
---|
| 211 | }
|
---|
| 212 | $chapter_file_path = $chapter_file_dir . '/' . $chapter_page_name . '.txt';
|
---|
| 213 | // - backup existing file
|
---|
| 214 | if (file_exists($chapter_file_path))
|
---|
| 215 | {
|
---|
| 216 | $chapter_backup_file_path = $chapter_file_path . '.bak';
|
---|
| 217 | rename($chapter_file_path, $chapter_backup_file_path);
|
---|
| 218 | }
|
---|
| 219 | // - open new file for writing
|
---|
| 220 | $chapter_txt_out = fopen($chapter_file_path, 'w');
|
---|
| 221 | if (!$chapter_txt_out)
|
---|
| 222 | {
|
---|
| 223 | printError('Failed to open page file for writing: ' . $chapter_page_name);
|
---|
| 224 | }
|
---|
| 225 | $in_chapter = true;
|
---|
| 226 | $title = getTitle($xml_in, 'chapter: ' . $chapter_id);
|
---|
| 227 | fwrite($chapter_txt_out, '====== ' . $title . ' ======' . "\n\n");
|
---|
| 228 | array_push($page_order, $chapter_page_name . '|' . noComments($title));
|
---|
| 229 | }
|
---|
| 230 | elseif ($in_chapter && preg_match('/<\/Chapter>/', $line))
|
---|
| 231 | {
|
---|
| 232 | fclose($chapter_txt_out);
|
---|
| 233 | $chapter_txt_out = false;
|
---|
| 234 | $in_chapter = false;
|
---|
| 235 | $page_count++;
|
---|
| 236 | }
|
---|
| 237 | // - section, subsection and part titles within chapter
|
---|
[25052] | 238 | elseif ($in_chapter && preg_match('/<(Section|Subsection|Part)\sid="([^"]*)">/', $line, $matches))
|
---|
[25026] | 239 | {
|
---|
| 240 | $title_type = $matches[1];
|
---|
| 241 | $section_id = $matches[2];
|
---|
[25052] | 242 | if (empty($section_id))
|
---|
| 243 | {
|
---|
| 244 | $section_id = generateID(strtolower($title_type));
|
---|
| 245 | }
|
---|
| 246 | echo '[adding ' . strtolower($title_type) . ': ' . $section_id . '] ';
|
---|
[25026] | 247 | $header_fix = '';
|
---|
| 248 | $title = getTitle($xml_in, 'heading: ' . $title_type);
|
---|
| 249 | if ($title_type == 'Section')
|
---|
| 250 | {
|
---|
| 251 | $header_fix = '=====';
|
---|
| 252 | }
|
---|
| 253 | if ($title_type == 'Subsection')
|
---|
| 254 | {
|
---|
| 255 | $header_fix = '====';
|
---|
| 256 | }
|
---|
| 257 | if ($title_type == 'Part')
|
---|
| 258 | {
|
---|
| 259 | $header_fix = '===';
|
---|
| 260 | // - remove b's and i's
|
---|
| 261 | $title = preg_replace('/<\/?(B|I)>/i', '', $title);
|
---|
| 262 | }
|
---|
| 263 | // - if the title, as is, wouldn't autogenerate the appropriate id, then
|
---|
| 264 | // we have to include the id explicitly (as another html comment block)
|
---|
| 265 | if ($section_id != generateID($title))
|
---|
| 266 | {
|
---|
| 267 | $title = '<!-- sid:' . $section_id . ' -->' . $title;
|
---|
| 268 | $seen_ids[$section_id] = 1;
|
---|
| 269 | }
|
---|
| 270 | fwrite($chapter_txt_out, $header_fix . ' ' . $title . ' ' . $header_fix . "\n\n");
|
---|
| 271 | }
|
---|
| 272 | elseif ($in_chapter && (strpos($line, '</Section') !== false || strpos($line, '</Subsection') !== false || strpos($line, '</Part') !== false))
|
---|
| 273 | {
|
---|
| 274 | // do nothing for now
|
---|
| 275 | }
|
---|
| 276 | // - figures (and their titles/captions)
|
---|
| 277 | elseif (preg_match('/<Figure id="([^"]+)"(.*?)>/', $line, $matches))
|
---|
| 278 | {
|
---|
| 279 | $figure_id = $matches[1];
|
---|
| 280 | $other_attributes = $matches[2];
|
---|
| 281 | echo '[adding figure: ' . $figure_id . "] \n";
|
---|
| 282 | // We need the title too
|
---|
| 283 | $caption = getTitle($xml_in, 'figure:' . $figure_id);
|
---|
| 284 | $caption = translateText(alternateComments($caption));
|
---|
| 285 | $txt = "<imgcaption figure_" . $figure_id . '|' . $caption . ' ';
|
---|
| 286 | // - we also check the other attributes to see if the XML has requested
|
---|
| 287 | // any following codeblock be linenumbered
|
---|
| 288 | if (strpos($other_attributes, 'withLineNumber') !== false)
|
---|
| 289 | {
|
---|
| 290 | $is_code_linenumbered = true;
|
---|
| 291 | $txt .= '%!-- withLineNumber --%';
|
---|
| 292 | }
|
---|
| 293 | $txt .= '></imgcaption>' . "\n";
|
---|
| 294 | if ($in_chapter)
|
---|
| 295 | {
|
---|
| 296 | fwrite($chapter_txt_out, $txt);
|
---|
| 297 | }
|
---|
| 298 | else
|
---|
| 299 | {
|
---|
| 300 | $frontmatter_text .= $txt;
|
---|
| 301 | }
|
---|
| 302 | }
|
---|
| 303 | elseif (strpos($line, '</Figure>') !== false)
|
---|
| 304 | {
|
---|
| 305 | if ($in_chapter)
|
---|
| 306 | {
|
---|
| 307 | fwrite($chapter_txt_out, "\n\n");
|
---|
| 308 | }
|
---|
| 309 | else
|
---|
| 310 | {
|
---|
| 311 | $frontmatter_text .= "\n\n";
|
---|
| 312 | }
|
---|
| 313 | // - no longer required
|
---|
| 314 | $is_code_linenumbered = false;
|
---|
| 315 | }
|
---|
| 316 | elseif (preg_match('/<Table([^>]*).*?.*?>/', $line, $matches))
|
---|
| 317 | {
|
---|
| 318 | $attributes = $matches[1];
|
---|
| 319 | $table_txt = '';
|
---|
| 320 | $table_id = '';
|
---|
| 321 | if (preg_match('/id="([^"]+)"/', $attributes, $matches))
|
---|
| 322 | {
|
---|
| 323 | $table_id = $matches[1];
|
---|
| 324 | }
|
---|
| 325 | else
|
---|
| 326 | {
|
---|
| 327 | $table_id = generateID('table');
|
---|
| 328 | }
|
---|
| 329 | $hidden = false;
|
---|
| 330 | if (strpos($attributes, 'class="hidden"') !== false)
|
---|
| 331 | {
|
---|
| 332 | $hidden = true;
|
---|
| 333 | }
|
---|
| 334 | echo '[adding table: ' . $table_id . "] \n";
|
---|
| 335 | $table_caption = getTitle($xml_in, 'table: ' . $table_id);
|
---|
| 336 | if ($hidden)
|
---|
| 337 | {
|
---|
| 338 | $table_txt .= '<tblcaption table_' . $table_id . '|##HIDDEN##></tblcaption>' . "\n";
|
---|
| 339 | }
|
---|
| 340 | elseif (empty($table_caption))
|
---|
| 341 | {
|
---|
| 342 | $table_txt .= '<tblcaption table_' . $table_id . '|##NOCAPTION##></tblcaption>' . "\n";
|
---|
| 343 | }
|
---|
| 344 | else
|
---|
| 345 | {
|
---|
| 346 | $table_txt .= '<tblcaption table_' . $table_id . '|' . noComments($table_caption) . '></tblcaption>' . "\n";
|
---|
| 347 | }
|
---|
| 348 | // - in order to properly capture the table we're going to have to read in
|
---|
| 349 | // the whole thing here, and take note of column widths
|
---|
| 350 | $have_output_widths = false;
|
---|
| 351 | $column_widths = array();
|
---|
| 352 | while (strpos($line, '</Table>') === false)
|
---|
| 353 | {
|
---|
| 354 | // - find the start of a row
|
---|
| 355 | while(!empty($line) && strpos($line, '<tr>') === false && strpos($line, '</Table>') === false)
|
---|
| 356 | {
|
---|
| 357 | $line = getLine($xml_in);
|
---|
| 358 | }
|
---|
| 359 | if (strpos($line, '<tr>') !== false)
|
---|
| 360 | {
|
---|
| 361 | $row_txt = '|';
|
---|
| 362 | $line = getLine($xml_in);
|
---|
[25052] | 363 | // - now we read in multiple cells (line starting <th
|
---|
| 364 | while (strpos($line, '<th') === 0)
|
---|
[25026] | 365 | {
|
---|
[25052] | 366 | if (preg_match('/<th width="(\d+)"\/?>/', $line, $matches))
|
---|
[25026] | 367 | {
|
---|
[25052] | 368 | $cell_width = $matches[1];
|
---|
| 369 | if (!$have_output_widths)
|
---|
| 370 | {
|
---|
| 371 | array_push($column_widths, $cell_width);
|
---|
| 372 | }
|
---|
[25026] | 373 | }
|
---|
| 374 | // Ignore empty cells
|
---|
| 375 | // - adding another case for empty header cells (turned up in es
|
---|
| 376 | // version of "From Paper")
|
---|
| 377 | if (preg_match('/<th width="\d+"\/>/', $line) || preg_match('/<th width="\d+">.*<\/th>/', $line))
|
---|
| 378 | {
|
---|
| 379 | $row_txt .= ' |';
|
---|
| 380 | }
|
---|
| 381 | else
|
---|
| 382 | {
|
---|
| 383 | $line = getLine($xml_in);
|
---|
| 384 | $first = true;
|
---|
| 385 | while (strpos($line, '</th>') === false)
|
---|
| 386 | {
|
---|
| 387 | if (!$first)
|
---|
| 388 | {
|
---|
| 389 | $row_txt .= '\\\\';
|
---|
| 390 | }
|
---|
| 391 | // - we can have images or text in our tables
|
---|
[25052] | 392 | if (preg_match('/<File.*url="images\/([^"]+)".*\/>/', $line, $matches))
|
---|
[25026] | 393 | {
|
---|
[25052] | 394 | $payload = $matches[0];
|
---|
| 395 | $filename = $matches[1];
|
---|
| 396 | $width = 0;
|
---|
| 397 | if (preg_match('/width="(\d+)"/', $payload, $matches))
|
---|
| 398 | {
|
---|
| 399 | $width = $matches[1];
|
---|
| 400 | }
|
---|
| 401 | $height = 0;
|
---|
| 402 | if (preg_match('/height="(\d+)"/', $payload, $matches))
|
---|
| 403 | {
|
---|
| 404 | $height = $matches[1];
|
---|
| 405 | }
|
---|
| 406 | $image_txt = handleImage($filename, $width, $height);
|
---|
[25026] | 407 | $row_txt .= ' ' . $image_txt . ' ';
|
---|
| 408 | }
|
---|
[25052] | 409 | elseif (preg_match('/<Text id="([^"]+)">(.*)/', $line, $matches))
|
---|
[25026] | 410 | {
|
---|
[25052] | 411 | $tid = $matches[1];
|
---|
| 412 | $txt = $matches[2];
|
---|
| 413 | // - multiple line text block
|
---|
| 414 | while (strpos($txt, '</Text>') === false)
|
---|
| 415 | {
|
---|
| 416 | $txt .= getLine($xml_in);
|
---|
| 417 | }
|
---|
| 418 | $txt = str_replace('</Text>','',$txt);
|
---|
| 419 | $row_txt .= ' <!-- id:' . $tid . ' -->' . translateText($txt) . ' ';
|
---|
[25026] | 420 | }
|
---|
| 421 | elseif (preg_match('/<CodeLine>(.*?)<\/CodeLine>/',$line,$matches))
|
---|
| 422 | {
|
---|
| 423 | $row_txt .= ' \'\'' . translateTableCodeLine($matches[1]) . '\'\' ';
|
---|
| 424 | }
|
---|
[25052] | 425 | elseif (preg_match('/<CodeLine>(.*)/',$line,$matches))
|
---|
| 426 | {
|
---|
| 427 | $row_txt .= ' \'\'' . translateTableCodeLine($matches[1]) . '\'\' ';
|
---|
| 428 | }
|
---|
| 429 | elseif (preg_match('/(.*)<\/CodeLine>/',$line,$matches))
|
---|
| 430 | {
|
---|
| 431 | if (!empty($matches[1]))
|
---|
| 432 | {
|
---|
| 433 | $row_txt .= ' \'\'' . translateTableCodeLine($matches[1]). '\'\' ';
|
---|
| 434 | }
|
---|
| 435 | else
|
---|
| 436 | {
|
---|
| 437 | $row_txt .= ' ';
|
---|
| 438 | }
|
---|
| 439 | }
|
---|
| 440 | // we'll add (bogus) linebreaks
|
---|
| 441 | elseif (preg_match('/^\s*<br\s*\/?>\s*$/', $line))
|
---|
| 442 | {
|
---|
| 443 | $row_txt = ' ';
|
---|
| 444 | }
|
---|
[25026] | 445 | else
|
---|
| 446 | {
|
---|
[25052] | 447 | printError('Warning! Unrecognized element in table: ' . htmlspecialchars($line));
|
---|
[25026] | 448 | }
|
---|
| 449 | $first = false;
|
---|
| 450 | // - next line
|
---|
| 451 | $line = getLine($xml_in);
|
---|
| 452 | }
|
---|
| 453 | // - close the cell
|
---|
| 454 | $row_txt .= '|';
|
---|
| 455 | }
|
---|
| 456 | // next!
|
---|
| 457 | $line = getLine($xml_in);
|
---|
| 458 | }
|
---|
| 459 | // - if we haven't already, output the width command
|
---|
| 460 | if (!$have_output_widths)
|
---|
| 461 | {
|
---|
| 462 | $table_txt .= '|< - ' . implode(' ', $column_widths) . ' >|' . "\n";
|
---|
| 463 | $have_output_widths = true;
|
---|
| 464 | }
|
---|
| 465 | $table_txt .= $row_txt . "\n";
|
---|
| 466 | // - throw away the closing </tr>
|
---|
| 467 | $line = getLine($xml_in);
|
---|
| 468 | }
|
---|
| 469 | }
|
---|
| 470 | $table_txt .= "\n";
|
---|
| 471 | if ($in_chapter)
|
---|
| 472 | {
|
---|
| 473 | fwrite($chapter_txt_out, $table_txt);
|
---|
| 474 | }
|
---|
| 475 | else
|
---|
| 476 | {
|
---|
| 477 | $frontmatter_text .= $table_txt;
|
---|
| 478 | }
|
---|
| 479 | }
|
---|
| 480 | // - copy and insert images
|
---|
| 481 | elseif (preg_match('/<File width="(\d+)" height="(\d+)" url="images\/([^"]+)"\/>/', $line, $matches))
|
---|
| 482 | {
|
---|
| 483 | $image_txt = handleImage($matches[3], $matches[1], $matches[2]);
|
---|
| 484 | if ($in_chapter)
|
---|
| 485 | {
|
---|
| 486 | fwrite($chapter_txt_out, $image_txt);
|
---|
| 487 | }
|
---|
| 488 | else
|
---|
| 489 | {
|
---|
| 490 | $frontmatter_text .= $image_txt;
|
---|
| 491 | }
|
---|
| 492 | }
|
---|
| 493 | // - bullet lists
|
---|
| 494 | elseif (preg_match('/<BulletList>/', $line))
|
---|
| 495 | {
|
---|
| 496 | echo "[adding bulletlist] \n";
|
---|
| 497 | if ($in_bullet_item || $in_numbered_item)
|
---|
| 498 | {
|
---|
| 499 | if ($in_chapter)
|
---|
| 500 | {
|
---|
| 501 | fwrite($chapter_txt_out, "\n");
|
---|
| 502 | }
|
---|
| 503 | elseif ($in_section)
|
---|
| 504 | {
|
---|
| 505 | $frontmatter_text .= "\n";
|
---|
| 506 | }
|
---|
| 507 | }
|
---|
| 508 | $bullet_depth++;
|
---|
| 509 | $is_numbered_list = false;
|
---|
| 510 | }
|
---|
| 511 | // - numbered lists
|
---|
| 512 | elseif (preg_match('/<NumberedList>/', $line))
|
---|
| 513 | {
|
---|
| 514 | echo "[adding numbered list] \n";
|
---|
| 515 | $bullet_depth++;
|
---|
| 516 | $is_numbered_list = true;
|
---|
| 517 | // - reset this flag that keeps track of whether an item (numbered or
|
---|
| 518 | // otherwise) is legitimately split by a code block
|
---|
| 519 | $seen_code_in_item = false;
|
---|
| 520 | }
|
---|
| 521 | elseif (preg_match('/<\/BulletList>/', $line))
|
---|
| 522 | {
|
---|
| 523 | $bullet_depth--;
|
---|
| 524 | if ($bullet_depth == 0)
|
---|
| 525 | {
|
---|
| 526 | if ($in_chapter)
|
---|
| 527 | {
|
---|
| 528 | fwrite($chapter_txt_out, "\n");
|
---|
| 529 | }
|
---|
| 530 | elseif ($in_section)
|
---|
| 531 | {
|
---|
| 532 | $frontmatter_text .= "\n";
|
---|
| 533 | }
|
---|
| 534 | $is_numbered_list = false;
|
---|
| 535 | }
|
---|
| 536 | ///cho "[finished bulletlist] ";
|
---|
| 537 | }
|
---|
| 538 | elseif (preg_match('/<\/NumberedList>/', $line))
|
---|
| 539 | {
|
---|
| 540 | $bullet_depth--;
|
---|
| 541 | if ($bullet_depth == 0)
|
---|
| 542 | {
|
---|
| 543 | if ($in_chapter)
|
---|
| 544 | {
|
---|
| 545 | fwrite($chapter_txt_out, "\n");
|
---|
| 546 | }
|
---|
| 547 | elseif ($in_section)
|
---|
| 548 | {
|
---|
| 549 | $frontmatter_text .= "\n";
|
---|
| 550 | }
|
---|
| 551 | $is_numbered_list = false;
|
---|
| 552 | }
|
---|
| 553 | ///cho "[finished numbered list] ";
|
---|
| 554 | }
|
---|
| 555 | elseif (preg_match('/<NumberedItem>/', $line))
|
---|
| 556 | {
|
---|
| 557 | $in_numbered_item = 1;
|
---|
| 558 | }
|
---|
| 559 | elseif (preg_match('/<\/NumberedItem>/', $line))
|
---|
| 560 | {
|
---|
| 561 | $in_numbered_item = 0;
|
---|
| 562 | if ($in_chapter)
|
---|
| 563 | {
|
---|
| 564 | fwrite($chapter_txt_out, "\n");
|
---|
| 565 | }
|
---|
| 566 | else
|
---|
| 567 | {
|
---|
| 568 | $frontmatter_text .= "\n";
|
---|
| 569 | }
|
---|
| 570 | }
|
---|
| 571 | elseif (preg_match('/<Bullet>/', $line))
|
---|
| 572 | {
|
---|
| 573 | $in_bullet_item = true;
|
---|
| 574 | }
|
---|
| 575 | elseif (preg_match('/<\/Bullet>/', $line))
|
---|
| 576 | {
|
---|
| 577 | if ($in_chapter)
|
---|
| 578 | {
|
---|
| 579 | fwrite($chapter_txt_out, "\n");
|
---|
| 580 | }
|
---|
| 581 | else
|
---|
| 582 | {
|
---|
| 583 | $frontmatter_text .= "\n";
|
---|
| 584 | }
|
---|
| 585 | $in_bullet_item = false;
|
---|
| 586 | }
|
---|
| 587 | // TEXT HANDLING - this is the main case, but has disappeared into the mire
|
---|
| 588 | // of other cases.
|
---|
| 589 | elseif (!$in_footnotes && preg_match('/<Text id="([^"]+)">(.+?)$/', $line, $matches))
|
---|
| 590 | {
|
---|
| 591 | $id = $matches[1];
|
---|
| 592 | $str = $matches[2];
|
---|
| 593 | // - special case for those text elements split over multiple lines. We
|
---|
| 594 | // keep concatenating lines until we find the closing text element or we
|
---|
| 595 | // run out of lines!
|
---|
| 596 | $another_line = '';
|
---|
| 597 | while (strpos($str, '</Text>') === false && ($another_line = getLine($xml_in)) !== false)
|
---|
| 598 | {
|
---|
| 599 | $str .= ' ' . $another_line;
|
---|
| 600 | }
|
---|
| 601 | // - note that if we ran out of lines (eof) then we'll break out of this
|
---|
| 602 | // block anyway, it's just there won't be a <\Text> at the end of this
|
---|
| 603 | // block... despite this being a major validation issue in the XML it
|
---|
| 604 | // shouldn't result in this script being vladed
|
---|
| 605 | // - now remove the </Text> from the end (hopefully) of str
|
---|
| 606 | $str = preg_replace('/<\/Text>\s*/', '', $str);
|
---|
| 607 | // - and prepend the id while translating the str into Dokuwiki format
|
---|
| 608 | $str = '<!-- id:' . $id . ' -->' . translateText($str);
|
---|
| 609 | if ($bullet_depth > 0)
|
---|
| 610 | {
|
---|
| 611 | if ($is_numbered_list)
|
---|
| 612 | {
|
---|
| 613 | // - special case for those text elements legimately split in two by
|
---|
| 614 | // code blocks. They get no bullet of either type and are 'run-on'
|
---|
| 615 | // immediately to the end of the code element in order to prevent
|
---|
| 616 | // dokuwiki restarting numbering etc
|
---|
| 617 | if ($seen_code_in_item)
|
---|
| 618 | {
|
---|
| 619 | // - leave str as it is
|
---|
| 620 | // - reset flag just incase the item happens to contain another
|
---|
| 621 | // code block
|
---|
| 622 | $seen_code_in_item = false;
|
---|
| 623 | }
|
---|
| 624 | else if ($in_numbered_item == 1)
|
---|
| 625 | {
|
---|
| 626 | $str = '- ' . $str;
|
---|
| 627 | }
|
---|
| 628 | // - superspecial case for the poorly formatted numberlists that
|
---|
| 629 | // contain more than one text block per point. We'll nest them
|
---|
| 630 | // as a bullet list as that preserves order, formatting and (I
|
---|
| 631 | // hope) meaning.
|
---|
| 632 | else
|
---|
| 633 | {
|
---|
| 634 | if ($in_chapter)
|
---|
| 635 | {
|
---|
| 636 | fwrite($chapter_txt_out, "\n");
|
---|
| 637 | }
|
---|
| 638 | else
|
---|
| 639 | {
|
---|
| 640 | $frontmatter_text .= "\n";
|
---|
| 641 | }
|
---|
| 642 | $str = ' * ' . $str;
|
---|
| 643 | }
|
---|
| 644 | $in_numbered_item++;
|
---|
| 645 | }
|
---|
| 646 | else
|
---|
| 647 | {
|
---|
| 648 | $str = '* ' . $str;
|
---|
| 649 | }
|
---|
| 650 | for ($i = 0; $i < $bullet_depth; $i++)
|
---|
| 651 | {
|
---|
| 652 | $str = ' ' . $str;
|
---|
| 653 | }
|
---|
| 654 | }
|
---|
| 655 | else
|
---|
| 656 | {
|
---|
| 657 | // Indented text is preceeded by a >
|
---|
| 658 | if ($in_indent)
|
---|
| 659 | {
|
---|
| 660 | $str = '> ' . $str . "\n";
|
---|
| 661 | }
|
---|
| 662 | else
|
---|
| 663 | {
|
---|
| 664 | $str .= "\n";
|
---|
| 665 | }
|
---|
| 666 | }
|
---|
| 667 | if ($bullet_depth == 0)
|
---|
| 668 | {
|
---|
| 669 | $str .= "\n";
|
---|
| 670 | }
|
---|
| 671 | if ($in_chapter)
|
---|
| 672 | {
|
---|
| 673 | fwrite($chapter_txt_out, $str);
|
---|
| 674 | }
|
---|
| 675 | else
|
---|
| 676 | {
|
---|
| 677 | $frontmatter_text .= $str;
|
---|
| 678 | }
|
---|
| 679 | }
|
---|
| 680 | // - codified text blocks
|
---|
| 681 | elseif (preg_match('/<Text\s+type="code"\s+id="([^"]+)"\s*>(.+?)<\/Text>/', $line, $matches))
|
---|
| 682 | {
|
---|
| 683 | $code_id = $matches[1];
|
---|
| 684 | // - determine the appropriate code block prefix
|
---|
| 685 | $code_prefix = '';
|
---|
| 686 | if (!$in_code)
|
---|
| 687 | {
|
---|
| 688 | if ($is_code_linenumbered)
|
---|
| 689 | {
|
---|
| 690 | $code_prefix = '<code 1>';
|
---|
| 691 | }
|
---|
| 692 | else
|
---|
| 693 | {
|
---|
| 694 | $code_prefix = '<code>';
|
---|
| 695 | }
|
---|
| 696 | $in_code = true;
|
---|
| 697 | }
|
---|
| 698 | $code_txt = $code_prefix . '<!-- id:' . $matches[1] . ' -->' . translateText($matches[2], true);
|
---|
| 699 | if ($in_chapter)
|
---|
| 700 | {
|
---|
| 701 | fwrite($chapter_txt_out, $code_txt);
|
---|
| 702 | }
|
---|
| 703 | else
|
---|
| 704 | {
|
---|
| 705 | $frontmatter_text .= $code_txt;
|
---|
| 706 | }
|
---|
| 707 | }
|
---|
| 708 | elseif (preg_match('/<Text\s+id="([^"]+)"\s+type="code"\s*>(.+?)<\/Text>/', $line, $matches))
|
---|
| 709 | {
|
---|
| 710 | $code_id = $matches[1];
|
---|
| 711 | // - determine the appropriate code block prefix
|
---|
| 712 | $code_prefix = '';
|
---|
| 713 | if (!$in_code)
|
---|
| 714 | {
|
---|
| 715 | if ($is_code_linenumbered)
|
---|
| 716 | {
|
---|
| 717 | $code_prefix = '<code 1>';
|
---|
| 718 | }
|
---|
| 719 | else
|
---|
| 720 | {
|
---|
| 721 | $code_prefix = '<code>';
|
---|
| 722 | }
|
---|
| 723 | $in_code = true;
|
---|
| 724 | }
|
---|
| 725 | $code_txt = $code_prefix . '<!-- id:' . $matches[1] . ' -->' . translateText($matches[2], true);
|
---|
| 726 | if ($in_chapter)
|
---|
| 727 | {
|
---|
| 728 | fwrite($chapter_txt_out, $code_txt);
|
---|
| 729 | }
|
---|
| 730 | else
|
---|
| 731 | {
|
---|
| 732 | $frontmatter_text .= $code_txt;
|
---|
| 733 | }
|
---|
| 734 | }
|
---|
| 735 | elseif (preg_match('/<CodeLine>(.*?)$/', $line, $matches))
|
---|
| 736 | {
|
---|
| 737 | $code_txt = $matches[1];
|
---|
| 738 | // - determine the appropriate code block prefix
|
---|
| 739 | $code_prefix = '';
|
---|
| 740 | if (!$in_code)
|
---|
| 741 | {
|
---|
| 742 | if ($is_code_linenumbered)
|
---|
| 743 | {
|
---|
| 744 | $code_prefix = "<code 1>\n";
|
---|
| 745 | }
|
---|
| 746 | else
|
---|
| 747 | {
|
---|
| 748 | $code_prefix = "<code>\n";
|
---|
| 749 | }
|
---|
| 750 | $in_code = true;
|
---|
| 751 | }
|
---|
| 752 | // - arg. another special case for codelines that span more than one line
|
---|
| 753 | // (but I guess <CodeLineButSometimesMoreThanOneLine> is a bit cumbersome
|
---|
| 754 | // for an element name, eh?)
|
---|
| 755 | $another_line = '';
|
---|
| 756 | while (strpos($code_txt, '</CodeLine>') === false && ($another_line = getLine($xml_in)) !== false)
|
---|
| 757 | {
|
---|
| 758 | $code_txt .= ' ' . $another_line;
|
---|
| 759 | }
|
---|
| 760 | $code_txt = preg_replace('/<\/CodeLine>\s*/', '', $code_txt);
|
---|
| 761 | $code_txt = $code_prefix . translateText($code_txt, true);
|
---|
| 762 | if ($in_chapter)
|
---|
| 763 | {
|
---|
| 764 | fwrite($chapter_txt_out, $code_txt);
|
---|
| 765 | }
|
---|
| 766 | else
|
---|
| 767 | {
|
---|
| 768 | $frontmatter_text .= $code_txt;
|
---|
| 769 | }
|
---|
| 770 | }
|
---|
| 771 | // - there are also sometimes empty codelines - which indicate a newline in
|
---|
| 772 | // the code listing
|
---|
| 773 | elseif (preg_match('/<CodeLine\s*\/>/', $line, $matches))
|
---|
| 774 | {
|
---|
| 775 | $code_txt = '';
|
---|
| 776 | if (!$in_code)
|
---|
| 777 | {
|
---|
| 778 | $code_txt = "\n";
|
---|
| 779 | if ($is_code_linenumbered)
|
---|
| 780 | {
|
---|
| 781 | $code_txt = "<code 1>\n" . $code_txt;
|
---|
| 782 | }
|
---|
| 783 | else
|
---|
| 784 | {
|
---|
| 785 | $code_txt = "<code>\n" . $code_txt;
|
---|
| 786 | }
|
---|
| 787 | $in_code = true;
|
---|
| 788 | }
|
---|
| 789 | if ($in_chapter)
|
---|
| 790 | {
|
---|
| 791 | fwrite($chapter_txt_out, $code_txt);
|
---|
| 792 | }
|
---|
| 793 | else
|
---|
| 794 | {
|
---|
| 795 | $frontmatter_text .= $code_txt;
|
---|
| 796 | }
|
---|
| 797 | }
|
---|
| 798 | // - reference to an external XML file
|
---|
| 799 | elseif (preg_match('/^\s*&[a-z0-9_]+;\s+$/is', $line))
|
---|
| 800 | {
|
---|
| 801 | if ($in_chapter)
|
---|
| 802 | {
|
---|
| 803 | fwrite($chapter_txt_out, $line);
|
---|
| 804 | }
|
---|
| 805 | else
|
---|
| 806 | {
|
---|
| 807 | $frontmatter_text .= $line;
|
---|
| 808 | }
|
---|
| 809 | }
|
---|
| 810 | elseif (strpos($line, '<FootnoteList>') !== false)
|
---|
| 811 | {
|
---|
| 812 | $in_footnotes = true;
|
---|
| 813 | }
|
---|
| 814 | elseif ($in_footnotes && strpos($line, '</FootnoteList>') !== false)
|
---|
| 815 | {
|
---|
| 816 | $in_footnotes = false;
|
---|
| 817 | }
|
---|
| 818 | // Indentation - the closest thing we have is quoting, so we'll use that
|
---|
| 819 | elseif (strpos($line, '<Indented>') !== false)
|
---|
| 820 | {
|
---|
| 821 | $in_indent = true;
|
---|
| 822 | }
|
---|
| 823 | elseif (strpos($line, '</Indented>') !== false)
|
---|
| 824 | {
|
---|
| 825 | $in_indent = false;
|
---|
| 826 | }
|
---|
| 827 | // - pattern of lines to ignore
|
---|
[25052] | 828 | else if (preg_match('/^(<\?xml version="1.0" encoding="UTF-8"\?>|<\!DOCTYPE Manual \[|\]>|<Bullet>|<\/?Content>|<\/?Footnote|<Manual id=".+?" lang=".+?">|<\/Manual>)/', $line))
|
---|
[25026] | 829 | {
|
---|
| 830 | }
|
---|
| 831 | // - we ignore anything else in footnotes too, as they were handled in the
|
---|
| 832 | // preprocessing pass
|
---|
[25052] | 833 | else if ($in_footnotes)
|
---|
[25026] | 834 | {
|
---|
| 835 | }
|
---|
[25052] | 836 | // - ignore empty lines
|
---|
| 837 | else if (preg_match('/^\s*$/', $line))
|
---|
| 838 | {
|
---|
| 839 | }
|
---|
| 840 | // - meh. French versions have random, non-text element, linebreaks floating
|
---|
| 841 | // around. Guess I'll honor their formatting even though it's bogus
|
---|
| 842 | else if (preg_match('/^\s*<br\s*\/?>\s*$/', $line))
|
---|
| 843 | {
|
---|
| 844 | if ($in_chapter)
|
---|
| 845 | {
|
---|
| 846 | fwrite($chapter_txt_out, ' \\\\');
|
---|
| 847 | }
|
---|
| 848 | else
|
---|
| 849 | {
|
---|
| 850 | $frontmatter_text .= ' \\\\';
|
---|
| 851 | }
|
---|
| 852 | }
|
---|
[25026] | 853 | // - danger Will Robinson!
|
---|
| 854 | else
|
---|
| 855 | {
|
---|
| 856 | echo '<div style="background-color:yellow;"><hr /><b>Warning!</b> Failed to parse line ' . $line_counter . ': |' . htmlspecialchars($line) . "|<hr /></div>\n";
|
---|
| 857 | }
|
---|
| 858 | }
|
---|
| 859 |
|
---|
| 860 | // 2. We should now have enough metadata to export the cover page
|
---|
| 861 | $top_page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '.txt';
|
---|
| 862 | // - backup any existing file
|
---|
| 863 | if (file_exists($top_page_path))
|
---|
| 864 | {
|
---|
| 865 | $top_page_backup_path = $top_page_path . '.bak';
|
---|
| 866 | if(!rename($top_page_path, $top_page_backup_path))
|
---|
| 867 | {
|
---|
| 868 | printError('Failed to rename existing top page for backup');
|
---|
| 869 | }
|
---|
| 870 | }
|
---|
| 871 | // - and create a handle to the new file
|
---|
| 872 | $txt_out = fopen($top_page_path, 'w');
|
---|
| 873 | // - write the page (including the tables)
|
---|
| 874 | fwrite($txt_out, '====== ' . noComments(ucfirst(getFirstMetadata('Heading'))) . ': ' . noComments(ucfirst(getFirstMetadata('Title'))) . ' (' . strtoupper($_REQUEST['l']) . ') ======' . "\n");
|
---|
| 875 | fwrite($txt_out, "\n");
|
---|
| 876 |
|
---|
| 877 | // - *NEW* ability to request imports and exports from within the page
|
---|
| 878 | fwrite($txt_out, "<ifauth @admin>\n\n");
|
---|
| 879 | fwrite($txt_out, '**Administrator Commands:**' . "\n");
|
---|
| 880 | // On second thoughts we probably never want to do this casually, as it boguses
|
---|
| 881 | // all history/approval/edit information. Instead I'll leave this as a manual
|
---|
| 882 | // process.
|
---|
| 883 | fwrite($txt_out, '<!-- Import available at this link - but be warned all current wiki data for this manual will become bogus: http://~~baseurl~~/../../php/gs-manual-import.php?m=' . $_REQUEST['m'] . '&l=' . $_REQUEST['l'] . " -->\n");
|
---|
| 884 | fwrite($txt_out, ' * Export manual: [[http://~~baseurl~~/../../php/gs-manual-export.php?m=' . $_REQUEST['m'] . '&l=' . $_REQUEST['l'] . '&v=draft&a=download|draft version]] [[http://~~baseurl~~/../../php/gs-manual-export.php?m=' . $_REQUEST['m'] . '&l=' . $_REQUEST['l'] . '&a=download|approved version]]' . "\n");
|
---|
| 885 | fwrite($txt_out, "</ifauth>\n\n");
|
---|
| 886 |
|
---|
| 887 | // - regular metadata
|
---|
| 888 | fwrite($txt_out, '<!-- Note: cover page information -->' . "\n");
|
---|
| 889 | fwrite($txt_out, '^ Metadata ^ Value ^' . "\n");
|
---|
| 890 | $fields = array('Heading','Title','Author','Affiliation','Text','Comment','Version','Date');
|
---|
| 891 | foreach ($fields as $field)
|
---|
| 892 | {
|
---|
| 893 | $values = getMetadata($field);
|
---|
| 894 | foreach ($values as $value)
|
---|
| 895 | {
|
---|
| 896 | fwrite($txt_out, '^ ' . $field . ' | ' . $value . ' |' . "\n");
|
---|
| 897 | }
|
---|
| 898 | }
|
---|
| 899 | fwrite($txt_out, "\n");
|
---|
| 900 | // - contents (which also provides order information for exporting)
|
---|
| 901 | fwrite($txt_out, '===== Contents =====' . "\n");
|
---|
| 902 | fwrite($txt_out, "\n");
|
---|
| 903 | fwrite($txt_out, '<!-- Note: The ordering of pages here is used when creating the HTML and PDF versions of the manual -->' . "\n");
|
---|
| 904 | foreach ($page_order as $page_info)
|
---|
| 905 | {
|
---|
| 906 | fwrite($txt_out, ' * [[.:' . $_REQUEST['m'] . ':' . $page_info . ']]' . "\n");
|
---|
| 907 | }
|
---|
| 908 | // - system metadata
|
---|
| 909 | fwrite($txt_out, "<ifauth @admin>\n\n");
|
---|
| 910 | fwrite($txt_out, '===== System Metadata =====' . "\n");
|
---|
| 911 | fwrite($txt_out, '<!-- Note: configuration options for the manual -->' . "\n");
|
---|
| 912 | fwrite($txt_out, '^ Metadata ^ Value ^' . "\n");
|
---|
| 913 | $fields = array('ENTITY','SupplementaryText');
|
---|
| 914 | foreach ($fields as $field)
|
---|
| 915 | {
|
---|
| 916 | $values = getMetadata($field);
|
---|
| 917 | foreach ($values as $value)
|
---|
| 918 | {
|
---|
| 919 | fwrite($txt_out, '^ ' . $field . ' | ' . $value . ' |' . "\n");
|
---|
| 920 | }
|
---|
| 921 | }
|
---|
| 922 | fwrite($txt_out, "</ifauth>\n\n");
|
---|
| 923 | fwrite($txt_out, "\n");
|
---|
| 924 | // - done!
|
---|
| 925 | fclose($txt_out);
|
---|
| 926 | $page_count++;
|
---|
| 927 |
|
---|
| 928 | // 3. And the 'sections' page, grouping together all the loose sections as
|
---|
| 929 | // frontmatter
|
---|
| 930 | $frontmatter_page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '/' . $sections_page_name . '.txt';
|
---|
| 931 | // - backup any existing file
|
---|
| 932 | if (file_exists($frontmatter_page_path))
|
---|
| 933 | {
|
---|
| 934 | $frontmatter_page_backup_path = $frontmatter_page_path . '.bak';
|
---|
| 935 | if(!rename($frontmatter_page_path, $frontmatter_page_backup_path))
|
---|
| 936 | {
|
---|
| 937 | printError('Failed to rename existing frontmatter page for backup');
|
---|
| 938 | }
|
---|
| 939 | }
|
---|
| 940 | // - populate the new frontmatter file
|
---|
| 941 | file_put_contents($frontmatter_page_path, $frontmatter_text);
|
---|
| 942 |
|
---|
| 943 | echo "</p>\n<p><b>Complete!</b> Imported " . $page_count . " pages</p><hr/>\n";
|
---|
| 944 | echo '<p>Click <a href="' . $dokuwiki_url . '/doku.php?id=' . $_REQUEST['l'] . ':manuals:' . $_REQUEST['m'] . '">here</a> to return to wiki page</p>' . "\n";
|
---|
| 945 | exit(0);
|
---|
| 946 |
|
---|
| 947 | /**
|
---|
| 948 | */
|
---|
| 949 | function addMetadata($field, $value)
|
---|
| 950 | {
|
---|
| 951 | global $manual_metadata;
|
---|
| 952 | echo '[adding metadata: ' . $field . "] \n";
|
---|
| 953 | $values = array();
|
---|
| 954 | if (isset($manual_metadata[$field]))
|
---|
| 955 | {
|
---|
| 956 | $values = $manual_metadata[$field];
|
---|
| 957 | }
|
---|
| 958 | array_push($values, $value);
|
---|
| 959 | $manual_metadata[$field] = $values;
|
---|
| 960 | }
|
---|
| 961 | /** addMetadata() **/
|
---|
| 962 |
|
---|
| 963 | function getFirstMetadata($field)
|
---|
| 964 | {
|
---|
| 965 | global $manual_metadata;
|
---|
| 966 | $value = '';
|
---|
| 967 | if (isset($manual_metadata[$field]))
|
---|
| 968 | {
|
---|
| 969 | $values = $manual_metadata[$field];
|
---|
| 970 | if (!empty($values))
|
---|
| 971 | {
|
---|
| 972 | $value = $values[0];
|
---|
| 973 | }
|
---|
| 974 | }
|
---|
| 975 | return $value;
|
---|
| 976 | }
|
---|
| 977 | /** getFirstMetadata() **/
|
---|
| 978 |
|
---|
| 979 | function getMetadata($field)
|
---|
| 980 | {
|
---|
| 981 | global $manual_metadata;
|
---|
| 982 | $values = array();
|
---|
| 983 | if (isset($manual_metadata[$field]))
|
---|
| 984 | {
|
---|
| 985 | $values = $manual_metadata[$field];
|
---|
| 986 | }
|
---|
| 987 | return $values;
|
---|
| 988 | }
|
---|
| 989 | /** getMetadata() **/
|
---|
| 990 |
|
---|
| 991 | /**
|
---|
| 992 | * Read in the next title element with nested text element and extract the
|
---|
| 993 | * title.
|
---|
| 994 | */
|
---|
| 995 | function getTitle($xml_in, $element)
|
---|
| 996 | {
|
---|
| 997 | $title = '';
|
---|
[25052] | 998 | $in_title_element = false;
|
---|
[25026] | 999 | // - the first thing in a chapter will be it's title
|
---|
| 1000 | $title_line = getLine($xml_in);
|
---|
[25052] | 1001 | // - super special case: some language versions don't wrap titles in title
|
---|
| 1002 | // element, so if the first thing we see is a text, we treat that as the
|
---|
| 1003 | // title
|
---|
| 1004 | if (strpos($title_line, '<text') !== false)
|
---|
| 1005 | {
|
---|
| 1006 |
|
---|
| 1007 | }
|
---|
[25026] | 1008 | // - super special case: a table with an empty title
|
---|
| 1009 | if (strpos($title_line, '<Title/>') !== false)
|
---|
| 1010 | {
|
---|
| 1011 | return '';
|
---|
| 1012 | }
|
---|
[25052] | 1013 | if (strpos($title_line, '<Title>') !== false)
|
---|
[25026] | 1014 | {
|
---|
[25052] | 1015 | $in_title_element = true;
|
---|
| 1016 | $title_line = getLine($xml_in);
|
---|
[25026] | 1017 | }
|
---|
[25052] | 1018 | // - some horribly formed entries have the subtitle first within the title
|
---|
| 1019 | // element
|
---|
| 1020 | if (strpos($title_line, '<SubTitle>') !== false)
|
---|
| 1021 | {
|
---|
| 1022 | $title_line = getLine($xml_in);
|
---|
| 1023 | if (preg_match('/<Text id="([^"]+)">(.+?)<\/Text>/', $title_line, $matches))
|
---|
| 1024 | {
|
---|
| 1025 | $title = '<!-- id:' . $matches[1] . ' -->' . $matches[2] . ' ' . $title;
|
---|
| 1026 | }
|
---|
| 1027 | $title_line = getLine($xml_in);
|
---|
| 1028 | if (strpos($title_line, '</SubTitle>') === false)
|
---|
| 1029 | {
|
---|
| 1030 | printError('Failed to find closing title for: ' . $element);
|
---|
| 1031 | }
|
---|
| 1032 | $title_line = getLine($xml_in);
|
---|
| 1033 | }
|
---|
[25026] | 1034 | // - grab the chapter title now so we can store it in the page ordering
|
---|
| 1035 | if (preg_match('/<Text id="([^"]+)">(.*?)$/', $title_line, $matches))
|
---|
| 1036 | {
|
---|
| 1037 | $id = $matches[1];
|
---|
| 1038 | $str = $matches[2];
|
---|
| 1039 | // - special case for text blocks that span multiple lines (as discovered
|
---|
| 1040 | // in the russian "From Paper"
|
---|
| 1041 | $another_line = '';
|
---|
| 1042 | while (strpos($str, '</Text>') === false && ($another_line = getLine($xml_in)) !== false)
|
---|
| 1043 | {
|
---|
| 1044 | $str .= ' ' . $another_line;
|
---|
| 1045 | }
|
---|
| 1046 | // - now remove </Text>
|
---|
| 1047 | $str = preg_replace('/<\/Text>\s*/', '', $str);
|
---|
[25052] | 1048 | $title = '<!-- id:' . $id . ' -->' . $str . $title;
|
---|
[25026] | 1049 | }
|
---|
| 1050 | // - special case for (stoopid) empty titles that use up a text id
|
---|
| 1051 | elseif (preg_match('/<Text id="([^"]+)"\s*\/>/', $title_line, $matches))
|
---|
| 1052 | {
|
---|
[25052] | 1053 | $title = '<!-- id:' . $matches[1] . ' -->' . $title;
|
---|
[25026] | 1054 | }
|
---|
| 1055 | else
|
---|
| 1056 | {
|
---|
| 1057 | printError('Failed to find title text for: ' . $element);
|
---|
| 1058 | }
|
---|
| 1059 | // - watch for subtitle elements
|
---|
[25052] | 1060 | if ($in_title_element)
|
---|
[25026] | 1061 | {
|
---|
| 1062 | $title_line = getLine($xml_in);
|
---|
[25052] | 1063 | if (strpos($title_line, '<SubTitle>') !== false)
|
---|
[25026] | 1064 | {
|
---|
[25052] | 1065 | $title_line = getLine($xml_in);
|
---|
| 1066 | if (preg_match('/<Text id="([^"]+)">(.+?)<\/Text>/', $title_line, $matches))
|
---|
| 1067 | {
|
---|
| 1068 | $title = '<!-- id:' . $matches[1] . ' -->' . $matches[2] . ' ' . $title;
|
---|
| 1069 | }
|
---|
| 1070 | $title_line = getLine($xml_in);
|
---|
| 1071 | if (strpos($title_line, '</SubTitle>') === false)
|
---|
| 1072 | {
|
---|
| 1073 | printError('Failed to find closing title for: ' . $element);
|
---|
| 1074 | }
|
---|
| 1075 | $title_line = getLine($xml_in);
|
---|
[25026] | 1076 | }
|
---|
[25052] | 1077 | if (strpos($title_line, '</Title>') === false)
|
---|
[25026] | 1078 | {
|
---|
| 1079 | printError('Failed to find closing title for: ' . $element);
|
---|
| 1080 | }
|
---|
| 1081 | }
|
---|
| 1082 | return $title;
|
---|
| 1083 | }
|
---|
| 1084 | /** getTitle() **/
|
---|
| 1085 |
|
---|
| 1086 | function alternateComments($text)
|
---|
| 1087 | {
|
---|
| 1088 | $text = str_replace('<!--', '%!--', $text);
|
---|
| 1089 | $text = str_replace('-->', '--%', $text);
|
---|
[25052] | 1090 | // remove any lurking crossrefs while we are at it
|
---|
| 1091 | $text = preg_replace('/<CrossRef.*?ref="([^"]+)".*?>/', '\\1', $text);
|
---|
[25026] | 1092 | return $text;
|
---|
| 1093 | }
|
---|
| 1094 |
|
---|
| 1095 | function noComments($text)
|
---|
| 1096 | {
|
---|
| 1097 | $text = preg_replace('/<i>(.*?)<\/i>/','//\1//',$text);
|
---|
| 1098 | return preg_replace('/<!--[^>]+-->/', '', $text);
|
---|
| 1099 | }
|
---|
| 1100 |
|
---|
| 1101 | function translateTableCodeline($text)
|
---|
| 1102 | {
|
---|
| 1103 | ///cho "<b>Debug:</b> translateTableCodeLine('" . htmlspecialchars($text) . "')<br />\n";
|
---|
| 1104 | // Escape the current italics tags to prevent the translate destroying them
|
---|
| 1105 | $text = str_replace('<i>', '%!--i--%', $text);
|
---|
| 1106 | $text = str_replace('</i>', '%/i%', $text);
|
---|
| 1107 | // Translate the text, just decoding the entities
|
---|
| 1108 | $text = translateText($text, true);
|
---|
| 1109 | // Now turn the italic tags (escaped) into HTML comments so we remember them
|
---|
| 1110 | // but they are hidden in the text
|
---|
| 1111 | $text = str_replace('%!--i--%', '<!--i-->', $text);
|
---|
| 1112 | $text = str_replace('%/i%', '<!--/i-->', $text);
|
---|
| 1113 | ///cho " => '" . htmlspecialchars($text) . "<br />\n";
|
---|
| 1114 | return $text;
|
---|
| 1115 | }
|
---|
| 1116 |
|
---|
| 1117 | function translateText($text, $entities_only=false)
|
---|
| 1118 | {
|
---|
| 1119 | global $entity_replacements;
|
---|
| 1120 | global $footnotes;
|
---|
| 1121 | global $in_code;
|
---|
| 1122 |
|
---|
| 1123 | // - immediate find and protect any legitimate HTML comments in the text
|
---|
| 1124 | // (so already using encoded entities), otherwise they'll be throughly
|
---|
| 1125 | // vladed during the following tranforms. This has to be matched with
|
---|
| 1126 | // changes to the HTMLComments plugin in Dokuwiki to allow the correct
|
---|
| 1127 | // thing to be displayed to the user.
|
---|
| 1128 | if (!$in_code)
|
---|
| 1129 | {
|
---|
| 1130 | $text = str_replace('<!--', '%!--', $text);
|
---|
| 1131 | $text = str_replace('-->', '--%', $text);
|
---|
| 1132 | }
|
---|
| 1133 |
|
---|
| 1134 | if (!$entities_only)
|
---|
| 1135 | {
|
---|
| 1136 | // - replace linking constructs with dokuwiki ones
|
---|
| 1137 | // - external chapter section crossrefs are easily the worst of all...
|
---|
| 1138 | while (preg_match('/<CrossRef\s[^>]*external[^>]*\/>/', $text) && preg_match('/<CrossRef\s[^>]*target="Chapter"[^>]*\/>/', $text) && preg_match('/<CrossRef\s+(.*?)\/>/', $text, $matches))
|
---|
| 1139 | {
|
---|
| 1140 | $pattern = $matches[0];
|
---|
| 1141 | $attributes = $matches[1];
|
---|
| 1142 | $manual_name = '';
|
---|
| 1143 | if (preg_match('/external="([^"]+)"/', $attributes, $matches))
|
---|
| 1144 | {
|
---|
| 1145 | $manual_name = $matches[1];
|
---|
| 1146 | }
|
---|
| 1147 | $language = '';
|
---|
| 1148 | if (preg_match('/lang="([^"]+)"/', $attributes, $matches))
|
---|
| 1149 | {
|
---|
| 1150 | $language = $matches[1];
|
---|
| 1151 | }
|
---|
| 1152 | $page_id = '';
|
---|
| 1153 | if (preg_match('/ref="([^"]+)"/', $attributes, $matches))
|
---|
| 1154 | {
|
---|
| 1155 | $page_id = $matches[1];
|
---|
| 1156 | }
|
---|
| 1157 | if (empty($manual_name) || empty($language) || empty($page_id))
|
---|
| 1158 | {
|
---|
| 1159 | printError('Failed to parse external reference: ' . $pattern);
|
---|
| 1160 | }
|
---|
| 1161 | // - best we can do is a search within a restricted namespace
|
---|
| 1162 | $reference = '[[?do=search&id=' . $page_id . ' @' . $language . ':manuals:' . $manual_name . '|' . $page_id . ']]';
|
---|
| 1163 | $text = str_replace($pattern, $reference, $text);
|
---|
| 1164 | }
|
---|
| 1165 | // - chapter crossrefs are tricksie due to needing to know ordering numbers
|
---|
| 1166 | while (preg_match('/<CrossRef target="Chapter" ref="([^"]+)"\/>/', $text, $matches))
|
---|
| 1167 | {
|
---|
| 1168 | $chapter_id = $matches[1];
|
---|
| 1169 | $page_name = $chapter_id;
|
---|
| 1170 | $text = preg_replace('/<CrossRef target="Chapter" ref="' . $chapter_id . '"\/>/', '[[.:' . $page_name . '|' . $chapter_id . ']]', $text);
|
---|
| 1171 | }
|
---|
| 1172 | // - internal figure and table references
|
---|
| 1173 | $text = preg_replace('/<CrossRef target="Figure" ref="([^"]+)"\/>/','<imgref figure_\1>', $text);
|
---|
| 1174 | $text = preg_replace('/<CrossRef target="Table" ref="([^"]+)"\/>/','<tblref table_\1>', $text);
|
---|
| 1175 | // - simple internal reference
|
---|
| 1176 | $text = preg_replace('/<CrossRef target="Section" ref="([^"]+)"\/>/', '[[#\1|\1]]', $text);
|
---|
| 1177 | $text = preg_replace('/<CrossRef target="Subsection" ref="([^"]+)"\/>/', '[[##\1|\1]]', $text);
|
---|
| 1178 | $text = preg_replace('/<CrossRef target="Part" ref="([^"]+)"\/>/', '[[###\1|\1]]', $text);
|
---|
| 1179 | // - simple external url
|
---|
| 1180 | $text = preg_replace('/<Link url="([^"]+)">(.+?)<\/Link>/', '[[\1|\2]]', $text);
|
---|
| 1181 | // - footnote references are also tricksie as we've had to extract the
|
---|
| 1182 | // footnotes earlier (during chapter counting)
|
---|
| 1183 | while (preg_match('/<FootnoteRef id="(\d+)"\/>/', $text, $matches))
|
---|
| 1184 | {
|
---|
| 1185 | $footnote_id = $matches[1];
|
---|
| 1186 | if (!isset($footnotes[$footnote_id]))
|
---|
| 1187 | {
|
---|
| 1188 | printError('Unknown footnote referenced: ' . $footnote_id);
|
---|
| 1189 | }
|
---|
| 1190 | $footnote = $footnotes[$footnote_id];
|
---|
| 1191 | $text = preg_replace('/<FootnoteRef id="' . $footnote_id . '"\/>/', '((' . $footnote . '))', $text);
|
---|
| 1192 | }
|
---|
| 1193 | // - detect and handle URLs surrounded by <i> tags very carefully (as doku
|
---|
| 1194 | // will less than helpfully turn them into an external link and screw up
|
---|
| 1195 | // everything that follows them on the page).
|
---|
| 1196 | // example: <i>www.microsoft.com</i>
|
---|
| 1197 | // example: <i>http://nzdl.org/cgi-bin/library</i>
|
---|
| 1198 | // example: <i>www.yourserver.com</i>
|
---|
| 1199 | // example: <i>http://www.yourserver.com</i>
|
---|
| 1200 | // example: <i>http://www.yourserver.com/greenstone</i>
|
---|
| 1201 | $text = preg_replace('/<i>((?:http\:\/\/)?[a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)<\/i>/i','// \1 //', $text);
|
---|
| 1202 | // - superspecial case for two-part URLs ending in .org (like nzdl.org)
|
---|
| 1203 | $text = preg_replace('/<i>((?:http\:\/\/)?[a-z0-9\-]+\.org(?:\/.*?)?)<\/i>/i','// \1 //', $text);
|
---|
| 1204 | // - another superspecial case, this time for URLs on localhost
|
---|
| 1205 | $text = preg_replace('/<i>((?:http\:\/\/)?localhost(?:\/.*?)?)<\/i>/i','// \1 //', $text);
|
---|
| 1206 | // - p00p, underlines have the same issue around URLs.
|
---|
| 1207 | $text = preg_replace('/<u>((?:http\:\/\/)?[a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)<\/u>/i','__ \1 __', $text);
|
---|
| 1208 | $text = preg_replace('/<u>((?:http\:\/\/)?[a-z0-9\-]+\.org(?:\/.*?)?)<\/u>/i','__ \1 __', $text);
|
---|
| 1209 | $text = preg_replace('/<u>((?:http\:\/\/)?localhost(?:\/.*?)?)<\/u>/i','__ \1 __', $text);
|
---|
| 1210 |
|
---|
| 1211 | // - replace HTML elements with the dokuwiki style equivilents
|
---|
| 1212 | $text = preg_replace('/(file|ftp|http):\/\//','\1:%%//%%', $text);
|
---|
| 1213 | // - restore the double slashes in dokuwiki links
|
---|
| 1214 | while (preg_match('/\[\[[^\]]*%%[^\]]*\]\]/', $text))
|
---|
| 1215 | {
|
---|
| 1216 | $text = preg_replace('/(\[\[[^\]]*)%%([^\]]*\]\])/', '\1\2', $text);
|
---|
| 1217 | }
|
---|
| 1218 | $text = str_replace('<b>', '**', $text);
|
---|
| 1219 | $text = str_replace('</b>', '**', $text);
|
---|
| 1220 | //$text = str_replace(' <br/>', '\\\\ ', $text);
|
---|
| 1221 | //$text = str_replace('Â <br/>', '\\\\ ', $text);
|
---|
| 1222 | //$text = str_replace('<br/>', '\\\\ ', $text);
|
---|
| 1223 | $text = str_replace('<i>', '//', $text);
|
---|
| 1224 | $text = str_replace('</i>', '//', $text);
|
---|
| 1225 | $text = str_replace('<u>', '__', $text);
|
---|
| 1226 | $text = str_replace('</u>', '__', $text);
|
---|
| 1227 | }
|
---|
| 1228 | // Decode entities
|
---|
| 1229 | // - user defined entities (in the manual metadata)
|
---|
| 1230 | foreach ($entity_replacements as $entity=>$code)
|
---|
| 1231 | {
|
---|
| 1232 | $text = str_replace('&' . $entity . ';', html_entity_decode('&#'.$code.';',ENT_NOQUOTES,'UTF-8'), $text);
|
---|
| 1233 | }
|
---|
| 1234 | // - standard entities
|
---|
| 1235 | $text = str_replace('>','>', $text);
|
---|
| 1236 | $text = str_replace('<','<', $text);
|
---|
| 1237 | $text = str_replace('&','&', $text);
|
---|
| 1238 | return $text;
|
---|
| 1239 | }
|
---|
| 1240 | /** translateText() **/
|
---|
| 1241 |
|
---|
| 1242 | function handleImage($filename, $width, $height)
|
---|
| 1243 | {
|
---|
| 1244 | global $dokuwiki_path;
|
---|
| 1245 | global $xml_source_path;
|
---|
| 1246 | echo '[copying image: ' . $filename . "] \n";
|
---|
| 1247 | // - copy file into place
|
---|
| 1248 | $source_path = $xml_source_path . '/' . $_REQUEST['l'] . '/images/' . $filename;
|
---|
[25052] | 1249 | $destination_dir = $dokuwiki_path . '/data/media/' . $_REQUEST['l'] . '/manuals/images/';
|
---|
| 1250 | if (!file_exists($destination_dir))
|
---|
| 1251 | {
|
---|
| 1252 | mkAllDir($destination_dir, 0755);
|
---|
| 1253 | }
|
---|
| 1254 | $destination_path = $destination_dir . strtolower($filename);
|
---|
[25026] | 1255 | copy($source_path, $destination_path);
|
---|
| 1256 | if (!file_exists($destination_path))
|
---|
| 1257 | {
|
---|
| 1258 | printError('Failed to copy image file: ' . $filename);
|
---|
| 1259 | }
|
---|
| 1260 | // - create the string
|
---|
| 1261 | $image_txt = '{{..:images:' . strtolower($filename) . '?' . $width . 'x' . $height . '&direct}}';
|
---|
| 1262 | return $image_txt;
|
---|
| 1263 | }
|
---|
| 1264 |
|
---|
| 1265 | function getLine($in)
|
---|
| 1266 | {
|
---|
| 1267 | global $line_counter;
|
---|
| 1268 | $line_counter++;
|
---|
| 1269 | return fgets($in);
|
---|
| 1270 | }
|
---|
| 1271 |
|
---|
| 1272 | function noFormatting($text)
|
---|
| 1273 | {
|
---|
| 1274 | $text = str_replace( '<b>', '', $text);
|
---|
| 1275 | $text = str_replace('</b>', '', $text);
|
---|
| 1276 | $text = str_replace( '<i>', '', $text);
|
---|
| 1277 | $text = str_replace('</i>', '', $text);
|
---|
| 1278 | $text = str_replace('<br/>', '', $text);
|
---|
| 1279 | return $text;
|
---|
| 1280 | }
|
---|
| 1281 |
|
---|
| 1282 | ?> |
---|