Importing Greenstone Manual XML' . "\n"; echo '

XML Source Path: ' . $xml_source_path . '
Manual: ' . $_REQUEST['m'] . '
Language: ' . $_REQUEST['l'] . "

\n
\n"; echo "

Frontmatter:
\n"; // 1. By-and-large we're going to process all of this in a big state machine // - the top level page, containing cover page and chapter order information, // needs to be created last, so we have to store it's information $manual_metadata = array(); $entity_replacements = array(); $footnotes = array(); $page_order = array(); $page_count = 2; $looking_for_metadata = ''; $chapter_txt_out = false; $frontmatter_text = ''; $in_section = false; $sections_page_name = ''; $in_chapter = false; $chapter_id = ''; $bullet_depth = 0; $is_numbered_list = true; $line_counter = 0; $in_code = false; $in_footnotes = false; $in_numbered_item = 0; $in_bullet_item = false; $seen_code_in_item = false; $in_indent = false; $is_code_linenumbered = false; // - construct the path using the information we've been provided as arguments $xml_file_path = $xml_source_path . '/' . $_REQUEST['l'] . '/' . ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.xml'; $xml_in = fopen($xml_file_path, 'r'); if (!$xml_in) { printError('Failed to locate top level page for manual'); } // - we also use this opportunity to read in any footnotes as we'll need to // move them onto their appropriate page while (($line = fgets($xml_in)) !== false) { if (preg_match('//', $line, $matches)) { $footnote_id = $matches[1]; $text_line = fgets($xml_in); if (preg_match('/(.+?)<\/Text>/', $text_line, $matches)) { $footnotes[$footnote_id] = '' . translateText($matches[2]); } // - throw away fgets($xml_in); } } fclose($xml_in); // - now reopen to parse it $xml_in = fopen($xml_file_path, 'r'); if (!$xml_in) { printError('Failed to locate top level page for manual'); } while (($line = getLine($xml_in)) !== false) { // - Special Case: lingering code blocks, continue if next line also // contains code, otherwise we need an extra newline if ($in_code) { $code_text = "\n"; if (strpos($line, ' encountered. if (!$is_numbered_list) { $code_text .= "\n\n"; } else { $seen_code_in_item = true; } $in_code = false; } if ($in_chapter) { fwrite($chapter_txt_out, $code_text); } else { $frontmatter_text .= $code_text; } } // - some system metadata to watch for if (preg_match('/]+)>/', $line, $matches)) { $entity = $matches[1]; addMetadata('ENTITY',$entity); if (preg_match('/([a-z]+)\s+"&#(\d+);"/', $entity, $matches)) { $entity_replacements[$matches[1]] = $matches[2]; } } // - we have an explicit list of cover metadata to watch for elseif (!$in_section && !$in_chapter && preg_match('/<(Author|Affiliation|Comment|Date|Heading|SupplementaryText|Title|Version)>/', $line, $matches)) { $looking_for_metadata = $matches[1]; } elseif (!$in_section && !$in_chapter && preg_match('/<\/(Author|Affiliation|Comment|Date|Heading|SupplementaryText|Title|Version)>/', $line, $matches)) { $looking_for_metadata = ''; } // - found metadata we have! elseif (!empty($looking_for_metadata) && preg_match('/(.+?)<\/Text>/', $line, $matches)) { $text_id = $matches[1]; $text = '' . translateText($matches[2]); addMetadata($looking_for_metadata, $text); } // - bogus metadata found in French version elseif (!empty($looking_for_metadata) && preg_match('//', $line, $matches)) { } // - any text we encounter outside of both sections and chapters also // belongs on the cover elseif (!$in_section && !$in_chapter && !$in_footnotes && preg_match('/(.+?)<\/Text>/', $line, $matches)) { // (for now I'll assume id's are persistent) addMetadata('Text', '' . translateText($matches[2])); } // - we will probably encounter the opening section (which is outside of a // chapter) first, so we have a special case for it elseif (!$in_chapter && preg_match('/

/', $line, $matches)) { $section_id = $matches[1]; // - if this is the first non-chapter section we have encountered then it // gets the honor of having the page---that these sections will // eventually be printed out on---named after it. Typically this should // be "about_this_manual" if (empty($frontmatter_text)) { $sections_page_name = $section_id; } $in_section = true; $title = getTitle($xml_in, 'section:' . $section_id); if (empty($frontmatter_text)) { array_unshift($page_order, $sections_page_name . '|' . noComments($title)); } // - if the title, as is, wouldn't autogenerate the appropriate id, then // we have to include the id explicitly (as another html comment block) if ($section_id != generateID($title)) { $title = '' . $title; $seen_ids[$section_id] = 1; } $frontmatter_text .= '===== ' . $title . ' =====' . "\n\n"; // - whew. Chapter's going to be just as bad though. } elseif ($in_section && preg_match('/<\/Section>/', $line)) { $in_section = false; } elseif (preg_match('//', $line, $matches)) { $chapter_id = $matches[1]; echo "

\n

Import Chapter:" . $chapter_id . '
' . "\n"; $chapter_page_name = $chapter_id; // - create a new file to store this chapter $chapter_file_dir = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m']; if (!file_exists($chapter_file_dir)) { mkAllDir($chapter_file_dir, 0755); } $chapter_file_path = $chapter_file_dir . '/' . $chapter_page_name . '.txt'; // - backup existing file if (file_exists($chapter_file_path)) { $chapter_backup_file_path = $chapter_file_path . '.bak'; rename($chapter_file_path, $chapter_backup_file_path); } // - open new file for writing $chapter_txt_out = fopen($chapter_file_path, 'w'); if (!$chapter_txt_out) { printError('Failed to open page file for writing: ' . $chapter_page_name); } $in_chapter = true; $title = getTitle($xml_in, 'chapter: ' . $chapter_id); fwrite($chapter_txt_out, '====== ' . $title . ' ======' . "\n\n"); array_push($page_order, $chapter_page_name . '|' . noComments($title)); } elseif ($in_chapter && preg_match('/<\/Chapter>/', $line)) { fclose($chapter_txt_out); $chapter_txt_out = false; $in_chapter = false; $page_count++; } // - section, subsection and part titles within chapter elseif ($in_chapter && preg_match('/<(Section|Subsection|Part)\sid="([^"]*)">/', $line, $matches)) { $title_type = $matches[1]; $section_id = $matches[2]; if (empty($section_id)) { $section_id = generateID(strtolower($title_type)); } echo '[adding ' . strtolower($title_type) . ': ' . $section_id . '] '; $header_fix = ''; $title = getTitle($xml_in, 'heading: ' . $title_type); if ($title_type == 'Section') { $header_fix = '====='; } if ($title_type == 'Subsection') { $header_fix = '===='; } if ($title_type == 'Part') { $header_fix = '==='; // - remove b's and i's $title = preg_replace('/<\/?(B|I)>/i', '', $title); } // - if the title, as is, wouldn't autogenerate the appropriate id, then // we have to include the id explicitly (as another html comment block) if ($section_id != generateID($title)) { $title = '' . $title; $seen_ids[$section_id] = 1; } fwrite($chapter_txt_out, $header_fix . ' ' . $title . ' ' . $header_fix . "\n\n"); } elseif ($in_chapter && (strpos($line, '/', $line, $matches)) { $figure_id = $matches[1]; $other_attributes = $matches[2]; echo '[adding figure: ' . $figure_id . "] \n"; // We need the title too $caption = getTitle($xml_in, 'figure:' . $figure_id); $caption = translateText(alternateComments($caption)); $txt = "') !== false) { if ($in_chapter) { fwrite($chapter_txt_out, "\n\n"); } else { $frontmatter_text .= "\n\n"; } // - no longer required $is_code_linenumbered = false; } elseif (preg_match('/]*).*?.*?>/', $line, $matches)) { $attributes = $matches[1]; $table_txt = ''; $table_id = ''; if (preg_match('/id="([^"]+)"/', $attributes, $matches)) { $table_id = $matches[1]; } else { $table_id = generateID('table'); } $hidden = false; if (strpos($attributes, 'class="hidden"') !== false) { $hidden = true; } echo '[adding table: ' . $table_id . "] \n"; $table_caption = getTitle($xml_in, 'table: ' . $table_id); if ($hidden) { $table_txt .= '' . "\n"; } elseif (empty($table_caption)) { $table_txt .= '' . "\n"; } else { $table_txt .= '' . "\n"; } // - in order to properly capture the table we're going to have to read in // the whole thing here, and take note of column widths $have_output_widths = false; $column_widths = array(); while (strpos($line, '') === false) { // - find the start of a row while(!empty($line) && strpos($line, '') === false && strpos($line, '') === false) { $line = getLine($xml_in); } if (strpos($line, '') !== false) { $row_txt = '|'; $line = getLine($xml_in); // - now we read in multiple cells (line starting /', $line, $matches)) { $cell_width = $matches[1]; if (!$have_output_widths) { array_push($column_widths, $cell_width); } } // Ignore empty cells // - adding another case for empty header cells (turned up in es // version of "From Paper") if (preg_match('//', $line) || preg_match('/.*<\/th>/', $line)) { $row_txt .= ' |'; } else { $line = getLine($xml_in); $first = true; while (strpos($line, '') === false) { if (!$first) { $row_txt .= '\\\\'; } // - we can have images or text in our tables if (preg_match('//', $line, $matches)) { $payload = $matches[0]; $filename = $matches[1]; $width = 0; if (preg_match('/width="(\d+)"/', $payload, $matches)) { $width = $matches[1]; } $height = 0; if (preg_match('/height="(\d+)"/', $payload, $matches)) { $height = $matches[1]; } $image_txt = handleImage($filename, $width, $height); $row_txt .= ' ' . $image_txt . ' '; } elseif (preg_match('/(.*)/', $line, $matches)) { $tid = $matches[1]; $txt = $matches[2]; // - multiple line text block while (strpos($txt, '') === false) { $txt .= getLine($xml_in); } $txt = str_replace('','',$txt); $row_txt .= ' ' . translateText($txt) . ' '; } elseif (preg_match('/(.*?)<\/CodeLine>/',$line,$matches)) { $row_txt .= ' \'\'' . translateTableCodeLine($matches[1]) . '\'\' '; } elseif (preg_match('/(.*)/',$line,$matches)) { $row_txt .= ' \'\'' . translateTableCodeLine($matches[1]) . '\'\' '; } elseif (preg_match('/(.*)<\/CodeLine>/',$line,$matches)) { if (!empty($matches[1])) { $row_txt .= ' \'\'' . translateTableCodeLine($matches[1]). '\'\' '; } else { $row_txt .= ' '; } } // we'll add (bogus) linebreaks elseif (preg_match('/^\s*\s*$/', $line)) { $row_txt = ' '; } else { printError('Warning! Unrecognized element in table: ' . htmlspecialchars($line)); } $first = false; // - next line $line = getLine($xml_in); } // - close the cell $row_txt .= '|'; } // next! $line = getLine($xml_in); } // - if we haven't already, output the width command if (!$have_output_widths) { $table_txt .= '|< - ' . implode(' ', $column_widths) . ' >|' . "\n"; $have_output_widths = true; } $table_txt .= $row_txt . "\n"; // - throw away the closing $line = getLine($xml_in); } } $table_txt .= "\n"; if ($in_chapter) { fwrite($chapter_txt_out, $table_txt); } else { $frontmatter_text .= $table_txt; } } // - copy and insert images elseif (preg_match('//', $line, $matches)) { $image_txt = handleImage($matches[3], $matches[1], $matches[2]); if ($in_chapter) { fwrite($chapter_txt_out, $image_txt); } else { $frontmatter_text .= $image_txt; } } // - bullet lists elseif (preg_match('//', $line)) { echo "[adding bulletlist] \n"; if ($in_bullet_item || $in_numbered_item) { if ($in_chapter) { fwrite($chapter_txt_out, "\n"); } elseif ($in_section) { $frontmatter_text .= "\n"; } } $bullet_depth++; $is_numbered_list = false; } // - numbered lists elseif (preg_match('//', $line)) { echo "[adding numbered list] \n"; $bullet_depth++; $is_numbered_list = true; // - reset this flag that keeps track of whether an item (numbered or // otherwise) is legitimately split by a code block $seen_code_in_item = false; } elseif (preg_match('/<\/BulletList>/', $line)) { $bullet_depth--; if ($bullet_depth == 0) { if ($in_chapter) { fwrite($chapter_txt_out, "\n"); } elseif ($in_section) { $frontmatter_text .= "\n"; } $is_numbered_list = false; } ///cho "[finished bulletlist] "; } elseif (preg_match('/<\/NumberedList>/', $line)) { $bullet_depth--; if ($bullet_depth == 0) { if ($in_chapter) { fwrite($chapter_txt_out, "\n"); } elseif ($in_section) { $frontmatter_text .= "\n"; } $is_numbered_list = false; } ///cho "[finished numbered list] "; } elseif (preg_match('//', $line)) { $in_numbered_item = 1; } elseif (preg_match('/<\/NumberedItem>/', $line)) { $in_numbered_item = 0; if ($in_chapter) { fwrite($chapter_txt_out, "\n"); } else { $frontmatter_text .= "\n"; } } elseif (preg_match('//', $line)) { $in_bullet_item = true; } elseif (preg_match('/<\/Bullet>/', $line)) { if ($in_chapter) { fwrite($chapter_txt_out, "\n"); } else { $frontmatter_text .= "\n"; } $in_bullet_item = false; } // TEXT HANDLING - this is the main case, but has disappeared into the mire // of other cases. elseif (!$in_footnotes && preg_match('/(.+?)$/', $line, $matches)) { $id = $matches[1]; $str = $matches[2]; // - special case for those text elements split over multiple lines. We // keep concatenating lines until we find the closing text element or we // run out of lines! $another_line = ''; while (strpos($str, '') === false && ($another_line = getLine($xml_in)) !== false) { $str .= ' ' . $another_line; } // - note that if we ran out of lines (eof) then we'll break out of this // block anyway, it's just there won't be a <\Text> at the end of this // block... despite this being a major validation issue in the XML it // shouldn't result in this script being vladed // - now remove the from the end (hopefully) of str $str = preg_replace('/<\/Text>\s*/', '', $str); // - and prepend the id while translating the str into Dokuwiki format $str = '' . translateText($str); if ($bullet_depth > 0) { if ($is_numbered_list) { // - special case for those text elements legimately split in two by // code blocks. They get no bullet of either type and are 'run-on' // immediately to the end of the code element in order to prevent // dokuwiki restarting numbering etc if ($seen_code_in_item) { // - leave str as it is // - reset flag just incase the item happens to contain another // code block $seen_code_in_item = false; } else if ($in_numbered_item == 1) { $str = '- ' . $str; } // - superspecial case for the poorly formatted numberlists that // contain more than one text block per point. We'll nest them // as a bullet list as that preserves order, formatting and (I // hope) meaning. else { if ($in_chapter) { fwrite($chapter_txt_out, "\n"); } else { $frontmatter_text .= "\n"; } $str = ' * ' . $str; } $in_numbered_item++; } else { $str = '* ' . $str; } for ($i = 0; $i < $bullet_depth; $i++) { $str = ' ' . $str; } } else { // Indented text is preceeded by a > if ($in_indent) { $str = '> ' . $str . "\n"; } else { $str .= "\n"; } } if ($bullet_depth == 0) { $str .= "\n"; } if ($in_chapter) { fwrite($chapter_txt_out, $str); } else { $frontmatter_text .= $str; } } // - codified text blocks elseif (preg_match('/(.+?)<\/Text>/', $line, $matches)) { $code_id = $matches[1]; // - determine the appropriate code block prefix $code_prefix = ''; if (!$in_code) { if ($is_code_linenumbered) { $code_prefix = ''; } else { $code_prefix = ''; } $in_code = true; } $code_txt = $code_prefix . '' . translateText($matches[2], true); if ($in_chapter) { fwrite($chapter_txt_out, $code_txt); } else { $frontmatter_text .= $code_txt; } } elseif (preg_match('/(.+?)<\/Text>/', $line, $matches)) { $code_id = $matches[1]; // - determine the appropriate code block prefix $code_prefix = ''; if (!$in_code) { if ($is_code_linenumbered) { $code_prefix = ''; } else { $code_prefix = ''; } $in_code = true; } $code_txt = $code_prefix . '' . translateText($matches[2], true); if ($in_chapter) { fwrite($chapter_txt_out, $code_txt); } else { $frontmatter_text .= $code_txt; } } elseif (preg_match('/(.*?)$/', $line, $matches)) { $code_txt = $matches[1]; // - determine the appropriate code block prefix $code_prefix = ''; if (!$in_code) { if ($is_code_linenumbered) { $code_prefix = "\n"; } else { $code_prefix = "\n"; } $in_code = true; } // - arg. another special case for codelines that span more than one line // (but I guess is a bit cumbersome // for an element name, eh?) $another_line = ''; while (strpos($code_txt, '') === false && ($another_line = getLine($xml_in)) !== false) { $code_txt .= ' ' . $another_line; } $code_txt = preg_replace('/<\/CodeLine>\s*/', '', $code_txt); $code_txt = $code_prefix . translateText($code_txt, true); if ($in_chapter) { fwrite($chapter_txt_out, $code_txt); } else { $frontmatter_text .= $code_txt; } } // - there are also sometimes empty codelines - which indicate a newline in // the code listing elseif (preg_match('//', $line, $matches)) { $code_txt = ''; if (!$in_code) { $code_txt = "\n"; if ($is_code_linenumbered) { $code_txt = "\n" . $code_txt; } else { $code_txt = "\n" . $code_txt; } $in_code = true; } if ($in_chapter) { fwrite($chapter_txt_out, $code_txt); } else { $frontmatter_text .= $code_txt; } } // - reference to an external XML file elseif (preg_match('/^\s*&[a-z0-9_]+;\s+$/is', $line)) { if ($in_chapter) { fwrite($chapter_txt_out, $line); } else { $frontmatter_text .= $line; } } elseif (strpos($line, '') !== false) { $in_footnotes = true; } elseif ($in_footnotes && strpos($line, '') !== false) { $in_footnotes = false; } // Indentation - the closest thing we have is quoting, so we'll use that elseif (strpos($line, '') !== false) { $in_indent = true; } elseif (strpos($line, '') !== false) { $in_indent = false; } // - pattern of lines to ignore else if (preg_match('/^(<\?xml version="1.0" encoding="UTF-8"\?>|<\!DOCTYPE Manual \[|\]>||<\/?Content>|<\/?Footnote||<\/Manual>)/', $line)) { } // - we ignore anything else in footnotes too, as they were handled in the // preprocessing pass else if ($in_footnotes) { } // - ignore empty lines else if (preg_match('/^\s*$/', $line)) { } // - meh. French versions have random, non-text element, linebreaks floating // around. Guess I'll honor their formatting even though it's bogus else if (preg_match('/^\s*\s*$/', $line)) { if ($in_chapter) { fwrite($chapter_txt_out, ' \\\\'); } else { $frontmatter_text .= ' \\\\'; } } // - danger Will Robinson! else { echo '


Warning! Failed to parse line ' . $line_counter . ': |' . htmlspecialchars($line) . "|
\n"; } } // 2. We should now have enough metadata to export the cover page $top_page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '.txt'; // - backup any existing file if (file_exists($top_page_path)) { $top_page_backup_path = $top_page_path . '.bak'; if(!rename($top_page_path, $top_page_backup_path)) { printError('Failed to rename existing top page for backup'); } } // - and create a handle to the new file $txt_out = fopen($top_page_path, 'w'); // - write the page (including the tables) fwrite($txt_out, '====== ' . noComments(ucfirst(getFirstMetadata('Heading'))) . ': ' . noComments(ucfirst(getFirstMetadata('Title'))) . ' (' . strtoupper($_REQUEST['l']) . ') ======' . "\n"); fwrite($txt_out, "\n"); // - *NEW* ability to request imports and exports from within the page fwrite($txt_out, "\n\n"); fwrite($txt_out, '**Administrator Commands:**' . "\n"); // On second thoughts we probably never want to do this casually, as it boguses // all history/approval/edit information. Instead I'll leave this as a manual // process. fwrite($txt_out, '\n"); fwrite($txt_out, ' * Export manual: [[http://~~baseurl~~/../../php/gs-manual-export.php?m=' . $_REQUEST['m'] . '&l=' . $_REQUEST['l'] . '&v=draft&a=download|draft version]] [[http://~~baseurl~~/../../php/gs-manual-export.php?m=' . $_REQUEST['m'] . '&l=' . $_REQUEST['l'] . '&a=download|approved version]]' . "\n"); fwrite($txt_out, "\n\n"); // - regular metadata fwrite($txt_out, '' . "\n"); fwrite($txt_out, '^ Metadata ^ Value ^' . "\n"); $fields = array('Heading','Title','Author','Affiliation','Text','Comment','Version','Date'); foreach ($fields as $field) { $values = getMetadata($field); foreach ($values as $value) { fwrite($txt_out, '^ ' . $field . ' | ' . $value . ' |' . "\n"); } } fwrite($txt_out, "\n"); // - contents (which also provides order information for exporting) fwrite($txt_out, '===== Contents =====' . "\n"); fwrite($txt_out, "\n"); fwrite($txt_out, '' . "\n"); foreach ($page_order as $page_info) { fwrite($txt_out, ' * [[.:' . $_REQUEST['m'] . ':' . $page_info . ']]' . "\n"); } // - system metadata fwrite($txt_out, "\n\n"); fwrite($txt_out, '===== System Metadata =====' . "\n"); fwrite($txt_out, '' . "\n"); fwrite($txt_out, '^ Metadata ^ Value ^' . "\n"); $fields = array('ENTITY','SupplementaryText'); foreach ($fields as $field) { $values = getMetadata($field); foreach ($values as $value) { fwrite($txt_out, '^ ' . $field . ' | ' . $value . ' |' . "\n"); } } fwrite($txt_out, "\n\n"); fwrite($txt_out, "\n"); // - done! fclose($txt_out); $page_count++; // 3. And the 'sections' page, grouping together all the loose sections as // frontmatter $frontmatter_page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '/' . $sections_page_name . '.txt'; // - backup any existing file if (file_exists($frontmatter_page_path)) { $frontmatter_page_backup_path = $frontmatter_page_path . '.bak'; if(!rename($frontmatter_page_path, $frontmatter_page_backup_path)) { printError('Failed to rename existing frontmatter page for backup'); } } // - populate the new frontmatter file file_put_contents($frontmatter_page_path, $frontmatter_text); echo "

\n

Complete! Imported " . $page_count . " pages


\n"; echo '

Click here to return to wiki page

' . "\n"; exit(0); /** */ function addMetadata($field, $value) { global $manual_metadata; echo '[adding metadata: ' . $field . "] \n"; $values = array(); if (isset($manual_metadata[$field])) { $values = $manual_metadata[$field]; } array_push($values, $value); $manual_metadata[$field] = $values; } /** addMetadata() **/ function getFirstMetadata($field) { global $manual_metadata; $value = ''; if (isset($manual_metadata[$field])) { $values = $manual_metadata[$field]; if (!empty($values)) { $value = $values[0]; } } return $value; } /** getFirstMetadata() **/ function getMetadata($field) { global $manual_metadata; $values = array(); if (isset($manual_metadata[$field])) { $values = $manual_metadata[$field]; } return $values; } /** getMetadata() **/ /** * Read in the next title element with nested text element and extract the * title. */ function getTitle($xml_in, $element) { $title = ''; $in_title_element = false; // - the first thing in a chapter will be it's title $title_line = getLine($xml_in); // - super special case: some language versions don't wrap titles in title // element, so if the first thing we see is a text, we treat that as the // title if (strpos($title_line, '') !== false) { return ''; } if (strpos($title_line, '') !== false) { $in_title_element = true; $title_line = getLine($xml_in); } // - some horribly formed entries have the subtitle first within the title // element if (strpos($title_line, '<SubTitle>') !== false) { $title_line = getLine($xml_in); if (preg_match('/<Text id="([^"]+)">(.+?)<\/Text>/', $title_line, $matches)) { $title = '<!-- id:' . $matches[1] . ' -->' . $matches[2] . ' ' . $title; } $title_line = getLine($xml_in); if (strpos($title_line, '</SubTitle>') === false) { printError('Failed to find closing title for: ' . $element); } $title_line = getLine($xml_in); } // - grab the chapter title now so we can store it in the page ordering if (preg_match('/<Text id="([^"]+)">(.*?)$/', $title_line, $matches)) { $id = $matches[1]; $str = $matches[2]; // - special case for text blocks that span multiple lines (as discovered // in the russian "From Paper" $another_line = ''; while (strpos($str, '</Text>') === false && ($another_line = getLine($xml_in)) !== false) { $str .= ' ' . $another_line; } // - now remove </Text> $str = preg_replace('/<\/Text>\s*/', '', $str); $title = '<!-- id:' . $id . ' -->' . $str . $title; } // - special case for (stoopid) empty titles that use up a text id elseif (preg_match('/<Text id="([^"]+)"\s*\/>/', $title_line, $matches)) { $title = '<!-- id:' . $matches[1] . ' -->' . $title; } else { printError('Failed to find title text for: ' . $element); } // - watch for subtitle elements if ($in_title_element) { $title_line = getLine($xml_in); if (strpos($title_line, '<SubTitle>') !== false) { $title_line = getLine($xml_in); if (preg_match('/<Text id="([^"]+)">(.+?)<\/Text>/', $title_line, $matches)) { $title = '<!-- id:' . $matches[1] . ' -->' . $matches[2] . ' ' . $title; } $title_line = getLine($xml_in); if (strpos($title_line, '</SubTitle>') === false) { printError('Failed to find closing title for: ' . $element); } $title_line = getLine($xml_in); } if (strpos($title_line, '') === false) { printError('Failed to find closing title for: ' . $element); } } return $title; } /** getTitle() **/ function alternateComments($text) { $text = str_replace('', '--%', $text); // remove any lurking crossrefs while we are at it $text = preg_replace('//', '\\1', $text); return $text; } function noComments($text) { $text = preg_replace('/(.*?)<\/i>/','//\1//',$text); return preg_replace('//', '', $text); } function translateTableCodeline($text) { ///cho "Debug: translateTableCodeLine('" . htmlspecialchars($text) . "')
\n"; // Escape the current italics tags to prevent the translate destroying them $text = str_replace('', '%!--i--%', $text); $text = str_replace('', '%/i%', $text); // Translate the text, just decoding the entities $text = translateText($text, true); // Now turn the italic tags (escaped) into HTML comments so we remember them // but they are hidden in the text $text = str_replace('%!--i--%', '', $text); $text = str_replace('%/i%', '', $text); ///cho " => '" . htmlspecialchars($text) . "
\n"; return $text; } function translateText($text, $entities_only=false) { global $entity_replacements; global $footnotes; global $in_code; // - immediate find and protect any legitimate HTML comments in the text // (so already using encoded entities), otherwise they'll be throughly // vladed during the following tranforms. This has to be matched with // changes to the HTMLComments plugin in Dokuwiki to allow the correct // thing to be displayed to the user. if (!$in_code) { $text = str_replace('<!--', '%!--', $text); $text = str_replace('-->', '--%', $text); } if (!$entities_only) { // - replace linking constructs with dokuwiki ones // - external chapter section crossrefs are easily the worst of all... while (preg_match('/]*external[^>]*\/>/', $text) && preg_match('/]*target="Chapter"[^>]*\/>/', $text) && preg_match('//', $text, $matches)) { $pattern = $matches[0]; $attributes = $matches[1]; $manual_name = ''; if (preg_match('/external="([^"]+)"/', $attributes, $matches)) { $manual_name = $matches[1]; } $language = ''; if (preg_match('/lang="([^"]+)"/', $attributes, $matches)) { $language = $matches[1]; } $page_id = ''; if (preg_match('/ref="([^"]+)"/', $attributes, $matches)) { $page_id = $matches[1]; } if (empty($manual_name) || empty($language) || empty($page_id)) { printError('Failed to parse external reference: ' . $pattern); } // - best we can do is a search within a restricted namespace $reference = '[[?do=search&id=' . $page_id . ' @' . $language . ':manuals:' . $manual_name . '|' . $page_id . ']]'; $text = str_replace($pattern, $reference, $text); } // - chapter crossrefs are tricksie due to needing to know ordering numbers while (preg_match('//', $text, $matches)) { $chapter_id = $matches[1]; $page_name = $chapter_id; $text = preg_replace('//', '[[.:' . $page_name . '|' . $chapter_id . ']]', $text); } // - internal figure and table references $text = preg_replace('//','', $text); $text = preg_replace('//','', $text); // - simple internal reference $text = preg_replace('//', '[[#\1|\1]]', $text); $text = preg_replace('//', '[[##\1|\1]]', $text); $text = preg_replace('//', '[[###\1|\1]]', $text); // - simple external url $text = preg_replace('/(.+?)<\/Link>/', '[[\1|\2]]', $text); // - footnote references are also tricksie as we've had to extract the // footnotes earlier (during chapter counting) while (preg_match('//', $text, $matches)) { $footnote_id = $matches[1]; if (!isset($footnotes[$footnote_id])) { printError('Unknown footnote referenced: ' . $footnote_id); } $footnote = $footnotes[$footnote_id]; $text = preg_replace('//', '((' . $footnote . '))', $text); } // - detect and handle URLs surrounded by tags very carefully (as doku // will less than helpfully turn them into an external link and screw up // everything that follows them on the page). // example: www.microsoft.com // example: http://nzdl.org/cgi-bin/library // example: www.yourserver.com // example: http://www.yourserver.com // example: http://www.yourserver.com/greenstone $text = preg_replace('/((?:http\:\/\/)?[a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)<\/i>/i','// \1 //', $text); // - superspecial case for two-part URLs ending in .org (like nzdl.org) $text = preg_replace('/((?:http\:\/\/)?[a-z0-9\-]+\.org(?:\/.*?)?)<\/i>/i','// \1 //', $text); // - another superspecial case, this time for URLs on localhost $text = preg_replace('/((?:http\:\/\/)?localhost(?:\/.*?)?)<\/i>/i','// \1 //', $text); // - p00p, underlines have the same issue around URLs. $text = preg_replace('/((?:http\:\/\/)?[a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)<\/u>/i','__ \1 __', $text); $text = preg_replace('/((?:http\:\/\/)?[a-z0-9\-]+\.org(?:\/.*?)?)<\/u>/i','__ \1 __', $text); $text = preg_replace('/((?:http\:\/\/)?localhost(?:\/.*?)?)<\/u>/i','__ \1 __', $text); // - replace HTML elements with the dokuwiki style equivilents $text = preg_replace('/(file|ftp|http):\/\//','\1:%%//%%', $text); // - restore the double slashes in dokuwiki links while (preg_match('/\[\[[^\]]*%%[^\]]*\]\]/', $text)) { $text = preg_replace('/(\[\[[^\]]*)%%([^\]]*\]\])/', '\1\2', $text); } $text = str_replace('', '**', $text); $text = str_replace('', '**', $text); //$text = str_replace('
', '\\\\ ', $text); //$text = str_replace(' 
', '\\\\ ', $text); //$text = str_replace('
', '\\\\ ', $text); $text = str_replace('', '//', $text); $text = str_replace('', '//', $text); $text = str_replace('', '__', $text); $text = str_replace('', '__', $text); } // Decode entities // - user defined entities (in the manual metadata) foreach ($entity_replacements as $entity=>$code) { $text = str_replace('&' . $entity . ';', html_entity_decode('&#'.$code.';',ENT_NOQUOTES,'UTF-8'), $text); } // - standard entities $text = str_replace('>','>', $text); $text = str_replace('<','<', $text); $text = str_replace('&','&', $text); return $text; } /** translateText() **/ function handleImage($filename, $width, $height) { global $dokuwiki_path; global $xml_source_path; echo '[copying image: ' . $filename . "] \n"; // - copy file into place $source_path = $xml_source_path . '/' . $_REQUEST['l'] . '/images/' . $filename; $destination_dir = $dokuwiki_path . '/data/media/' . $_REQUEST['l'] . '/manuals/images/'; if (!file_exists($destination_dir)) { mkAllDir($destination_dir, 0755); } $destination_path = $destination_dir . strtolower($filename); copy($source_path, $destination_path); if (!file_exists($destination_path)) { printError('Failed to copy image file: ' . $filename); } // - create the string $image_txt = '{{..:images:' . strtolower($filename) . '?' . $width . 'x' . $height . '&direct}}'; return $image_txt; } function getLine($in) { global $line_counter; $line_counter++; return fgets($in); } function noFormatting($text) { $text = str_replace( '', '', $text); $text = str_replace('', '', $text); $text = str_replace( '', '', $text); $text = str_replace('', '', $text); $text = str_replace('
', '', $text); return $text; } ?>