Frontmatter: \n";
// 1. By-and-large we're going to process all of this in a big state machine
// - the top level page, containing cover page and chapter order information,
// needs to be created last, so we have to store it's information
$manual_metadata = array();
$entity_replacements = array();
$footnotes = array();
$page_order = array();
$page_count = 2;
$looking_for_metadata = '';
$chapter_txt_out = false;
$frontmatter_text = '';
$in_section = false;
$sections_page_name = '';
$in_chapter = false;
$chapter_id = '';
$bullet_depth = 0;
$is_numbered_list = true;
$line_counter = 0;
$in_code = false;
$in_footnotes = false;
$in_numbered_item = 0;
$in_bullet_item = false;
$seen_code_in_item = false;
$in_indent = false;
$is_code_linenumbered = false;
// - construct the path using the information we've been provided as arguments
$xml_file_path = $xml_source_path . '/' . $_REQUEST['l'] . '/' . ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.xml';
$xml_in = fopen($xml_file_path, 'r');
if (!$xml_in)
{
printError('Failed to locate top level page for manual');
}
// - we also use this opportunity to read in any footnotes as we'll need to
// move them onto their appropriate page
while (($line = fgets($xml_in)) !== false)
{
if (preg_match('//', $line, $matches))
{
$footnote_id = $matches[1];
$text_line = fgets($xml_in);
if (preg_match('/(.+?)<\/Text>/', $text_line, $matches))
{
$footnotes[$footnote_id] = '' . translateText($matches[2]);
}
// - throw away
fgets($xml_in);
}
}
fclose($xml_in);
// - now reopen to parse it
$xml_in = fopen($xml_file_path, 'r');
if (!$xml_in)
{
printError('Failed to locate top level page for manual');
}
while (($line = getLine($xml_in)) !== false)
{
// - Special Case: lingering code blocks, continue if next line also
// contains code, otherwise we need an extra newline
if ($in_code)
{
$code_text = "\n";
if (strpos($line, ' encountered.
if (!$is_numbered_list)
{
$code_text .= "\n\n";
}
else
{
$seen_code_in_item = true;
}
$in_code = false;
}
if ($in_chapter)
{
fwrite($chapter_txt_out, $code_text);
}
else
{
$frontmatter_text .= $code_text;
}
}
// - some system metadata to watch for
if (preg_match('/]+)>/', $line, $matches))
{
$entity = $matches[1];
addMetadata('ENTITY',$entity);
if (preg_match('/([a-z]+)\s+"(\d+);"/', $entity, $matches))
{
$entity_replacements[$matches[1]] = $matches[2];
}
}
// - we have an explicit list of cover metadata to watch for
elseif (!$in_section && !$in_chapter && preg_match('/<(Author|Affiliation|Comment|Date|Heading|SupplementaryText|Title|Version)>/', $line, $matches))
{
$looking_for_metadata = $matches[1];
}
elseif (!$in_section && !$in_chapter && preg_match('/<\/(Author|Affiliation|Comment|Date|Heading|SupplementaryText|Title|Version)>/', $line, $matches))
{
$looking_for_metadata = '';
}
// - found metadata we have!
elseif (!empty($looking_for_metadata) && preg_match('/(.+?)<\/Text>/', $line, $matches))
{
$text_id = $matches[1];
$text = '' . translateText($matches[2]);
addMetadata($looking_for_metadata, $text);
}
// - bogus metadata found in French version
elseif (!empty($looking_for_metadata) && preg_match('//', $line, $matches))
{
}
// - any text we encounter outside of both sections and chapters also
// belongs on the cover
elseif (!$in_section && !$in_chapter && !$in_footnotes && preg_match('/(.+?)<\/Text>/', $line, $matches))
{
// (for now I'll assume id's are persistent)
addMetadata('Text', '' . translateText($matches[2]));
}
// - we will probably encounter the opening section (which is outside of a
// chapter) first, so we have a special case for it
elseif (!$in_chapter && preg_match('//', $line, $matches))
{
$section_id = $matches[1];
// - if this is the first non-chapter section we have encountered then it
// gets the honor of having the page---that these sections will
// eventually be printed out on---named after it. Typically this should
// be "about_this_manual"
if (empty($frontmatter_text))
{
$sections_page_name = $section_id;
}
$in_section = true;
$title = getTitle($xml_in, 'section:' . $section_id);
if (empty($frontmatter_text))
{
array_unshift($page_order, $sections_page_name . '|' . noComments($title));
}
// - if the title, as is, wouldn't autogenerate the appropriate id, then
// we have to include the id explicitly (as another html comment block)
if ($section_id != generateID($title))
{
$title = '' . $title;
$seen_ids[$section_id] = 1;
}
$frontmatter_text .= '===== ' . $title . ' =====' . "\n\n";
// - whew. Chapter's going to be just as bad though.
}
elseif ($in_section && preg_match('/<\/Section>/', $line))
{
$in_section = false;
}
elseif (preg_match('//', $line, $matches))
{
$chapter_id = $matches[1];
echo "
\n
Import Chapter:" . $chapter_id . ' ' . "\n";
$chapter_page_name = $chapter_id;
// - create a new file to store this chapter
$chapter_file_dir = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'];
if (!file_exists($chapter_file_dir))
{
mkAllDir($chapter_file_dir, 0755);
}
$chapter_file_path = $chapter_file_dir . '/' . $chapter_page_name . '.txt';
// - backup existing file
if (file_exists($chapter_file_path))
{
$chapter_backup_file_path = $chapter_file_path . '.bak';
rename($chapter_file_path, $chapter_backup_file_path);
}
// - open new file for writing
$chapter_txt_out = fopen($chapter_file_path, 'w');
if (!$chapter_txt_out)
{
printError('Failed to open page file for writing: ' . $chapter_page_name);
}
$in_chapter = true;
$title = getTitle($xml_in, 'chapter: ' . $chapter_id);
fwrite($chapter_txt_out, '====== ' . $title . ' ======' . "\n\n");
array_push($page_order, $chapter_page_name . '|' . noComments($title));
}
elseif ($in_chapter && preg_match('/<\/Chapter>/', $line))
{
fclose($chapter_txt_out);
$chapter_txt_out = false;
$in_chapter = false;
$page_count++;
}
// - section, subsection and part titles within chapter
elseif ($in_chapter && preg_match('/<(Section|Subsection|Part)\sid="([^"]*)">/', $line, $matches))
{
$title_type = $matches[1];
$section_id = $matches[2];
if (empty($section_id))
{
$section_id = generateID(strtolower($title_type));
}
echo '[adding ' . strtolower($title_type) . ': ' . $section_id . '] ';
$header_fix = '';
$title = getTitle($xml_in, 'heading: ' . $title_type);
if ($title_type == 'Section')
{
$header_fix = '=====';
}
if ($title_type == 'Subsection')
{
$header_fix = '====';
}
if ($title_type == 'Part')
{
$header_fix = '===';
// - remove b's and i's
$title = preg_replace('/<\/?(B|I)>/i', '', $title);
}
// - if the title, as is, wouldn't autogenerate the appropriate id, then
// we have to include the id explicitly (as another html comment block)
if ($section_id != generateID($title))
{
$title = '' . $title;
$seen_ids[$section_id] = 1;
}
fwrite($chapter_txt_out, $header_fix . ' ' . $title . ' ' . $header_fix . "\n\n");
}
elseif ($in_chapter && (strpos($line, '/', $line, $matches))
{
$figure_id = $matches[1];
$other_attributes = $matches[2];
echo '[adding figure: ' . $figure_id . "] \n";
// We need the title too
$caption = getTitle($xml_in, 'figure:' . $figure_id);
$caption = translateText(alternateComments($caption));
$txt = "') !== false)
{
if ($in_chapter)
{
fwrite($chapter_txt_out, "\n\n");
}
else
{
$frontmatter_text .= "\n\n";
}
// - no longer required
$is_code_linenumbered = false;
}
elseif (preg_match('/
]*).*?.*?>/', $line, $matches))
{
$attributes = $matches[1];
$table_txt = '';
$table_id = '';
if (preg_match('/id="([^"]+)"/', $attributes, $matches))
{
$table_id = $matches[1];
}
else
{
$table_id = generateID('table');
}
$hidden = false;
if (strpos($attributes, 'class="hidden"') !== false)
{
$hidden = true;
}
echo '[adding table: ' . $table_id . "] \n";
$table_caption = getTitle($xml_in, 'table: ' . $table_id);
if ($hidden)
{
$table_txt .= '' . "\n";
}
elseif (empty($table_caption))
{
$table_txt .= '' . "\n";
}
else
{
$table_txt .= '' . "\n";
}
// - in order to properly capture the table we're going to have to read in
// the whole thing here, and take note of column widths
$have_output_widths = false;
$column_widths = array();
while (strpos($line, '
') === false)
{
// - find the start of a row
while(!empty($line) && strpos($line, '
') !== false)
{
$row_txt = '|';
$line = getLine($xml_in);
// - now we read in multiple cells (line starting
/', $line, $matches))
{
$cell_width = $matches[1];
if (!$have_output_widths)
{
array_push($column_widths, $cell_width);
}
}
// Ignore empty cells
// - adding another case for empty header cells (turned up in es
// version of "From Paper")
if (preg_match('/
$line = getLine($xml_in);
}
}
$table_txt .= "\n";
if ($in_chapter)
{
fwrite($chapter_txt_out, $table_txt);
}
else
{
$frontmatter_text .= $table_txt;
}
}
// - copy and insert images
elseif (preg_match('//', $line, $matches))
{
$image_txt = handleImage($matches[3], $matches[1], $matches[2]);
if ($in_chapter)
{
fwrite($chapter_txt_out, $image_txt);
}
else
{
$frontmatter_text .= $image_txt;
}
}
// - bullet lists
elseif (preg_match('//', $line))
{
echo "[adding bulletlist] \n";
if ($in_bullet_item || $in_numbered_item)
{
if ($in_chapter)
{
fwrite($chapter_txt_out, "\n");
}
elseif ($in_section)
{
$frontmatter_text .= "\n";
}
}
$bullet_depth++;
$is_numbered_list = false;
}
// - numbered lists
elseif (preg_match('//', $line))
{
echo "[adding numbered list] \n";
$bullet_depth++;
$is_numbered_list = true;
// - reset this flag that keeps track of whether an item (numbered or
// otherwise) is legitimately split by a code block
$seen_code_in_item = false;
}
elseif (preg_match('/<\/BulletList>/', $line))
{
$bullet_depth--;
if ($bullet_depth == 0)
{
if ($in_chapter)
{
fwrite($chapter_txt_out, "\n");
}
elseif ($in_section)
{
$frontmatter_text .= "\n";
}
$is_numbered_list = false;
}
///cho "[finished bulletlist] ";
}
elseif (preg_match('/<\/NumberedList>/', $line))
{
$bullet_depth--;
if ($bullet_depth == 0)
{
if ($in_chapter)
{
fwrite($chapter_txt_out, "\n");
}
elseif ($in_section)
{
$frontmatter_text .= "\n";
}
$is_numbered_list = false;
}
///cho "[finished numbered list] ";
}
elseif (preg_match('//', $line))
{
$in_numbered_item = 1;
}
elseif (preg_match('/<\/NumberedItem>/', $line))
{
$in_numbered_item = 0;
if ($in_chapter)
{
fwrite($chapter_txt_out, "\n");
}
else
{
$frontmatter_text .= "\n";
}
}
elseif (preg_match('//', $line))
{
$in_bullet_item = true;
}
elseif (preg_match('/<\/Bullet>/', $line))
{
if ($in_chapter)
{
fwrite($chapter_txt_out, "\n");
}
else
{
$frontmatter_text .= "\n";
}
$in_bullet_item = false;
}
// TEXT HANDLING - this is the main case, but has disappeared into the mire
// of other cases.
elseif (!$in_footnotes && preg_match('/(.+?)$/', $line, $matches))
{
$id = $matches[1];
$str = $matches[2];
// - special case for those text elements split over multiple lines. We
// keep concatenating lines until we find the closing text element or we
// run out of lines!
$another_line = '';
while (strpos($str, '') === false && ($another_line = getLine($xml_in)) !== false)
{
$str .= ' ' . $another_line;
}
// - note that if we ran out of lines (eof) then we'll break out of this
// block anyway, it's just there won't be a <\Text> at the end of this
// block... despite this being a major validation issue in the XML it
// shouldn't result in this script being vladed
// - now remove the from the end (hopefully) of str
$str = preg_replace('/<\/Text>\s*/', '', $str);
// - and prepend the id while translating the str into Dokuwiki format
$str = '' . translateText($str);
if ($bullet_depth > 0)
{
if ($is_numbered_list)
{
// - special case for those text elements legimately split in two by
// code blocks. They get no bullet of either type and are 'run-on'
// immediately to the end of the code element in order to prevent
// dokuwiki restarting numbering etc
if ($seen_code_in_item)
{
// - leave str as it is
// - reset flag just incase the item happens to contain another
// code block
$seen_code_in_item = false;
}
else if ($in_numbered_item == 1)
{
$str = '- ' . $str;
}
// - superspecial case for the poorly formatted numberlists that
// contain more than one text block per point. We'll nest them
// as a bullet list as that preserves order, formatting and (I
// hope) meaning.
else
{
if ($in_chapter)
{
fwrite($chapter_txt_out, "\n");
}
else
{
$frontmatter_text .= "\n";
}
$str = ' * ' . $str;
}
$in_numbered_item++;
}
else
{
$str = '* ' . $str;
}
for ($i = 0; $i < $bullet_depth; $i++)
{
$str = ' ' . $str;
}
}
else
{
// Indented text is preceeded by a >
if ($in_indent)
{
$str = '> ' . $str . "\n";
}
else
{
$str .= "\n";
}
}
if ($bullet_depth == 0)
{
$str .= "\n";
}
if ($in_chapter)
{
fwrite($chapter_txt_out, $str);
}
else
{
$frontmatter_text .= $str;
}
}
// - codified text blocks
elseif (preg_match('/(.+?)<\/Text>/', $line, $matches))
{
$code_id = $matches[1];
// - determine the appropriate code block prefix
$code_prefix = '';
if (!$in_code)
{
if ($is_code_linenumbered)
{
$code_prefix = '';
}
else
{
$code_prefix = '';
}
$in_code = true;
}
$code_txt = $code_prefix . '' . translateText($matches[2], true);
if ($in_chapter)
{
fwrite($chapter_txt_out, $code_txt);
}
else
{
$frontmatter_text .= $code_txt;
}
}
elseif (preg_match('/(.+?)<\/Text>/', $line, $matches))
{
$code_id = $matches[1];
// - determine the appropriate code block prefix
$code_prefix = '';
if (!$in_code)
{
if ($is_code_linenumbered)
{
$code_prefix = '';
}
else
{
$code_prefix = '';
}
$in_code = true;
}
$code_txt = $code_prefix . '' . translateText($matches[2], true);
if ($in_chapter)
{
fwrite($chapter_txt_out, $code_txt);
}
else
{
$frontmatter_text .= $code_txt;
}
}
elseif (preg_match('/(.*?)$/', $line, $matches))
{
$code_txt = $matches[1];
// - determine the appropriate code block prefix
$code_prefix = '';
if (!$in_code)
{
if ($is_code_linenumbered)
{
$code_prefix = "\n";
}
else
{
$code_prefix = "\n";
}
$in_code = true;
}
// - arg. another special case for codelines that span more than one line
// (but I guess is a bit cumbersome
// for an element name, eh?)
$another_line = '';
while (strpos($code_txt, '') === false && ($another_line = getLine($xml_in)) !== false)
{
$code_txt .= ' ' . $another_line;
}
$code_txt = preg_replace('/<\/CodeLine>\s*/', '', $code_txt);
$code_txt = $code_prefix . translateText($code_txt, true);
if ($in_chapter)
{
fwrite($chapter_txt_out, $code_txt);
}
else
{
$frontmatter_text .= $code_txt;
}
}
// - there are also sometimes empty codelines - which indicate a newline in
// the code listing
elseif (preg_match('//', $line, $matches))
{
$code_txt = '';
if (!$in_code)
{
$code_txt = "\n";
if ($is_code_linenumbered)
{
$code_txt = "\n" . $code_txt;
}
else
{
$code_txt = "\n" . $code_txt;
}
$in_code = true;
}
if ($in_chapter)
{
fwrite($chapter_txt_out, $code_txt);
}
else
{
$frontmatter_text .= $code_txt;
}
}
// - reference to an external XML file
elseif (preg_match('/^\s*&[a-z0-9_]+;\s+$/is', $line))
{
if ($in_chapter)
{
fwrite($chapter_txt_out, $line);
}
else
{
$frontmatter_text .= $line;
}
}
elseif (strpos($line, '') !== false)
{
$in_footnotes = true;
}
elseif ($in_footnotes && strpos($line, '') !== false)
{
$in_footnotes = false;
}
// Indentation - the closest thing we have is quoting, so we'll use that
elseif (strpos($line, '') !== false)
{
$in_indent = true;
}
elseif (strpos($line, '') !== false)
{
$in_indent = false;
}
// - pattern of lines to ignore
else if (preg_match('/^(<\?xml version="1.0" encoding="UTF-8"\?>|<\!DOCTYPE Manual \[|\]>||<\/?Content>|<\/?Footnote||<\/Manual>)/', $line))
{
}
// - we ignore anything else in footnotes too, as they were handled in the
// preprocessing pass
else if ($in_footnotes)
{
}
// - ignore empty lines
else if (preg_match('/^\s*$/', $line))
{
}
// - meh. French versions have random, non-text element, linebreaks floating
// around. Guess I'll honor their formatting even though it's bogus
else if (preg_match('/^\s* \s*$/', $line))
{
if ($in_chapter)
{
fwrite($chapter_txt_out, ' \\\\');
}
else
{
$frontmatter_text .= ' \\\\';
}
}
// - danger Will Robinson!
else
{
echo '
Warning! Failed to parse line ' . $line_counter . ': |' . htmlspecialchars($line) . "|
\n";
}
}
// 2. We should now have enough metadata to export the cover page
$top_page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '.txt';
// - backup any existing file
if (file_exists($top_page_path))
{
$top_page_backup_path = $top_page_path . '.bak';
if(!rename($top_page_path, $top_page_backup_path))
{
printError('Failed to rename existing top page for backup');
}
}
// - and create a handle to the new file
$txt_out = fopen($top_page_path, 'w');
// - write the page (including the tables)
fwrite($txt_out, '====== ' . noComments(ucfirst(getFirstMetadata('Heading'))) . ': ' . noComments(ucfirst(getFirstMetadata('Title'))) . ' (' . strtoupper($_REQUEST['l']) . ') ======' . "\n");
fwrite($txt_out, "\n");
// - *NEW* ability to request imports and exports from within the page
fwrite($txt_out, "\n\n");
fwrite($txt_out, '**Administrator Commands:**' . "\n");
// On second thoughts we probably never want to do this casually, as it boguses
// all history/approval/edit information. Instead I'll leave this as a manual
// process.
fwrite($txt_out, '\n");
fwrite($txt_out, ' * Export manual: [[http://~~baseurl~~/../../php/gs-manual-export.php?m=' . $_REQUEST['m'] . '&l=' . $_REQUEST['l'] . '&v=draft&a=download|draft version]] [[http://~~baseurl~~/../../php/gs-manual-export.php?m=' . $_REQUEST['m'] . '&l=' . $_REQUEST['l'] . '&a=download|approved version]]' . "\n");
fwrite($txt_out, "\n\n");
// - regular metadata
fwrite($txt_out, '' . "\n");
fwrite($txt_out, '^ Metadata ^ Value ^' . "\n");
$fields = array('Heading','Title','Author','Affiliation','Text','Comment','Version','Date');
foreach ($fields as $field)
{
$values = getMetadata($field);
foreach ($values as $value)
{
fwrite($txt_out, '^ ' . $field . ' | ' . $value . ' |' . "\n");
}
}
fwrite($txt_out, "\n");
// - contents (which also provides order information for exporting)
fwrite($txt_out, '===== Contents =====' . "\n");
fwrite($txt_out, "\n");
fwrite($txt_out, '' . "\n");
foreach ($page_order as $page_info)
{
fwrite($txt_out, ' * [[.:' . $_REQUEST['m'] . ':' . $page_info . ']]' . "\n");
}
// - system metadata
fwrite($txt_out, "\n\n");
fwrite($txt_out, '===== System Metadata =====' . "\n");
fwrite($txt_out, '' . "\n");
fwrite($txt_out, '^ Metadata ^ Value ^' . "\n");
$fields = array('ENTITY','SupplementaryText');
foreach ($fields as $field)
{
$values = getMetadata($field);
foreach ($values as $value)
{
fwrite($txt_out, '^ ' . $field . ' | ' . $value . ' |' . "\n");
}
}
fwrite($txt_out, "\n\n");
fwrite($txt_out, "\n");
// - done!
fclose($txt_out);
$page_count++;
// 3. And the 'sections' page, grouping together all the loose sections as
// frontmatter
$frontmatter_page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '/' . $sections_page_name . '.txt';
// - backup any existing file
if (file_exists($frontmatter_page_path))
{
$frontmatter_page_backup_path = $frontmatter_page_path . '.bak';
if(!rename($frontmatter_page_path, $frontmatter_page_backup_path))
{
printError('Failed to rename existing frontmatter page for backup');
}
}
// - populate the new frontmatter file
file_put_contents($frontmatter_page_path, $frontmatter_text);
echo "