1 | <?php
|
---|
2 |
|
---|
3 | require_once('common.php');
|
---|
4 |
|
---|
5 | /** @file gs-manual-export.php
|
---|
6 | * This script transforms the single XML manual file required by the rest of
|
---|
7 | * the Greenstone manual generation scripts into the series of dokuwiki pages
|
---|
8 | * that make up a certain manual (as specified by the 'm' argument) in a
|
---|
9 | * certain language ('l').
|
---|
10 | */
|
---|
11 |
|
---|
12 | if (!parseCLIArguments())
|
---|
13 | {
|
---|
14 | printError("Error! Failed to parse arguments...\nUsage: gs-manual-import.php -m [user|install|develop|paper]");
|
---|
15 | }
|
---|
16 |
|
---|
17 | // 0. Initialization
|
---|
18 | if (!isset($_REQUEST['l']) || empty($_REQUEST['l']))
|
---|
19 | {
|
---|
20 | $_REQUEST['l'] = 'en';
|
---|
21 | }
|
---|
22 | if (!isset($_REQUEST['m']) || empty($_REQUEST['m']))
|
---|
23 | {
|
---|
24 | $_REQUEST['m'] = 'user';
|
---|
25 | //$_REQUEST['m'] = 'install';
|
---|
26 | //$_REQUEST['m'] = 'develop';
|
---|
27 | //$_REQUEST['m'] = 'paper';
|
---|
28 | }
|
---|
29 |
|
---|
30 | // - validate arguments before we use them (security)
|
---|
31 | if (!preg_match('/^(develop|install|paper|user)$/',$_REQUEST['m']))
|
---|
32 | {
|
---|
33 | printError('Unknown manual type requested: ' . htmlspecialchars($_REQUEST['m']));
|
---|
34 | }
|
---|
35 |
|
---|
36 | if (!preg_match('/^(ar|en|es|fr|pt-br|ru)$/',$_REQUEST['l']))
|
---|
37 | {
|
---|
38 | printError('Unknown language requested: ' . htmlspecialchars($_REQUEST['l']));
|
---|
39 | }
|
---|
40 |
|
---|
41 | echo '<h2>Importing Greenstone Manual XML</h2>' . "\n";
|
---|
42 | echo '<p><b>XML Source Path:</b> ' . $xml_source_path . '<br/><b>Manual:</b> ' . $_REQUEST['m'] . '<br/><b>Language:</b> ' . $_REQUEST['l'] . "</p>\n<hr/>\n";
|
---|
43 | echo "<p><b>Frontmatter: </b><br/>\n";
|
---|
44 | // 1. By-and-large we're going to process all of this in a big state machine
|
---|
45 | // - the top level page, containing cover page and chapter order information,
|
---|
46 | // needs to be created last, so we have to store it's information
|
---|
47 | $manual_metadata = array();
|
---|
48 | $entity_replacements = array();
|
---|
49 | $footnotes = array();
|
---|
50 | $page_order = array();
|
---|
51 | $page_count = 2;
|
---|
52 | $looking_for_metadata = '';
|
---|
53 | $chapter_txt_out = false;
|
---|
54 | $frontmatter_text = '';
|
---|
55 | $in_section = false;
|
---|
56 | $sections_page_name = '';
|
---|
57 | $in_chapter = false;
|
---|
58 | $chapter_id = '';
|
---|
59 | $bullet_depth = 0;
|
---|
60 | $is_numbered_list = true;
|
---|
61 | $line_counter = 0;
|
---|
62 | $in_code = false;
|
---|
63 | $in_footnotes = false;
|
---|
64 | $in_numbered_item = 0;
|
---|
65 | $in_bullet_item = false;
|
---|
66 | $seen_code_in_item = false;
|
---|
67 | $in_indent = false;
|
---|
68 | $is_code_linenumbered = false;
|
---|
69 | // - construct the path using the information we've been provided as arguments
|
---|
70 | $xml_file_path = $xml_source_path . '/' . $_REQUEST['l'] . '/' . ucfirst($_REQUEST['m']) . '_' . $_REQUEST['l'] . '.xml';
|
---|
71 | $xml_in = fopen($xml_file_path, 'r');
|
---|
72 | if (!$xml_in)
|
---|
73 | {
|
---|
74 | printError('Failed to locate top level page for manual');
|
---|
75 | }
|
---|
76 | // - we also use this opportunity to read in any footnotes as we'll need to
|
---|
77 | // move them onto their appropriate page
|
---|
78 | while (($line = fgets($xml_in)) !== false)
|
---|
79 | {
|
---|
80 | if (preg_match('/<Footnote id="(\d+)">/', $line, $matches))
|
---|
81 | {
|
---|
82 | $footnote_id = $matches[1];
|
---|
83 | $text_line = fgets($xml_in);
|
---|
84 | if (preg_match('/<Text id="([^"]+)">(.+?)<\/Text>/', $text_line, $matches))
|
---|
85 | {
|
---|
86 | $footnotes[$footnote_id] = '<!-- id:' . $matches[1] . ' -->' . translateText($matches[2]);
|
---|
87 | }
|
---|
88 | // - throw away </Footnote>
|
---|
89 | fgets($xml_in);
|
---|
90 | }
|
---|
91 | }
|
---|
92 | fclose($xml_in);
|
---|
93 | // - now reopen to parse it
|
---|
94 | $xml_in = fopen($xml_file_path, 'r');
|
---|
95 | if (!$xml_in)
|
---|
96 | {
|
---|
97 | printError('Failed to locate top level page for manual');
|
---|
98 | }
|
---|
99 | while (($line = getLine($xml_in)) !== false)
|
---|
100 | {
|
---|
101 | // - Special Case: lingering code blocks, continue if next line also
|
---|
102 | // contains code, otherwise we need an extra newline
|
---|
103 | if ($in_code)
|
---|
104 | {
|
---|
105 | $code_text = "\n";
|
---|
106 | if (strpos($line, '<CodeLine') === false && strpos($line, 'type="code"') === false)
|
---|
107 | {
|
---|
108 | $code_text .= '</code>';
|
---|
109 | // - codeblocks that appear inside numbered lists do not get their own
|
---|
110 | // newlines as that would split the item. Instead newlines will be
|
---|
111 | // added when </NumberedItem> encountered.
|
---|
112 | if (!$is_numbered_list)
|
---|
113 | {
|
---|
114 | $code_text .= "\n\n";
|
---|
115 | }
|
---|
116 | else
|
---|
117 | {
|
---|
118 | $seen_code_in_item = true;
|
---|
119 | }
|
---|
120 | $in_code = false;
|
---|
121 | }
|
---|
122 | if ($in_chapter)
|
---|
123 | {
|
---|
124 | fwrite($chapter_txt_out, $code_text);
|
---|
125 | }
|
---|
126 | else
|
---|
127 | {
|
---|
128 | $frontmatter_text .= $code_text;
|
---|
129 | }
|
---|
130 | }
|
---|
131 | // - some system metadata to watch for
|
---|
132 | if (preg_match('/<!ENTITY\s+([^>]+)>/', $line, $matches))
|
---|
133 | {
|
---|
134 | $entity = $matches[1];
|
---|
135 | addMetadata('ENTITY',$entity);
|
---|
136 | if (preg_match('/([a-z]+)\s+"&#(\d+);"/', $entity, $matches))
|
---|
137 | {
|
---|
138 | $entity_replacements[$matches[1]] = $matches[2];
|
---|
139 | }
|
---|
140 | }
|
---|
141 | // - we have an explicit list of cover metadata to watch for
|
---|
142 | elseif (!$in_section && !$in_chapter && preg_match('/<(Author|Affiliation|Comment|Date|Heading|SupplementaryText|Title|Version)>/', $line, $matches))
|
---|
143 | {
|
---|
144 | $looking_for_metadata = $matches[1];
|
---|
145 | }
|
---|
146 | elseif (!$in_section && !$in_chapter && preg_match('/<\/(Author|Affiliation|Comment|Date|Heading|SupplementaryText|Title|Version)>/', $line, $matches))
|
---|
147 | {
|
---|
148 | $looking_for_metadata = '';
|
---|
149 | }
|
---|
150 | // - found metadata we have!
|
---|
151 | elseif (!empty($looking_for_metadata) && preg_match('/<Text id="([^"]+)">(.+?)<\/Text>/', $line, $matches))
|
---|
152 | {
|
---|
153 | $text_id = $matches[1];
|
---|
154 | $text = '<!-- id:' . $text_id . ' -->' . translateText($matches[2]);
|
---|
155 | addMetadata($looking_for_metadata, $text);
|
---|
156 | }
|
---|
157 | // - bogus metadata found in French version
|
---|
158 | elseif (!empty($looking_for_metadata) && preg_match('/<Text id="([^"]+)"\/>/', $line, $matches))
|
---|
159 | {
|
---|
160 | }
|
---|
161 | // - any text we encounter outside of both sections and chapters also
|
---|
162 | // belongs on the cover
|
---|
163 | elseif (!$in_section && !$in_chapter && !$in_footnotes && preg_match('/<Text id="([^"]+)">(.+?)<\/Text>/', $line, $matches))
|
---|
164 | {
|
---|
165 | // (for now I'll assume id's are persistent)
|
---|
166 | addMetadata('Text', '<!-- id:' . $matches[1] . ' -->' . translateText($matches[2]));
|
---|
167 | }
|
---|
168 | // - we will probably encounter the opening section (which is outside of a
|
---|
169 | // chapter) first, so we have a special case for it
|
---|
170 | elseif (!$in_chapter && preg_match('/<Section id="([^"]+)">/', $line, $matches))
|
---|
171 | {
|
---|
172 | $section_id = $matches[1];
|
---|
173 | // - if this is the first non-chapter section we have encountered then it
|
---|
174 | // gets the honor of having the page---that these sections will
|
---|
175 | // eventually be printed out on---named after it. Typically this should
|
---|
176 | // be "about_this_manual"
|
---|
177 | if (empty($frontmatter_text))
|
---|
178 | {
|
---|
179 | $sections_page_name = $section_id;
|
---|
180 | }
|
---|
181 | $in_section = true;
|
---|
182 | $title = getTitle($xml_in, 'section:' . $section_id);
|
---|
183 | if (empty($frontmatter_text))
|
---|
184 | {
|
---|
185 | array_unshift($page_order, $sections_page_name . '|' . noComments($title));
|
---|
186 | }
|
---|
187 | // - if the title, as is, wouldn't autogenerate the appropriate id, then
|
---|
188 | // we have to include the id explicitly (as another html comment block)
|
---|
189 | if ($section_id != generateID($title))
|
---|
190 | {
|
---|
191 | $title = '<!-- sid:' . $section_id . ' -->' . $title;
|
---|
192 | $seen_ids[$section_id] = 1;
|
---|
193 | }
|
---|
194 | $frontmatter_text .= '===== ' . $title . ' =====' . "\n\n";
|
---|
195 | // - whew. Chapter's going to be just as bad though.
|
---|
196 | }
|
---|
197 | elseif ($in_section && preg_match('/<\/Section>/', $line))
|
---|
198 | {
|
---|
199 | $in_section = false;
|
---|
200 | }
|
---|
201 | elseif (preg_match('/<Chapter id="([^"]+)">/', $line, $matches))
|
---|
202 | {
|
---|
203 | $chapter_id = $matches[1];
|
---|
204 | echo "</p>\n<p><b>Import Chapter:</b>" . $chapter_id . '<br/>' . "\n";
|
---|
205 | $chapter_page_name = $chapter_id;
|
---|
206 | // - create a new file to store this chapter
|
---|
207 | $chapter_file_dir = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'];
|
---|
208 | if (!file_exists($chapter_file_dir))
|
---|
209 | {
|
---|
210 | mkAllDir($chapter_file_dir, 0755);
|
---|
211 | }
|
---|
212 | $chapter_file_path = $chapter_file_dir . '/' . $chapter_page_name . '.txt';
|
---|
213 | // - backup existing file
|
---|
214 | if (file_exists($chapter_file_path))
|
---|
215 | {
|
---|
216 | $chapter_backup_file_path = $chapter_file_path . '.bak';
|
---|
217 | rename($chapter_file_path, $chapter_backup_file_path);
|
---|
218 | }
|
---|
219 | // - open new file for writing
|
---|
220 | $chapter_txt_out = fopen($chapter_file_path, 'w');
|
---|
221 | if (!$chapter_txt_out)
|
---|
222 | {
|
---|
223 | printError('Failed to open page file for writing: ' . $chapter_page_name);
|
---|
224 | }
|
---|
225 | $in_chapter = true;
|
---|
226 | $title = getTitle($xml_in, 'chapter: ' . $chapter_id);
|
---|
227 | fwrite($chapter_txt_out, '====== ' . $title . ' ======' . "\n\n");
|
---|
228 | array_push($page_order, $chapter_page_name . '|' . noComments($title));
|
---|
229 | }
|
---|
230 | elseif ($in_chapter && preg_match('/<\/Chapter>/', $line))
|
---|
231 | {
|
---|
232 | fclose($chapter_txt_out);
|
---|
233 | $chapter_txt_out = false;
|
---|
234 | $in_chapter = false;
|
---|
235 | $page_count++;
|
---|
236 | }
|
---|
237 | // - section, subsection and part titles within chapter
|
---|
238 | elseif ($in_chapter && preg_match('/<(Section|Subsection|Part)\sid="([^"]*)">/', $line, $matches))
|
---|
239 | {
|
---|
240 | $title_type = $matches[1];
|
---|
241 | $section_id = $matches[2];
|
---|
242 | if (empty($section_id))
|
---|
243 | {
|
---|
244 | $section_id = generateID(strtolower($title_type));
|
---|
245 | }
|
---|
246 | echo '[adding ' . strtolower($title_type) . ': ' . $section_id . '] ';
|
---|
247 | $header_fix = '';
|
---|
248 | $title = getTitle($xml_in, 'heading: ' . $title_type);
|
---|
249 | if ($title_type == 'Section')
|
---|
250 | {
|
---|
251 | $header_fix = '=====';
|
---|
252 | }
|
---|
253 | if ($title_type == 'Subsection')
|
---|
254 | {
|
---|
255 | $header_fix = '====';
|
---|
256 | }
|
---|
257 | if ($title_type == 'Part')
|
---|
258 | {
|
---|
259 | $header_fix = '===';
|
---|
260 | // - remove b's and i's
|
---|
261 | $title = preg_replace('/<\/?(B|I)>/i', '', $title);
|
---|
262 | }
|
---|
263 | // - if the title, as is, wouldn't autogenerate the appropriate id, then
|
---|
264 | // we have to include the id explicitly (as another html comment block)
|
---|
265 | if ($section_id != generateID($title))
|
---|
266 | {
|
---|
267 | $title = '<!-- sid:' . $section_id . ' -->' . $title;
|
---|
268 | $seen_ids[$section_id] = 1;
|
---|
269 | }
|
---|
270 | fwrite($chapter_txt_out, $header_fix . ' ' . $title . ' ' . $header_fix . "\n\n");
|
---|
271 | }
|
---|
272 | elseif ($in_chapter && (strpos($line, '</Section') !== false || strpos($line, '</Subsection') !== false || strpos($line, '</Part') !== false))
|
---|
273 | {
|
---|
274 | // do nothing for now
|
---|
275 | }
|
---|
276 | // - figures (and their titles/captions)
|
---|
277 | elseif (preg_match('/<Figure id="([^"]+)"(.*?)>/', $line, $matches))
|
---|
278 | {
|
---|
279 | $figure_id = $matches[1];
|
---|
280 | $other_attributes = $matches[2];
|
---|
281 | echo '[adding figure: ' . $figure_id . "] \n";
|
---|
282 | // We need the title too
|
---|
283 | $caption = getTitle($xml_in, 'figure:' . $figure_id);
|
---|
284 | $caption = translateText(alternateComments($caption));
|
---|
285 | $txt = "<imgcaption figure_" . $figure_id . '|' . $caption . ' ';
|
---|
286 | // - we also check the other attributes to see if the XML has requested
|
---|
287 | // any following codeblock be linenumbered
|
---|
288 | if (strpos($other_attributes, 'withLineNumber') !== false)
|
---|
289 | {
|
---|
290 | $is_code_linenumbered = true;
|
---|
291 | $txt .= '%!-- withLineNumber --%';
|
---|
292 | }
|
---|
293 | $txt .= '></imgcaption>' . "\n";
|
---|
294 | if ($in_chapter)
|
---|
295 | {
|
---|
296 | fwrite($chapter_txt_out, $txt);
|
---|
297 | }
|
---|
298 | else
|
---|
299 | {
|
---|
300 | $frontmatter_text .= $txt;
|
---|
301 | }
|
---|
302 | }
|
---|
303 | elseif (strpos($line, '</Figure>') !== false)
|
---|
304 | {
|
---|
305 | if ($in_chapter)
|
---|
306 | {
|
---|
307 | fwrite($chapter_txt_out, "\n\n");
|
---|
308 | }
|
---|
309 | else
|
---|
310 | {
|
---|
311 | $frontmatter_text .= "\n\n";
|
---|
312 | }
|
---|
313 | // - no longer required
|
---|
314 | $is_code_linenumbered = false;
|
---|
315 | }
|
---|
316 | elseif (preg_match('/<Table([^>]*).*?.*?>/', $line, $matches))
|
---|
317 | {
|
---|
318 | $attributes = $matches[1];
|
---|
319 | $table_txt = '';
|
---|
320 | $table_id = '';
|
---|
321 | if (preg_match('/id="([^"]+)"/', $attributes, $matches))
|
---|
322 | {
|
---|
323 | $table_id = $matches[1];
|
---|
324 | }
|
---|
325 | else
|
---|
326 | {
|
---|
327 | $table_id = generateID('table');
|
---|
328 | }
|
---|
329 | $hidden = false;
|
---|
330 | if (strpos($attributes, 'class="hidden"') !== false)
|
---|
331 | {
|
---|
332 | $hidden = true;
|
---|
333 | }
|
---|
334 | echo '[adding table: ' . $table_id . "] \n";
|
---|
335 | $table_caption = getTitle($xml_in, 'table: ' . $table_id);
|
---|
336 | if ($hidden)
|
---|
337 | {
|
---|
338 | $table_txt .= '<tblcaption table_' . $table_id . '|##HIDDEN##></tblcaption>' . "\n";
|
---|
339 | }
|
---|
340 | elseif (empty($table_caption))
|
---|
341 | {
|
---|
342 | $table_txt .= '<tblcaption table_' . $table_id . '|##NOCAPTION##></tblcaption>' . "\n";
|
---|
343 | }
|
---|
344 | else
|
---|
345 | {
|
---|
346 | $table_txt .= '<tblcaption table_' . $table_id . '|' . noComments($table_caption) . '></tblcaption>' . "\n";
|
---|
347 | }
|
---|
348 | // - in order to properly capture the table we're going to have to read in
|
---|
349 | // the whole thing here, and take note of column widths
|
---|
350 | $have_output_widths = false;
|
---|
351 | $column_widths = array();
|
---|
352 | while (strpos($line, '</Table>') === false)
|
---|
353 | {
|
---|
354 | // - find the start of a row
|
---|
355 | while(!empty($line) && strpos($line, '<tr>') === false && strpos($line, '</Table>') === false)
|
---|
356 | {
|
---|
357 | $line = getLine($xml_in);
|
---|
358 | }
|
---|
359 | if (strpos($line, '<tr>') !== false)
|
---|
360 | {
|
---|
361 | $row_txt = '|';
|
---|
362 | $line = getLine($xml_in);
|
---|
363 | // - now we read in multiple cells (line starting <th
|
---|
364 | while (strpos($line, '<th') === 0)
|
---|
365 | {
|
---|
366 | if (preg_match('/<th width="(\d+)"\/?>/', $line, $matches))
|
---|
367 | {
|
---|
368 | $cell_width = $matches[1];
|
---|
369 | if (!$have_output_widths)
|
---|
370 | {
|
---|
371 | array_push($column_widths, $cell_width);
|
---|
372 | }
|
---|
373 | }
|
---|
374 | // Ignore empty cells
|
---|
375 | // - adding another case for empty header cells (turned up in es
|
---|
376 | // version of "From Paper")
|
---|
377 | if (preg_match('/<th width="\d+"\/>/', $line) || preg_match('/<th width="\d+">.*<\/th>/', $line))
|
---|
378 | {
|
---|
379 | $row_txt .= ' |';
|
---|
380 | }
|
---|
381 | else
|
---|
382 | {
|
---|
383 | $line = getLine($xml_in);
|
---|
384 | $first = true;
|
---|
385 | while (strpos($line, '</th>') === false)
|
---|
386 | {
|
---|
387 | if (!$first)
|
---|
388 | {
|
---|
389 | $row_txt .= '\\\\';
|
---|
390 | }
|
---|
391 | // - we can have images or text in our tables
|
---|
392 | if (preg_match('/<File.*url="images\/([^"]+)".*\/>/', $line, $matches))
|
---|
393 | {
|
---|
394 | $payload = $matches[0];
|
---|
395 | $filename = $matches[1];
|
---|
396 | $width = 0;
|
---|
397 | if (preg_match('/width="(\d+)"/', $payload, $matches))
|
---|
398 | {
|
---|
399 | $width = $matches[1];
|
---|
400 | }
|
---|
401 | $height = 0;
|
---|
402 | if (preg_match('/height="(\d+)"/', $payload, $matches))
|
---|
403 | {
|
---|
404 | $height = $matches[1];
|
---|
405 | }
|
---|
406 | $image_txt = handleImage($filename, $width, $height);
|
---|
407 | $row_txt .= ' ' . $image_txt . ' ';
|
---|
408 | }
|
---|
409 | elseif (preg_match('/<Text id="([^"]+)">(.*)/', $line, $matches))
|
---|
410 | {
|
---|
411 | $tid = $matches[1];
|
---|
412 | $txt = $matches[2];
|
---|
413 | // - multiple line text block
|
---|
414 | while (strpos($txt, '</Text>') === false)
|
---|
415 | {
|
---|
416 | $txt .= getLine($xml_in);
|
---|
417 | }
|
---|
418 | $txt = str_replace('</Text>','',$txt);
|
---|
419 | $row_txt .= ' <!-- id:' . $tid . ' -->' . translateText($txt) . ' ';
|
---|
420 | }
|
---|
421 | elseif (preg_match('/<CodeLine>(.*?)<\/CodeLine>/',$line,$matches))
|
---|
422 | {
|
---|
423 | $row_txt .= ' \'\'' . translateTableCodeLine($matches[1]) . '\'\' ';
|
---|
424 | }
|
---|
425 | elseif (preg_match('/<CodeLine>(.*)/',$line,$matches))
|
---|
426 | {
|
---|
427 | $row_txt .= ' \'\'' . translateTableCodeLine($matches[1]) . '\'\' ';
|
---|
428 | }
|
---|
429 | elseif (preg_match('/(.*)<\/CodeLine>/',$line,$matches))
|
---|
430 | {
|
---|
431 | if (!empty($matches[1]))
|
---|
432 | {
|
---|
433 | $row_txt .= ' \'\'' . translateTableCodeLine($matches[1]). '\'\' ';
|
---|
434 | }
|
---|
435 | else
|
---|
436 | {
|
---|
437 | $row_txt .= ' ';
|
---|
438 | }
|
---|
439 | }
|
---|
440 | // we'll add (bogus) linebreaks
|
---|
441 | elseif (preg_match('/^\s*<br\s*\/?>\s*$/', $line))
|
---|
442 | {
|
---|
443 | $row_txt = ' ';
|
---|
444 | }
|
---|
445 | else
|
---|
446 | {
|
---|
447 | printError('Warning! Unrecognized element in table: ' . htmlspecialchars($line));
|
---|
448 | }
|
---|
449 | $first = false;
|
---|
450 | // - next line
|
---|
451 | $line = getLine($xml_in);
|
---|
452 | }
|
---|
453 | // - close the cell
|
---|
454 | $row_txt .= '|';
|
---|
455 | }
|
---|
456 | // next!
|
---|
457 | $line = getLine($xml_in);
|
---|
458 | }
|
---|
459 | // - if we haven't already, output the width command
|
---|
460 | if (!$have_output_widths)
|
---|
461 | {
|
---|
462 | $table_txt .= '|< - ' . implode(' ', $column_widths) . ' >|' . "\n";
|
---|
463 | $have_output_widths = true;
|
---|
464 | }
|
---|
465 | $table_txt .= $row_txt . "\n";
|
---|
466 | // - throw away the closing </tr>
|
---|
467 | $line = getLine($xml_in);
|
---|
468 | }
|
---|
469 | }
|
---|
470 | $table_txt .= "\n";
|
---|
471 | if ($in_chapter)
|
---|
472 | {
|
---|
473 | fwrite($chapter_txt_out, $table_txt);
|
---|
474 | }
|
---|
475 | else
|
---|
476 | {
|
---|
477 | $frontmatter_text .= $table_txt;
|
---|
478 | }
|
---|
479 | }
|
---|
480 | // - copy and insert images
|
---|
481 | elseif (preg_match('/<File width="(\d+)" height="(\d+)" url="images\/([^"]+)"\/>/', $line, $matches))
|
---|
482 | {
|
---|
483 | $image_txt = handleImage($matches[3], $matches[1], $matches[2]);
|
---|
484 | if ($in_chapter)
|
---|
485 | {
|
---|
486 | fwrite($chapter_txt_out, $image_txt);
|
---|
487 | }
|
---|
488 | else
|
---|
489 | {
|
---|
490 | $frontmatter_text .= $image_txt;
|
---|
491 | }
|
---|
492 | }
|
---|
493 | // - bullet lists
|
---|
494 | elseif (preg_match('/<BulletList>/', $line))
|
---|
495 | {
|
---|
496 | echo "[adding bulletlist] \n";
|
---|
497 | if ($in_bullet_item || $in_numbered_item)
|
---|
498 | {
|
---|
499 | if ($in_chapter)
|
---|
500 | {
|
---|
501 | fwrite($chapter_txt_out, "\n");
|
---|
502 | }
|
---|
503 | elseif ($in_section)
|
---|
504 | {
|
---|
505 | $frontmatter_text .= "\n";
|
---|
506 | }
|
---|
507 | }
|
---|
508 | $bullet_depth++;
|
---|
509 | $is_numbered_list = false;
|
---|
510 | }
|
---|
511 | // - numbered lists
|
---|
512 | elseif (preg_match('/<NumberedList>/', $line))
|
---|
513 | {
|
---|
514 | echo "[adding numbered list] \n";
|
---|
515 | $bullet_depth++;
|
---|
516 | $is_numbered_list = true;
|
---|
517 | // - reset this flag that keeps track of whether an item (numbered or
|
---|
518 | // otherwise) is legitimately split by a code block
|
---|
519 | $seen_code_in_item = false;
|
---|
520 | }
|
---|
521 | elseif (preg_match('/<\/BulletList>/', $line))
|
---|
522 | {
|
---|
523 | $bullet_depth--;
|
---|
524 | if ($bullet_depth == 0)
|
---|
525 | {
|
---|
526 | if ($in_chapter)
|
---|
527 | {
|
---|
528 | fwrite($chapter_txt_out, "\n");
|
---|
529 | }
|
---|
530 | elseif ($in_section)
|
---|
531 | {
|
---|
532 | $frontmatter_text .= "\n";
|
---|
533 | }
|
---|
534 | $is_numbered_list = false;
|
---|
535 | }
|
---|
536 | ///cho "[finished bulletlist] ";
|
---|
537 | }
|
---|
538 | elseif (preg_match('/<\/NumberedList>/', $line))
|
---|
539 | {
|
---|
540 | $bullet_depth--;
|
---|
541 | if ($bullet_depth == 0)
|
---|
542 | {
|
---|
543 | if ($in_chapter)
|
---|
544 | {
|
---|
545 | fwrite($chapter_txt_out, "\n");
|
---|
546 | }
|
---|
547 | elseif ($in_section)
|
---|
548 | {
|
---|
549 | $frontmatter_text .= "\n";
|
---|
550 | }
|
---|
551 | $is_numbered_list = false;
|
---|
552 | }
|
---|
553 | ///cho "[finished numbered list] ";
|
---|
554 | }
|
---|
555 | elseif (preg_match('/<NumberedItem>/', $line))
|
---|
556 | {
|
---|
557 | $in_numbered_item = 1;
|
---|
558 | }
|
---|
559 | elseif (preg_match('/<\/NumberedItem>/', $line))
|
---|
560 | {
|
---|
561 | $in_numbered_item = 0;
|
---|
562 | if ($in_chapter)
|
---|
563 | {
|
---|
564 | fwrite($chapter_txt_out, "\n");
|
---|
565 | }
|
---|
566 | else
|
---|
567 | {
|
---|
568 | $frontmatter_text .= "\n";
|
---|
569 | }
|
---|
570 | }
|
---|
571 | elseif (preg_match('/<Bullet>/', $line))
|
---|
572 | {
|
---|
573 | $in_bullet_item = true;
|
---|
574 | }
|
---|
575 | elseif (preg_match('/<\/Bullet>/', $line))
|
---|
576 | {
|
---|
577 | if ($in_chapter)
|
---|
578 | {
|
---|
579 | fwrite($chapter_txt_out, "\n");
|
---|
580 | }
|
---|
581 | else
|
---|
582 | {
|
---|
583 | $frontmatter_text .= "\n";
|
---|
584 | }
|
---|
585 | $in_bullet_item = false;
|
---|
586 | }
|
---|
587 | // TEXT HANDLING - this is the main case, but has disappeared into the mire
|
---|
588 | // of other cases.
|
---|
589 | elseif (!$in_footnotes && preg_match('/<Text id="([^"]+)">(.+?)$/', $line, $matches))
|
---|
590 | {
|
---|
591 | $id = $matches[1];
|
---|
592 | $str = $matches[2];
|
---|
593 | // - special case for those text elements split over multiple lines. We
|
---|
594 | // keep concatenating lines until we find the closing text element or we
|
---|
595 | // run out of lines!
|
---|
596 | $another_line = '';
|
---|
597 | while (strpos($str, '</Text>') === false && ($another_line = getLine($xml_in)) !== false)
|
---|
598 | {
|
---|
599 | $str .= ' ' . $another_line;
|
---|
600 | }
|
---|
601 | // - note that if we ran out of lines (eof) then we'll break out of this
|
---|
602 | // block anyway, it's just there won't be a <\Text> at the end of this
|
---|
603 | // block... despite this being a major validation issue in the XML it
|
---|
604 | // shouldn't result in this script being vladed
|
---|
605 | // - now remove the </Text> from the end (hopefully) of str
|
---|
606 | $str = preg_replace('/<\/Text>\s*/', '', $str);
|
---|
607 | // - and prepend the id while translating the str into Dokuwiki format
|
---|
608 | $str = '<!-- id:' . $id . ' -->' . translateText($str);
|
---|
609 | if ($bullet_depth > 0)
|
---|
610 | {
|
---|
611 | if ($is_numbered_list)
|
---|
612 | {
|
---|
613 | // - special case for those text elements legimately split in two by
|
---|
614 | // code blocks. They get no bullet of either type and are 'run-on'
|
---|
615 | // immediately to the end of the code element in order to prevent
|
---|
616 | // dokuwiki restarting numbering etc
|
---|
617 | if ($seen_code_in_item)
|
---|
618 | {
|
---|
619 | // - leave str as it is
|
---|
620 | // - reset flag just incase the item happens to contain another
|
---|
621 | // code block
|
---|
622 | $seen_code_in_item = false;
|
---|
623 | }
|
---|
624 | else if ($in_numbered_item == 1)
|
---|
625 | {
|
---|
626 | $str = '- ' . $str;
|
---|
627 | }
|
---|
628 | // - superspecial case for the poorly formatted numberlists that
|
---|
629 | // contain more than one text block per point. We'll nest them
|
---|
630 | // as a bullet list as that preserves order, formatting and (I
|
---|
631 | // hope) meaning.
|
---|
632 | else
|
---|
633 | {
|
---|
634 | if ($in_chapter)
|
---|
635 | {
|
---|
636 | fwrite($chapter_txt_out, "\n");
|
---|
637 | }
|
---|
638 | else
|
---|
639 | {
|
---|
640 | $frontmatter_text .= "\n";
|
---|
641 | }
|
---|
642 | $str = ' * ' . $str;
|
---|
643 | }
|
---|
644 | $in_numbered_item++;
|
---|
645 | }
|
---|
646 | else
|
---|
647 | {
|
---|
648 | $str = '* ' . $str;
|
---|
649 | }
|
---|
650 | for ($i = 0; $i < $bullet_depth; $i++)
|
---|
651 | {
|
---|
652 | $str = ' ' . $str;
|
---|
653 | }
|
---|
654 | }
|
---|
655 | else
|
---|
656 | {
|
---|
657 | // Indented text is preceeded by a >
|
---|
658 | if ($in_indent)
|
---|
659 | {
|
---|
660 | $str = '> ' . $str . "\n";
|
---|
661 | }
|
---|
662 | else
|
---|
663 | {
|
---|
664 | $str .= "\n";
|
---|
665 | }
|
---|
666 | }
|
---|
667 | if ($bullet_depth == 0)
|
---|
668 | {
|
---|
669 | $str .= "\n";
|
---|
670 | }
|
---|
671 | if ($in_chapter)
|
---|
672 | {
|
---|
673 | fwrite($chapter_txt_out, $str);
|
---|
674 | }
|
---|
675 | else
|
---|
676 | {
|
---|
677 | $frontmatter_text .= $str;
|
---|
678 | }
|
---|
679 | }
|
---|
680 | // - codified text blocks
|
---|
681 | elseif (preg_match('/<Text\s+type="code"\s+id="([^"]+)"\s*>(.+?)<\/Text>/', $line, $matches))
|
---|
682 | {
|
---|
683 | $code_id = $matches[1];
|
---|
684 | // - determine the appropriate code block prefix
|
---|
685 | $code_prefix = '';
|
---|
686 | if (!$in_code)
|
---|
687 | {
|
---|
688 | if ($is_code_linenumbered)
|
---|
689 | {
|
---|
690 | $code_prefix = '<code 1>';
|
---|
691 | }
|
---|
692 | else
|
---|
693 | {
|
---|
694 | $code_prefix = '<code>';
|
---|
695 | }
|
---|
696 | $in_code = true;
|
---|
697 | }
|
---|
698 | $code_txt = $code_prefix . '<!-- id:' . $matches[1] . ' -->' . translateText($matches[2], true);
|
---|
699 | if ($in_chapter)
|
---|
700 | {
|
---|
701 | fwrite($chapter_txt_out, $code_txt);
|
---|
702 | }
|
---|
703 | else
|
---|
704 | {
|
---|
705 | $frontmatter_text .= $code_txt;
|
---|
706 | }
|
---|
707 | }
|
---|
708 | elseif (preg_match('/<Text\s+id="([^"]+)"\s+type="code"\s*>(.+?)<\/Text>/', $line, $matches))
|
---|
709 | {
|
---|
710 | $code_id = $matches[1];
|
---|
711 | // - determine the appropriate code block prefix
|
---|
712 | $code_prefix = '';
|
---|
713 | if (!$in_code)
|
---|
714 | {
|
---|
715 | if ($is_code_linenumbered)
|
---|
716 | {
|
---|
717 | $code_prefix = '<code 1>';
|
---|
718 | }
|
---|
719 | else
|
---|
720 | {
|
---|
721 | $code_prefix = '<code>';
|
---|
722 | }
|
---|
723 | $in_code = true;
|
---|
724 | }
|
---|
725 | $code_txt = $code_prefix . '<!-- id:' . $matches[1] . ' -->' . translateText($matches[2], true);
|
---|
726 | if ($in_chapter)
|
---|
727 | {
|
---|
728 | fwrite($chapter_txt_out, $code_txt);
|
---|
729 | }
|
---|
730 | else
|
---|
731 | {
|
---|
732 | $frontmatter_text .= $code_txt;
|
---|
733 | }
|
---|
734 | }
|
---|
735 | elseif (preg_match('/<CodeLine>(.*?)$/', $line, $matches))
|
---|
736 | {
|
---|
737 | $code_txt = $matches[1];
|
---|
738 | // - determine the appropriate code block prefix
|
---|
739 | $code_prefix = '';
|
---|
740 | if (!$in_code)
|
---|
741 | {
|
---|
742 | if ($is_code_linenumbered)
|
---|
743 | {
|
---|
744 | $code_prefix = "<code 1>\n";
|
---|
745 | }
|
---|
746 | else
|
---|
747 | {
|
---|
748 | $code_prefix = "<code>\n";
|
---|
749 | }
|
---|
750 | $in_code = true;
|
---|
751 | }
|
---|
752 | // - arg. another special case for codelines that span more than one line
|
---|
753 | // (but I guess <CodeLineButSometimesMoreThanOneLine> is a bit cumbersome
|
---|
754 | // for an element name, eh?)
|
---|
755 | $another_line = '';
|
---|
756 | while (strpos($code_txt, '</CodeLine>') === false && ($another_line = getLine($xml_in)) !== false)
|
---|
757 | {
|
---|
758 | $code_txt .= ' ' . $another_line;
|
---|
759 | }
|
---|
760 | $code_txt = preg_replace('/<\/CodeLine>\s*/', '', $code_txt);
|
---|
761 | $code_txt = $code_prefix . translateText($code_txt, true);
|
---|
762 | if ($in_chapter)
|
---|
763 | {
|
---|
764 | fwrite($chapter_txt_out, $code_txt);
|
---|
765 | }
|
---|
766 | else
|
---|
767 | {
|
---|
768 | $frontmatter_text .= $code_txt;
|
---|
769 | }
|
---|
770 | }
|
---|
771 | // - there are also sometimes empty codelines - which indicate a newline in
|
---|
772 | // the code listing
|
---|
773 | elseif (preg_match('/<CodeLine\s*\/>/', $line, $matches))
|
---|
774 | {
|
---|
775 | $code_txt = '';
|
---|
776 | if (!$in_code)
|
---|
777 | {
|
---|
778 | $code_txt = "\n";
|
---|
779 | if ($is_code_linenumbered)
|
---|
780 | {
|
---|
781 | $code_txt = "<code 1>\n" . $code_txt;
|
---|
782 | }
|
---|
783 | else
|
---|
784 | {
|
---|
785 | $code_txt = "<code>\n" . $code_txt;
|
---|
786 | }
|
---|
787 | $in_code = true;
|
---|
788 | }
|
---|
789 | if ($in_chapter)
|
---|
790 | {
|
---|
791 | fwrite($chapter_txt_out, $code_txt);
|
---|
792 | }
|
---|
793 | else
|
---|
794 | {
|
---|
795 | $frontmatter_text .= $code_txt;
|
---|
796 | }
|
---|
797 | }
|
---|
798 | // - reference to an external XML file
|
---|
799 | elseif (preg_match('/^\s*&[a-z0-9_]+;\s+$/is', $line))
|
---|
800 | {
|
---|
801 | if ($in_chapter)
|
---|
802 | {
|
---|
803 | fwrite($chapter_txt_out, $line);
|
---|
804 | }
|
---|
805 | else
|
---|
806 | {
|
---|
807 | $frontmatter_text .= $line;
|
---|
808 | }
|
---|
809 | }
|
---|
810 | elseif (strpos($line, '<FootnoteList>') !== false)
|
---|
811 | {
|
---|
812 | $in_footnotes = true;
|
---|
813 | }
|
---|
814 | elseif ($in_footnotes && strpos($line, '</FootnoteList>') !== false)
|
---|
815 | {
|
---|
816 | $in_footnotes = false;
|
---|
817 | }
|
---|
818 | // Indentation - the closest thing we have is quoting, so we'll use that
|
---|
819 | elseif (strpos($line, '<Indented>') !== false)
|
---|
820 | {
|
---|
821 | $in_indent = true;
|
---|
822 | }
|
---|
823 | elseif (strpos($line, '</Indented>') !== false)
|
---|
824 | {
|
---|
825 | $in_indent = false;
|
---|
826 | }
|
---|
827 | // - pattern of lines to ignore
|
---|
828 | else if (preg_match('/^(<\?xml version="1.0" encoding="UTF-8"\?>|<\!DOCTYPE Manual \[|\]>|<Bullet>|<\/?Content>|<\/?Footnote|<Manual id=".+?" lang=".+?">|<\/Manual>)/', $line))
|
---|
829 | {
|
---|
830 | }
|
---|
831 | // - we ignore anything else in footnotes too, as they were handled in the
|
---|
832 | // preprocessing pass
|
---|
833 | else if ($in_footnotes)
|
---|
834 | {
|
---|
835 | }
|
---|
836 | // - ignore empty lines
|
---|
837 | else if (preg_match('/^\s*$/', $line))
|
---|
838 | {
|
---|
839 | }
|
---|
840 | // - meh. French versions have random, non-text element, linebreaks floating
|
---|
841 | // around. Guess I'll honor their formatting even though it's bogus
|
---|
842 | else if (preg_match('/^\s*<br\s*\/?>\s*$/', $line))
|
---|
843 | {
|
---|
844 | if ($in_chapter)
|
---|
845 | {
|
---|
846 | fwrite($chapter_txt_out, ' \\\\');
|
---|
847 | }
|
---|
848 | else
|
---|
849 | {
|
---|
850 | $frontmatter_text .= ' \\\\';
|
---|
851 | }
|
---|
852 | }
|
---|
853 | // - danger Will Robinson!
|
---|
854 | else
|
---|
855 | {
|
---|
856 | echo '<div style="background-color:yellow;"><hr /><b>Warning!</b> Failed to parse line ' . $line_counter . ': |' . htmlspecialchars($line) . "|<hr /></div>\n";
|
---|
857 | }
|
---|
858 | }
|
---|
859 |
|
---|
860 | // 2. We should now have enough metadata to export the cover page
|
---|
861 | $top_page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '.txt';
|
---|
862 | // - backup any existing file
|
---|
863 | if (file_exists($top_page_path))
|
---|
864 | {
|
---|
865 | $top_page_backup_path = $top_page_path . '.bak';
|
---|
866 | if(!rename($top_page_path, $top_page_backup_path))
|
---|
867 | {
|
---|
868 | printError('Failed to rename existing top page for backup');
|
---|
869 | }
|
---|
870 | }
|
---|
871 | // - and create a handle to the new file
|
---|
872 | $txt_out = fopen($top_page_path, 'w');
|
---|
873 | // - write the page (including the tables)
|
---|
874 | fwrite($txt_out, '====== ' . noComments(ucfirst(getFirstMetadata('Heading'))) . ': ' . noComments(ucfirst(getFirstMetadata('Title'))) . ' (' . strtoupper($_REQUEST['l']) . ') ======' . "\n");
|
---|
875 | fwrite($txt_out, "\n");
|
---|
876 |
|
---|
877 | // - *NEW* ability to request imports and exports from within the page
|
---|
878 | fwrite($txt_out, "<ifauth @admin>\n\n");
|
---|
879 | fwrite($txt_out, '**Administrator Commands:**' . "\n");
|
---|
880 | // On second thoughts we probably never want to do this casually, as it boguses
|
---|
881 | // all history/approval/edit information. Instead I'll leave this as a manual
|
---|
882 | // process.
|
---|
883 | fwrite($txt_out, '<!-- Import available at this link - but be warned all current wiki data for this manual will become bogus: http://~~baseurl~~/../../php/gs-manual-import.php?m=' . $_REQUEST['m'] . '&l=' . $_REQUEST['l'] . " -->\n");
|
---|
884 | fwrite($txt_out, ' * Export manual: [[http://~~baseurl~~/../../php/gs-manual-export.php?m=' . $_REQUEST['m'] . '&l=' . $_REQUEST['l'] . '&v=draft&a=download|draft version]] [[http://~~baseurl~~/../../php/gs-manual-export.php?m=' . $_REQUEST['m'] . '&l=' . $_REQUEST['l'] . '&a=download|approved version]]' . "\n");
|
---|
885 | fwrite($txt_out, "</ifauth>\n\n");
|
---|
886 |
|
---|
887 | // - regular metadata
|
---|
888 | fwrite($txt_out, '<!-- Note: cover page information -->' . "\n");
|
---|
889 | fwrite($txt_out, '^ Metadata ^ Value ^' . "\n");
|
---|
890 | $fields = array('Heading','Title','Author','Affiliation','Text','Comment','Version','Date');
|
---|
891 | foreach ($fields as $field)
|
---|
892 | {
|
---|
893 | $values = getMetadata($field);
|
---|
894 | foreach ($values as $value)
|
---|
895 | {
|
---|
896 | fwrite($txt_out, '^ ' . $field . ' | ' . $value . ' |' . "\n");
|
---|
897 | }
|
---|
898 | }
|
---|
899 | fwrite($txt_out, "\n");
|
---|
900 | // - contents (which also provides order information for exporting)
|
---|
901 | fwrite($txt_out, '===== Contents =====' . "\n");
|
---|
902 | fwrite($txt_out, "\n");
|
---|
903 | fwrite($txt_out, '<!-- Note: The ordering of pages here is used when creating the HTML and PDF versions of the manual -->' . "\n");
|
---|
904 | foreach ($page_order as $page_info)
|
---|
905 | {
|
---|
906 | fwrite($txt_out, ' * [[.:' . $_REQUEST['m'] . ':' . $page_info . ']]' . "\n");
|
---|
907 | }
|
---|
908 | // - system metadata
|
---|
909 | fwrite($txt_out, "<ifauth @admin>\n\n");
|
---|
910 | fwrite($txt_out, '===== System Metadata =====' . "\n");
|
---|
911 | fwrite($txt_out, '<!-- Note: configuration options for the manual -->' . "\n");
|
---|
912 | fwrite($txt_out, '^ Metadata ^ Value ^' . "\n");
|
---|
913 | $fields = array('ENTITY','SupplementaryText');
|
---|
914 | foreach ($fields as $field)
|
---|
915 | {
|
---|
916 | $values = getMetadata($field);
|
---|
917 | foreach ($values as $value)
|
---|
918 | {
|
---|
919 | fwrite($txt_out, '^ ' . $field . ' | ' . $value . ' |' . "\n");
|
---|
920 | }
|
---|
921 | }
|
---|
922 | fwrite($txt_out, "</ifauth>\n\n");
|
---|
923 | fwrite($txt_out, "\n");
|
---|
924 | // - done!
|
---|
925 | fclose($txt_out);
|
---|
926 | $page_count++;
|
---|
927 |
|
---|
928 | // 3. And the 'sections' page, grouping together all the loose sections as
|
---|
929 | // frontmatter
|
---|
930 | $frontmatter_page_path = $dokuwiki_path . '/data/pages/' . $_REQUEST['l'] . '/manuals/' . $_REQUEST['m'] . '/' . $sections_page_name . '.txt';
|
---|
931 | // - backup any existing file
|
---|
932 | if (file_exists($frontmatter_page_path))
|
---|
933 | {
|
---|
934 | $frontmatter_page_backup_path = $frontmatter_page_path . '.bak';
|
---|
935 | if(!rename($frontmatter_page_path, $frontmatter_page_backup_path))
|
---|
936 | {
|
---|
937 | printError('Failed to rename existing frontmatter page for backup');
|
---|
938 | }
|
---|
939 | }
|
---|
940 | // - populate the new frontmatter file
|
---|
941 | file_put_contents($frontmatter_page_path, $frontmatter_text);
|
---|
942 |
|
---|
943 | echo "</p>\n<p><b>Complete!</b> Imported " . $page_count . " pages</p><hr/>\n";
|
---|
944 | echo '<p>Click <a href="' . $dokuwiki_url . '/doku.php?id=' . $_REQUEST['l'] . ':manuals:' . $_REQUEST['m'] . '">here</a> to return to wiki page</p>' . "\n";
|
---|
945 | exit(0);
|
---|
946 |
|
---|
947 | /**
|
---|
948 | */
|
---|
949 | function addMetadata($field, $value)
|
---|
950 | {
|
---|
951 | global $manual_metadata;
|
---|
952 | echo '[adding metadata: ' . $field . "] \n";
|
---|
953 | $values = array();
|
---|
954 | if (isset($manual_metadata[$field]))
|
---|
955 | {
|
---|
956 | $values = $manual_metadata[$field];
|
---|
957 | }
|
---|
958 | array_push($values, $value);
|
---|
959 | $manual_metadata[$field] = $values;
|
---|
960 | }
|
---|
961 | /** addMetadata() **/
|
---|
962 |
|
---|
963 | function getFirstMetadata($field)
|
---|
964 | {
|
---|
965 | global $manual_metadata;
|
---|
966 | $value = '';
|
---|
967 | if (isset($manual_metadata[$field]))
|
---|
968 | {
|
---|
969 | $values = $manual_metadata[$field];
|
---|
970 | if (!empty($values))
|
---|
971 | {
|
---|
972 | $value = $values[0];
|
---|
973 | }
|
---|
974 | }
|
---|
975 | return $value;
|
---|
976 | }
|
---|
977 | /** getFirstMetadata() **/
|
---|
978 |
|
---|
979 | function getMetadata($field)
|
---|
980 | {
|
---|
981 | global $manual_metadata;
|
---|
982 | $values = array();
|
---|
983 | if (isset($manual_metadata[$field]))
|
---|
984 | {
|
---|
985 | $values = $manual_metadata[$field];
|
---|
986 | }
|
---|
987 | return $values;
|
---|
988 | }
|
---|
989 | /** getMetadata() **/
|
---|
990 |
|
---|
991 | /**
|
---|
992 | * Read in the next title element with nested text element and extract the
|
---|
993 | * title.
|
---|
994 | */
|
---|
995 | function getTitle($xml_in, $element)
|
---|
996 | {
|
---|
997 | $title = '';
|
---|
998 | $in_title_element = false;
|
---|
999 | // - the first thing in a chapter will be it's title
|
---|
1000 | $title_line = getLine($xml_in);
|
---|
1001 | // - super special case: some language versions don't wrap titles in title
|
---|
1002 | // element, so if the first thing we see is a text, we treat that as the
|
---|
1003 | // title
|
---|
1004 | if (strpos($title_line, '<text') !== false)
|
---|
1005 | {
|
---|
1006 |
|
---|
1007 | }
|
---|
1008 | // - super special case: a table with an empty title
|
---|
1009 | if (strpos($title_line, '<Title/>') !== false)
|
---|
1010 | {
|
---|
1011 | return '';
|
---|
1012 | }
|
---|
1013 | if (strpos($title_line, '<Title>') !== false)
|
---|
1014 | {
|
---|
1015 | $in_title_element = true;
|
---|
1016 | $title_line = getLine($xml_in);
|
---|
1017 | }
|
---|
1018 | // - some horribly formed entries have the subtitle first within the title
|
---|
1019 | // element
|
---|
1020 | if (strpos($title_line, '<SubTitle>') !== false)
|
---|
1021 | {
|
---|
1022 | $title_line = getLine($xml_in);
|
---|
1023 | if (preg_match('/<Text id="([^"]+)">(.+?)<\/Text>/', $title_line, $matches))
|
---|
1024 | {
|
---|
1025 | $title = '<!-- id:' . $matches[1] . ' -->' . $matches[2] . ' ' . $title;
|
---|
1026 | }
|
---|
1027 | $title_line = getLine($xml_in);
|
---|
1028 | if (strpos($title_line, '</SubTitle>') === false)
|
---|
1029 | {
|
---|
1030 | printError('Failed to find closing title for: ' . $element);
|
---|
1031 | }
|
---|
1032 | $title_line = getLine($xml_in);
|
---|
1033 | }
|
---|
1034 | // - grab the chapter title now so we can store it in the page ordering
|
---|
1035 | if (preg_match('/<Text id="([^"]+)">(.*?)$/', $title_line, $matches))
|
---|
1036 | {
|
---|
1037 | $id = $matches[1];
|
---|
1038 | $str = $matches[2];
|
---|
1039 | // - special case for text blocks that span multiple lines (as discovered
|
---|
1040 | // in the russian "From Paper"
|
---|
1041 | $another_line = '';
|
---|
1042 | while (strpos($str, '</Text>') === false && ($another_line = getLine($xml_in)) !== false)
|
---|
1043 | {
|
---|
1044 | $str .= ' ' . $another_line;
|
---|
1045 | }
|
---|
1046 | // - now remove </Text>
|
---|
1047 | $str = preg_replace('/<\/Text>\s*/', '', $str);
|
---|
1048 | $title = '<!-- id:' . $id . ' -->' . $str . $title;
|
---|
1049 | }
|
---|
1050 | // - special case for (stoopid) empty titles that use up a text id
|
---|
1051 | elseif (preg_match('/<Text id="([^"]+)"\s*\/>/', $title_line, $matches))
|
---|
1052 | {
|
---|
1053 | $title = '<!-- id:' . $matches[1] . ' -->' . $title;
|
---|
1054 | }
|
---|
1055 | else
|
---|
1056 | {
|
---|
1057 | printError('Failed to find title text for: ' . $element);
|
---|
1058 | }
|
---|
1059 | // - watch for subtitle elements
|
---|
1060 | if ($in_title_element)
|
---|
1061 | {
|
---|
1062 | $title_line = getLine($xml_in);
|
---|
1063 | if (strpos($title_line, '<SubTitle>') !== false)
|
---|
1064 | {
|
---|
1065 | $title_line = getLine($xml_in);
|
---|
1066 | if (preg_match('/<Text id="([^"]+)">(.+?)<\/Text>/', $title_line, $matches))
|
---|
1067 | {
|
---|
1068 | $title = '<!-- id:' . $matches[1] . ' -->' . $matches[2] . ' ' . $title;
|
---|
1069 | }
|
---|
1070 | $title_line = getLine($xml_in);
|
---|
1071 | if (strpos($title_line, '</SubTitle>') === false)
|
---|
1072 | {
|
---|
1073 | printError('Failed to find closing title for: ' . $element);
|
---|
1074 | }
|
---|
1075 | $title_line = getLine($xml_in);
|
---|
1076 | }
|
---|
1077 | if (strpos($title_line, '</Title>') === false)
|
---|
1078 | {
|
---|
1079 | printError('Failed to find closing title for: ' . $element);
|
---|
1080 | }
|
---|
1081 | }
|
---|
1082 | return $title;
|
---|
1083 | }
|
---|
1084 | /** getTitle() **/
|
---|
1085 |
|
---|
1086 | function alternateComments($text)
|
---|
1087 | {
|
---|
1088 | $text = str_replace('<!--', '%!--', $text);
|
---|
1089 | $text = str_replace('-->', '--%', $text);
|
---|
1090 | // remove any lurking crossrefs while we are at it
|
---|
1091 | $text = preg_replace('/<CrossRef.*?ref="([^"]+)".*?>/', '\\1', $text);
|
---|
1092 | return $text;
|
---|
1093 | }
|
---|
1094 |
|
---|
1095 | function noComments($text)
|
---|
1096 | {
|
---|
1097 | $text = preg_replace('/<i>(.*?)<\/i>/','//\1//',$text);
|
---|
1098 | return preg_replace('/<!--[^>]+-->/', '', $text);
|
---|
1099 | }
|
---|
1100 |
|
---|
1101 | function translateTableCodeline($text)
|
---|
1102 | {
|
---|
1103 | ///cho "<b>Debug:</b> translateTableCodeLine('" . htmlspecialchars($text) . "')<br />\n";
|
---|
1104 | // Escape the current italics tags to prevent the translate destroying them
|
---|
1105 | $text = str_replace('<i>', '%!--i--%', $text);
|
---|
1106 | $text = str_replace('</i>', '%/i%', $text);
|
---|
1107 | // Translate the text, just decoding the entities
|
---|
1108 | $text = translateText($text, true);
|
---|
1109 | // Now turn the italic tags (escaped) into HTML comments so we remember them
|
---|
1110 | // but they are hidden in the text
|
---|
1111 | $text = str_replace('%!--i--%', '<!--i-->', $text);
|
---|
1112 | $text = str_replace('%/i%', '<!--/i-->', $text);
|
---|
1113 | ///cho " => '" . htmlspecialchars($text) . "<br />\n";
|
---|
1114 | return $text;
|
---|
1115 | }
|
---|
1116 |
|
---|
1117 | function translateText($text, $entities_only=false)
|
---|
1118 | {
|
---|
1119 | global $entity_replacements;
|
---|
1120 | global $footnotes;
|
---|
1121 | global $in_code;
|
---|
1122 |
|
---|
1123 | // - immediate find and protect any legitimate HTML comments in the text
|
---|
1124 | // (so already using encoded entities), otherwise they'll be throughly
|
---|
1125 | // vladed during the following tranforms. This has to be matched with
|
---|
1126 | // changes to the HTMLComments plugin in Dokuwiki to allow the correct
|
---|
1127 | // thing to be displayed to the user.
|
---|
1128 | if (!$in_code)
|
---|
1129 | {
|
---|
1130 | $text = str_replace('<!--', '%!--', $text);
|
---|
1131 | $text = str_replace('-->', '--%', $text);
|
---|
1132 | }
|
---|
1133 |
|
---|
1134 | if (!$entities_only)
|
---|
1135 | {
|
---|
1136 | // - replace linking constructs with dokuwiki ones
|
---|
1137 | // - external chapter section crossrefs are easily the worst of all...
|
---|
1138 | while (preg_match('/<CrossRef\s[^>]*external[^>]*\/>/', $text) && preg_match('/<CrossRef\s[^>]*target="Chapter"[^>]*\/>/', $text) && preg_match('/<CrossRef\s+(.*?)\/>/', $text, $matches))
|
---|
1139 | {
|
---|
1140 | $pattern = $matches[0];
|
---|
1141 | $attributes = $matches[1];
|
---|
1142 | $manual_name = '';
|
---|
1143 | if (preg_match('/external="([^"]+)"/', $attributes, $matches))
|
---|
1144 | {
|
---|
1145 | $manual_name = $matches[1];
|
---|
1146 | }
|
---|
1147 | $language = '';
|
---|
1148 | if (preg_match('/lang="([^"]+)"/', $attributes, $matches))
|
---|
1149 | {
|
---|
1150 | $language = $matches[1];
|
---|
1151 | }
|
---|
1152 | $page_id = '';
|
---|
1153 | if (preg_match('/ref="([^"]+)"/', $attributes, $matches))
|
---|
1154 | {
|
---|
1155 | $page_id = $matches[1];
|
---|
1156 | }
|
---|
1157 | if (empty($manual_name) || empty($language) || empty($page_id))
|
---|
1158 | {
|
---|
1159 | printError('Failed to parse external reference: ' . $pattern);
|
---|
1160 | }
|
---|
1161 | // - best we can do is a search within a restricted namespace
|
---|
1162 | $reference = '[[?do=search&id=' . $page_id . ' @' . $language . ':manuals:' . $manual_name . '|' . $page_id . ']]';
|
---|
1163 | $text = str_replace($pattern, $reference, $text);
|
---|
1164 | }
|
---|
1165 | // - chapter crossrefs are tricksie due to needing to know ordering numbers
|
---|
1166 | while (preg_match('/<CrossRef target="Chapter" ref="([^"]+)"\/>/', $text, $matches))
|
---|
1167 | {
|
---|
1168 | $chapter_id = $matches[1];
|
---|
1169 | $page_name = $chapter_id;
|
---|
1170 | $text = preg_replace('/<CrossRef target="Chapter" ref="' . $chapter_id . '"\/>/', '[[.:' . $page_name . '|' . $chapter_id . ']]', $text);
|
---|
1171 | }
|
---|
1172 | // - internal figure and table references
|
---|
1173 | $text = preg_replace('/<CrossRef target="Figure" ref="([^"]+)"\/>/','<imgref figure_\1>', $text);
|
---|
1174 | $text = preg_replace('/<CrossRef target="Table" ref="([^"]+)"\/>/','<tblref table_\1>', $text);
|
---|
1175 | // - simple internal reference
|
---|
1176 | $text = preg_replace('/<CrossRef target="Section" ref="([^"]+)"\/>/', '[[#\1|\1]]', $text);
|
---|
1177 | $text = preg_replace('/<CrossRef target="Subsection" ref="([^"]+)"\/>/', '[[##\1|\1]]', $text);
|
---|
1178 | $text = preg_replace('/<CrossRef target="Part" ref="([^"]+)"\/>/', '[[###\1|\1]]', $text);
|
---|
1179 | // - simple external url
|
---|
1180 | $text = preg_replace('/<Link url="([^"]+)">(.+?)<\/Link>/', '[[\1|\2]]', $text);
|
---|
1181 | // - footnote references are also tricksie as we've had to extract the
|
---|
1182 | // footnotes earlier (during chapter counting)
|
---|
1183 | while (preg_match('/<FootnoteRef id="(\d+)"\/>/', $text, $matches))
|
---|
1184 | {
|
---|
1185 | $footnote_id = $matches[1];
|
---|
1186 | if (!isset($footnotes[$footnote_id]))
|
---|
1187 | {
|
---|
1188 | printError('Unknown footnote referenced: ' . $footnote_id);
|
---|
1189 | }
|
---|
1190 | $footnote = $footnotes[$footnote_id];
|
---|
1191 | $text = preg_replace('/<FootnoteRef id="' . $footnote_id . '"\/>/', '((' . $footnote . '))', $text);
|
---|
1192 | }
|
---|
1193 | // - detect and handle URLs surrounded by <i> tags very carefully (as doku
|
---|
1194 | // will less than helpfully turn them into an external link and screw up
|
---|
1195 | // everything that follows them on the page).
|
---|
1196 | // example: <i>www.microsoft.com</i>
|
---|
1197 | // example: <i>http://nzdl.org/cgi-bin/library</i>
|
---|
1198 | // example: <i>www.yourserver.com</i>
|
---|
1199 | // example: <i>http://www.yourserver.com</i>
|
---|
1200 | // example: <i>http://www.yourserver.com/greenstone</i>
|
---|
1201 | $text = preg_replace('/<i>((?:http\:\/\/)?[a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)<\/i>/i','// \1 //', $text);
|
---|
1202 | // - superspecial case for two-part URLs ending in .org (like nzdl.org)
|
---|
1203 | $text = preg_replace('/<i>((?:http\:\/\/)?[a-z0-9\-]+\.org(?:\/.*?)?)<\/i>/i','// \1 //', $text);
|
---|
1204 | // - another superspecial case, this time for URLs on localhost
|
---|
1205 | $text = preg_replace('/<i>((?:http\:\/\/)?localhost(?:\/.*?)?)<\/i>/i','// \1 //', $text);
|
---|
1206 | // - p00p, underlines have the same issue around URLs.
|
---|
1207 | $text = preg_replace('/<u>((?:http\:\/\/)?[a-z]+\.[a-z0-9\-]+\.[a-z0-9\.\-]+(?:\/.*?)?)<\/u>/i','__ \1 __', $text);
|
---|
1208 | $text = preg_replace('/<u>((?:http\:\/\/)?[a-z0-9\-]+\.org(?:\/.*?)?)<\/u>/i','__ \1 __', $text);
|
---|
1209 | $text = preg_replace('/<u>((?:http\:\/\/)?localhost(?:\/.*?)?)<\/u>/i','__ \1 __', $text);
|
---|
1210 |
|
---|
1211 | // - replace HTML elements with the dokuwiki style equivilents
|
---|
1212 | $text = preg_replace('/(file|ftp|http):\/\//','\1:%%//%%', $text);
|
---|
1213 | // - restore the double slashes in dokuwiki links
|
---|
1214 | while (preg_match('/\[\[[^\]]*%%[^\]]*\]\]/', $text))
|
---|
1215 | {
|
---|
1216 | $text = preg_replace('/(\[\[[^\]]*)%%([^\]]*\]\])/', '\1\2', $text);
|
---|
1217 | }
|
---|
1218 | $text = str_replace('<b>', '**', $text);
|
---|
1219 | $text = str_replace('</b>', '**', $text);
|
---|
1220 | //$text = str_replace(' <br/>', '\\\\ ', $text);
|
---|
1221 | //$text = str_replace('Â <br/>', '\\\\ ', $text);
|
---|
1222 | //$text = str_replace('<br/>', '\\\\ ', $text);
|
---|
1223 | $text = str_replace('<i>', '//', $text);
|
---|
1224 | $text = str_replace('</i>', '//', $text);
|
---|
1225 | $text = str_replace('<u>', '__', $text);
|
---|
1226 | $text = str_replace('</u>', '__', $text);
|
---|
1227 | }
|
---|
1228 | // Decode entities
|
---|
1229 | // - user defined entities (in the manual metadata)
|
---|
1230 | foreach ($entity_replacements as $entity=>$code)
|
---|
1231 | {
|
---|
1232 | $text = str_replace('&' . $entity . ';', html_entity_decode('&#'.$code.';',ENT_NOQUOTES,'UTF-8'), $text);
|
---|
1233 | }
|
---|
1234 | // - standard entities
|
---|
1235 | $text = str_replace('>','>', $text);
|
---|
1236 | $text = str_replace('<','<', $text);
|
---|
1237 | $text = str_replace('&','&', $text);
|
---|
1238 | return $text;
|
---|
1239 | }
|
---|
1240 | /** translateText() **/
|
---|
1241 |
|
---|
1242 | function handleImage($filename, $width, $height)
|
---|
1243 | {
|
---|
1244 | global $dokuwiki_path;
|
---|
1245 | global $xml_source_path;
|
---|
1246 | echo '[copying image: ' . $filename . "] \n";
|
---|
1247 | // - copy file into place
|
---|
1248 | $source_path = $xml_source_path . '/' . $_REQUEST['l'] . '/images/' . $filename;
|
---|
1249 | $destination_dir = $dokuwiki_path . '/data/media/' . $_REQUEST['l'] . '/manuals/images/';
|
---|
1250 | if (!file_exists($destination_dir))
|
---|
1251 | {
|
---|
1252 | mkAllDir($destination_dir, 0755);
|
---|
1253 | }
|
---|
1254 | $destination_path = $destination_dir . strtolower($filename);
|
---|
1255 | copy($source_path, $destination_path);
|
---|
1256 | if (!file_exists($destination_path))
|
---|
1257 | {
|
---|
1258 | printError('Failed to copy image file: ' . $filename);
|
---|
1259 | }
|
---|
1260 | // - create the string
|
---|
1261 | $image_txt = '{{..:images:' . strtolower($filename) . '?' . $width . 'x' . $height . '&direct}}';
|
---|
1262 | return $image_txt;
|
---|
1263 | }
|
---|
1264 |
|
---|
1265 | function getLine($in)
|
---|
1266 | {
|
---|
1267 | global $line_counter;
|
---|
1268 | $line_counter++;
|
---|
1269 | return fgets($in);
|
---|
1270 | }
|
---|
1271 |
|
---|
1272 | function noFormatting($text)
|
---|
1273 | {
|
---|
1274 | $text = str_replace( '<b>', '', $text);
|
---|
1275 | $text = str_replace('</b>', '', $text);
|
---|
1276 | $text = str_replace( '<i>', '', $text);
|
---|
1277 | $text = str_replace('</i>', '', $text);
|
---|
1278 | $text = str_replace('<br/>', '', $text);
|
---|
1279 | return $text;
|
---|
1280 | }
|
---|
1281 |
|
---|
1282 | ?> |
---|