Changeset 25052
- Timestamp:
- 2012-02-07T11:08:26+13:00 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
documentation/trunk/php/gs-manual-import.php
r25026 r25052 155 155 addMetadata($looking_for_metadata, $text); 156 156 } 157 // - bogus metadata found in French version 158 elseif (!empty($looking_for_metadata) && preg_match('/<Text id="([^"]+)"\/>/', $line, $matches)) 159 { 160 } 157 161 // - any text we encounter outside of both sections and chapters also 158 162 // belongs on the cover … … 232 236 } 233 237 // - section, subsection and part titles within chapter 234 elseif ($in_chapter && preg_match('/<(Section|Subsection|Part)\sid="([^"] +)">/', $line, $matches))238 elseif ($in_chapter && preg_match('/<(Section|Subsection|Part)\sid="([^"]*)">/', $line, $matches)) 235 239 { 236 240 $title_type = $matches[1]; 237 241 $section_id = $matches[2]; 242 if (empty($section_id)) 243 { 244 $section_id = generateID(strtolower($title_type)); 245 } 246 echo '[adding ' . strtolower($title_type) . ': ' . $section_id . '] '; 238 247 $header_fix = ''; 239 248 $title = getTitle($xml_in, 'heading: ' . $title_type); … … 352 361 $row_txt = '|'; 353 362 $line = getLine($xml_in); 354 // - now we read in multiple cells 355 while ( preg_match('/<th width="(\d+)"\/?>/', $line, $matches))363 // - now we read in multiple cells (line starting <th 364 while (strpos($line, '<th') === 0) 356 365 { 357 $cell_width = $matches[1]; 358 if (!$have_output_widths) 366 if (preg_match('/<th width="(\d+)"\/?>/', $line, $matches)) 359 367 { 360 array_push($column_widths, $cell_width); 368 $cell_width = $matches[1]; 369 if (!$have_output_widths) 370 { 371 array_push($column_widths, $cell_width); 372 } 361 373 } 362 374 // Ignore empty cells … … 378 390 } 379 391 // - we can have images or text in our tables 380 if (preg_match('/<File width="(\d+)" height="(\d+)" url="images\/([^"]+)"\/>/', $line, $matches))392 if (preg_match('/<File.*url="images\/([^"]+)".*\/>/', $line, $matches)) 381 393 { 382 $image_txt = handleImage($matches[3], $matches[1], $matches[2]); 394 $payload = $matches[0]; 395 $filename = $matches[1]; 396 $width = 0; 397 if (preg_match('/width="(\d+)"/', $payload, $matches)) 398 { 399 $width = $matches[1]; 400 } 401 $height = 0; 402 if (preg_match('/height="(\d+)"/', $payload, $matches)) 403 { 404 $height = $matches[1]; 405 } 406 $image_txt = handleImage($filename, $width, $height); 383 407 $row_txt .= ' ' . $image_txt . ' '; 384 408 } 385 elseif (preg_match('/<Text id="([^"]+)">(. +?)<\/Text>/', $line, $matches))409 elseif (preg_match('/<Text id="([^"]+)">(.*)/', $line, $matches)) 386 410 { 387 $row_txt .= ' <!-- id:' . $matches[1] . ' -->' . translateText($matches[2]) . ' '; 411 $tid = $matches[1]; 412 $txt = $matches[2]; 413 // - multiple line text block 414 while (strpos($txt, '</Text>') === false) 415 { 416 $txt .= getLine($xml_in); 417 } 418 $txt = str_replace('</Text>','',$txt); 419 $row_txt .= ' <!-- id:' . $tid . ' -->' . translateText($txt) . ' '; 388 420 } 389 421 elseif (preg_match('/<CodeLine>(.*?)<\/CodeLine>/',$line,$matches)) … … 391 423 $row_txt .= ' \'\'' . translateTableCodeLine($matches[1]) . '\'\' '; 392 424 } 425 elseif (preg_match('/<CodeLine>(.*)/',$line,$matches)) 426 { 427 $row_txt .= ' \'\'' . translateTableCodeLine($matches[1]) . '\'\' '; 428 } 429 elseif (preg_match('/(.*)<\/CodeLine>/',$line,$matches)) 430 { 431 if (!empty($matches[1])) 432 { 433 $row_txt .= ' \'\'' . translateTableCodeLine($matches[1]). '\'\' '; 434 } 435 else 436 { 437 $row_txt .= ' '; 438 } 439 } 440 // we'll add (bogus) linebreaks 441 elseif (preg_match('/^\s*<br\s*\/?>\s*$/', $line)) 442 { 443 $row_txt = ' '; 444 } 393 445 else 394 446 { 395 printError('Warning! Unrecognized element in table: ' . $line);447 printError('Warning! Unrecognized element in table: ' . htmlspecialchars($line)); 396 448 } 397 449 $first = false; … … 774 826 } 775 827 // - pattern of lines to ignore 776 else if (preg_match('/^(<\?xml version="1.0" encoding="UTF-8"\?>|<\!DOCTYPE Manual \[|\]>|<Bullet>|<\/?Content>|<\/?Footnote|<Manual id=".+?" lang=".+?">|<\/Manual>)/', $line))828 else if (preg_match('/^(<\?xml version="1.0" encoding="UTF-8"\?>|<\!DOCTYPE Manual \[|\]>|<Bullet>|<\/?Content>|<\/?Footnote|<Manual id=".+?" lang=".+?">|<\/Manual>)/', $line)) 777 829 { 778 830 } 779 831 // - we ignore anything else in footnotes too, as they were handled in the 780 832 // preprocessing pass 781 elseif ($in_footnotes) 782 { 833 else if ($in_footnotes) 834 { 835 } 836 // - ignore empty lines 837 else if (preg_match('/^\s*$/', $line)) 838 { 839 } 840 // - meh. French versions have random, non-text element, linebreaks floating 841 // around. Guess I'll honor their formatting even though it's bogus 842 else if (preg_match('/^\s*<br\s*\/?>\s*$/', $line)) 843 { 844 if ($in_chapter) 845 { 846 fwrite($chapter_txt_out, ' \\\\'); 847 } 848 else 849 { 850 $frontmatter_text .= ' \\\\'; 851 } 783 852 } 784 853 // - danger Will Robinson! … … 927 996 { 928 997 $title = ''; 998 $in_title_element = false; 929 999 // - the first thing in a chapter will be it's title 930 1000 $title_line = getLine($xml_in); 1001 // - super special case: some language versions don't wrap titles in title 1002 // element, so if the first thing we see is a text, we treat that as the 1003 // title 1004 if (strpos($title_line, '<text') !== false) 1005 { 1006 1007 } 931 1008 // - super special case: a table with an empty title 932 1009 if (strpos($title_line, '<Title/>') !== false) … … 934 1011 return ''; 935 1012 } 936 if (strpos($title_line, '<Title>') === false) 937 { 938 printError('Failed to find opening title for: ' . $element); 939 } 940 $title_line = getLine($xml_in); 1013 if (strpos($title_line, '<Title>') !== false) 1014 { 1015 $in_title_element = true; 1016 $title_line = getLine($xml_in); 1017 } 1018 // - some horribly formed entries have the subtitle first within the title 1019 // element 1020 if (strpos($title_line, '<SubTitle>') !== false) 1021 { 1022 $title_line = getLine($xml_in); 1023 if (preg_match('/<Text id="([^"]+)">(.+?)<\/Text>/', $title_line, $matches)) 1024 { 1025 $title = '<!-- id:' . $matches[1] . ' -->' . $matches[2] . ' ' . $title; 1026 } 1027 $title_line = getLine($xml_in); 1028 if (strpos($title_line, '</SubTitle>') === false) 1029 { 1030 printError('Failed to find closing title for: ' . $element); 1031 } 1032 $title_line = getLine($xml_in); 1033 } 941 1034 // - grab the chapter title now so we can store it in the page ordering 942 1035 if (preg_match('/<Text id="([^"]+)">(.*?)$/', $title_line, $matches)) … … 953 1046 // - now remove </Text> 954 1047 $str = preg_replace('/<\/Text>\s*/', '', $str); 955 $title = '<!-- id:' . $id . ' -->' . $str ;1048 $title = '<!-- id:' . $id . ' -->' . $str . $title; 956 1049 } 957 1050 // - special case for (stoopid) empty titles that use up a text id 958 1051 elseif (preg_match('/<Text id="([^"]+)"\s*\/>/', $title_line, $matches)) 959 1052 { 960 $title = '<!-- id:' . $matches[1] . ' -->' ;1053 $title = '<!-- id:' . $matches[1] . ' -->' . $title; 961 1054 } 962 1055 else … … 965 1058 } 966 1059 // - watch for subtitle elements 967 $title_line = getLine($xml_in); 968 if (strpos($title_line, '<SubTitle>') !== false) 1060 if ($in_title_element) 969 1061 { 970 1062 $title_line = getLine($xml_in); 971 if ( preg_match('/<Text id="([^"]+)">(.+?)<\/Text>/', $title_line, $matches))1063 if (strpos($title_line, '<SubTitle>') !== false) 972 1064 { 973 $title = '<!-- id:' . $matches[1] . ' -->' . $matches[2] . ' ' . $title; 1065 $title_line = getLine($xml_in); 1066 if (preg_match('/<Text id="([^"]+)">(.+?)<\/Text>/', $title_line, $matches)) 1067 { 1068 $title = '<!-- id:' . $matches[1] . ' -->' . $matches[2] . ' ' . $title; 1069 } 1070 $title_line = getLine($xml_in); 1071 if (strpos($title_line, '</SubTitle>') === false) 1072 { 1073 printError('Failed to find closing title for: ' . $element); 1074 } 1075 $title_line = getLine($xml_in); 974 1076 } 975 $title_line = getLine($xml_in); 976 if (strpos($title_line, '</SubTitle>') === false) 1077 if (strpos($title_line, '</Title>') === false) 977 1078 { 978 1079 printError('Failed to find closing title for: ' . $element); 979 1080 } 980 $title_line = getLine($xml_in);981 }982 if (strpos($title_line, '</Title>') === false)983 {984 printError('Failed to find closing title for: ' . $element);985 1081 } 986 1082 return $title; … … 992 1088 $text = str_replace('<!--', '%!--', $text); 993 1089 $text = str_replace('-->', '--%', $text); 1090 // remove any lurking crossrefs while we are at it 1091 $text = preg_replace('/<CrossRef.*?ref="([^"]+)".*?>/', '\\1', $text); 994 1092 return $text; 995 1093 } … … 1149 1247 // - copy file into place 1150 1248 $source_path = $xml_source_path . '/' . $_REQUEST['l'] . '/images/' . $filename; 1151 $destination_path = $dokuwiki_path . '/data/media/' . $_REQUEST['l'] . '/manuals/images/' . strtolower($filename); 1249 $destination_dir = $dokuwiki_path . '/data/media/' . $_REQUEST['l'] . '/manuals/images/'; 1250 if (!file_exists($destination_dir)) 1251 { 1252 mkAllDir($destination_dir, 0755); 1253 } 1254 $destination_path = $destination_dir . strtolower($filename); 1152 1255 copy($source_path, $destination_path); 1153 1256 if (!file_exists($destination_path))
Note:
See TracChangeset
for help on using the changeset viewer.