Changeset 1654
- Timestamp:
- 2000-11-03T15:22:00+13:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/gsConvert.pl
r1578 r1654 137 137 ($input_filename, $output_filestem, $output_type) = @_; 138 138 139 # Many .doc files are not in fact word documents! 140 my $realtype = &find_docfile_type($input_filename); 141 142 if ($realtype eq "word678") { 143 return &convertWord678($input_filename, $output_filestem, $output_type); 144 } elsif ($realtype eq "rtf") { 145 return &convertRTF($input_filename, $output_filestem, $output_type); 146 } else { 147 return &convertAnything($input_filename, $output_filestem, $output_type); 148 } 149 } 150 151 # Convert a Microsoft word 6/7/8 document 152 153 sub convertWord678 { 154 ($input_filename, $output_filestem, $output_type) = @_; 155 139 156 my $success = 0; 140 157 … … 147 164 } 148 165 166 return &convertAnything($input_filename, $output_filestem, $output_type); 167 } 168 169 170 # Convert a Rich Text Format (RTF) file 171 172 sub convertRTF { 173 ($input_filename, $output_filestem, $output_type) = @_; 174 175 my $success = 0; 176 177 # Attempt specialised conversion to HTML 178 if (!$output_type || ($output_type =~ /html/i)) { 179 $success = &rtf_to_html($input_filename, $output_filestem); 180 if ($success) { 181 return "html"; 182 } 183 } 184 185 return &convertAnything($input_filename, $output_filestem, $output_type); 186 } 187 188 189 # Convert an unidentified file 190 191 sub convertAnything { 192 ($input_filename, $output_filestem, $output_type) = @_; 193 194 my $success = 0; 195 149 196 # Attempt simple conversion to HTML 150 197 if (!$output_type || ($output_type =~ /html/i)) { … … 162 209 } 163 210 } 164 165 211 return "fail"; 166 167 } 212 } 213 168 214 169 215 … … 213 259 return "fail"; 214 260 261 } 262 263 264 # Find the real type of a .doc file 265 # 266 # We seem to have alot of files with a .dco extension that are .rtf 267 # files or Word 5 files. This function attempts to tell the difference. 268 269 sub find_docfile_type { 270 ($input_filename) = @_; 271 272 open(CHK, "<$input_filename"); 273 my $line = ""; 274 my $first = 1; 275 276 while (<CHK>) { 277 278 $line = $_; 279 280 if ($first) { 281 # check to see if this is an rtf file 282 if ($line =~ /^\{\\rtf/) { 283 close(CHK); 284 return "rtf"; 285 } 286 } 287 288 # is theis a word 6/7/8 document? 289 if ($line =~ /Word\.Document\.[678]/) { 290 close(CHK); 291 return "word678"; 292 } 293 294 $first = 0; 295 296 } 297 298 return "unknown"; 215 299 } 216 300 … … 235 319 my $wvWare = &util::filename_cat($wv_home, "bin", "wvWare"); 236 320 return 0 unless (-e "$wvWare"); 237 $cmd = "$wvWare --charset utf-8 --config $wv_conf"; 321 $cmd = "ulimit -t 20;"; 322 $cmd .= "$wvWare --charset utf-8 --config $wv_conf"; 238 323 $cmd .= " \"$input_filename\" > \"$output_filestem.html\" 2>\"$output_filestem.err\""; 239 324 240 325 # execute the command 241 326 if (system($cmd)>0) … … 262 347 263 348 349 # Attempt to convert an RTF document to html with rtftohtml 350 # 351 # rtf2html isn't distributed with Greenstone because it is not 352 # distributed under teh GPL. If you know of a better solution, 353 # please let me know. 354 355 sub rtf_to_html { 356 ($input_filename, $output_filestem) = @_; 357 358 # formulate the command 359 my $r_cmd = &util::filename_cat($ENV{'GSDLHOME'}, "packages", "unix", 360 "rtf2html", "rtf2html", "rtf2html"); 361 $r_cmd = "rtf2html" unless (-e "$r_cmd"); 362 return 0 unless (-e "$r_cmd"); 363 $cmd = "ulimit -t 20;"; 364 $cmd .= "$r_cmd"; 365 $cmd .= " \"$input_filename\" > \"$output_filestem.html\" 2>\"$output_filestem.err\""; 366 367 # execute the command 368 if (system($cmd)>0) 369 { 370 print STDERR "Error executing rtf converter: $!. Continuing...\n"; 371 } 372 373 # Was the conversion successful? 374 if (-e "$output_filestem.html") { 375 open(TMP, "$output_filestem.html"); 376 $line = <TMP>; 377 close(TMP); 378 if ($line && $line =~ /DOCTYPE HTML/) { 379 &util::rm("$output_filestem.err"); 380 return 1; 381 } else { 382 # An error of some sort occurred 383 &util::rm("$output_filestem.html"); 384 &util::rm("$output_filestem.err"); 385 } 386 } 387 return 0; 388 } 389 390 264 391 # Convert a pdf file to html with the pdftohtml command 265 392
Note:
See TracChangeset
for help on using the changeset viewer.