Changeset 2241 for trunk/gsdl/bin
- Timestamp:
- 2001-04-01T21:19:25+12:00 (23 years ago)
- Location:
- trunk/gsdl/bin/script
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/gsConvert.pl
r2117 r2241 91 91 # Deduce filenames 92 92 my ($tailname,$dirname,$suffix) 93 = File::Basename::fileparse($input_filename, '\..+');94 my $output_filestem = &util::filename_cat($dirname, "$tailname");93 = File::Basename::fileparse($input_filename, "\\.[^\\.]+\$"); 94 my $output_filestem = &util::filename_cat($dirname, "$tailname"); 95 95 96 96 if ($input_type eq "") 97 97 { 98 $input_type = substr($suffix,1,length($suffix)-1);98 $input_type = lc (substr($suffix,1,length($suffix)-1)); 99 99 } 100 100 … … 138 138 139 139 140 # Document-type conversion fu cntions140 # Document-type conversion functions 141 141 # 142 142 # The following functions attempt to convert documents from their … … 219 219 # Convert to text 220 220 if (!$output_type || ($output_type =~ /text/i)) { 221 $success = any_to_text($input_filename, $output_filestem);221 $success = &any_to_text($input_filename, $output_filestem); 222 222 if ($success) { 223 223 return "text"; … … 332 332 my $wvWare = &util::filename_cat($ENV{'GSDLHOME'}, "bin", 333 333 $ENV{'GSDLOS'}, "wvWare"); 334 $wvWare .= ".exe" if ($ENV{'GSDLOS'} =~ /^windows$/i); 335 return 0 unless (-e "$wvWare"); 334 335 # don't include path on windows (to avoid having to play about 336 # with quoting when GSDLHOME might contain spaces) but assume 337 # that the PATH is set up correctly 338 $wvWare = "wvWare" if ($ENV{'GSDLOS'} =~ /^windows$/i); 336 339 337 340 my $wv_conf = &util::filename_cat($ENV{'GSDLHOME'}, "packages", 338 341 "wv", "wvHtml.xml"); 339 342 343 my $cmd = ""; 344 if ($timeout) {$cmd = "ulimit -t $timeout;";} 345 $cmd .= "$wvWare --charset utf-8 --config \"$wv_conf\""; 346 $cmd .= " \"$input_filename\" > \"$output_filestem.html\""; 347 348 # redirecting STDERR is a bad idea on windows 95/98 349 $cmd .= " 2> \"$output_filestem.err\"" 350 if $ENV{'GSDLOS'} !~ /^windows$/i; 351 352 # execute the command 353 if (system($cmd)!=0) 354 { 355 print STDERR "Error executing wv converter: $!. Continuing...\n"; 356 } 357 358 # Was the conversion successful? 359 360 if (-e "$output_filestem.html") { 361 open(TMP, "$output_filestem.html"); 362 $line = <TMP>; 363 close(TMP); 364 if ($line && $line =~ /DOCTYPE HTML/) { 365 &util::rm("$output_filestem.err") if -e "$output_filestem.err"; 366 return 1; 367 } else { 368 # An error of some sort occurred 369 &util::rm("$output_filestem.html"); 370 &util::rm("$output_filestem.err") if -e "$output_filestem.err"; 371 } 372 } 373 374 return 0; 375 } 376 377 378 # Attempt to convert an RTF document to html with rtftohtml 379 # 380 # rtf2html isn't distributed with Greenstone because it is not 381 # distributed under the GPL. If you know of a better solution, 382 # please let me know. 383 384 sub rtf_to_html { 385 my ($input_filename, $output_filestem) = @_; 386 387 # we'll give up already if using Windows 388 return 0 if $ENV{'GSDLOS'} =~ /^windows$/i; 389 390 # formulate the command 391 my $r_cmd = &util::filename_cat($ENV{'GSDLHOME'}, "packages", "unix", 392 "rtf2html", "rtf2html", "rtf2html"); 393 $r_cmd = "rtf2html" unless (-e "$r_cmd"); 394 return 0 unless (-e "$r_cmd"); 340 395 $cmd = ""; 341 396 if ($timeout) {$cmd = "ulimit -t $timeout;";} 342 $cmd .= "$ wvWare --charset utf-8 --config $wv_conf";397 $cmd .= "$r_cmd"; 343 398 $cmd .= " \"$input_filename\" > \"$output_filestem.html\" 2>\"$output_filestem.err\""; 344 399 … … 346 401 if (system($cmd)!=0) 347 402 { 348 print STDERR "Error executing wvconverter: $!. Continuing...\n";403 print STDERR "Error executing rtf converter: $!. Continuing...\n"; 349 404 } 350 405 … … 363 418 } 364 419 } 365 366 420 return 0; 367 421 } 368 422 369 423 370 # Attempt to convert an RTF document to html with rtftohtml 371 # 372 # rtf2html isn't distributed with Greenstone because it is not 373 # distributed under teh GPL. If you know of a better solution, 374 # please let me know. 375 376 sub rtf_to_html { 377 ($input_filename, $output_filestem) = @_; 378 379 # formulate the command 380 my $r_cmd = &util::filename_cat($ENV{'GSDLHOME'}, "packages", "unix", 381 "rtf2html", "rtf2html", "rtf2html"); 382 $r_cmd = "rtf2html" unless (-e "$r_cmd"); 383 return 0 unless (-e "$r_cmd"); 424 # Convert a pdf file to html with the pdftohtml command 425 426 sub pdf_to_html { 427 ($dirname, $input_filename, $output_filestem) = @_; 428 384 429 $cmd = ""; 385 430 if ($timeout) {$cmd = "ulimit -t $timeout;";} 386 $cmd .= "$r_cmd"; 387 $cmd .= " \"$input_filename\" > \"$output_filestem.html\" 2>\"$output_filestem.err\""; 388 389 # execute the command 390 if (system($cmd)!=0) 391 { 392 print STDERR "Error executing rtf converter: $!. Continuing...\n"; 393 } 394 395 # Was the conversion successful? 396 if (-e "$output_filestem.html") { 397 open(TMP, "$output_filestem.html"); 398 $line = <TMP>; 399 close(TMP); 400 if ($line && $line =~ /DOCTYPE HTML/) { 401 &util::rm("$output_filestem.err"); 402 return 1; 403 } else { 404 # An error of some sort occurred 405 &util::rm("$output_filestem.html"); 406 &util::rm("$output_filestem.err"); 407 } 408 } 409 return 0; 410 } 411 412 413 # Convert a pdf file to html with the pdftohtml command 414 415 sub pdf_to_html { 416 ($dirname, $input_filename, $output_filestem) = @_; 417 418 $cmd = ""; 419 if ($timeout) {$cmd = "ulimit -t $timeout;";} 420 $cmd .= "pdftohtml.pl -F "; 431 $cmd .= "perl -S pdftohtml.pl -F "; 421 432 $cmd .= " \"$input_filename\" \"$output_filestem\""; 422 433 $!=0; 434 423 435 if (system($cmd)!=0) 424 436 { … … 491 503 492 504 sub ps_to_text { 493 ($input_filename, $output_filestem) = @_; 494 495 my $cmd = "gs -q -dNODISPLAY -dNOBIND -dWRITESYSTEMDICT -dSIMPLE -c save "; 496 $cmd .= "-f ps2ascii.ps \"$input_filename\" -c quit > \"$output_filestem.text\""; 497 $cmd .= " 2> $output_filestem.err"; 498 $!=0; 499 my $retcode=system($cmd); 500 $retcode = $? >> 8; # see man perlfunc - system for this... 501 # if system returns -1 | 127 (couldn't start program), look at $! for message 502 my $error=""; 503 if ($retcode!=0) {if ($!) {$error=$!;} else {$error="couldn't run.\n";}} 504 elsif (! -e "$output_filestem.text") { 505 $error="did not create output file.\n"; 506 } 507 else 508 { # make sure the interpreter didn't get an error. It is technically 509 # possible for the actual text to start with this, but.... 510 open PSOUT, "$output_filestem.text"; 511 if (<PSOUT> =~ /^Error: (.*)/) { 512 $error="interpreter error - \"$1\""; 513 } 514 close PSOUT; 515 } 505 my ($input_filename, $output_filestem) = @_; 506 507 my $error = ""; 508 509 # if we're on windows we'll fall straight through without attempting 510 # to use gs 511 if ($ENV{'GSDLOS'} =~ /^windows$/i) { 512 $error = "Windows does not support gs"; 513 514 } else { 515 my $cmd = "gs -q -dNODISPLAY -dNOBIND -dWRITESYSTEMDICT -dSIMPLE -c save "; 516 $cmd .= "-f ps2ascii.ps \"$input_filename\" -c quit > \"$output_filestem.text\""; 517 $cmd .= " 2> $output_filestem.err"; 518 $!=0; 519 my $retcode=system($cmd); 520 $retcode = $? >> 8; # see man perlfunc - system for this... 521 # if system returns -1 | 127 (couldn't start program), look at $! for message 522 523 if ($retcode!=0) {if ($!) {$error=$!;} else {$error="couldn't run.\n";}} 524 elsif (! -e "$output_filestem.text") { 525 $error="did not create output file.\n"; 526 } 527 else 528 { # make sure the interpreter didn't get an error. It is technically 529 # possible for the actual text to start with this, but.... 530 open PSOUT, "$output_filestem.text"; 531 if (<PSOUT> =~ /^Error: (.*)/) { 532 $error="interpreter error - \"$1\""; 533 } 534 close PSOUT; 535 } 536 } 537 516 538 if ($error ne "") 517 539 { … … 611 633 open(HTML, ">$output_filestem.html"); 612 634 613 print HTML '<html><head> 614 <META HTTP-EQUIV="Content-Type" CONTENT="text/html"> 615 <META NAME="GENERATOR" CONTENT="Greenstone any_to_html"> 616 </head><body>'; 617 print HTML "\n\n"; 635 print HTML "<html><head>\n"; 636 print HTML "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html\">\n"; 637 print HTML "<META NAME=\"GENERATOR\" CONTENT=\"Greenstone any_to_html\">\n"; 638 print HTML "</head><body>\n\n"; 618 639 619 640 while (<TEXT>) { 620 641 print HTML "<p> ", $_; 621 622 642 } 623 643 print HTML "\n</body></html>\n"; 644 645 close HTML; 646 close TEXT; 624 647 625 648 &util::rm("$output_filestem.text") if (-e "$output_filestem.text"); … … 661 684 } 662 685 } 686 687 close OUT; 688 close IN; 689 663 690 return 1; 664 691 } -
trunk/gsdl/bin/script/pdftohtml.pl
r2118 r2241 177 177 print STDERR "pdftohtml.pl: $input_filename appears to have no "; 178 178 print STDERR "textual data. Aborting.\n"; 179 print STDERR "num: $unenc_stream_objects and $non_text_objects from $num_objects\n";179 # print STDERR "num: $unenc_stream_objects and $non_text_objects from $num_objects\n"; 180 180 exit(1); 181 181 } 182 182 183 183 # formulate the command 184 my $pdftohtml = &util::filename_cat($ENV{'GSDLHOME'}, "bin", 185 $ENV{'GSDLOS'}, "pdftohtml.bin"); 186 return 0 unless (-e "$pdftohtml"); 187 188 $cmd = ""; 184 my $cmd = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "pdftohtml.bin"); 185 186 # don't include path on windows (to avoid having to play about 187 # with quoting when GSDLHOME might contain spaces) but assume 188 # that the PATH is set up correctly - note also that on windows 189 # we use pdftohtml.exe not pdftohtml.bin 190 $cmd = "pdftohtml" if ($ENV{'GSDLOS'} !~ /^windows$/) { 191 189 192 if ($timeout) {$cmd = "ulimit -t $timeout;";} 190 $cmd .= "$pdftohtml -noframes"; 191 $cmd .= " \"$input_filename\" \"$output_filestem.html\""; 192 $cmd .= " >\"$output_filestem.out\" 2>\"$output_filestem.err\""; 193 193 $cmd .= " -noframes \"$input_filename\" \"$output_filestem.html\""; 194 $cmd .= " > \"$output_filestem.out\""; 195 196 # attempting to redirect STDERR on windows 95/98 is a bad idea 197 $cmd .= " 2> \"$output_filestem.err\"" 198 if $ENV{'GSDLOS'} !~ /^windows$/i; 199 194 200 if (system($cmd)>0) { 195 201 print STDERR "Error executing $cmd: $!\n"; … … 201 207 # Need to convert images from PPM format to PNG format 202 208 my @images; 209 203 210 204 211 open (IMAGES, "images.log"); … … 212 219 my $cmd = ""; 213 220 if ($ENV{'GSDLOS'} =~ /^windows/i) { 214 $cmd = &util::filename_cat($ENV{'GSDLHOME'}, "bin", "windows", "pnmtopng.exe"); 215 $cmd .= " $image"; 221 $cmd = "pnmtopng $image"; 216 222 if (system($cmd)!=0) { 217 223 print STDERR "Error executing $cmd\n";
Note:
See TracChangeset
for help on using the changeset viewer.