Changeset 2012
- Timestamp:
- 2001-02-19T18:06:16+13:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/gsConvert.pl
r1997 r2012 276 276 # Find the real type of a .doc file 277 277 # 278 # We seem to have a lot of files with a .dcoextension that are .rtf278 # We seem to have a lot of files with a .doc extension that are .rtf 279 279 # files or Word 5 files. This function attempts to tell the difference. 280 280 … … 487 487 } 488 488 489 # Convert a PostScript document to text with ps2ascii 489 # Convert a PostScript document to text 490 # note - just using "ps2ascii" isn't good enough, as it 491 # returns 0 for a postscript interpreter error. ps2ascii is just 492 # a wrapper to "gs" anyway, so we use that cmd here. 490 493 491 494 sub ps_to_text { 492 495 ($input_filename, $output_filestem) = @_; 493 496 494 my $cmd = "ps2ascii \"$input_filename\" > \"$output_filestem.text\""; 497 my $cmd = "gs -q -dNODISPLAY -dNOBIND -dWRITESYSTEMDICT -dSIMPLE -c save "; 498 $cmd .= "-f ps2ascii \"$input_filename\" -c quit > \"$output_filestem.text\""; 495 499 $cmd .= " 2> $output_filestem.err"; 496 497 if (system($cmd)>0) 498 { 499 print STDERR "Error executing $cmd: $!\n"; 500 $!=0; 501 my $retcode=system($cmd); 502 $retcode = $? >> 8; # see man perlfunc - system for this... 503 # if system returns -1 | 127 (couldn't start program), look at $! for message 504 my $error=""; 505 if ($retcode!=0) {if ($!) {$error=$!;} else {$error="couldn't run.\n";}} 506 elsif (! -e "$output_filestem.text") { 507 $error="did not create output file.\n"; 508 } 509 else 510 { # make sure the interpreter didn't get an error. It is technically 511 # possible for the actual text to start with this, but.... 512 open PSOUT, "$output_filestem.text"; 513 if (<PSOUT> =~ /^Error: (.*)/) { 514 $error="interpreter error - \"$1\""; 515 } 516 close PSOUT; 517 } 518 # print STDERR "retcode=$retcode, error=\"$!\"\n"; 519 if ($error ne "") 520 { 521 print STDERR "PSPLUG: WARNING: Error executing gs: $error\n"; 500 522 &util::rm("$output_filestem.text") if (-e "$output_filestem.text"); 501 523 &util::rm("$output_filestem.err") if (-e "$output_filestem.err"); 502 return 0; 524 525 # Fine then. We'll just do a lousy job by ourselves... 526 # Based on regexps nicked from: 527 # http://snark.ptc.spbu.ru/mail-archives/lout/brown/msg00003.html 528 # 529 print STDERR "PSPlug: Stripping text from postscript\n"; 530 my $errorcode=0; 531 open (IN, "$input_filename") 532 || ($errorcode=1, warn "Couldn't read file: $!"); 533 open (OUT, ">$output_filestem.text") 534 || ($errorcode=1, warn "Couldn't write file: $!"); 535 if ($errorcode) {print STDERR "errors\n";return 0;} 536 537 my $in_a_sentence=0; 538 while (<IN>) { 539 if (/^[^\(\)]+$/ && !$in_a_sentence) {next ;} # no brackets in line 540 # attempt to add whitespace between different lines... 541 s/F.?\(/\( /g; # this might break up some other words though... 542 ### remove all postscript control data 543 if (!$in_a_sentence) { 544 s/^[^\(\)]*?\(//; # rm start of line up to first open bracket 545 } 546 s/\\\(/\{/g;s/\\\)/\}/g ; # change quoted braces 547 s/\)([^\(\)])*?\(//g ; # close bracket up to next open unquoted bracket 548 if (s/\)[^\(\)]*?$//g) # last close bracket to end of line 549 {$in_a_sentence=0;chomp;} 550 if (s/\\$//) # if line is a continuation 551 {$in_a_sentence=1;chomp;} 552 s/^$//g ; # remove empty lines 553 ### ligatures have special characters... 554 s/\\214/fi/g; 555 s/\\215/fl/g; 556 print OUT "$_"; 557 } 558 close IN; close OUT; 503 559 } 504 560
Note:
See TracChangeset
for help on using the changeset viewer.