Changeset 2031
- Timestamp:
- 2001-02-20T18:54:46+13:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/gsConvert.pl
r2023 r2031 418 418 $cmd .= " \"$input_filename\" \"$output_filestem\""; 419 419 420 if (system($cmd) >0)420 if (system($cmd)!=0) 421 421 { 422 422 print STDERR "Error executing $cmd: $!\n"; … … 489 489 490 490 my $cmd = "gs -q -dNODISPLAY -dNOBIND -dWRITESYSTEMDICT -dSIMPLE -c save "; 491 $cmd .= "-f ps2ascii \"$input_filename\" -c quit > \"$output_filestem.text\"";491 $cmd .= "-f ps2ascii.ps \"$input_filename\" -c quit > \"$output_filestem.text\""; 492 492 $cmd .= " 2> $output_filestem.err"; 493 493 $!=0; … … 509 509 close PSOUT; 510 510 } 511 # print STDERR "retcode=$retcode, error=\"$!\"\n";512 511 if ($error ne "") 513 512 { … … 517 516 518 517 # Fine then. We'll just do a lousy job by ourselves... 519 # Based on regexps nicked from:518 # Based on 5-line regexp sed script found at: 520 519 # http://snark.ptc.spbu.ru/mail-archives/lout/brown/msg00003.html 521 520 # … … 528 527 if ($errorcode) {print STDERR "errors\n";return 0;} 529 528 530 my $ in_a_sentence=0;529 my $text=""; # this is for whole .ps file... 531 530 while (<IN>) { 532 if (/^[^\(\)]+$/ && !$in_a_sentence) {next ;} # no brackets in line 533 # attempt to add whitespace between different lines... 534 s/F.?\(/\( /g; # this might break up some other words though... 535 ### remove all postscript control data 536 if (!$in_a_sentence) { 537 s/^[^\(\)]*?\(//; # rm start of line up to first open bracket 538 } 539 s/\\\(/\{/g;s/\\\)/\}/g ; # change quoted braces 540 s/\)([^\(\)])*?\(//g ; # close bracket up to next open unquoted bracket 541 if (s/\)[^\(\)]*?$//g) # last close bracket to end of line 542 {$in_a_sentence=0;chomp;} 543 if (s/\\$//) # if line is a continuation 544 {$in_a_sentence=1;chomp;} 545 s/^$//g ; # remove empty lines 546 ### ligatures have special characters... 547 s/\\214/fi/g; 548 s/\\215/fl/g; 549 print OUT "$_"; 550 } 551 close IN; close OUT; 552 } 553 531 $text.=$_; 532 } 533 close IN; 534 535 # if ps has Page data, then use it to delete all stuff before it. 536 $text =~ s/^.*?%%Page:.*?\n//s; # treat string as single line 537 538 # remove all leading non-data stuff 539 $text =~ s/^.*?\(//s; 540 541 # remove all newline chars for easier processing 542 $text =~ s/\n//g; 543 544 # Big assumption here - assume that if any co-ordinates are 545 # given, then we are at the end of a sentence. 546 $text =~ s/\)-?\d+\ -?\d+/\) \(\n\)/g; 547 548 # special characters-- 549 $text =~ s/\(\|\)/\(\ - \)/g; # j -> em-dash? 550 551 # ? ps text formatting (eg italics?) ? 552 $text =~ s/Fn\(f\)/\(\{\)/g; # f -> { 553 $text =~ s/Fn\(g\)/\(\}\)/g; # g -> } 554 $text =~ s/Fn\(j\)/\(\|\)/g; # j -> | 555 # default - remove the rest 556 $text =~ s/\ ?F.\((.+?)\)/\($1\)/g; 557 558 # attempt to add whitespace between words... 559 # this is based purely on observation, and may be completely wrong... 560 $text =~ s/([^F])[defghijkuy]\(/$1 \( /g; 561 # eg I notice "b(" is sometimes NOT a space if preceded by a 562 # negative number. 563 $text =~ s/\)\d+ ?b\(/\) \( /g; 564 565 # change quoted braces to brackets 566 $text =~ s/([^\\])\\\(/$1\{/g; 567 $text =~ s/([^\\])\\\)/$1\}/g ; 568 569 # remove everything that is not between braces 570 $text =~ s/\)([^\(\)])+?\(//sg ; 571 572 # remove any Trailer eof stuff. 573 $text =~ s/\)[^\)]*$//sg; 574 575 ### ligatures have special characters... 576 $text =~ s/\\013/ff/g; 577 $text =~ s/\\014/fi/g; 578 $text =~ s/\\015/fl/g; 579 $text =~ s/\\016/ffi/g; 580 $text =~ s/\\214/fi/g; 581 $text =~ s/\\215/fl/g; 582 $text =~ s/\\017/\n\* /g; # asterisk? 583 $text =~ s/\\023/\023/g; # e acute ('e) 584 $text =~ s/\\177/\252/g; # u" 585 # $text =~ s/ ?? /\344/g; # a" 586 587 print OUT "$text"; 588 close OUT; 589 } 554 590 &util::rm("$output_filestem.err") if (-e "$output_filestem.err"); 555 591 return 1;
Note:
See TracChangeset
for help on using the changeset viewer.