Changeset 1960 for trunk/gsdl
- Timestamp:
- 2001-02-13T11:48:10+13:00 (23 years ago)
- Location:
- trunk/gsdl/bin/script
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/gsConvert.pl
r1928 r1960 60 60 my (@ARGV) = @_; 61 61 my ($input_type,$output_type,$verbose,$timeout); 62 62 63 63 $timeout = 0; 64 64 # read command-line arguments … … 167 167 # Attempt specialised conversion to HTML 168 168 if (!$output_type || ($output_type =~ /html/i)) { 169 print STDERR "I am about to call doc_to_html...\n";170 169 $success = &doc_to_html($input_filename, $output_filestem); 171 170 if ($success) { … … 280 279 ($input_filename) = @_; 281 280 282 open(TMP, ">temp.txt");283 binmode(TMP);284 281 open(CHK, "<$input_filename"); 285 282 binmode(CHK); … … 290 287 291 288 $line = $_; 292 print TMP "$line\n\n"; 289 293 290 if ($first) { 294 291 # check to see if this is an rtf file … … 494 491 my $cmd = "ps2ascii \"$input_filename\" > \"$output_filestem.text\""; 495 492 $cmd .= " 2> $output_filestem.err"; 493 496 494 if (system($cmd)>0) 497 495 { … … 499 497 &util::rm("$output_filestem.text") if (-e "$output_filestem.text"); 500 498 &util::rm("$output_filestem.err") if (-e "$output_filestem.err"); 501 502 # Fine then. We'll just do a lousy job by ourselves... 503 # Based on code nicked from: 504 # http://snark.ptc.spbu.ru/mail-archives/lout/brown/msg00003.html 505 # 506 print STDERR "Attempting to strip text from postscript.\n"; 507 my $errorcode=0; 508 open (IN, "$input_filename") 509 || ($errorcode=1, warn "Couldn't read file: $!"); 510 open (OUT, ">$output_filestem.text") 511 || ($errorcode=1, warn "Couldn't write file: $!"); 512 if ($errorcode) {print STDERR "errors\n";return 0;} 513 514 my $in_a_sentence=0; 515 while (<IN>) { 516 if (/^[^\(\)]+$/ && !$in_a_sentence) {next ;} # no brackets in line 517 # attempt to add whitespace between different lines... 518 s/F.?\(/\( /g; # this might break up some other words though... 519 ### remove all postscript control data 520 if (!$in_a_sentence) { 521 s/^[^\(\)]*?\(//;} # rm start of line up to first open bracket 522 s/\\\(/\{/g;s/\\\)/\}/g ; # change quoted braces 523 s/\)([^\(\)])*?\(//g ; # close bracket up to next open unquoted bracket 524 if (s/\)[^\(\)]*?$//g) # last close bracket to end of line 525 {$in_a_sentence=0;chomp;} 526 if (s/\\$//) # if line is a continuation 527 {$in_a_sentence=1;chomp;} 528 s/^$//g ; # remove empty lines 529 ### ligatures have special characters... 530 s/\\214/fi/g; 531 s/\\215/fl/g; 532 print OUT "$_"; 533 } 534 close IN; close OUT; 535 } 499 return 0; 500 } 501 536 502 &util::rm("$output_filestem.err") if (-e "$output_filestem.err"); 537 503 return 1; … … 574 540 ($input_filename, $output_filestem) = @_; 575 541 576 #open(TEMP, ">temp.txt");577 542 open(IN, "<$input_filename"); 578 543 binmode(IN); … … 585 550 586 551 # delete anything that isn't a printable character 587 #print TEMP $line;588 552 $line =~ s/[^\040-\176]+/\n/sg; 589 553 -
trunk/gsdl/bin/script/pdftohtml.pl
r1928 r1960 110 110 else { 111 111 $p_home = &util::filename_cat($ENV{'GSDLHOME'}, "packages", "unix", "pdftohtml"); 112 $pdftohtml = &util::filename_cat($p_home, " pdftohtml_0_22", "pdftohtml.bin");112 $pdftohtml = &util::filename_cat($p_home, "bin", "pdftohtml.bin"); 113 113 } 114 114 return 0 unless (-e "$pdftohtml");
Note:
See TracChangeset
for help on using the changeset viewer.