Changeset 1960

Show
Ignore:
Timestamp:
13.02.2001 11:48:10 (19 years ago)
Author:
dg5
Message:

Modified pdftohtml.pl to reflect the change in location of pdftohtml.bin file

Location:
trunk/gsdl/bin/script
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/gsConvert.pl

    r1928 r1960  
    6060    my (@ARGV) = @_; 
    6161    my ($input_type,$output_type,$verbose,$timeout); 
    62      
     62 
    6363    $timeout = 0; 
    6464    # read command-line arguments 
     
    167167    # Attempt specialised conversion to HTML 
    168168    if (!$output_type || ($output_type =~ /html/i)) { 
    169     print STDERR "I am about to call doc_to_html...\n"; 
    170169    $success = &doc_to_html($input_filename, $output_filestem); 
    171170    if ($success) { 
     
    280279    ($input_filename) = @_; 
    281280     
    282     open(TMP, ">temp.txt"); 
    283     binmode(TMP); 
    284281    open(CHK, "<$input_filename"); 
    285282    binmode(CHK); 
     
    290287     
    291288    $line = $_; 
    292     print TMP "$line\n\n"; 
     289 
    293290    if ($first) { 
    294291        # check to see if this is an rtf file 
     
    494491    my $cmd = "ps2ascii \"$input_filename\" > \"$output_filestem.text\""; 
    495492    $cmd .= " 2> $output_filestem.err"; 
     493     
    496494    if (system($cmd)>0) 
    497495    { 
     
    499497    &util::rm("$output_filestem.text") if (-e "$output_filestem.text"); 
    500498    &util::rm("$output_filestem.err") if (-e "$output_filestem.err"); 
    501  
    502     # Fine then. We'll just do a lousy job by ourselves... 
    503     # Based on code nicked from: 
    504     # http://snark.ptc.spbu.ru/mail-archives/lout/brown/msg00003.html 
    505     #  
    506     print STDERR "Attempting to strip text from postscript.\n"; 
    507     my $errorcode=0; 
    508     open (IN, "$input_filename")  
    509         ||  ($errorcode=1, warn "Couldn't read file: $!"); 
    510     open (OUT, ">$output_filestem.text")  
    511         ||  ($errorcode=1, warn "Couldn't write file: $!"); 
    512     if ($errorcode) {print STDERR "errors\n";return 0;} 
    513  
    514     my $in_a_sentence=0; 
    515     while (<IN>) { 
    516         if (/^[^\(\)]+$/ && !$in_a_sentence) {next ;} # no brackets in line 
    517         # attempt to add whitespace between different lines... 
    518         s/F.?\(/\( /g; # this might break up some other words though... 
    519         ### remove all postscript control data 
    520         if (!$in_a_sentence) { 
    521         s/^[^\(\)]*?\(//;}  # rm start of line up to first open bracket 
    522         s/\\\(/\{/g;s/\\\)/\}/g ; # change quoted braces 
    523         s/\)([^\(\)])*?\(//g ;   # close bracket up to next open unquoted bracket 
    524         if (s/\)[^\(\)]*?$//g)  # last close bracket to end of line 
    525         {$in_a_sentence=0;chomp;}      
    526         if (s/\\$//) # if line is a continuation 
    527         {$in_a_sentence=1;chomp;}  
    528         s/^$//g ;             # remove empty lines 
    529         ### ligatures have special characters... 
    530         s/\\214/fi/g; 
    531         s/\\215/fl/g; 
    532         print OUT "$_"; 
    533     } 
    534     close IN; close OUT; 
    535     } 
     499    return 0; 
     500    } 
     501 
    536502    &util::rm("$output_filestem.err") if (-e "$output_filestem.err"); 
    537503    return 1; 
     
    574540    ($input_filename, $output_filestem) = @_; 
    575541 
    576     #open(TEMP, ">temp.txt"); 
    577542    open(IN, "<$input_filename"); 
    578543    binmode(IN); 
     
    585550 
    586551    # delete anything that isn't a printable character 
    587     #print TEMP $line; 
    588552    $line =~ s/[^\040-\176]+/\n/sg; 
    589553 
  • trunk/gsdl/bin/script/pdftohtml.pl

    r1928 r1960  
    110110    else  { 
    111111    $p_home = &util::filename_cat($ENV{'GSDLHOME'}, "packages", "unix", "pdftohtml"); 
    112     $pdftohtml = &util::filename_cat($p_home, "pdftohtml_0_22", "pdftohtml.bin"); 
     112    $pdftohtml = &util::filename_cat($p_home, "bin", "pdftohtml.bin"); 
    113113    } 
    114114    return 0 unless (-e "$pdftohtml");