Changeset 1928

Show
Ignore:
Timestamp:
08.02.2001 13:49:27 (19 years ago)
Author:
sjboddie
Message:

Added: pdftohtml.pl - Perl script that handles conversion of PDF documents into

HTML. Called by gsConvert.pl in sub pdf_to_html.

Modified: gsConvert.pl - Perl script that converts various formats (MSWord,

RTF, PDF, PS) into HTML when importing the collection.

Location:
trunk/gsdl/bin/script
Files:
1 added
1 modified

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/gsConvert.pl

    r1734 r1928  
    6060    my (@ARGV) = @_; 
    6161    my ($input_type,$output_type,$verbose,$timeout); 
    62  
     62     
    6363    $timeout = 0; 
    6464    # read command-line arguments 
     
    7676    print_usage(); 
    7777    } 
     78 
    7879    my $input_filename = $ARGV[0]; 
    7980    if (!-r $input_filename) { 
     
    102103    }  
    103104    elsif ($input_type eq "doc") { 
    104     print STDERR "I recognise this to be a Word document...\n"; # remove 
    105105    print &convertDOC($input_filename, $output_filestem, $output_type); 
    106106    print "\n"; 
     
    149149    my $realtype = &find_docfile_type($input_filename); 
    150150 
    151     print STDERR "The real type of this Word document is $realtype\n"; # remove 
    152  
    153151    if ($realtype eq "word6" || $realtype eq "word7" || $realtype eq "word8") { 
    154     print STDERR "I recognise this to be a word678 document...\n"; # remove 
    155152    return &convertWord678($input_filename, $output_filestem, $output_type); 
    156153    } elsif ($realtype eq "rtf") { 
     
    328325 
    329326sub doc_to_html { 
    330     print STDERR "/;-DG I am in doc_to_html...\n"; # remove 
    331327    ($input_filename, $output_filestem) = @_; 
    332328 
    333329    my $wvWare = ""; 
    334330    my $wv_conf = ""; 
     331 
    335332    if ($ENV{'GSDLOS'} =~ /^windows$/i) { 
    336333    $wvWare = "$ENV{'GSDLHOME'}\\bin\\windows\\wvWare.exe"; 
     
    343340    $wvWare = &util::filename_cat($wv_home, "bin", "wvWare"); 
    344341    } 
    345     print STDERR "I am about to test if your file exists...\n"; 
    346342    return 0 unless (-e "$wvWare"); 
     343     
    347344    $cmd = ""; 
    348345    if ($timeout) {$cmd = "ulimit -t $timeout;";} 
     
    350347    $cmd .= " \"$input_filename\" > \"$output_filestem.html\" 2>\"$output_filestem.err\""; 
    351348 
    352     print STDERR "$cmd\n"; #remove 
    353  
    354349    # execute the command 
    355     print STDERR system($cmd); 
    356     print STDERR "\n"; 
    357350    if (system($cmd)>0) 
    358351    { 
     
    374367    } 
    375368    } 
    376     print STDERR "/;-DG I am leaving doc_to_html...\n"; 
     369 
    377370    return 0; 
    378371} 
     
    427420    ($dirname, $input_filename, $output_filestem) = @_; 
    428421 
    429     # formulate the command 
    430     my $p_home = &util::filename_cat($ENV{'GSDLHOME'}, "packages", "unix", "pdftohtml"); 
    431     my $pdftohtml = &util::filename_cat($p_home, "pdftohtml_0_20", "pdftohtml.bin"); 
    432     return 0 unless (-e "$pdftohtml"); 
    433  
    434422    $cmd = ""; 
    435423    if ($timeout) {$cmd = "ulimit -t $timeout;";} 
    436     $cmd .= "$pdftohtml -noframes"; 
    437     $cmd .= " \"$input_filename\" \"$output_filestem.html\""; 
    438     $cmd .= " >\"$output_filestem.out\" 2>\"$output_filestem.err\""; 
     424    $cmd .= "pdftohtml.pl -F "; 
     425    $cmd .= " \"$input_filename\" \"$output_filestem\""; 
    439426 
    440427    if (system($cmd)>0) 
    441428    { 
    442429    print STDERR "Error executing $cmd: $!\n"; 
    443     &util::rm("$output_filestem.html") if (-e "$output_filestem.html"); 
    444     &util::rm("$output_filestem.out") if (-e "$output_filestem.out"); 
    445430    return 0; 
    446431    } 
     
    466451} 
    467452 
    468  
    469453# Convert a PDF file to text with the pdftotext command 
    470454 
     
    502486    return 1; 
    503487} 
    504  
    505488 
    506489# Convert a PostScript document to text with ps2ascii 
     
    560543 
    561544sub any_to_html { 
    562     print STDERR "/;-Dg I am in any_to_html!\n"; 
    563545    ($input_filename, $output_filestem) = @_; 
    564546 
     
    583565 
    584566    &util::rm("$output_filestem.text") if (-e "$output_filestem.text"); 
    585     print STDERR "/;-Dg I am getting out of  any_to_html!\n"; 
    586567    return 1; 
    587568} 
     
    625606    return 1; 
    626607} 
    627  
    628  
    629