Ignore:
Timestamp:
2001-02-08T13:49:27+13:00 (23 years ago)
Author:
sjboddie
Message:

Added: pdftohtml.pl - Perl script that handles conversion of PDF documents into

HTML. Called by gsConvert.pl in sub pdf_to_html.

Modified: gsConvert.pl - Perl script that converts various formats (MSWord,

RTF, PDF, PS) into HTML when importing the collection.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/gsConvert.pl

    r1734 r1928  
    6060    my (@ARGV) = @_;
    6161    my ($input_type,$output_type,$verbose,$timeout);
    62 
     62   
    6363    $timeout = 0;
    6464    # read command-line arguments
     
    7676    print_usage();
    7777    }
     78
    7879    my $input_filename = $ARGV[0];
    7980    if (!-r $input_filename) {
     
    102103    }
    103104    elsif ($input_type eq "doc") {
    104     print STDERR "I recognise this to be a Word document...\n"; # remove
    105105    print &convertDOC($input_filename, $output_filestem, $output_type);
    106106    print "\n";
     
    149149    my $realtype = &find_docfile_type($input_filename);
    150150
    151     print STDERR "The real type of this Word document is $realtype\n"; # remove
    152 
    153151    if ($realtype eq "word6" || $realtype eq "word7" || $realtype eq "word8") {
    154     print STDERR "I recognise this to be a word678 document...\n"; # remove
    155152    return &convertWord678($input_filename, $output_filestem, $output_type);
    156153    } elsif ($realtype eq "rtf") {
     
    328325
    329326sub doc_to_html {
    330     print STDERR "/;-DG I am in doc_to_html...\n"; # remove
    331327    ($input_filename, $output_filestem) = @_;
    332328
    333329    my $wvWare = "";
    334330    my $wv_conf = "";
     331
    335332    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
    336333    $wvWare = "$ENV{'GSDLHOME'}\\bin\\windows\\wvWare.exe";
     
    343340    $wvWare = &util::filename_cat($wv_home, "bin", "wvWare");
    344341    }
    345     print STDERR "I am about to test if your file exists...\n";
    346342    return 0 unless (-e "$wvWare");
     343   
    347344    $cmd = "";
    348345    if ($timeout) {$cmd = "ulimit -t $timeout;";}
     
    350347    $cmd .= " \"$input_filename\" > \"$output_filestem.html\" 2>\"$output_filestem.err\"";
    351348
    352     print STDERR "$cmd\n"; #remove
    353 
    354349    # execute the command
    355     print STDERR system($cmd);
    356     print STDERR "\n";
    357350    if (system($cmd)>0)
    358351    {
     
    374367    }
    375368    }
    376     print STDERR "/;-DG I am leaving doc_to_html...\n";
     369
    377370    return 0;
    378371}
     
    427420    ($dirname, $input_filename, $output_filestem) = @_;
    428421
    429     # formulate the command
    430     my $p_home = &util::filename_cat($ENV{'GSDLHOME'}, "packages", "unix", "pdftohtml");
    431     my $pdftohtml = &util::filename_cat($p_home, "pdftohtml_0_20", "pdftohtml.bin");
    432     return 0 unless (-e "$pdftohtml");
    433 
    434422    $cmd = "";
    435423    if ($timeout) {$cmd = "ulimit -t $timeout;";}
    436     $cmd .= "$pdftohtml -noframes";
    437     $cmd .= " \"$input_filename\" \"$output_filestem.html\"";
    438     $cmd .= " >\"$output_filestem.out\" 2>\"$output_filestem.err\"";
     424    $cmd .= "pdftohtml.pl -F ";
     425    $cmd .= " \"$input_filename\" \"$output_filestem\"";
    439426
    440427    if (system($cmd)>0)
    441428    {
    442429    print STDERR "Error executing $cmd: $!\n";
    443     &util::rm("$output_filestem.html") if (-e "$output_filestem.html");
    444     &util::rm("$output_filestem.out") if (-e "$output_filestem.out");
    445430    return 0;
    446431    }
     
    466451}
    467452
    468 
    469453# Convert a PDF file to text with the pdftotext command
    470454
     
    502486    return 1;
    503487}
    504 
    505488
    506489# Convert a PostScript document to text with ps2ascii
     
    560543
    561544sub any_to_html {
    562     print STDERR "/;-Dg I am in any_to_html!\n";
    563545    ($input_filename, $output_filestem) = @_;
    564546
     
    583565
    584566    &util::rm("$output_filestem.text") if (-e "$output_filestem.text");
    585     print STDERR "/;-Dg I am getting out of  any_to_html!\n";
    586567    return 1;
    587568}
     
    625606    return 1;
    626607}
    627 
    628 
    629 
Note: See TracChangeset for help on using the changeset viewer.