Changeset 2241 for trunk/gsdl/bin


Ignore:
Timestamp:
2001-04-01T21:19:25+12:00 (23 years ago)
Author:
sjboddie
Message:

Tidied up the ConvertToPlug stuff to get it working on Windows 95/98

Location:
trunk/gsdl/bin/script
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/gsConvert.pl

    r2117 r2241  
    9191    # Deduce filenames
    9292    my ($tailname,$dirname,$suffix)
    93     = File::Basename::fileparse($input_filename,'\..+');
    94     my $output_filestem = &util::filename_cat($dirname,"$tailname");
     93    = File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
     94    my $output_filestem = &util::filename_cat($dirname, "$tailname");
    9595
    9696    if ($input_type eq "")
    9797    {
    98     $input_type = substr($suffix,1,length($suffix)-1);
     98    $input_type = lc (substr($suffix,1,length($suffix)-1));
    9999    }
    100100   
     
    138138
    139139
    140 # Document-type conversion fucntions
     140# Document-type conversion functions
    141141#
    142142# The following functions attempt to convert documents from their
     
    219219    # Convert to text
    220220    if (!$output_type || ($output_type =~ /text/i)) {
    221     $success = any_to_text($input_filename, $output_filestem);
     221    $success = &any_to_text($input_filename, $output_filestem);
    222222    if ($success) {
    223223        return "text";
     
    332332    my $wvWare = &util::filename_cat($ENV{'GSDLHOME'}, "bin",
    333333                     $ENV{'GSDLOS'}, "wvWare");
    334     $wvWare .= ".exe" if ($ENV{'GSDLOS'} =~ /^windows$/i);
    335     return 0 unless (-e "$wvWare");
     334
     335    # don't include path on windows (to avoid having to play about
     336    # with quoting when GSDLHOME might contain spaces) but assume
     337    # that the PATH is set up correctly
     338    $wvWare = "wvWare" if ($ENV{'GSDLOS'} =~ /^windows$/i);
    336339
    337340    my $wv_conf = &util::filename_cat($ENV{'GSDLHOME'}, "packages",
    338341                      "wv", "wvHtml.xml");
    339342   
     343    my $cmd = "";
     344    if ($timeout) {$cmd = "ulimit -t $timeout;";}
     345    $cmd .= "$wvWare --charset utf-8 --config \"$wv_conf\"";
     346    $cmd .= " \"$input_filename\" > \"$output_filestem.html\"";
     347   
     348    # redirecting STDERR is a bad idea on windows 95/98
     349    $cmd .= " 2> \"$output_filestem.err\""
     350    if $ENV{'GSDLOS'} !~ /^windows$/i;
     351
     352    # execute the command
     353    if (system($cmd)!=0)
     354    {
     355    print STDERR "Error executing wv converter: $!. Continuing...\n";
     356    }
     357
     358    # Was the conversion successful?
     359
     360    if (-e "$output_filestem.html") {
     361    open(TMP, "$output_filestem.html");
     362    $line = <TMP>;
     363    close(TMP);
     364    if ($line && $line =~ /DOCTYPE HTML/) {
     365        &util::rm("$output_filestem.err") if -e "$output_filestem.err";
     366        return 1;
     367    } else {
     368        # An error of some sort occurred
     369        &util::rm("$output_filestem.html");
     370        &util::rm("$output_filestem.err") if -e "$output_filestem.err";
     371    }
     372    }
     373
     374    return 0;
     375}
     376
     377
     378# Attempt to convert an RTF document to html with rtftohtml
     379#
     380# rtf2html isn't distributed with Greenstone because it is not
     381# distributed under the GPL.  If you know of a better solution,
     382# please let me know.
     383
     384sub rtf_to_html {
     385    my ($input_filename, $output_filestem) = @_;
     386
     387    # we'll give up already if using Windows
     388    return 0 if $ENV{'GSDLOS'} =~ /^windows$/i;
     389
     390    # formulate the command
     391    my $r_cmd = &util::filename_cat($ENV{'GSDLHOME'}, "packages", "unix",
     392                    "rtf2html", "rtf2html", "rtf2html");
     393    $r_cmd = "rtf2html" unless (-e "$r_cmd");
     394    return 0 unless (-e "$r_cmd");
    340395    $cmd = "";
    341396    if ($timeout) {$cmd = "ulimit -t $timeout;";}
    342     $cmd .= "$wvWare --charset utf-8 --config $wv_conf";
     397    $cmd .= "$r_cmd";
    343398    $cmd .= " \"$input_filename\" > \"$output_filestem.html\" 2>\"$output_filestem.err\"";
    344399
     
    346401    if (system($cmd)!=0)
    347402    {
    348     print STDERR "Error executing wv converter: $!. Continuing...\n";
     403    print STDERR "Error executing rtf converter: $!. Continuing...\n";
    349404    }
    350405
     
    363418    }
    364419    }
    365 
    366420    return 0;
    367421}
    368422
    369423
    370 # Attempt to convert an RTF document to html with rtftohtml
    371 #
    372 # rtf2html isn't distributed with Greenstone because it is not
    373 # distributed under teh GPL.  If you know of a better solution,
    374 # please let me know.
    375 
    376 sub rtf_to_html {
    377     ($input_filename, $output_filestem) = @_;
    378 
    379     # formulate the command
    380     my $r_cmd = &util::filename_cat($ENV{'GSDLHOME'}, "packages", "unix",
    381                     "rtf2html", "rtf2html", "rtf2html");
    382     $r_cmd = "rtf2html" unless (-e "$r_cmd");
    383     return 0 unless (-e "$r_cmd");
     424# Convert a pdf file to html with the pdftohtml command
     425
     426sub pdf_to_html {
     427    ($dirname, $input_filename, $output_filestem) = @_;
     428
    384429    $cmd = "";
    385430    if ($timeout) {$cmd = "ulimit -t $timeout;";}
    386     $cmd .= "$r_cmd";
    387     $cmd .= " \"$input_filename\" > \"$output_filestem.html\" 2>\"$output_filestem.err\"";
    388 
    389     # execute the command
    390     if (system($cmd)!=0)
    391     {
    392     print STDERR "Error executing rtf converter: $!. Continuing...\n";
    393     }
    394 
    395     # Was the conversion successful?
    396     if (-e "$output_filestem.html") {
    397     open(TMP, "$output_filestem.html");
    398     $line = <TMP>;
    399     close(TMP);
    400     if ($line && $line =~ /DOCTYPE HTML/) {
    401         &util::rm("$output_filestem.err");
    402         return 1;
    403     } else {
    404         # An error of some sort occurred
    405         &util::rm("$output_filestem.html");
    406         &util::rm("$output_filestem.err");
    407     }
    408     }
    409     return 0;
    410 }
    411 
    412 
    413 # Convert a pdf file to html with the pdftohtml command
    414 
    415 sub pdf_to_html {
    416     ($dirname, $input_filename, $output_filestem) = @_;
    417 
    418     $cmd = "";
    419     if ($timeout) {$cmd = "ulimit -t $timeout;";}
    420     $cmd .= "pdftohtml.pl -F ";
     431    $cmd .= "perl -S pdftohtml.pl -F ";
    421432    $cmd .= " \"$input_filename\" \"$output_filestem\"";
    422433    $!=0;
     434
    423435    if (system($cmd)!=0)
    424436    {
     
    491503
    492504sub ps_to_text {
    493     ($input_filename, $output_filestem) = @_;
    494 
    495     my $cmd = "gs -q -dNODISPLAY -dNOBIND -dWRITESYSTEMDICT -dSIMPLE -c save ";
    496     $cmd .= "-f ps2ascii.ps \"$input_filename\" -c quit > \"$output_filestem.text\"";
    497     $cmd .= " 2> $output_filestem.err";
    498     $!=0;
    499     my $retcode=system($cmd);
    500     $retcode = $? >> 8;  # see man perlfunc - system for this...
    501     # if system returns -1 | 127 (couldn't start program), look at $! for message
    502     my $error="";
    503     if ($retcode!=0) {if ($!) {$error=$!;} else {$error="couldn't run.\n";}}
    504     elsif (! -e "$output_filestem.text") {
    505     $error="did not create output file.\n";
    506     }
    507     else
    508     { # make sure the interpreter didn't get an error. It is technically
    509     # possible for the actual text to start with this, but....
    510     open PSOUT, "$output_filestem.text";
    511     if (<PSOUT> =~ /^Error: (.*)/) {
    512         $error="interpreter error - \"$1\"";
    513     }
    514     close PSOUT;
    515     }
     505    my ($input_filename, $output_filestem) = @_;
     506
     507    my $error = "";
     508
     509    # if we're on windows we'll fall straight through without attempting
     510    # to use gs
     511    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
     512    $error = "Windows does not support gs";
     513
     514    } else {
     515    my $cmd = "gs -q -dNODISPLAY -dNOBIND -dWRITESYSTEMDICT -dSIMPLE -c save ";
     516    $cmd .= "-f ps2ascii.ps \"$input_filename\" -c quit > \"$output_filestem.text\"";
     517    $cmd .= " 2> $output_filestem.err";
     518    $!=0;
     519    my $retcode=system($cmd);
     520    $retcode = $? >> 8;  # see man perlfunc - system for this...
     521    # if system returns -1 | 127 (couldn't start program), look at $! for message
     522
     523    if ($retcode!=0) {if ($!) {$error=$!;} else {$error="couldn't run.\n";}}
     524    elsif (! -e "$output_filestem.text") {
     525        $error="did not create output file.\n";
     526    }
     527    else
     528    {   # make sure the interpreter didn't get an error. It is technically
     529        # possible for the actual text to start with this, but....
     530        open PSOUT, "$output_filestem.text";
     531        if (<PSOUT> =~ /^Error: (.*)/) {
     532        $error="interpreter error - \"$1\"";
     533        }
     534        close PSOUT;
     535    }
     536    }
     537
    516538    if ($error ne "")
    517539    {
     
    611633    open(HTML, ">$output_filestem.html");
    612634
    613     print HTML '<html><head>
    614 <META HTTP-EQUIV="Content-Type" CONTENT="text/html">
    615 <META NAME="GENERATOR" CONTENT="Greenstone any_to_html">
    616 </head><body>';
    617     print HTML "\n\n";
     635    print HTML "<html><head>\n";
     636    print HTML "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html\">\n";
     637    print HTML "<META NAME=\"GENERATOR\" CONTENT=\"Greenstone any_to_html\">\n";
     638    print HTML "</head><body>\n\n";
    618639
    619640    while (<TEXT>) {
    620641    print HTML "<p> ", $_;
    621    
    622642    }
    623643    print HTML "\n</body></html>\n";
     644
     645    close HTML;
     646    close TEXT;
    624647
    625648    &util::rm("$output_filestem.text") if (-e "$output_filestem.text");
     
    661684    }
    662685    }
     686
     687    close OUT;
     688    close IN;
     689
    663690    return 1;
    664691}
  • trunk/gsdl/bin/script/pdftohtml.pl

    r2118 r2241  
    177177    print STDERR "pdftohtml.pl: $input_filename appears to have no ";
    178178    print STDERR "textual data. Aborting.\n";
    179     print STDERR "num: $unenc_stream_objects and $non_text_objects from $num_objects\n";
     179    # print STDERR "num: $unenc_stream_objects and $non_text_objects from $num_objects\n";
    180180    exit(1);
    181181    }
    182182
    183183    # formulate the command
    184     my $pdftohtml = &util::filename_cat($ENV{'GSDLHOME'}, "bin",
    185                     $ENV{'GSDLOS'}, "pdftohtml.bin");
    186     return 0 unless (-e "$pdftohtml");
    187    
    188     $cmd = "";
     184    my $cmd = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "pdftohtml.bin");
     185
     186    # don't include path on windows (to avoid having to play about
     187    # with quoting when GSDLHOME might contain spaces) but assume
     188    # that the PATH is set up correctly - note also that on windows
     189    # we use pdftohtml.exe not pdftohtml.bin
     190    $cmd = "pdftohtml" if ($ENV{'GSDLOS'} !~ /^windows$/) {
     191
    189192    if ($timeout) {$cmd = "ulimit -t $timeout;";}
    190     $cmd .= "$pdftohtml -noframes";
    191     $cmd .= " \"$input_filename\" \"$output_filestem.html\"";
    192     $cmd .= " >\"$output_filestem.out\" 2>\"$output_filestem.err\"";
    193    
     193    $cmd .= " -noframes \"$input_filename\" \"$output_filestem.html\"";
     194    $cmd .= " > \"$output_filestem.out\"";
     195
     196    # attempting to redirect STDERR on windows 95/98 is a bad idea
     197    $cmd .= " 2> \"$output_filestem.err\""
     198    if $ENV{'GSDLOS'} !~ /^windows$/i;
     199
    194200    if (system($cmd)>0) {
    195201    print STDERR "Error executing $cmd: $!\n";
     
    201207    # Need to convert images from PPM format to PNG format
    202208    my @images;
     209
    203210
    204211    open (IMAGES, "images.log");
     
    212219    my $cmd = "";
    213220    if ($ENV{'GSDLOS'} =~ /^windows/i) {
    214         $cmd = &util::filename_cat($ENV{'GSDLHOME'}, "bin", "windows", "pnmtopng.exe");
    215         $cmd .= " $image";
     221        $cmd = "pnmtopng $image";
    216222        if (system($cmd)!=0) {
    217223        print STDERR "Error executing $cmd\n";
Note: See TracChangeset for help on using the changeset viewer.