Changeset 2241


Ignore:
Timestamp:
2001-04-01T21:19:25+12:00 (23 years ago)
Author:
sjboddie
Message:

Tidied up the ConvertToPlug stuff to get it working on Windows 95/98

Location:
trunk/gsdl
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/gsConvert.pl

    r2117 r2241  
    9191    # Deduce filenames
    9292    my ($tailname,$dirname,$suffix)
    93     = File::Basename::fileparse($input_filename,'\..+');
    94     my $output_filestem = &util::filename_cat($dirname,"$tailname");
     93    = File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
     94    my $output_filestem = &util::filename_cat($dirname, "$tailname");
    9595
    9696    if ($input_type eq "")
    9797    {
    98     $input_type = substr($suffix,1,length($suffix)-1);
     98    $input_type = lc (substr($suffix,1,length($suffix)-1));
    9999    }
    100100   
     
    138138
    139139
    140 # Document-type conversion fucntions
     140# Document-type conversion functions
    141141#
    142142# The following functions attempt to convert documents from their
     
    219219    # Convert to text
    220220    if (!$output_type || ($output_type =~ /text/i)) {
    221     $success = any_to_text($input_filename, $output_filestem);
     221    $success = &any_to_text($input_filename, $output_filestem);
    222222    if ($success) {
    223223        return "text";
     
    332332    my $wvWare = &util::filename_cat($ENV{'GSDLHOME'}, "bin",
    333333                     $ENV{'GSDLOS'}, "wvWare");
    334     $wvWare .= ".exe" if ($ENV{'GSDLOS'} =~ /^windows$/i);
    335     return 0 unless (-e "$wvWare");
     334
     335    # don't include path on windows (to avoid having to play about
     336    # with quoting when GSDLHOME might contain spaces) but assume
     337    # that the PATH is set up correctly
     338    $wvWare = "wvWare" if ($ENV{'GSDLOS'} =~ /^windows$/i);
    336339
    337340    my $wv_conf = &util::filename_cat($ENV{'GSDLHOME'}, "packages",
    338341                      "wv", "wvHtml.xml");
    339342   
     343    my $cmd = "";
     344    if ($timeout) {$cmd = "ulimit -t $timeout;";}
     345    $cmd .= "$wvWare --charset utf-8 --config \"$wv_conf\"";
     346    $cmd .= " \"$input_filename\" > \"$output_filestem.html\"";
     347   
     348    # redirecting STDERR is a bad idea on windows 95/98
     349    $cmd .= " 2> \"$output_filestem.err\""
     350    if $ENV{'GSDLOS'} !~ /^windows$/i;
     351
     352    # execute the command
     353    if (system($cmd)!=0)
     354    {
     355    print STDERR "Error executing wv converter: $!. Continuing...\n";
     356    }
     357
     358    # Was the conversion successful?
     359
     360    if (-e "$output_filestem.html") {
     361    open(TMP, "$output_filestem.html");
     362    $line = <TMP>;
     363    close(TMP);
     364    if ($line && $line =~ /DOCTYPE HTML/) {
     365        &util::rm("$output_filestem.err") if -e "$output_filestem.err";
     366        return 1;
     367    } else {
     368        # An error of some sort occurred
     369        &util::rm("$output_filestem.html");
     370        &util::rm("$output_filestem.err") if -e "$output_filestem.err";
     371    }
     372    }
     373
     374    return 0;
     375}
     376
     377
     378# Attempt to convert an RTF document to html with rtftohtml
     379#
     380# rtf2html isn't distributed with Greenstone because it is not
     381# distributed under the GPL.  If you know of a better solution,
     382# please let me know.
     383
     384sub rtf_to_html {
     385    my ($input_filename, $output_filestem) = @_;
     386
     387    # we'll give up already if using Windows
     388    return 0 if $ENV{'GSDLOS'} =~ /^windows$/i;
     389
     390    # formulate the command
     391    my $r_cmd = &util::filename_cat($ENV{'GSDLHOME'}, "packages", "unix",
     392                    "rtf2html", "rtf2html", "rtf2html");
     393    $r_cmd = "rtf2html" unless (-e "$r_cmd");
     394    return 0 unless (-e "$r_cmd");
    340395    $cmd = "";
    341396    if ($timeout) {$cmd = "ulimit -t $timeout;";}
    342     $cmd .= "$wvWare --charset utf-8 --config $wv_conf";
     397    $cmd .= "$r_cmd";
    343398    $cmd .= " \"$input_filename\" > \"$output_filestem.html\" 2>\"$output_filestem.err\"";
    344399
     
    346401    if (system($cmd)!=0)
    347402    {
    348     print STDERR "Error executing wv converter: $!. Continuing...\n";
     403    print STDERR "Error executing rtf converter: $!. Continuing...\n";
    349404    }
    350405
     
    363418    }
    364419    }
    365 
    366420    return 0;
    367421}
    368422
    369423
    370 # Attempt to convert an RTF document to html with rtftohtml
    371 #
    372 # rtf2html isn't distributed with Greenstone because it is not
    373 # distributed under teh GPL.  If you know of a better solution,
    374 # please let me know.
    375 
    376 sub rtf_to_html {
    377     ($input_filename, $output_filestem) = @_;
    378 
    379     # formulate the command
    380     my $r_cmd = &util::filename_cat($ENV{'GSDLHOME'}, "packages", "unix",
    381                     "rtf2html", "rtf2html", "rtf2html");
    382     $r_cmd = "rtf2html" unless (-e "$r_cmd");
    383     return 0 unless (-e "$r_cmd");
     424# Convert a pdf file to html with the pdftohtml command
     425
     426sub pdf_to_html {
     427    ($dirname, $input_filename, $output_filestem) = @_;
     428
    384429    $cmd = "";
    385430    if ($timeout) {$cmd = "ulimit -t $timeout;";}
    386     $cmd .= "$r_cmd";
    387     $cmd .= " \"$input_filename\" > \"$output_filestem.html\" 2>\"$output_filestem.err\"";
    388 
    389     # execute the command
    390     if (system($cmd)!=0)
    391     {
    392     print STDERR "Error executing rtf converter: $!. Continuing...\n";
    393     }
    394 
    395     # Was the conversion successful?
    396     if (-e "$output_filestem.html") {
    397     open(TMP, "$output_filestem.html");
    398     $line = <TMP>;
    399     close(TMP);
    400     if ($line && $line =~ /DOCTYPE HTML/) {
    401         &util::rm("$output_filestem.err");
    402         return 1;
    403     } else {
    404         # An error of some sort occurred
    405         &util::rm("$output_filestem.html");
    406         &util::rm("$output_filestem.err");
    407     }
    408     }
    409     return 0;
    410 }
    411 
    412 
    413 # Convert a pdf file to html with the pdftohtml command
    414 
    415 sub pdf_to_html {
    416     ($dirname, $input_filename, $output_filestem) = @_;
    417 
    418     $cmd = "";
    419     if ($timeout) {$cmd = "ulimit -t $timeout;";}
    420     $cmd .= "pdftohtml.pl -F ";
     431    $cmd .= "perl -S pdftohtml.pl -F ";
    421432    $cmd .= " \"$input_filename\" \"$output_filestem\"";
    422433    $!=0;
     434
    423435    if (system($cmd)!=0)
    424436    {
     
    491503
    492504sub ps_to_text {
    493     ($input_filename, $output_filestem) = @_;
    494 
    495     my $cmd = "gs -q -dNODISPLAY -dNOBIND -dWRITESYSTEMDICT -dSIMPLE -c save ";
    496     $cmd .= "-f ps2ascii.ps \"$input_filename\" -c quit > \"$output_filestem.text\"";
    497     $cmd .= " 2> $output_filestem.err";
    498     $!=0;
    499     my $retcode=system($cmd);
    500     $retcode = $? >> 8;  # see man perlfunc - system for this...
    501     # if system returns -1 | 127 (couldn't start program), look at $! for message
    502     my $error="";
    503     if ($retcode!=0) {if ($!) {$error=$!;} else {$error="couldn't run.\n";}}
    504     elsif (! -e "$output_filestem.text") {
    505     $error="did not create output file.\n";
    506     }
    507     else
    508     { # make sure the interpreter didn't get an error. It is technically
    509     # possible for the actual text to start with this, but....
    510     open PSOUT, "$output_filestem.text";
    511     if (<PSOUT> =~ /^Error: (.*)/) {
    512         $error="interpreter error - \"$1\"";
    513     }
    514     close PSOUT;
    515     }
     505    my ($input_filename, $output_filestem) = @_;
     506
     507    my $error = "";
     508
     509    # if we're on windows we'll fall straight through without attempting
     510    # to use gs
     511    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
     512    $error = "Windows does not support gs";
     513
     514    } else {
     515    my $cmd = "gs -q -dNODISPLAY -dNOBIND -dWRITESYSTEMDICT -dSIMPLE -c save ";
     516    $cmd .= "-f ps2ascii.ps \"$input_filename\" -c quit > \"$output_filestem.text\"";
     517    $cmd .= " 2> $output_filestem.err";
     518    $!=0;
     519    my $retcode=system($cmd);
     520    $retcode = $? >> 8;  # see man perlfunc - system for this...
     521    # if system returns -1 | 127 (couldn't start program), look at $! for message
     522
     523    if ($retcode!=0) {if ($!) {$error=$!;} else {$error="couldn't run.\n";}}
     524    elsif (! -e "$output_filestem.text") {
     525        $error="did not create output file.\n";
     526    }
     527    else
     528    {   # make sure the interpreter didn't get an error. It is technically
     529        # possible for the actual text to start with this, but....
     530        open PSOUT, "$output_filestem.text";
     531        if (<PSOUT> =~ /^Error: (.*)/) {
     532        $error="interpreter error - \"$1\"";
     533        }
     534        close PSOUT;
     535    }
     536    }
     537
    516538    if ($error ne "")
    517539    {
     
    611633    open(HTML, ">$output_filestem.html");
    612634
    613     print HTML '<html><head>
    614 <META HTTP-EQUIV="Content-Type" CONTENT="text/html">
    615 <META NAME="GENERATOR" CONTENT="Greenstone any_to_html">
    616 </head><body>';
    617     print HTML "\n\n";
     635    print HTML "<html><head>\n";
     636    print HTML "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html\">\n";
     637    print HTML "<META NAME=\"GENERATOR\" CONTENT=\"Greenstone any_to_html\">\n";
     638    print HTML "</head><body>\n\n";
    618639
    619640    while (<TEXT>) {
    620641    print HTML "<p> ", $_;
    621    
    622642    }
    623643    print HTML "\n</body></html>\n";
     644
     645    close HTML;
     646    close TEXT;
    624647
    625648    &util::rm("$output_filestem.text") if (-e "$output_filestem.text");
     
    661684    }
    662685    }
     686
     687    close OUT;
     688    close IN;
     689
    663690    return 1;
    664691}
  • trunk/gsdl/bin/script/pdftohtml.pl

    r2118 r2241  
    177177    print STDERR "pdftohtml.pl: $input_filename appears to have no ";
    178178    print STDERR "textual data. Aborting.\n";
    179     print STDERR "num: $unenc_stream_objects and $non_text_objects from $num_objects\n";
     179    # print STDERR "num: $unenc_stream_objects and $non_text_objects from $num_objects\n";
    180180    exit(1);
    181181    }
    182182
    183183    # formulate the command
    184     my $pdftohtml = &util::filename_cat($ENV{'GSDLHOME'}, "bin",
    185                     $ENV{'GSDLOS'}, "pdftohtml.bin");
    186     return 0 unless (-e "$pdftohtml");
    187    
    188     $cmd = "";
     184    my $cmd = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "pdftohtml.bin");
     185
     186    # don't include path on windows (to avoid having to play about
     187    # with quoting when GSDLHOME might contain spaces) but assume
     188    # that the PATH is set up correctly - note also that on windows
     189    # we use pdftohtml.exe not pdftohtml.bin
     190    $cmd = "pdftohtml" if ($ENV{'GSDLOS'} !~ /^windows$/) {
     191
    189192    if ($timeout) {$cmd = "ulimit -t $timeout;";}
    190     $cmd .= "$pdftohtml -noframes";
    191     $cmd .= " \"$input_filename\" \"$output_filestem.html\"";
    192     $cmd .= " >\"$output_filestem.out\" 2>\"$output_filestem.err\"";
    193    
     193    $cmd .= " -noframes \"$input_filename\" \"$output_filestem.html\"";
     194    $cmd .= " > \"$output_filestem.out\"";
     195
     196    # attempting to redirect STDERR on windows 95/98 is a bad idea
     197    $cmd .= " 2> \"$output_filestem.err\""
     198    if $ENV{'GSDLOS'} !~ /^windows$/i;
     199
    194200    if (system($cmd)>0) {
    195201    print STDERR "Error executing $cmd: $!\n";
     
    201207    # Need to convert images from PPM format to PNG format
    202208    my @images;
     209
    203210
    204211    open (IMAGES, "images.log");
     
    212219    my $cmd = "";
    213220    if ($ENV{'GSDLOS'} =~ /^windows/i) {
    214         $cmd = &util::filename_cat($ENV{'GSDLHOME'}, "bin", "windows", "pnmtopng.exe");
    215         $cmd .= " $image";
     221        $cmd = "pnmtopng $image";
    216222        if (system($cmd)!=0) {
    217223        print STDERR "Error executing $cmd\n";
  • trunk/gsdl/perllib/plugins/ConvertToPlug.pm

    r2086 r2241  
    9494    if ($class eq "ConvertToPlug") {$class = shift (@_);}
    9595    my $self;
    96 # parsargv::parse might modify the list, so we do this by creating a copy
    97 # of the argument list.
     96    # parsargv::parse might modify the list, so we do this by creating a copy
     97    # of the argument list.
    9898    my @arglist = @_;
    99     my ($plugin_name,$generate_format, $kea_arg) = $class->parse_args(\@_);
     99    my ($plugin_name, $generate_format, $kea_arg) = $class->parse_args(\@_);
     100
     101    if ($class eq "PDFPlug" && $generate_format eq "text" &&
     102    $ENV{'GSDLOS'} =~ /^windows$/i) {
     103    print STDERR "Windows does not support pdf to text. PDFs will be converted to HTML instead\n";
     104    $generate_format = "html";
     105    }
    100106
    101107    if ($generate_format eq "text")
     
    135141sub tmp_area_convert_file {
    136142    my $self = shift (@_);
    137     my ($output_ext,$input_filename, $textref) = @_;
     143    my ($output_ext, $input_filename, $textref) = @_;
    138144
    139145    my $convert_to = $self->{'convert_to'};
     
    142148    my $colname = &util::use_collection();
    143149    my $tmp_dirname
    144     = &util::filename_cat($ENV{'GSDLHOME'},"collect",$colname,"tmp");
     150    = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $colname, "tmp");
    145151    &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
    146152
    147153    # derive tmp filename from input filename
    148     my ($tailname,$dirname,$suffix)
    149     = File::Basename::fileparse($input_filename,'\.[^\.]+$');
     154    my ($tailname, $dirname, $suffix)
     155    = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
    150156
    151157    # Remove any white space from filename -- no risk of name collision, and
     
    153159    $tailname =~ s/\s+//g;
    154160
    155     my $tmp_filename = &util::filename_cat($tmp_dirname,"$tailname$suffix");
    156 
    157     &util::soft_link($input_filename,$tmp_filename);
     161    my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$suffix");
     162
     163    &util::soft_link($input_filename, $tmp_filename);
    158164
    159165    my $verbosity = $self->{'verbosity'};
    160     if ($verbosity>0)
    161     {
     166    if ($verbosity > 0) {
    162167    print STDERR "Converting $tailname$suffix to $convert_to format\n";
    163168    }
     
    166171    # making sure the converter gives us the appropriate output type
    167172    my $output_type = lc($convert_to);
    168     my $cmd = "gsConvert.pl -verbose $verbosity -output $output_type \"$tmp_filename\"";
     173    my $cmd = "perl -S gsConvert.pl -verbose $verbosity -output $output_type \"$tmp_filename\"";
    169174    $output_type = `$cmd`;
    170 
    171     # Check STDERR here
    172175
    173176    chomp $output_type;
     
    175178    print STDERR "Could not convert $tailname$suffix to $convert_to format\n";
    176179    return "";
    177 ### exit 1;
    178180    }
    179181
     
    184186    $self->{'convert_to_ext'} = $output_type;
    185187    my $output_filename = $tmp_filename;
     188
    186189    $output_filename =~ s/$suffix$/.$output_type/;
    187190
     
    197200    my $colname = &util::use_collection();
    198201    my $tmp_dirname
    199     = &util::filename_cat($ENV{'GSDLHOME'},"collect",$colname,"tmp");
     202    = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $colname, "tmp");
    200203    &util::rm_r($tmp_dirname);
    201204    &util::mk_dir($tmp_dirname);
     
    229232
    230233    my $output_ext = $self->{'convert_to_ext'};
    231     my $conv_filename = $self->tmp_area_convert_file($output_ext,$filename);
     234    my $conv_filename = $self->tmp_area_convert_file($output_ext, $filename);
     235
    232236    if ("$conv_filename" eq "") {return 0;} # allows continue on errors
    233237    if (! -e "$conv_filename") {return 0;} # allows continue on errors
    234238    $self->{'conv_filename'} = $conv_filename;
    235239
    236 # Do encoding stuff
     240    # Do encoding stuff
    237241    my ($language, $encoding);
    238242    if ($self->{'input_encoding'} eq "auto") {
     
    253257    }
    254258
    255     BasPlug::read_file($self,$conv_filename, $encoding, \$text);
     259    &BasPlug::read_file($self, $conv_filename, $encoding, \$text);
    256260    if (!length ($text)) {
    257261        print $outhandle "$plugin_name: ERROR: $file contains no text\n" if $self->{'verbosity'};
     
    301305    {
    302306
    303     $ret_val = TEXTPlug::process($self,$textref,$pluginfo,
    304                      $tmp_dirname,$tmp_tailname,
    305                      $metadata,$doc_obj);
     307    $ret_val = &TEXTPlug::process($self, $textref, $pluginfo,
     308                      $tmp_dirname, $tmp_tailname,
     309                      $metadata, $doc_obj);
    306310    }
    307311    else
    308312    {
    309     $ret_val = HTMLPlug::process($self,$textref,$pluginfo,
    310                      $tmp_dirname,$tmp_tailname,
    311                      $metadata,$doc_obj);
     313    $ret_val = &HTMLPlug::process($self, $textref, $pluginfo,
     314                      $tmp_dirname, $tmp_tailname,
     315                      $metadata, $doc_obj);
    312316    }
    313317
    314318    # associate original file with doc object
    315319    my $cursection = $doc_obj->get_top_section();
    316     my $filename = &util::filename_cat($base_dir,$file);
     320    my $filename = &util::filename_cat($base_dir, $file);
    317321    $doc_obj->associate_file($filename, "doc.$doc_ext", undef, $cursection);
    318322
Note: See TracChangeset for help on using the changeset viewer.