Ignore:
Timestamp:
2003-01-28T16:22:26+13:00 (21 years ago)
Author:
sjboddie
Message:

Added options to PDFPlug to take advantage of the improvements in
version 0.34 of pdftohtml. It now works much better for non latin
input documents (producing UTF-8 encoded HTML).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/PDFPlug.pm

    r3614 r3720  
    4040            'desc' => "Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",
    4141            'type' => 'string',
    42             'deft' =>  q^^ }
     42            'deft' =>  q^^ },
     43          { 'name' => "noimages",
     44            'desc' =>  "Don't attempt to extract images from PDF.",
     45            'type' => "flag" },
     46          { 'name' => "complex",
     47            'desc' =>  "Create more complex output. With this option set the output html will look much more like the original PDF file. For this to function properly you Ghostscript installed (for *nix gs should be on your path while for windows you must have gswin32c.exe on your path).",
     48            'type' => "flag" },
     49          { 'name' => "zoom",
     50            'desc' =>  "The factor by which to zoomthe PDF for output (this is only useful if -complex is set).",
     51            'type' => "int" }
    4352          ];
    4453
     
    5059sub new {
    5160    my $class = shift (@_);
     61
     62    my ($noimages, $complex, $zoom);
     63   
     64    if (!parsargv::parse(\@_,
     65             q^noimages^, \$noimages,
     66             q^complex^, \$complex,
     67             q^zoom/\d+/2^, \$zoom,
     68             "allow_extra_options")) {
     69
     70    print STDERR "\nIncorrect options passed to PDFPlug, check your collect.cfg configuration file\n";
     71    &print_usage();
     72    die "\n";
     73    }
    5274
    5375    # following title_sub removes "Page 1" added by pdftohtml, and a leading
     
    81103    my $option_list = $self->{'option_list'};
    82104    push( @{$option_list}, $options );
     105
     106    # these are passed through to gsConvert.pl by ConvertToPlug.pm
     107    $self->{'convert_options'} = "-pdf_zoom $zoom";
     108    $self->{'convert_options'} .= " -pdf_complex" if $complex;
     109    $self->{'convert_options'} .= " -pdf_ignore_images" if $noimages;
     110
     111    # pdftohtml will always produce html files encoded as utf-8
     112    if ($self->{'input_encoding'} eq "auto") {
     113    $self->{'input_encoding'} = "utf8";
     114    $self->{'extract_language'} = 1;
     115    }
    83116   
    84117    return bless $self, $class;
     
    89122    print STDERR "\n  usage: plugin PDFPlug [options]\n\n";
    90123    print STDERR "  options:\n";
    91     print STDERR "   -convert_to (html|text) plugin converts to TEXT or HTML\n";
    92     print STDERR "                           (default html)\n";
    93     print STDERR "   -use_sections     create a separate section for each page of the PDF file.\n\n";
     124    print STDERR "   -convert_to (html|text) Convert to TEXT or HTML (default html)\n";
     125    print STDERR "   -use_sections     Create a separate section for each page\n";
     126    print STDERR "                     of the PDF file.\n";
     127    print STDERR "   -noimages         Don't attempt to extract images from PDF.\n";
     128    print STDERR "   -complex          Create more complex output. With this option\n";
     129    print STDERR "                     set the output html will look much more like\n";
     130    print STDERR "                     the original PDF file. For this to function\n";
     131    print STDERR "                     properly you Ghostscript installed (for *nix\n";
     132    print STDERR "                     gs should be on your path while for windows\n";
     133    print STDERR "                     you must have gswin32c.exe on your path).\n";
     134    print STDERR "   -zoom             The factor by which to zoomthe PDF for output\n";
     135    print STDERR "                     (this is only useful if -complex is set).\n\n";
    94136}
    95137
Note: See TracChangeset for help on using the changeset viewer.