Changeset 4103


Ignore:
Timestamp:
2003-04-08T12:25:08+12:00 (21 years ago)
Author:
sjboddie
Message:

Added a -nohidden PDFPlug option and made it pass the -hidden option to pdftohtml
by default.

Location:
trunk/gsdl
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/gsConvert.pl

    r3933 r4103  
    6060my $use_strings;
    6161my $pdf_complex;
     62my $pdf_nohidden;
    6263my $pdf_zoom;
    6364my $pdf_ignore_images;
     
    7576    print STDERR "\t-use_strings\tuse strings to extract text if conversion fails\n";
    7677    print STDERR "\t-pdf_complex\tuse complex output when converting PDF to HTML\n";
     78    print STDERR "\t-pdf_nohidden\tDon't attempt to extract hidden text from PDF files\n";
    7779    print STDERR "\t-pdf_ignore_images\tdon't attempt to extract images when\n";
    7880    print STDERR "\t\tconverting PDF to HTML\n";
     
    99101             'use_strings', \$use_strings,
    100102             'pdf_complex', \$pdf_complex,
     103             'pdf_nohidden', \$pdf_nohidden,
    101104             'pdf_zoom/\d+/2', \$pdf_zoom
    102105             ))
     
    642645    $cmd .= " -c" if ($pdf_complex);
    643646    $cmd .= " -i" if ($pdf_ignore_images);
     647    $cmd .= " -hidden" unless ($pdf_nohidden);
    644648    $cmd .= " \"$input_filename\" \"$output_filestem\"";
    645649   
  • trunk/gsdl/bin/script/pdftohtml.pl

    r3720 r4103  
    5151     "\t-a\tallow images only (continue even if no text is present)\n",
    5252     "\t-c\tproduce complex output (requires ghostscript)\n",
     53     "\t-hidden\tExtract hidden text\n",
    5354     "\t-zoom\tfactor by which to zoom the PDF (only useful if -c is set)\n"
    5455     );
     
    5859sub main {
    5960    my (@ARGV) = @_;
    60     my ($allow_no_text, $ignore_images, $complex, $zoom);
     61    my ($allow_no_text, $ignore_images, $complex, $zoom, $hidden);
    6162   
    6263    # read command-line arguments so that
     
    6667             'i', \$ignore_images,
    6768             'c', \$complex,
     69             'hidden', \$hidden,
    6870             'zoom/\d+/2', \$zoom,
    6971             ))
     
    115117    $cmd .= " -i" if ($ignore_images);
    116118    $cmd .= " -c" if ($complex);
     119    $cmd .= " -hidden" if ($hidden);
    117120    $cmd .= " -zoom $zoom";
    118121    $cmd .= " -noframes -p -enc UTF-8 \"$input_filename\" \"$output_filestem.html\"";
  • trunk/gsdl/perllib/plugins/PDFPlug.pm

    r3833 r4103  
    4545            'type' => "flag" },
    4646          { 'name' => "complex",
    47             'desc' =>  "Create more complex output. With this option set the output html will look much more like the original PDF file. For this to function properly you Ghostscript installed (for *nix gs should be on your path while for windows you must have gswin32c.exe on your path).",
     47            'desc' => "Create more complex output. With this option set the output html will look much more like the original PDF file. For this to function properly you Ghostscript installed (for *nix gs should be on your path while for windows you must have gswin32c.exe on your path).",
     48            'type' => "flag" },
     49          { 'name' => "nohidden",
     50            'desc' => "Prevent pdftohtml from attempting to extract hidden text. This is only useful if the -complex option is also set.",
    4851            'type' => "flag" },
    4952          { 'name' => "zoom",
     
    6063    my $class = shift (@_);
    6164
    62     my ($noimages, $complex, $zoom, $use_sections);
     65    my ($noimages, $complex, $zoom, $use_sections, $nohidden);
    6366   
    6467    if (!parsargv::parse(\@_,
     
    6669             q^complex^, \$complex,
    6770             q^zoom/\d+/2^, \$zoom,
     71             q^nohidden^, \$nohidden,
    6872             q^use_sections/1?/^, \$use_sections,
    6973             "allow_extra_options")) {
     
    97101    $self->{'convert_options'} = "-pdf_zoom $zoom";
    98102    $self->{'convert_options'} .= " -pdf_complex" if $complex;
     103    $self->{'convert_options'} .= " -pdf_nohidden" if $nohidden;
    99104    $self->{'convert_options'} .= " -pdf_ignore_images" if $noimages;
    100105
     
    122127    print STDERR "                     gs should be on your path while for windows\n";
    123128    print STDERR "                     you must have gswin32c.exe on your path).\n";
     129    print STDERR "   -nohidden         Prevent pdftohtml from attempting to extract\n";
     130    print STDERR "                     hidden text. This is only useful if the -complex\n";
     131    print STDERR "                     option is also set.";
    124132    print STDERR "   -zoom             The factor by which to zoomthe PDF for output\n";
    125133    print STDERR "                     (this is only useful if -complex is set).\n\n";
Note: See TracChangeset for help on using the changeset viewer.