Changeset 32284 for main


Ignore:
Timestamp:
2018-07-18T18:45:52+12:00 (6 years ago)
Author:
ak19
Message:

PDFv2Plugin doesn't offer a zoom flag anymore, replaced with a dpi flag to set the resolution.

Location:
main/trunk/greenstone2
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/gsConvert.pl

    r32277 r32284  
    6565my $pdf_nohidden;
    6666my $pdf_zoom;
     67my $pdf_dpi;
    6768my $pdf_ignore_images;
    6869my $pdf_allow_images_only;
     
    9091    print STDERR "\t-pdf_zoom\tfactor by which to zoom PDF (only useful if\n";
    9192    print STDERR "\t\t-pdf_complex is set\n";
     93    print STDERR "\t-pdf_dpi\tSet the resolution in DPI of background images produced by xpdf's pdftohtml\n";
    9294    exit(1);
    9395}
     
    132134             'pdf_allow_images_only', \$pdf_allow_images_only,
    133135             'pdf_nohidden', \$pdf_nohidden,
    134              'pdf_zoom/\d+/2', \$pdf_zoom
     136             'pdf_zoom/\d+/2', \$pdf_zoom,
     137             'pdf_dpi/\d+/96', \$pdf_dpi
    135138             ))
    136139    {
     
    899902    # xpdf's pdftohtml tool also takes a zoom factor, where a zoom of 1 is 100%
    900903    $cmd .= "\"$xpdf_pdftohtml\"";
    901     $cmd .= " -z $pdf_zoom" if ($pdf_zoom);
    902 #    $cmd .= " -c" if ($pdf_complex);
    903 #    $cmd .= " -i" if ($pdf_ignore_images);
    904 #    $cmd .= " -a" if ($pdf_allow_images_only);
    905 #    $cmd .= " -hidden" unless ($pdf_nohidden);   
     904    # resolution, -r in DPI of background images, see https://www.xpdfreader.com/pdftohtml-man.html
     905    $cmd .= " -r $pdf_dpi" if ($pdf_dpi);
    906906    $cmd .= " \"$input_filename\" \"$tmp_dirname\"";
    907907    #$cmd .= " \"$input_filename\" \"$output_filestem\"";
  • main/trunk/greenstone2/perllib/plugins/PDFv2Plugin.pm

    r32283 r32284  
    8989       'type' => "string",
    9090       'deft' => "Title,Author,Subject,Keywords" },
    91       { 'name' => "metadata_field_separator",
     91     { 'name' => "metadata_field_separator",
    9292    'desc' => "{HTMLPlugin.metadata_field_separator}",
    9393    'type' => "string",
    9494    'deft' => "" },
    95 #     { 'name' => "noimages",
    96 #       'desc' => "{PDFPlugin.noimages}",
    97 #       'type' => "flag" },
    98 #     { 'name' => "allowimagesonly",
    99 #       'desc' => "{PDFPlugin.allowimagesonly}",
    100 #       'type' => "flag" },
    101 #     { 'name' => "complex",
    102 #       'desc' => "{PDFPlugin.complex}",
    103 #       'type' => "flag" },
    104 #     { 'name' => "nohidden",
    105 #       'desc' => "{PDFPlugin.nohidden}",
    106 #       'type' => "flag" },
    107      { 'name' => "zoom",
    108        'desc' => "{PDFv2Plugin.zoom}",
    109        'deft' => "1",
    110        'type' => "string" }, # xpdftools' zoom takes fractions
     95     { 'name' => "dpi",
     96       'desc' => "{PDFv2Plugin.dpi}",
     97       'deft' => "96",
     98       'type' => "int" }, # 72DPI is xpdf's pdftohtml's default. pdfbox' default is 96DPI in headless mode else detected from the screen resolution, see https://pdfbox.apache.org/2.0/commandline.html#pdftoimage
    11199#     { 'name' => "use_sections",
    112100#       'desc' => "{PDFPlugin.use_sections}",
     
    155143    $self->{'convert_options'} = "-pdf_tool xpdftools"; # default for PDFv2Plugin. If pdfbox_conversion is on, the pdfbpox GS extension sets pdf_tool to pdfbox
    156144
    157     # pdf_zoom is supported by xpdftools' pdftohtml. So for pretty_html and paged_pretty_html
    158     my $zoom = $self->{"zoom"};
    159     $self->{'convert_options'} .= " -pdf_zoom $zoom";
     145    # Setting dpi has meaning for xpdftools pdftohtml (so paged_pretty_html and pretty_html)
     146    # and for when pdfbox outputs an image for each page (pagedimg, pagedimgtxt).
     147    # dpi has no effect on (paged_)text and html output modes.
     148    my $dpi = $self->{"dpi"};
     149    $self->{'convert_options'} .= " -pdf_dpi $dpi";
    160150
    161151    # PDFv2Plugin now supports PDF to txt conversion on Windows too:
Note: See TracChangeset for help on using the changeset viewer.