Changeset 32284 for main

Show
Ignore:
Timestamp:
18.07.2018 18:45:52 (13 months ago)
Author:
ak19
Message:

PDFv2Plugin doesn't offer a zoom flag anymore, replaced with a dpi flag to set the resolution.

Location:
main/trunk/greenstone2
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/gsConvert.pl

    r32277 r32284  
    6565my $pdf_nohidden; 
    6666my $pdf_zoom; 
     67my $pdf_dpi; 
    6768my $pdf_ignore_images; 
    6869my $pdf_allow_images_only; 
     
    9091    print STDERR "\t-pdf_zoom\tfactor by which to zoom PDF (only useful if\n"; 
    9192    print STDERR "\t\t-pdf_complex is set\n"; 
     93    print STDERR "\t-pdf_dpi\tSet the resolution in DPI of background images produced by xpdf's pdftohtml\n"; 
    9294    exit(1); 
    9395} 
     
    132134             'pdf_allow_images_only', \$pdf_allow_images_only, 
    133135             'pdf_nohidden', \$pdf_nohidden, 
    134              'pdf_zoom/\d+/2', \$pdf_zoom 
     136             'pdf_zoom/\d+/2', \$pdf_zoom, 
     137             'pdf_dpi/\d+/96', \$pdf_dpi 
    135138             )) 
    136139    { 
     
    899902    # xpdf's pdftohtml tool also takes a zoom factor, where a zoom of 1 is 100% 
    900903    $cmd .= "\"$xpdf_pdftohtml\""; 
    901     $cmd .= " -z $pdf_zoom" if ($pdf_zoom); 
    902 #    $cmd .= " -c" if ($pdf_complex); 
    903 #    $cmd .= " -i" if ($pdf_ignore_images); 
    904 #    $cmd .= " -a" if ($pdf_allow_images_only); 
    905 #    $cmd .= " -hidden" unless ($pdf_nohidden);     
     904    # resolution, -r in DPI of background images, see https://www.xpdfreader.com/pdftohtml-man.html 
     905    $cmd .= " -r $pdf_dpi" if ($pdf_dpi); 
    906906    $cmd .= " \"$input_filename\" \"$tmp_dirname\""; 
    907907    #$cmd .= " \"$input_filename\" \"$output_filestem\""; 
  • main/trunk/greenstone2/perllib/plugins/PDFv2Plugin.pm

    r32283 r32284  
    8989       'type' => "string", 
    9090       'deft' => "Title,Author,Subject,Keywords" }, 
    91       { 'name' => "metadata_field_separator", 
     91     { 'name' => "metadata_field_separator", 
    9292    'desc' => "{HTMLPlugin.metadata_field_separator}", 
    9393    'type' => "string", 
    9494    'deft' => "" }, 
    95 #     { 'name' => "noimages", 
    96 #       'desc' => "{PDFPlugin.noimages}", 
    97 #       'type' => "flag" }, 
    98 #     { 'name' => "allowimagesonly", 
    99 #       'desc' => "{PDFPlugin.allowimagesonly}", 
    100 #       'type' => "flag" }, 
    101 #     { 'name' => "complex", 
    102 #       'desc' => "{PDFPlugin.complex}", 
    103 #       'type' => "flag" }, 
    104 #     { 'name' => "nohidden", 
    105 #       'desc' => "{PDFPlugin.nohidden}", 
    106 #       'type' => "flag" }, 
    107      { 'name' => "zoom", 
    108        'desc' => "{PDFv2Plugin.zoom}", 
    109        'deft' => "1", 
    110        'type' => "string" }, # xpdftools' zoom takes fractions 
     95     { 'name' => "dpi", 
     96       'desc' => "{PDFv2Plugin.dpi}", 
     97       'deft' => "96", 
     98       'type' => "int" }, # 72DPI is xpdf's pdftohtml's default. pdfbox' default is 96DPI in headless mode else detected from the screen resolution, see https://pdfbox.apache.org/2.0/commandline.html#pdftoimage 
    11199#     { 'name' => "use_sections", 
    112100#       'desc' => "{PDFPlugin.use_sections}", 
     
    155143    $self->{'convert_options'} = "-pdf_tool xpdftools"; # default for PDFv2Plugin. If pdfbox_conversion is on, the pdfbpox GS extension sets pdf_tool to pdfbox 
    156144 
    157     # pdf_zoom is supported by xpdftools' pdftohtml. So for pretty_html and paged_pretty_html 
    158     my $zoom = $self->{"zoom"}; 
    159     $self->{'convert_options'} .= " -pdf_zoom $zoom"; 
     145    # Setting dpi has meaning for xpdftools pdftohtml (so paged_pretty_html and pretty_html) 
     146    # and for when pdfbox outputs an image for each page (pagedimg, pagedimgtxt). 
     147    # dpi has no effect on (paged_)text and html output modes. 
     148    my $dpi = $self->{"dpi"}; 
     149    $self->{'convert_options'} .= " -pdf_dpi $dpi"; 
    160150 
    161151    # PDFv2Plugin now supports PDF to txt conversion on Windows too: