- Timestamp:
- 2018-07-18T18:45:52+12:00 (6 years ago)
- Location:
- main/trunk/greenstone2
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/bin/script/gsConvert.pl
r32277 r32284 65 65 my $pdf_nohidden; 66 66 my $pdf_zoom; 67 my $pdf_dpi; 67 68 my $pdf_ignore_images; 68 69 my $pdf_allow_images_only; … … 90 91 print STDERR "\t-pdf_zoom\tfactor by which to zoom PDF (only useful if\n"; 91 92 print STDERR "\t\t-pdf_complex is set\n"; 93 print STDERR "\t-pdf_dpi\tSet the resolution in DPI of background images produced by xpdf's pdftohtml\n"; 92 94 exit(1); 93 95 } … … 132 134 'pdf_allow_images_only', \$pdf_allow_images_only, 133 135 'pdf_nohidden', \$pdf_nohidden, 134 'pdf_zoom/\d+/2', \$pdf_zoom 136 'pdf_zoom/\d+/2', \$pdf_zoom, 137 'pdf_dpi/\d+/96', \$pdf_dpi 135 138 )) 136 139 { … … 899 902 # xpdf's pdftohtml tool also takes a zoom factor, where a zoom of 1 is 100% 900 903 $cmd .= "\"$xpdf_pdftohtml\""; 901 $cmd .= " -z $pdf_zoom" if ($pdf_zoom); 902 # $cmd .= " -c" if ($pdf_complex); 903 # $cmd .= " -i" if ($pdf_ignore_images); 904 # $cmd .= " -a" if ($pdf_allow_images_only); 905 # $cmd .= " -hidden" unless ($pdf_nohidden); 904 # resolution, -r in DPI of background images, see https://www.xpdfreader.com/pdftohtml-man.html 905 $cmd .= " -r $pdf_dpi" if ($pdf_dpi); 906 906 $cmd .= " \"$input_filename\" \"$tmp_dirname\""; 907 907 #$cmd .= " \"$input_filename\" \"$output_filestem\""; -
main/trunk/greenstone2/perllib/plugins/PDFv2Plugin.pm
r32283 r32284 89 89 'type' => "string", 90 90 'deft' => "Title,Author,Subject,Keywords" }, 91 91 { 'name' => "metadata_field_separator", 92 92 'desc' => "{HTMLPlugin.metadata_field_separator}", 93 93 'type' => "string", 94 94 'deft' => "" }, 95 # { 'name' => "noimages", 96 # 'desc' => "{PDFPlugin.noimages}", 97 # 'type' => "flag" }, 98 # { 'name' => "allowimagesonly", 99 # 'desc' => "{PDFPlugin.allowimagesonly}", 100 # 'type' => "flag" }, 101 # { 'name' => "complex", 102 # 'desc' => "{PDFPlugin.complex}", 103 # 'type' => "flag" }, 104 # { 'name' => "nohidden", 105 # 'desc' => "{PDFPlugin.nohidden}", 106 # 'type' => "flag" }, 107 { 'name' => "zoom", 108 'desc' => "{PDFv2Plugin.zoom}", 109 'deft' => "1", 110 'type' => "string" }, # xpdftools' zoom takes fractions 95 { 'name' => "dpi", 96 'desc' => "{PDFv2Plugin.dpi}", 97 'deft' => "96", 98 'type' => "int" }, # 72DPI is xpdf's pdftohtml's default. pdfbox' default is 96DPI in headless mode else detected from the screen resolution, see https://pdfbox.apache.org/2.0/commandline.html#pdftoimage 111 99 # { 'name' => "use_sections", 112 100 # 'desc' => "{PDFPlugin.use_sections}", … … 155 143 $self->{'convert_options'} = "-pdf_tool xpdftools"; # default for PDFv2Plugin. If pdfbox_conversion is on, the pdfbpox GS extension sets pdf_tool to pdfbox 156 144 157 # pdf_zoom is supported by xpdftools' pdftohtml. So for pretty_html and paged_pretty_html 158 my $zoom = $self->{"zoom"}; 159 $self->{'convert_options'} .= " -pdf_zoom $zoom"; 145 # Setting dpi has meaning for xpdftools pdftohtml (so paged_pretty_html and pretty_html) 146 # and for when pdfbox outputs an image for each page (pagedimg, pagedimgtxt). 147 # dpi has no effect on (paged_)text and html output modes. 148 my $dpi = $self->{"dpi"}; 149 $self->{'convert_options'} .= " -pdf_dpi $dpi"; 160 150 161 151 # PDFv2Plugin now supports PDF to txt conversion on Windows too:
Note:
See TracChangeset
for help on using the changeset viewer.