- Timestamp:
- 2018-07-18T18:45:52+12:00 (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/pdf-box/trunk/java/perllib/plugins/PDFBoxConverter.pm
r32282 r32284 129 129 $self->{'pdfbox_txt_launch_cmd'} = "$java -cp \"$pbajar\" org.apache.pdfbox.tools.ExtractText"; 130 130 $self->{'pdfbox_html_launch_cmd'} = "$java -cp \"$pbajar\" -Dline.separator=\"<br />\" org.apache.pdfbox.tools.ExtractText"; 131 # $self->{'pdfbox_img_launch_cmd'} = "java -cp \"$pbajar\" org.apache.pdfbox.tools.PDFToImage"; # pdfbox 2.09 cmd for converting each PDF page to an image ( gif,jpg, png)131 # $self->{'pdfbox_img_launch_cmd'} = "java -cp \"$pbajar\" org.apache.pdfbox.tools.PDFToImage"; # pdfbox 2.09 cmd for converting each PDF page to an image (jpg, png) 132 132 133 133 # We use this next cmd to launch our new custom PDFBox class (PDFBoxToImagesAndText.java) to convert each PDF page into an image (gif, jpg, png) … … 179 179 # and ends up going through gsConvert.pl 180 180 $self->{'convert_options'} .= " -pdf_tool pdfbox"; 181 181 182 182 my $img_output_mode = 0; 183 183 … … 192 192 if ($target_file_type eq "html") { 193 193 $self->{'converted_to'} = "HTML"; 194 } elsif ($target_file_type eq "jpg" || $target_file_type eq " gif" || $target_file_type eq "png") {194 } elsif ($target_file_type eq "jpg" || $target_file_type eq "png") { # || $target_file_type eq "gif" 195 195 # GIF not supported by PDFBox at present, see https://pdfbox.apache.org/1.8/commandline.html#pdftoimage 196 196 $self->{'converted_to'} = $target_file_type; … … 248 248 if($img_output_mode || $paged_txt_output_mode) { # converting each page to image and/or text 249 249 my $output_prefix = &FileUtils::filenameConcatenate($target_file_path, $tailname); 250 250 251 # Our custom class does renaming of the pages (simplified to just numbers) for PagedImagePlugin 251 252 #$convert_cmd = $paged_txt_output_mode ? $self->{'pdfbox_imgtxt_launch_cmd'} : $self->{'pdfbox_img_launch_cmd'}; 252 253 $convert_cmd = $self->{'pdfbox_imgtxt_launch_cmd'}; 253 $convert_cmd .= " -textOnly" unless($img_output_mode); # if paged txt only and no images254 254 $convert_cmd .= " -imagesOnly" unless($paged_txt_output_mode); # set to images only unless there's text too 255 $convert_cmd .= " -imageType $target_file_type" if($img_output_mode); 255 if($img_output_mode) { # whether images-only or images-and-text mode 256 $convert_cmd .= " -imageType $target_file_type"; 257 $convert_cmd .= " -dpi ". $self->{"dpi"} if defined $self->{"dpi"}; 258 } else { # img_output_mode off, so paged txt only and no images 259 $convert_cmd .= " -textOnly"; 260 } 256 261 $convert_cmd .= " -outputPrefix \"$output_prefix\""; 257 262 $convert_cmd .= " \"$source_file_full_path\"";
Note:
See TracChangeset
for help on using the changeset viewer.