Changeset 38728 for gs2-extensions/pdf-box/trunk/java/perllib
- Timestamp:
- 2024-02-08T18:53:34+13:00 (4 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/pdf-box/trunk/java/perllib/plugins/PDFBoxConverter.pm
r37803 r38728 126 126 my $gextpb_home = $ENV{'GEXT_PDFBOX'}; 127 127 my $pbajar = &FileUtils::filenameConcatenate($gextpb_home,"lib","java","pdfbox-app.jar"); 128 my $pbjbigjar = &FileUtils::filenameConcatenate($gextpb_home,"lib","java","jbig2-imageio-3.0.1.jar"); 128 my $pb_jbig2_jar = &FileUtils::filenameConcatenate($gextpb_home,"lib","java","jbig2-imageio-3.0.1.jar"); 129 130 my $pb_jaicore_jar = &FileUtils::filenameConcatenate($gextpb_home,"lib","java","jai-imageio-core-1.4.0.jar"); 131 my $pb_jaijpeg2000_jar = &FileUtils::filenameConcatenate($gextpb_home,"lib","java","jai-imageio-jpeg2000-1.4.0.jar"); 132 129 133 # Not including the following JPEG2000 jar, as it is under commercial license: 130 134 # https://github.com/jai-imageio/jai-imageio-jpeg2000 leading to https://bintray.com/jai-imageio/maven/jai-imageio-jpeg2000# (Files tab) 131 135 # my $pbjp2jar = &FileUtils::filenameConcatenate($gextpb_home,"lib","java","jai-imageio-jpeg2000-1.3.0.jar"); # jpeg2000 136 132 137 my $java = &util::get_java_command(); 133 138 $self->{'pdfbox_txt_launch_cmd'} = "$java -cp \"$pbajar\" org.apache.pdfbox.tools.ExtractText"; … … 141 146 # put the pdfbox jar, the jbig2-imageio library (Apache Software License 2.0) 142 147 # and our build folder containing our custom PDFBox class on the classpath 143 my $classpath = &util::pathname_cat($pbajar, $pbjbigjar, $pdfbox_build); 148 my $classpath = &util::pathname_cat($pbajar, $pb_jbig2_jar); 149 150 if(!&FileUtils::filenameExists($pb_jaicore_jar)) { 151 $classpath = &util::pathname_cat($classpath, $pb_jaicore_jar); 152 } 153 if(!&FileUtils::filenameExists($pb_jaijpeg2000_jar)) { 154 $classpath = &util::pathname_cat($classpath, $pb_jaijpeg2000_jar); 155 } 156 $classpath = &util::pathname_cat($classpath, $pdfbox_build); 157 144 158 # $self->{'pdfbox_img_launch_cmd'} = "java -cp \"$classpath\" org.apache.pdfbox.tools.PDFToImage"; # pdfbox 2.09 cmd for converting each PDF page to an image (jpg, png) 145 159 $self->{'pdfbox_imgtxt_launch_cmd'} = "java -cp \"$classpath\" org.greenstone.pdfbox.PDFBoxToImagesAndText"; … … 212 226 # Determine the full name and path of the output file 213 227 my $target_file_path; 228 214 229 if ($self->{'enable_cache'}) { 215 230 $self->init_cache_for_file($source_file_full_path); … … 236 251 # this is in gsdl/tmp. get a tmp filename in collection instead??? 237 252 $target_file_path = &util::get_tmp_filename($target_file_type); 238 253 239 254 # for image files, remove the suffix, since we can have many output image files 240 255 # per input PDF (one img for each page of the PDF, for example) 241 256 if($img_output_mode || $paged_txt_output_mode) { 242 257 $target_file_path =~ s/\.[^.]*$//g; 258 243 259 if(!&FileUtils::directoryExists($target_file_path)) { 244 260 mkdir($target_file_path); … … 274 290 $convert_cmd .= " -textOnly"; 275 291 } 292 276 293 $convert_cmd .= " -outputPrefix \"$output_prefix\""; 277 294 $convert_cmd .= " \"$source_file_full_path\"";
Note:
See TracChangeset
for help on using the changeset viewer.