Changeset 38728


Ignore:
Timestamp:
2024-02-08T18:53:34+13:00 (3 months ago)
Author:
davidb
Message:

Updated to dynamically add in JPEG2000 jars, if present of the filesystem

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs2-extensions/pdf-box/trunk/java/perllib/plugins/PDFBoxConverter.pm

    r37803 r38728  
    126126    my $gextpb_home = $ENV{'GEXT_PDFBOX'};
    127127    my $pbajar = &FileUtils::filenameConcatenate($gextpb_home,"lib","java","pdfbox-app.jar");
    128     my $pbjbigjar = &FileUtils::filenameConcatenate($gextpb_home,"lib","java","jbig2-imageio-3.0.1.jar");
     128    my $pb_jbig2_jar = &FileUtils::filenameConcatenate($gextpb_home,"lib","java","jbig2-imageio-3.0.1.jar");
     129
     130    my $pb_jaicore_jar = &FileUtils::filenameConcatenate($gextpb_home,"lib","java","jai-imageio-core-1.4.0.jar");
     131    my $pb_jaijpeg2000_jar = &FileUtils::filenameConcatenate($gextpb_home,"lib","java","jai-imageio-jpeg2000-1.4.0.jar");
     132       
    129133    # Not including the following JPEG2000 jar, as it is under commercial license:
    130134    # https://github.com/jai-imageio/jai-imageio-jpeg2000 leading to https://bintray.com/jai-imageio/maven/jai-imageio-jpeg2000# (Files tab)
    131135    # my $pbjp2jar = &FileUtils::filenameConcatenate($gextpb_home,"lib","java","jai-imageio-jpeg2000-1.3.0.jar"); # jpeg2000
     136   
    132137    my $java = &util::get_java_command();
    133138    $self->{'pdfbox_txt_launch_cmd'} = "$java -cp \"$pbajar\" org.apache.pdfbox.tools.ExtractText";
     
    141146    # put the pdfbox jar, the jbig2-imageio library (Apache Software License 2.0)
    142147    # and our build folder containing our custom PDFBox class on the classpath
    143     my $classpath = &util::pathname_cat($pbajar, $pbjbigjar, $pdfbox_build);
     148    my $classpath = &util::pathname_cat($pbajar, $pb_jbig2_jar);
     149
     150    if(!&FileUtils::filenameExists($pb_jaicore_jar)) {
     151        $classpath = &util::pathname_cat($classpath, $pb_jaicore_jar);
     152    }
     153    if(!&FileUtils::filenameExists($pb_jaijpeg2000_jar)) {
     154        $classpath = &util::pathname_cat($classpath, $pb_jaijpeg2000_jar);
     155    }
     156    $classpath = &util::pathname_cat($classpath, $pdfbox_build);
     157   
    144158#   $self->{'pdfbox_img_launch_cmd'} = "java -cp \"$classpath\" org.apache.pdfbox.tools.PDFToImage"; # pdfbox 2.09 cmd for converting each PDF page to an image (jpg, png) 
    145159    $self->{'pdfbox_imgtxt_launch_cmd'} = "java -cp \"$classpath\" org.greenstone.pdfbox.PDFBoxToImagesAndText";
     
    212226    # Determine the full name and path of the output file
    213227    my $target_file_path;
     228   
    214229    if ($self->{'enable_cache'}) {
    215230    $self->init_cache_for_file($source_file_full_path);
     
    236251    # this is in gsdl/tmp. get a tmp filename in collection instead???
    237252    $target_file_path = &util::get_tmp_filename($target_file_type);
    238 
     253   
    239254    # for image files, remove the suffix, since we can have many output image files
    240255    # per input PDF (one img for each page of the PDF, for example)
    241256    if($img_output_mode || $paged_txt_output_mode) {
    242257        $target_file_path =~ s/\.[^.]*$//g;
     258       
    243259        if(!&FileUtils::directoryExists($target_file_path)) {       
    244260        mkdir($target_file_path);
     
    274290        $convert_cmd .= " -textOnly";       
    275291    }
     292   
    276293    $convert_cmd .= " -outputPrefix \"$output_prefix\"";
    277294    $convert_cmd .= " \"$source_file_full_path\"";
Note: See TracChangeset for help on using the changeset viewer.