Changeset 25995
- Timestamp:
- 2012-07-19T18:51:55+12:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/pdf-box/trunk/java/perllib/plugins/PDFBoxConverter.pm
r25513 r25995 127 127 128 128 $self->{'pdfbox_launch_cmd'} = $launch_cmd; 129 $self->{'pdfbox_img_launch_cmd'} = "java -cp \"$pbajar\" org.apache.pdfbox.PDFToImage"; # cmd for converting pages to images (gif, jpg, png) 129 130 } 130 131 else { … … 162 163 # check the filename 163 164 return 0 if ( !-f $source_file_full_path); 165 166 my $img_output_mode = 0; 164 167 165 168 # the following line is necessary to avoid 'uninitialised variable' error … … 170 173 if ($target_file_type eq "html") { 171 174 $self->{'converted_to'} = "HTML"; 175 } elsif ($target_file_type eq "jpg" || $target_file_type eq "gif" || $target_file_type eq "png") { 176 $self->{'converted_to'} = $target_file_type; 177 $img_output_mode = 1; 172 178 } else { 173 179 $self->{'converted_to'} = "text"; … … 185 191 my $file_root = $self->{'cached_file_root'}; 186 192 #$file_root .= "_$convert_id" if ($convert_id ne ""); 187 my $target_file = "$file_root.$target_file_type"; 193 194 # append the output filetype suffix only for non-image output formats, since for 195 # images we can be outputting multiple image files per single PDF input file 196 my $target_file = $img_output_mode ? "$file_root" : "$file_root.$target_file_type"; 197 188 198 $target_file_path = &util::filename_cat($cache_dir,$target_file); 189 199 } … … 191 201 # this is in gsdl/tmp. get a tmp filename in collection instead??? 192 202 $target_file_path = &util::get_tmp_filename($target_file_type); 203 204 # for image files, remove the suffix, since we can have many output image files 205 # per input PDF (one img for each page of the PDF, for example) 206 if($img_output_mode) { 207 $target_file_path =~ s/\.[^.]*$//g; 208 if(!&util::dir_exists($target_file_path)) { 209 mkdir($target_file_path); 210 } 211 212 # once the item file for the imgs has been created, need to adjust target_file_path 213 214 # below, we'll store the dir just created to pbtmp_file_paths, so all imgs and the 215 # item file generated in it can be deleted in one go on clean_up 216 } 217 193 218 push(@{$self->{'pbtmp_file_paths'}}, $target_file_path); 194 219 } 195 220 196 221 # Generate and run the convert command 197 my $convert_cmd = $self->{'pdfbox_launch_cmd'}; 198 $convert_cmd .= " -html" if ($target_file_type eq "html"); 199 $convert_cmd .= " \"$source_file_full_path\" \"$target_file_path\""; 222 my $convert_cmd = ""; 223 224 # want the filename without extension, because any images 225 # are to be generated with the same filename as the PDF 226 my ($tailname, $dirname, $suffix) = &File::Basename::fileparse($source_file_full_path, "\\.[^\\.]+\$"); 227 228 if($img_output_mode) { # converting to images 229 my $output_prefix = &util::filename_cat($target_file_path, $tailname); 230 231 $convert_cmd = $self->{'pdfbox_img_launch_cmd'}; 232 $convert_cmd .= " -imageType $target_file_type"; 233 $convert_cmd .= " -outputPrefix $output_prefix"; 234 $convert_cmd .= " \"$source_file_full_path\""; 235 236 } else { # html or text 237 $convert_cmd = $self->{'pdfbox_launch_cmd'}; 238 $convert_cmd .= " -html" if ($target_file_type eq "html"); 239 $convert_cmd .= " \"$source_file_full_path\" \"$target_file_path\""; 240 } 200 241 201 242 if ($verbosity>2) { … … 209 250 my ($regenerated,$result,$had_error) 210 251 = $self->autorun_general_cmd($convert_cmd,$source_file_full_path, $target_file_path,$print_info); 252 253 if($img_output_mode) { 254 # now the images have been generated, generate the "$target_file_path/tailname.item" 255 # item file for them, which is also the target_file_path that needs to be returned 256 $target_file_path = &util::create_itemfile($target_file_path, $tailname, $target_file_type); 257 #print STDERR "**** item file: $target_file_path\n"; 258 } 259 211 260 if ($had_error) { 212 261 return (0, $result,$target_file_path); … … 231 280 232 281 foreach my $pbtmp_file_path (@{$self->{'pbtmp_file_paths'}}) { 233 if (-e $pbtmp_file_path) { 282 if (-d $pbtmp_file_path) { 283 #print STDERR "@@@@@@ cleanup called on $pbtmp_file_path\n"; 284 &util::rm_r($pbtmp_file_path); 285 } 286 elsif (-e $pbtmp_file_path) { 234 287 &util::rm($pbtmp_file_path); 235 288 }
Note:
See TracChangeset
for help on using the changeset viewer.