Changeset 25993

Show
Ignore:
Timestamp:
19.07.2012 18:10:15 (7 years ago)
Author:
ak19
Message:

Generalised the create_itemfile() subroutine's code so that PDFBoxConverter can reuse this, as the PDFBoxConverter is currently being modified to convert a PDF to images, when the -pagedimage_IMGTYPE flag is specified. After this commit, the updated create_itemfile() will be moved into util for common access.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/pdfpstoimg.pl

    r24600 r25993  
    118118    my ($output_dir, $convert_basename, $convert_to) = @_; 
    119119    opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!";   
    120     my $item_file = $output_dir."/".$convert_basename.".item"; 
    121     open(FILE,">$item_file"); 
    122      
    123     print FILE "<PagedDocument>\n"; 
    124120 
    125121    my $page_num = ""; 
    126     @dir_files = grep {-f "$output_dir/$_"} readdir(DIR); 
     122    my @dir_files = grep {-f "$output_dir/$_"} readdir(DIR); 
    127123 
    128124    # Sort files in the directory by page_num     
    129125    sub page_number { 
    130126    my ($dir) = @_; 
    131     my ($pagenum) =($dir =~ m/^.*[-\.](\d+)(\.(jpg|gif|png))?$/i); 
    132      
     127    my ($pagenum) =($dir =~ m/^.*[-\.]?(\d+)(\.(jpg|gif|png))?$/i); 
     128 
    133129    $pagenum = 1 unless defined $pagenum; 
    134130    return $pagenum; 
     
    138134    @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files; 
    139135 
     136    # work out if the numbering of the now sorted image files starts at 0 or not 
     137    # by checking the number of the first _image_ file (skipping item files) 
     138    my $starts_at_0 = 0; 
     139    my $firstfile = ($dir_files[0] !~ /\.item$/i) ? $dir_files[0] : $dir_files[1]; 
     140    if(page_number($firstfile) == 0) { # 00 will evaluate to 0 too in this condition 
     141    $starts_at_0 = 1; 
     142    } 
     143 
     144    my $item_file = &util::filename_cat($output_dir, $convert_basename.".item"); 
     145    open(FILE,">$item_file");     
     146    print FILE "<PagedDocument>\n"; 
     147 
    140148    foreach my $file (@dir_files){ 
    141     $page_num = page_number($file)+1; # image numbers start at 0, so add 1 
    142149    if ($file !~ /\.item/i){ 
     150        $page_num = page_number($file); 
     151        $page_num++ if $starts_at_0; # image numbers start at 0, so add 1 
    143152        print FILE "   <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n"; 
    144153    }  
     
    147156    print FILE "</PagedDocument>\n"; 
    148157    closedir DIR; 
    149     return ""; 
     158    return $item_file; 
    150159} 
    151160