Ignore:
Timestamp:
2012-07-19T18:10:15+12:00 (12 years ago)
Author:
ak19
Message:

Generalised the create_itemfile() subroutine's code so that PDFBoxConverter can reuse this, as the PDFBoxConverter is currently being modified to convert a PDF to images, when the -pagedimage_IMGTYPE flag is specified. After this commit, the updated create_itemfile() will be moved into util for common access.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/pdfpstoimg.pl

    r24600 r25993  
    118118    my ($output_dir, $convert_basename, $convert_to) = @_;
    119119    opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!"; 
    120     my $item_file = $output_dir."/".$convert_basename.".item";
    121     open(FILE,">$item_file");
    122    
    123     print FILE "<PagedDocument>\n";
    124120
    125121    my $page_num = "";
    126     @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
     122    my @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
    127123
    128124    # Sort files in the directory by page_num   
    129125    sub page_number {
    130126    my ($dir) = @_;
    131     my ($pagenum) =($dir =~ m/^.*[-\.](\d+)(\.(jpg|gif|png))?$/i);
    132    
     127    my ($pagenum) =($dir =~ m/^.*[-\.]?(\d+)(\.(jpg|gif|png))?$/i);
     128
    133129    $pagenum = 1 unless defined $pagenum;
    134130    return $pagenum;
     
    138134    @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files;
    139135
     136    # work out if the numbering of the now sorted image files starts at 0 or not
     137    # by checking the number of the first _image_ file (skipping item files)
     138    my $starts_at_0 = 0;
     139    my $firstfile = ($dir_files[0] !~ /\.item$/i) ? $dir_files[0] : $dir_files[1];
     140    if(page_number($firstfile) == 0) { # 00 will evaluate to 0 too in this condition
     141    $starts_at_0 = 1;
     142    }
     143
     144    my $item_file = &util::filename_cat($output_dir, $convert_basename.".item");
     145    open(FILE,">$item_file");   
     146    print FILE "<PagedDocument>\n";
     147
    140148    foreach my $file (@dir_files){
    141     $page_num = page_number($file)+1; # image numbers start at 0, so add 1
    142149    if ($file !~ /\.item/i){
     150        $page_num = page_number($file);
     151        $page_num++ if $starts_at_0; # image numbers start at 0, so add 1
    143152        print FILE "   <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n";
    144153    }
     
    147156    print FILE "</PagedDocument>\n";
    148157    closedir DIR;
    149     return "";
     158    return $item_file;
    150159}
    151160
Note: See TracChangeset for help on using the changeset viewer.