Ignore:
Timestamp:
2012-07-19T18:16:46+12:00 (12 years ago)
Author:
ak19
Message:

Moving the updated create_itemfile() subroutine from pdfpstoimg.pl to util so that PDFBoxConverter can easily reuse this method too, as the PDFBoxConverter is currently being modified to convert a PDF to images when the -pagedimage_IMGTYPE flag is specified.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/util.pm

    r25796 r25994  
    18491849}
    18501850
     1851# Used by pdfpstoimg.pl and PDFBoxConverter to create a .item file from
     1852# a directory containing sequentially numbered images.
     1853sub create_itemfile
     1854{
     1855    my ($output_dir, $convert_basename, $convert_to) = @_;
     1856    opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!"; 
     1857
     1858    my $page_num = "";
     1859    my @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
     1860
     1861    # Sort files in the directory by page_num   
     1862    sub page_number {
     1863    my ($dir) = @_;
     1864    my ($pagenum) =($dir =~ m/^.*[-\.]?(\d+)(\.(jpg|gif|png))?$/i);
     1865
     1866    $pagenum = 1 unless defined $pagenum;
     1867    return $pagenum;
     1868    }
     1869
     1870    # sort the files in the directory in the order of page_num rather than lexically.
     1871    @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files;
     1872
     1873    # work out if the numbering of the now sorted image files starts at 0 or not
     1874    # by checking the number of the first _image_ file (skipping item files)
     1875    my $starts_at_0 = 0;
     1876    my $firstfile = ($dir_files[0] !~ /\.item$/i) ? $dir_files[0] : $dir_files[1];
     1877    if(page_number($firstfile) == 0) { # 00 will evaluate to 0 too in this condition
     1878    $starts_at_0 = 1;
     1879    }
     1880
     1881    my $item_file = &util::filename_cat($output_dir, $convert_basename.".item");
     1882    open(FILE,">$item_file");   
     1883    print FILE "<PagedDocument>\n";
     1884
     1885    foreach my $file (@dir_files){
     1886    if ($file !~ /\.item/i){
     1887        $page_num = page_number($file);
     1888        $page_num++ if $starts_at_0; # image numbers start at 0, so add 1
     1889        print FILE "   <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n";
     1890    }
     1891    }
     1892
     1893    print FILE "</PagedDocument>\n";
     1894    closedir DIR;
     1895    return $item_file;
     1896}
     1897
    185118981;
Note: See TracChangeset for help on using the changeset viewer.