Ignore:
Timestamp:
2012-07-19T18:16:46+12:00 (12 years ago)
Author:
ak19
Message:

Moving the updated create_itemfile() subroutine from pdfpstoimg.pl to util so that PDFBoxConverter can easily reuse this method too, as the PDFBoxConverter is currently being modified to convert a PDF to images when the -pagedimage_IMGTYPE flag is specified.

Location:
main/trunk/greenstone2
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/pdfpstoimg.pl

    r25993 r25994  
    109109    } else {
    110110    # command execute successfully
    111     create_itemfile($output_filestem, $input_basename, $convert_to);
     111    &util::create_itemfile($output_filestem, $input_basename, $convert_to);
    112112    }
    113113    return 0;
    114 }
    115 
    116 sub create_itemfile
    117 {
    118     my ($output_dir, $convert_basename, $convert_to) = @_;
    119     opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!"; 
    120 
    121     my $page_num = "";
    122     my @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
    123 
    124     # Sort files in the directory by page_num   
    125     sub page_number {
    126     my ($dir) = @_;
    127     my ($pagenum) =($dir =~ m/^.*[-\.]?(\d+)(\.(jpg|gif|png))?$/i);
    128 
    129     $pagenum = 1 unless defined $pagenum;
    130     return $pagenum;
    131     }
    132 
    133     # sort the files in the directory in the order of page_num rather than lexically.
    134     @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files;
    135 
    136     # work out if the numbering of the now sorted image files starts at 0 or not
    137     # by checking the number of the first _image_ file (skipping item files)
    138     my $starts_at_0 = 0;
    139     my $firstfile = ($dir_files[0] !~ /\.item$/i) ? $dir_files[0] : $dir_files[1];
    140     if(page_number($firstfile) == 0) { # 00 will evaluate to 0 too in this condition
    141     $starts_at_0 = 1;
    142     }
    143 
    144     my $item_file = &util::filename_cat($output_dir, $convert_basename.".item");
    145     open(FILE,">$item_file");   
    146     print FILE "<PagedDocument>\n";
    147 
    148     foreach my $file (@dir_files){
    149     if ($file !~ /\.item/i){
    150         $page_num = page_number($file);
    151         $page_num++ if $starts_at_0; # image numbers start at 0, so add 1
    152         print FILE "   <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n";
    153     }
    154     }
    155 
    156     print FILE "</PagedDocument>\n";
    157     closedir DIR;
    158     return $item_file;
    159114}
    160115
  • main/trunk/greenstone2/perllib/util.pm

    r25796 r25994  
    18491849}
    18501850
     1851# Used by pdfpstoimg.pl and PDFBoxConverter to create a .item file from
     1852# a directory containing sequentially numbered images.
     1853sub create_itemfile
     1854{
     1855    my ($output_dir, $convert_basename, $convert_to) = @_;
     1856    opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!"; 
     1857
     1858    my $page_num = "";
     1859    my @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
     1860
     1861    # Sort files in the directory by page_num   
     1862    sub page_number {
     1863    my ($dir) = @_;
     1864    my ($pagenum) =($dir =~ m/^.*[-\.]?(\d+)(\.(jpg|gif|png))?$/i);
     1865
     1866    $pagenum = 1 unless defined $pagenum;
     1867    return $pagenum;
     1868    }
     1869
     1870    # sort the files in the directory in the order of page_num rather than lexically.
     1871    @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files;
     1872
     1873    # work out if the numbering of the now sorted image files starts at 0 or not
     1874    # by checking the number of the first _image_ file (skipping item files)
     1875    my $starts_at_0 = 0;
     1876    my $firstfile = ($dir_files[0] !~ /\.item$/i) ? $dir_files[0] : $dir_files[1];
     1877    if(page_number($firstfile) == 0) { # 00 will evaluate to 0 too in this condition
     1878    $starts_at_0 = 1;
     1879    }
     1880
     1881    my $item_file = &util::filename_cat($output_dir, $convert_basename.".item");
     1882    open(FILE,">$item_file");   
     1883    print FILE "<PagedDocument>\n";
     1884
     1885    foreach my $file (@dir_files){
     1886    if ($file !~ /\.item/i){
     1887        $page_num = page_number($file);
     1888        $page_num++ if $starts_at_0; # image numbers start at 0, so add 1
     1889        print FILE "   <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n";
     1890    }
     1891    }
     1892
     1893    print FILE "</PagedDocument>\n";
     1894    closedir DIR;
     1895    return $item_file;
     1896}
     1897
    185118981;
Note: See TracChangeset for help on using the changeset viewer.