Changeset 25994

Show
Ignore:
Timestamp:
19.07.2012 18:16:46 (7 years ago)
Author:
ak19
Message:

Moving the updated create_itemfile() subroutine from pdfpstoimg.pl to util so that PDFBoxConverter can easily reuse this method too, as the PDFBoxConverter is currently being modified to convert a PDF to images when the -pagedimage_IMGTYPE flag is specified.

Location:
main/trunk/greenstone2
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/pdfpstoimg.pl

    r25993 r25994  
    109109    } else { 
    110110    # command execute successfully 
    111     create_itemfile($output_filestem, $input_basename, $convert_to); 
     111    &util::create_itemfile($output_filestem, $input_basename, $convert_to); 
    112112    } 
    113113    return 0; 
    114 } 
    115  
    116 sub create_itemfile 
    117 { 
    118     my ($output_dir, $convert_basename, $convert_to) = @_; 
    119     opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!";   
    120  
    121     my $page_num = ""; 
    122     my @dir_files = grep {-f "$output_dir/$_"} readdir(DIR); 
    123  
    124     # Sort files in the directory by page_num     
    125     sub page_number { 
    126     my ($dir) = @_; 
    127     my ($pagenum) =($dir =~ m/^.*[-\.]?(\d+)(\.(jpg|gif|png))?$/i); 
    128  
    129     $pagenum = 1 unless defined $pagenum; 
    130     return $pagenum; 
    131     } 
    132  
    133     # sort the files in the directory in the order of page_num rather than lexically.  
    134     @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files; 
    135  
    136     # work out if the numbering of the now sorted image files starts at 0 or not 
    137     # by checking the number of the first _image_ file (skipping item files) 
    138     my $starts_at_0 = 0; 
    139     my $firstfile = ($dir_files[0] !~ /\.item$/i) ? $dir_files[0] : $dir_files[1]; 
    140     if(page_number($firstfile) == 0) { # 00 will evaluate to 0 too in this condition 
    141     $starts_at_0 = 1; 
    142     } 
    143  
    144     my $item_file = &util::filename_cat($output_dir, $convert_basename.".item"); 
    145     open(FILE,">$item_file");     
    146     print FILE "<PagedDocument>\n"; 
    147  
    148     foreach my $file (@dir_files){ 
    149     if ($file !~ /\.item/i){ 
    150         $page_num = page_number($file); 
    151         $page_num++ if $starts_at_0; # image numbers start at 0, so add 1 
    152         print FILE "   <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n"; 
    153     }  
    154     } 
    155  
    156     print FILE "</PagedDocument>\n"; 
    157     closedir DIR; 
    158     return $item_file; 
    159114} 
    160115 
  • main/trunk/greenstone2/perllib/util.pm

    r25796 r25994  
    18491849} 
    18501850 
     1851# Used by pdfpstoimg.pl and PDFBoxConverter to create a .item file from 
     1852# a directory containing sequentially numbered images. 
     1853sub create_itemfile 
     1854{ 
     1855    my ($output_dir, $convert_basename, $convert_to) = @_; 
     1856    opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!";   
     1857 
     1858    my $page_num = ""; 
     1859    my @dir_files = grep {-f "$output_dir/$_"} readdir(DIR); 
     1860 
     1861    # Sort files in the directory by page_num     
     1862    sub page_number { 
     1863    my ($dir) = @_; 
     1864    my ($pagenum) =($dir =~ m/^.*[-\.]?(\d+)(\.(jpg|gif|png))?$/i); 
     1865 
     1866    $pagenum = 1 unless defined $pagenum; 
     1867    return $pagenum; 
     1868    } 
     1869 
     1870    # sort the files in the directory in the order of page_num rather than lexically.  
     1871    @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files; 
     1872 
     1873    # work out if the numbering of the now sorted image files starts at 0 or not 
     1874    # by checking the number of the first _image_ file (skipping item files) 
     1875    my $starts_at_0 = 0; 
     1876    my $firstfile = ($dir_files[0] !~ /\.item$/i) ? $dir_files[0] : $dir_files[1]; 
     1877    if(page_number($firstfile) == 0) { # 00 will evaluate to 0 too in this condition 
     1878    $starts_at_0 = 1; 
     1879    } 
     1880 
     1881    my $item_file = &util::filename_cat($output_dir, $convert_basename.".item"); 
     1882    open(FILE,">$item_file");     
     1883    print FILE "<PagedDocument>\n"; 
     1884 
     1885    foreach my $file (@dir_files){ 
     1886    if ($file !~ /\.item/i){ 
     1887        $page_num = page_number($file); 
     1888        $page_num++ if $starts_at_0; # image numbers start at 0, so add 1 
     1889        print FILE "   <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n"; 
     1890    }  
     1891    } 
     1892 
     1893    print FILE "</PagedDocument>\n"; 
     1894    closedir DIR; 
     1895    return $item_file; 
     1896} 
     1897 
    185118981;