Changeset 32280 for main/trunk/greenstone2/perllib/util.pm
- Timestamp:
- 2018-07-17T20:40:57+12:00 (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/util.pm
r32193 r32280 1729 1729 sub page_number { 1730 1730 my ($dir) = @_; 1731 my ($pagenum) =($dir =~ m/^.*?[-\.]?(\d+)(\.(jpg|gif|png ))?$/i);1731 my ($pagenum) =($dir =~ m/^.*?[-\.]?(\d+)(\.(jpg|gif|png|txt))?$/i); 1732 1732 # my ($pagenum) =($dir =~ m/(\d+)(\.(jpg|gif|png))?$/i); # this works but is not as safe/strict about input filepatterns as the above 1733 1733 … … 1763 1763 my $hasTxtFile = &FileUtils::fileExists($txtfilename); 1764 1764 1765 foreach my $file (@dir_files){ 1766 if ($file !~ /\.item/i && $file !~ /\.txt/i){ 1765 # Write out the elements of the item file. 1766 # We could be dealing with 3 types of conversion output formats: txt only (paged_text), 1767 # images only (pagedimg_) and images AND text (pagedimgtxt_). 1768 foreach my $file (@dir_files) { 1769 if ($file !~ /\.item/i) { 1767 1770 $page_num = page_number($file); 1768 1771 $page_num++ if $starts_at_0; # image numbers start at 0, so add 1 1769 if($hasTxtFile) { 1770 print $item_fh " <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"$page_num.txt\"/>\n"; 1771 } else { 1772 print $item_fh " <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n"; 1772 1773 if ($convert_to eq "txt") { # output format is paged_text, which has no images 1774 if ($file =~ m/\.txt/i) { # check only txt files (should be all there is, besides the skipped .item file) 1775 print $item_fh " <Page pagenum=\"$page_num\" imgfile=\"\" txtfile=\"$page_num.txt\"/>\n"; 1776 } # else, some non-txt file ext, skip 1773 1777 } 1774 } 1775 } 1778 else { # either pagedimg or pagedimgtxt output mode 1779 if($file !~ /\.txt/i) { # check only img files, skip any matching txt files 1780 if($hasTxtFile) { # if every image has a matching txt file, output txtfile too 1781 print $item_fh " <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"$page_num.txt\"/>\n"; 1782 } else { # when its pagedimg only, txtfile is empty 1783 print $item_fh " <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n"; 1784 } 1785 } 1786 } 1787 } 1788 } 1789 1776 1790 1777 1791 print $item_fh "</PagedDocument>\n";
Note:
See TracChangeset
for help on using the changeset viewer.