source: gsdl/trunk/bin/script/pdftoimg.pl@ 14959

Last change on this file since 14959 was 13072, checked in by kjdon, 18 years ago

fixed a bug where the first page was given pagenumber of 2

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.8 KB
Line 
1#!/usr/bin/perl -w
2
3
4###########################################################################
5#
6# pdftoimg.pl -- convert PDF documents to various types of Image format
7#
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 2001 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29# pdftoimg.pl is a wrapper for running convert utility which converts
30# PDF documents to various types of image (e.g. PNG, GIF, JPEG format,
31# Then use PagedImgPlug to deal with the the images
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36}
37
38use parsargv;
39use util;
40use Cwd;
41use File::Basename
42;
43sub print_usage {
44 # note - we don't actually ever use most of these options...
45 print STDERR
46 ("pdftoimg.pl wrapper for pdftoimg.\n",
47 "Usage: pdftoimg [options] <PDF-file> <output>>\n",
48 "Options:\n",
49 "\t-convert_to\toutput image type for the PDF\n"
50 );
51 exit (1);
52}
53
54sub main {
55 my (@ARGV) = @_;
56 my ($convert_to);
57
58 # read command-line arguments so that
59 # you can change the command in this script
60 if (!parsargv::parse(\@ARGV,
61 'convert_to/.*/^', \$convert_to,
62 )) {
63 print_usage();
64 }
65
66 # Make sure the input file exists and can be opened for reading
67 if (scalar(@ARGV) != 2) {
68 print_usage();
69 }
70
71 my $input_filename = $ARGV[0];
72 my $output_filestem = $ARGV[1];
73
74 # test that the directories exist to create the output file, or
75 # we should exit immediately. (File:: is included by util.pm)
76 &util::mk_dir($output_filestem) if (!-e $output_filestem);
77
78 my @dir = split (/(\/|\\)/, $input_filename);
79 my $input_basename = pop(@dir);
80 $input_basename =~ s/\.pdf//i;
81 my $dir = join ("", @dir);
82
83 if (!-r $input_filename) {
84 print STDERR "Error: unable to open $input_filename for reading\n";
85 exit(1);
86 }
87 # don't include path on windows (to avoid having to play about
88 # with quoting when GSDLHOME might contain spaces) but assume
89 # that the PATH is set up correctly.
90 $cmd = "convert";
91 #if ($ENV{'GSDLOS'} =~ /^windows$/);
92 #Convert utility will convert the PDF to GIF Animation
93 my $output_filename = &util::filename_cat($output_filestem, $input_basename);
94 if ($convert_to eq "gif") {
95 $cmd .= " \"$input_filename\" \"$output_filename-%02d.$convert_to\"";
96 } else {
97 $cmd .= " \"$input_filename\" \"$output_filename.$convert_to\"";
98 }
99
100 # system() returns -1 if it can't run, otherwise it's $cmds ret val.
101 # note we return 0 if the file is "encrypted"
102 $!=0;
103 if (system($cmd)!=0) {
104 print STDERR "Convert error for $input_filename $!\n";
105 # leave these for gsConvert.pl...
106 #&util::rm("$output_filestem.text") if (-e "$output_filestem.text");
107 #&util::rm("$output_filestem.err") if (-e "$output_filestem.err");
108 return 1;
109 } else {
110 # command execute successfully
111 create_itemfile($output_filestem, $input_basename, $convert_to);
112 }
113 return 0;
114}
115
116sub create_itemfile
117{
118 my ($output_dir, $convert_basename, $convert_to) = @_;
119 opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!";
120 my $item_file = $output_dir."/".$convert_basename.".item";
121 open(FILE,">$item_file");
122
123 print FILE "<PagedDocument>\n";
124
125 my $page_num = "";
126 @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
127
128 # Sort files in the directory by page_num
129 sub page_number {
130 my ($dir) = @_;
131 my ($pagenum) =($dir =~ m/^.*[-\.](\d+)(\.(jpg|gif|png))?$/i);
132
133 $pagenum = 1 unless defined $pagenum;
134 return $pagenum;
135 }
136
137 # sort the files in the directory in the order of page_num rather than lexically.
138 @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files;
139
140 foreach my $file (@dir_files){
141 $page_num = page_number($file)+1; # image numbers start at 0, so add 1
142 if ($file !~ /\.item/i){
143 print FILE " <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n";
144 }
145 }
146
147 print FILE "</PagedDocument>\n";
148 closedir DIR;
149 return "";
150}
151
152# indicate our error status, 0 = success
153exit (&main(@ARGV));
154
155
156
Note: See TracBrowser for help on using the repository browser.