source: trunk/gsdl/bin/script/pdftoimg.pl@ 10493

Last change on this file since 10493 was 10402, checked in by chi, 19 years ago

Modifications to consider the different generated file names under windows and linux system. Also,
to convert the PDF documents to gif needs to be dealt with differently as the default conversion
converts the documents to GIF animation.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.4 KB
Line 
1#!/usr/bin/perl -w
2
3
4###########################################################################
5#
6# pdftoimg.pl -- convert PDF documents to various types of Image format
7#
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 2001 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29# pdftoimg.pl is a wrapper for running convert utility which converts
30# PDF documents to various types of image (e.g. PNG, GIF, JPEG format,
31# Then use PagedImgPlug to deal with the the images
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36}
37
38use parsargv;
39use util;
40use Cwd;
41use File::Basename
42;
43sub print_usage {
44 # note - we don't actually ever use most of these options...
45 print STDERR
46 ("pdftoimg.pl wrapper for pdftoimg.\n",
47 "Usage: pdftoimg [options] <PDF-file> <output>>\n",
48 "Options:\n",
49 "\t-convert_to\toutput image type for the PDF\n"
50 );
51 exit (1);
52}
53
54sub main {
55 my (@ARGV) = @_;
56 my ($convert_to);
57
58 # read command-line arguments so that
59 # you can change the command in this script
60 if (!parsargv::parse(\@ARGV,
61 'convert_to/.*/^', \$convert_to,
62 )) {
63 print_usage();
64 }
65
66 # Make sure the input file exists and can be opened for reading
67 if (scalar(@ARGV) != 2) {
68 print_usage();
69 }
70
71 my $input_filename = $ARGV[0];
72 my $output_filestem = $ARGV[1];
73
74 # test that the directories exist to create the output file, or
75 # we should exit immediately. (File:: is included by util.pm)
76 &util::mk_dir($output_filestem) if (!-e $output_filestem);
77
78 my @dir = split (/(\/|\\)/, $input_filename);
79 my $input_basename = pop(@dir);
80 $input_basename =~ s/\.pdf//i;
81 my $dir = join ("", @dir);
82
83 if (!-r $input_filename) {
84 print STDERR "Error: unable to open $input_filename for reading\n";
85 exit(1);
86 }
87 # don't include path on windows (to avoid having to play about
88 # with quoting when GSDLHOME might contain spaces) but assume
89 # that the PATH is set up correctly.
90 $cmd = "convert";
91 #if ($ENV{'GSDLOS'} =~ /^windows$/);
92 #Convert utility will convert the PDF to GIF Animation
93 if ($convert_to eq "gif") {
94 if ($ENV{'GSDLOS'} =~ /^windows$/){
95 $cmd .= " ".$input_filename." ".$output_filestem."\\".$input_basename."-%02d.".$convert_to;
96 } else {
97 $cmd .= " ".$input_filename." ".$output_filestem."/".$input_basename."-%02d.".$convert_to;
98 }
99 } else {
100 if ($ENV{'GSDLOS'} =~ /^windows$/){
101 $cmd .= " ".$input_filename." ".$output_filestem."\\".$input_basename.".".$convert_to;
102 } else {
103 $cmd .= " ".$input_filename." ".$output_filestem."/".$input_basename.".".$convert_to;
104 }
105 }
106
107 # system() returns -1 if it can't run, otherwise it's $cmds ret val.
108 # note we return 0 if the file is "encrypted"
109 $!=0;
110 if (system($cmd)!=0) {
111 print STDERR "Convert error for $input_filename $!\n";
112 # leave these for gsConvert.pl...
113 #&util::rm("$output_filestem.text") if (-e "$output_filestem.text");
114 #&util::rm("$output_filestem.err") if (-e "$output_filestem.err");
115 return 1;
116 } else {
117 # command execute successfully
118 create_itemfile($output_filestem, $input_basename, $convert_to);
119 }
120 return 0;
121}
122
123sub create_itemfile
124{
125 my ($output_dir, $convert_basename, $convert_to) = @_;
126 opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!";
127 my $item_file = $output_dir."/".$convert_basename.".item";
128 open(FILE,">$item_file");
129
130 print FILE "<PagedDocument>\n";
131
132 my $page_num = "";
133 @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
134
135 # Sort files in the directory by page_num
136 sub page_num {
137 my ($dir) = @_;
138 my $pagenum = "";
139 if ($ENV{'GSDLOS'} =~ /^windows$/){
140 ($pagenum) =($dir =~ m/^.*\.(\d+)$/i);
141 } else {
142 ($pagenum) =($dir =~ m/^.*-(\d+)\.(.*)$/i);
143 }
144 $pagenum = $pagenum || 1;
145 return $pagenum;
146 }
147
148 # sort the files in the directory in the order of page_num rather than lexically.
149 @dir_files = sort { page_num($a) <=> page_num($b) } @dir_files;
150
151 foreach my $file (@dir_files){
152 if ($ENV{'GSDLOS'} =~ /^windows$/ && $convert_to ne "gif"){
153 ($page_num) =($file =~ m/^.*\.(\d+)$/i);
154 } else {
155 ($page_num) =($file =~ m/^.*-(\d+)\.(.*)/i);
156 }
157 # as the converter will convert the document to image files start from page 0
158 $page_num =$page_num + 1 if defined $page_num;
159 $page_num = 1 unless defined $page_num;
160 if ($file !~ /\.item/i){
161 print FILE " <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n";
162 }
163 }
164
165 print FILE "</PagedDocument>\n";
166 closedir DIR;
167 return "";
168}
169
170# indicate our error status, 0 = success
171exit (&main(@ARGV));
172
173
174
Note: See TracBrowser for help on using the repository browser.