source: gsdl/trunk/bin/script/pdfpstoimg.pl@ 19743

Last change on this file since 19743 was 17328, checked in by kjdon, 16 years ago

tidied up this file a bit, and changed all pdf help bits to pdf/ps help. This is a result of the merge of pdftoimg.pl and pstoimg.pl. Basically only the output statements have changed, the code was pretty much all the same

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.8 KB
Line 
1#!/usr/bin/perl -w
2
3
4###########################################################################
5#
6# pdfpstoimg.pl -- convert PDF or PS documents to various types of Image format
7#
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 2001 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
29# pdfpstoimg.pl is a wrapper for running the ImageMagick 'convert' utility
30# which converts PDF and PS documents to various types of image (e.g. PNG,
31# GIF, JPEG format). We then create an item file to join the images together
32# into a document. The item file will be processed by PagedImagePlugin
33
34BEGIN {
35 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37}
38
39use parsargv;
40use util;
41use Cwd;
42use File::Basename;
43
44sub print_usage {
45 print STDERR
46 ("pdfpstoimg.pl wrapper for converting PDF or PS files to a series of images.\n",
47 "Usage: pdfpstoimg.pl [options] <PDF/PS-file> <output-filestem>>\n",
48 "Options:\n",
49 "\t-convert_to\toutput image type (gif, jpg, png) \n"
50 );
51 exit (1);
52}
53
54sub main {
55 my (@ARGV) = @_;
56 my ($convert_to);
57
58 # read command-line arguments so that
59 # you can change the command in this script
60 if (!parsargv::parse(\@ARGV,
61 'convert_to/.*/^', \$convert_to,
62 )) {
63 print_usage();
64 }
65
66 # Make sure the user has specified both input and output files
67 if (scalar(@ARGV) != 2) {
68 print_usage();
69 }
70
71 my $input_filename = $ARGV[0];
72 my $output_filestem = $ARGV[1];
73
74 # test that the directories exist to create the output file, or
75 # we should exit immediately.
76 &util::mk_dir($output_filestem) if (!-e $output_filestem);
77
78 my @dir = split (/(\/|\\)/, $input_filename);
79 my $input_basename = pop(@dir);
80 $input_basename =~ s/\.(pdf|ps)$//i;
81 my $dir = join ("", @dir);
82
83 if (!-r $input_filename) {
84 print STDERR "Error: unable to open $input_filename for reading\n";
85 exit(1);
86 }
87 # don't include path on windows (to avoid having to play about
88 # with quoting when GSDLHOME might contain spaces) but assume
89 # that the PATH is set up correctly.
90 $cmd = "convert";
91
92 my $output_filename = &util::filename_cat($output_filestem, $input_basename);
93 if ($convert_to eq "gif") {
94 $cmd .= " \"$input_filename\" \"$output_filename-%02d.$convert_to\"";
95 } else {
96 $cmd .= " \"$input_filename\" \"$output_filename.$convert_to\"";
97 }
98
99 # system() returns -1 if it can't run, otherwise it's $cmds ret val.
100 # note we return 0 if the file is "encrypted"
101 $!=0;
102 if (system($cmd)!=0) {
103 print STDERR "Convert error for $input_filename $!\n";
104 # leave these for gsConvert.pl...
105 #&util::rm("$output_filestem.text") if (-e "$output_filestem.text");
106 #&util::rm("$output_filestem.err") if (-e "$output_filestem.err");
107 return 1;
108 } else {
109 # command execute successfully
110 create_itemfile($output_filestem, $input_basename, $convert_to);
111 }
112 return 0;
113}
114
115sub create_itemfile
116{
117 my ($output_dir, $convert_basename, $convert_to) = @_;
118 opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!";
119 my $item_file = $output_dir."/".$convert_basename.".item";
120 open(FILE,">$item_file");
121
122 print FILE "<PagedDocument>\n";
123
124 my $page_num = "";
125 @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
126
127 # Sort files in the directory by page_num
128 sub page_number {
129 my ($dir) = @_;
130 my ($pagenum) =($dir =~ m/^.*[-\.](\d+)(\.(jpg|gif|png))?$/i);
131
132 $pagenum = 1 unless defined $pagenum;
133 return $pagenum;
134 }
135
136 # sort the files in the directory in the order of page_num rather than lexically.
137 @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files;
138
139 foreach my $file (@dir_files){
140 $page_num = page_number($file)+1; # image numbers start at 0, so add 1
141 if ($file !~ /\.item/i){
142 print FILE " <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n";
143 }
144 }
145
146 print FILE "</PagedDocument>\n";
147 closedir DIR;
148 return "";
149}
150
151# indicate our error status, 0 = success
152exit (&main(@ARGV));
153
154
155
Note: See TracBrowser for help on using the repository browser.