source: main/trunk/greenstone2/bin/script/pdfpstoimg.pl@ 25993

Last change on this file since 25993 was 25993, checked in by ak19, 12 years ago

Generalised the create_itemfile() subroutine's code so that PDFBoxConverter can reuse this, as the PDFBoxConverter is currently being modified to convert a PDF to images, when the -pagedimage_IMGTYPE flag is specified. After this commit, the updated create_itemfile() will be moved into util for common access.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.3 KB
RevLine 
[10358]1#!/usr/bin/perl -w
2
3
4###########################################################################
5#
[17328]6# pdfpstoimg.pl -- convert PDF or PS documents to various types of Image format
[10358]7#
8# A component of the Greenstone digital library software
9# from the New Zealand Digital Library Project at the
10# University of Waikato, New Zealand.
11#
12# Copyright (C) 2001 New Zealand Digital Library Project
13#
14# This program is free software; you can redistribute it and/or modify
15# it under the terms of the GNU General Public License as published by
16# the Free Software Foundation; either version 2 of the License, or
17# (at your option) any later version.
18#
19# This program is distributed in the hope that it will be useful,
20# but WITHOUT ANY WARRANTY; without even the implied warranty of
21# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22# GNU General Public License for more details.
23#
24# You should have received a copy of the GNU General Public License
25# along with this program; if not, write to the Free Software
26# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27#
28###########################################################################
[17328]29# pdfpstoimg.pl is a wrapper for running the ImageMagick 'convert' utility
30# which converts PDF and PS documents to various types of image (e.g. PNG,
31# GIF, JPEG format). We then create an item file to join the images together
32# into a document. The item file will be processed by PagedImagePlugin
[10358]33
34BEGIN {
35 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37}
38
39use parsargv;
40use util;
41use Cwd;
[17328]42use File::Basename;
43
[10358]44sub print_usage {
45 print STDERR
[17328]46 ("pdfpstoimg.pl wrapper for converting PDF or PS files to a series of images.\n",
47 "Usage: pdfpstoimg.pl [options] <PDF/PS-file> <output-filestem>>\n",
[10358]48 "Options:\n",
[17328]49 "\t-convert_to\toutput image type (gif, jpg, png) \n"
[10358]50 );
51 exit (1);
52}
53
54sub main {
55 my (@ARGV) = @_;
56 my ($convert_to);
57
58 # read command-line arguments so that
59 # you can change the command in this script
60 if (!parsargv::parse(\@ARGV,
61 'convert_to/.*/^', \$convert_to,
[10402]62 )) {
63 print_usage();
[10358]64 }
65
[17328]66 # Make sure the user has specified both input and output files
[10358]67 if (scalar(@ARGV) != 2) {
68 print_usage();
69 }
70
71 my $input_filename = $ARGV[0];
72 my $output_filestem = $ARGV[1];
73
74 # test that the directories exist to create the output file, or
[17328]75 # we should exit immediately.
[10358]76 &util::mk_dir($output_filestem) if (!-e $output_filestem);
77
78 my @dir = split (/(\/|\\)/, $input_filename);
79 my $input_basename = pop(@dir);
[17328]80 $input_basename =~ s/\.(pdf|ps)$//i;
[10358]81 my $dir = join ("", @dir);
82
83 if (!-r $input_filename) {
84 print STDERR "Error: unable to open $input_filename for reading\n";
85 exit(1);
86 }
87 # don't include path on windows (to avoid having to play about
88 # with quoting when GSDLHOME might contain spaces) but assume
89 # that the PATH is set up correctly.
[24600]90 $cmd = "\"".&util::get_perl_exec()."\" -S gs-magick.pl convert";
[17328]91
[10526]92 my $output_filename = &util::filename_cat($output_filestem, $input_basename);
[10402]93 if ($convert_to eq "gif") {
[10526]94 $cmd .= " \"$input_filename\" \"$output_filename-%02d.$convert_to\"";
[10402]95 } else {
[10526]96 $cmd .= " \"$input_filename\" \"$output_filename.$convert_to\"";
[10402]97 }
98
[10358]99 # system() returns -1 if it can't run, otherwise it's $cmds ret val.
100 # note we return 0 if the file is "encrypted"
101 $!=0;
[24600]102 my $status = system($cmd);
103 if ($status != 0) {
[10358]104 print STDERR "Convert error for $input_filename $!\n";
105 # leave these for gsConvert.pl...
106 #&util::rm("$output_filestem.text") if (-e "$output_filestem.text");
107 #&util::rm("$output_filestem.err") if (-e "$output_filestem.err");
108 return 1;
109 } else {
110 # command execute successfully
[10402]111 create_itemfile($output_filestem, $input_basename, $convert_to);
[10358]112 }
113 return 0;
114}
115
116sub create_itemfile
117{
[10402]118 my ($output_dir, $convert_basename, $convert_to) = @_;
[10358]119 opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!";
120
121 my $page_num = "";
[25993]122 my @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
[10358]123
124 # Sort files in the directory by page_num
[11858]125 sub page_number {
[10358]126 my ($dir) = @_;
[25993]127 my ($pagenum) =($dir =~ m/^.*[-\.]?(\d+)(\.(jpg|gif|png))?$/i);
128
[13072]129 $pagenum = 1 unless defined $pagenum;
[10402]130 return $pagenum;
[10358]131 }
132
133 # sort the files in the directory in the order of page_num rather than lexically.
[11858]134 @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files;
[10358]135
[25993]136 # work out if the numbering of the now sorted image files starts at 0 or not
137 # by checking the number of the first _image_ file (skipping item files)
138 my $starts_at_0 = 0;
139 my $firstfile = ($dir_files[0] !~ /\.item$/i) ? $dir_files[0] : $dir_files[1];
140 if(page_number($firstfile) == 0) { # 00 will evaluate to 0 too in this condition
141 $starts_at_0 = 1;
142 }
143
144 my $item_file = &util::filename_cat($output_dir, $convert_basename.".item");
145 open(FILE,">$item_file");
146 print FILE "<PagedDocument>\n";
147
[10358]148 foreach my $file (@dir_files){
149 if ($file !~ /\.item/i){
[25993]150 $page_num = page_number($file);
151 $page_num++ if $starts_at_0; # image numbers start at 0, so add 1
[10358]152 print FILE " <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n";
153 }
154 }
[10402]155
[10358]156 print FILE "</PagedDocument>\n";
157 closedir DIR;
[25993]158 return $item_file;
[10358]159}
160
161# indicate our error status, 0 = success
162exit (&main(@ARGV));
163
[10402]164
165
Note: See TracBrowser for help on using the repository browser.