source: trunk/gsdl/perllib/plugins/ImagePlug.pm@ 9960

Last change on this file since 9960 was 9960, checked in by davidb, 19 years ago

Minor refinement made to print statments the show how the plugin code is
progressing.

  • Property svn:keywords set to Author Date Id Revision
File size: 15.2 KB
Line 
1###########################################################################
2#
3# ImagePlug.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package ImagePlug;
27
28use BasPlug;
29
30sub BEGIN {
31 @ISA = ('BasPlug');
32}
33
34my $arguments =
35 [ { 'name' => "process_exp",
36 'desc' => "{BasPlug.process_exp}",
37 'type' => "regexp",
38 'deft' => &get_default_process_exp(),
39 'reqd' => "no" },
40 { 'name' => "noscaleup",
41 'desc' => "{ImagePlug.noscaleup}",
42 'type' => "flag",
43 'reqd' => "no" },
44 { 'name' => "thumbnailsize",
45 'desc' => "{ImagePlug.thumbnailsize}",
46 'type' => "int",
47 'deft' => "100",
48 'reqd' => "no" },
49 { 'name' => "thumbnailtype",
50 'desc' => "{ImagePlug.thumbnailtype}",
51 'type' => "string",
52 'deft' => "gif",
53 'reqd' => "no" },
54 { 'name' => "screenviewsize",
55 'desc' => "{ImagePlug.screenviewsize}",
56 'type' => "int",
57 'deft' => "0",
58 'reqd' => "no" },
59 { 'name' => "screenviewtype",
60 'desc' => "{ImagePlug.screenviewtype}",
61 'type' => "string",
62 'deft' => "jpg",
63 'reqd' => "no" },
64 { 'name' => "converttotype",
65 'desc' => "{ImagePlug.converttotype}",
66 'type' => "string",
67 'deft' => "",
68 'reqd' => "no" },
69 { 'name' => "minimumsize",
70 'desc' => "{ImagePlug.minimumsize}",
71 'type' => "int",
72 'deft' => "100",
73 'reqd' => "no" } ];
74
75my $options = { 'name' => "ImagePlug",
76 'desc' => "{ImagePlug.desc}",
77 'abstract' => "no",
78 'inherits' => "yes",
79 'args' => $arguments };
80
81
82
83sub new {
84 my ($class) = @_;
85 my $plugin_name = shift (@_);
86 my $self = new BasPlug ("ImagePlug", @_);
87 $self->{'plugin_type'} = "ImagePlug";
88 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
89 my $option_list = $self->{'option_list'};
90 push( @{$option_list}, $options );
91
92 if (!parsargv::parse(\@_,
93 q^noscaleup^, \$self->{'noscaleup'},
94 q^converttotype/.*/^, \$self->{'converttotype'},
95 q^minimumsize/[0-9]*/100^, \$self->{'minimumsize'},
96
97 q^thumbnailsize/[0-9]*/100^, \$self->{'thumbnailsize'},
98 q^thumbnailtype/.*/gif^, \$self->{'thumbnailtype'},
99 q^screenviewsize/[0-9]*/0^, \$self->{'screenviewsize'},
100 q^screenviewtype/.*/jpg^, \$self->{'screenviewtype'},
101 "allow_extra_options")) {
102
103 print STDERR "\nImagePlug uses an incorrect option.\n";
104 print STDERR "Check your collect.cfg configuration file.\n";
105 $self->print_txt_usage(""); # Use default resource bundle
106 die "\n";
107 }
108
109 # Check that ImageMagick is installed and available on the path (except for Windows 95/98)
110 if (!($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT())) {
111 my $result = `identify 2>&1`;
112 if ($? == -1 || $? == 256) { # Linux and Windows return different values for "program not found"
113 $self->{'imagemagick_not_installed'} = 1;
114 }
115 }
116
117 return bless $self, $class;
118}
119
120sub get_default_process_exp {
121 my $self = shift (@_);
122
123 return q^(?i)(\.jpe?g|\.gif|\.png|\.bmp|\.xbm|\.tif?f)$^;
124}
125
126# this makes no sense for images
127sub block_cover_image
128{
129 my $self =shift (@_);
130 my ($filename) = @_;
131
132 return;
133}
134# Create the thumbnail and screenview images, and discover the Image's
135# size, width, and height using the convert utility.
136
137sub run_convert {
138 my $self = shift (@_);
139 my $filename = shift (@_); # filename with full path
140 my $file = shift (@_); # filename without path
141 my $doc_obj = shift (@_);
142 my $section = $doc_obj->get_top_section();
143
144 my $verbosity = $self->{'verbosity'};
145 my $outhandle = $self->{'outhandle'};
146
147 # check the filename is okay
148 return 0 if ($file eq "" || $filename eq "");
149
150# Code now extended to quote filenames in 'convert' commnads
151# Allows spaces in filenames, but note needs spaces to be escaped in URL as well
152# if ($filename =~ m/ /) {
153# print $outhandle "ImagePlug: \"$filename\" contains a space. choking.\n";
154# return undef;
155# }
156
157 my $minimumsize = $self->{'minimumsize'};
158 if (defined $minimumsize && (-s $filename < $minimumsize)) {
159 print $outhandle "ImagePlug: \"$filename\" too small, skipping\n"
160 if ($verbosity > 1);
161 }
162
163
164 # Convert the image to a new type (if required).
165 my $converttotype = $self->{'converttotype'};
166 my $originalfilename = ""; # only set if we do a conversion
167 my $type = "unknown";
168
169
170 if ($converttotype ne "" && $filename =~ m/$converttotype$/) {
171
172 $originalfilename = $filename;
173 $filename = &util::get_tmp_filename() . ".$converttotype";
174 $self->{'tmp_filename'} = $filename;
175
176 my $command = "convert -interlace plane -verbose \"$originalfilename\" \"$filename\"";
177 print $outhandle "$command\n" if ($verbosity > 2);
178 my $result = '';
179 $result = `$command`;
180 print $outhandle "RESULT = $result\n" if ($verbosity > 2);
181
182 $type = $converttotype;
183 }
184
185
186 # Add the image metadata
187 my $url = $file;
188 $url =~ s/ /%20/g;
189
190 $doc_obj->add_metadata ($section, "Image", $url);
191
192 # Also want to set filename as 'Source' metadata to be
193 # consistent with other plugins
194 $doc_obj->add_metadata ($section, "Source", $url);
195
196 my ($image_type, $image_width, $image_height, $image_size)
197 = &identify($filename, $outhandle, $verbosity);
198
199 if ($image_type ne " ") {
200 $type = $image_type;
201 }
202
203 $doc_obj->add_metadata ($section, "FileFormat", $type);
204 $doc_obj->add_metadata ($section, "FileSize", $image_size);
205
206 $doc_obj->add_metadata ($section, "ImageType", $image_type);
207 $doc_obj->add_metadata ($section, "ImageWidth", $image_width);
208 $doc_obj->add_metadata ($section, "ImageHeight", $image_height);
209 $doc_obj->add_metadata ($section, "ImageSize", $image_size);
210
211 $doc_obj->add_metadata ($section, "srclink",
212 "<a href=\"_httpcollection_/index/assoc/[assocfilepath]/[Image]\">");
213 $doc_obj->add_metadata ($section, "/srclink", "</a>");
214
215 $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpcollection_/index/assoc/[assocfilepath]/[Image]\" width=100>");
216
217
218 # Add the image as an associated file
219 $doc_obj->associate_file($filename,$file,"image/$type",$section);
220
221
222 # Make the thumbnail image
223 my $thumbnailsize = $self->{'thumbnailsize'} || 100;
224 my $thumbnailtype = $self->{'thumbnailtype'} || 'gif';
225
226 my $thumbnailfile = &util::get_tmp_filename() . ".$thumbnailtype";
227 $self->{'tmp_filename2'} = $thumbnailfile;
228
229 # Generate the thumbnail with convert
230 my $command = "convert -interlace plane -verbose -geometry $thumbnailsize"
231 . "x$thumbnailsize \"$filename\" \"$thumbnailfile\"";
232 print $outhandle "THUMBNAIL: $command\n" if ($verbosity > 2);
233 my $result = '';
234 $result = `$command 2>&1` ;
235 print $outhandle "THUMB RESULT: $result\n" if ($verbosity > 2);
236
237 # Add the thumbnail as an associated file ...
238 if (-e "$thumbnailfile") {
239 $doc_obj->associate_file("$thumbnailfile", "thumbnail.$thumbnailtype",
240 "image/$thumbnailtype",$section);
241 $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype);
242 $doc_obj->add_metadata ($section, "Thumb", "thumbnail.$thumbnailtype");
243
244 $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpcollection_/index/assoc/[assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>");
245 }
246
247 # Extract Thumnail metadata from convert output
248 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
249 $doc_obj->add_metadata ($section, "ThumbWidth", $1);
250 $doc_obj->add_metadata ($section, "ThumbHeight", $2);
251 }
252
253 # Make a screen-sized version of the picture if requested
254 if ($self->{'screenviewsize'}) {
255
256 # To do: if the actual image smaller than the screenview size,
257 # we should use the original !
258
259 my $screenviewsize = $self->{'screenviewsize'};
260 my $screenviewtype = $self->{'screenviewtype'} || 'jpeg';
261 my $screenviewfilename = &util::get_tmp_filename() . ".$screenviewtype";
262 $self->{'tmp_filename3'} = $screenviewfilename;
263
264 # make the screenview image
265 my $command = "convert -interlace plane -verbose -geometry $screenviewsize"
266 . "x$screenviewsize \"$filename\" \"$screenviewfilename\"";
267 print $outhandle "$command\n" if ($verbosity > 2);
268 my $result = "";
269 $result = `$command 2>&1` ;
270 print $outhandle "$result\n" if ($verbosity > 3);
271
272 # get screenview dimensions, size and type
273 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
274 $doc_obj->add_metadata ($section, "ScreenWidth", $1);
275 $doc_obj->add_metadata ($section, "ScreenHeight", $2);
276 }
277 else {
278 $doc_obj->add_metadata ($section, "ScreenWidth", $image_width);
279 $doc_obj->add_metadata ($section, "ScreenHeight", $image_height);
280 }
281
282 #add the screenview as an associated file ...
283 if (-e "$screenviewfilename") {
284 $doc_obj->associate_file("$screenviewfilename", "screenview.$screenviewtype",
285 "image/$screenviewtype",$section);
286 $doc_obj->add_metadata ($section, "ScreenType", $screenviewtype);
287 $doc_obj->add_metadata ($section, "Screen", "screenview.$screenviewtype");
288
289 $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpcollection_/index/assoc/[assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");
290 } else {
291 print $outhandle "ImagePlug: couldn't find \"$screenviewfilename\"\n";
292 }
293 }
294
295 return $type;
296
297
298}
299
300
301
302# Discover the characteristics of an image file with the ImageMagick
303# "identify" command.
304
305sub identify {
306 my ($image, $outhandle, $verbosity) = @_;
307
308 # Use the ImageMagick "identify" command to get the file specs
309 my $command = "identify \"$image\" 2>&1";
310 print $outhandle "$command\n" if ($verbosity > 2);
311 my $result = '';
312 $result = `$command`;
313 print $outhandle "$result\n" if ($verbosity > 3);
314
315 # Read the type, width, and height
316 my $type = 'unknown';
317 my $width = 'unknown';
318 my $height = 'unknown';
319
320 my $image_safe = quotemeta $image;
321 if ($result =~ /^$image_safe (\w+) (\d+)x(\d+)/) {
322 $type = $1;
323 $width = $2;
324 $height = $3;
325 }
326
327 # Read the size
328 my $size = "unknown";
329 if ($result =~ m/^.* ([0-9]+)b/) {
330 $size = $1;
331 }
332 elsif ($result =~ m/^.* ([0-9]+)(\.([0-9]+))?kb?/) {
333 $size = 1024 * $1;
334 if (defined($2)) {
335 $size = $size + (1024 * $2);
336 # Truncate size (it isn't going to be very accurate anyway)
337 $size = int($size);
338 }
339 }
340
341 print $outhandle "file: $image:\t $type, $width, $height, $size\n"
342 if ($verbosity > 2);
343
344 # Return the specs
345 return ($type, $width, $height, $size);
346}
347
348
349# The ImagePlug read() function. This function does all the right things
350# to make general options work for a given plugin. It calls the process()
351# function which does all the work specific to a plugin (like the old
352# read functions used to do). Most plugins should define their own
353# process() function and let this read() function keep control.
354#
355# ImagePlug overrides read() because there is no need to read the actual
356# text of the file in, because the contents of the file is not text...
357#
358# Return number of files processed, undef if can't process
359# Note that $base_dir might be "" and that $file might
360# include directories
361
362sub read {
363 my $self = shift (@_);
364 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
365
366 my $outhandle = $self->{'outhandle'};
367
368 my $filename = &util::filename_cat($base_dir, $file);
369 return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
370 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
371 return undef;
372 }
373
374 print STDERR "<Processing n='$file' p='ImagePlug'>\n" if ($gli);
375 print $outhandle "ImagePlug processing $file\n"
376 if $self->{'verbosity'} > 1;
377
378 # None of this works very well on Windows 95/98...
379 if ($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT()) {
380 if ($gli) {
381 print STDERR "<ProcessingError n='$file' r='Windows 95/98 not supported'>\n";
382 }
383 print $outhandle "ImagePlug: Windows 95/98 not supported\n";
384 return -1;
385 }
386
387 # None of this is going to work very well without ImageMagick...
388 if ($self->{'imagemagick_not_installed'}) {
389 if ($gli) {
390 print STDERR "<ProcessingError n='$file' r='ImageMagick not installed'>\n";
391 }
392 print $outhandle "ImagePlug: ImageMagick not installed\n";
393 return -1;
394 }
395
396 #if there's a leading directory name, eat it...
397 $file =~ s/^.*[\/\\]//;
398
399 # create a new document
400 my $doc_obj = new doc ($filename, "indexed_doc");
401 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
402 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
403
404 #run convert to get the thumbnail and extract size and type info
405 my $result = run_convert($self, $filename, $file, $doc_obj);
406
407 if (!defined $result)
408 {
409 if ($gli) {
410 print STDERR "<ProcessingError n='$file'>\n";
411 }
412 print $outhandle "ImagePlug: couldn't process \"$filename\"\n";
413 return -1; # error during processing
414 }
415
416 #create an empty text string so we don't break downstream plugins
417 my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}");
418 # include any metadata passed in from previous plugins
419 # note that this metadata is associated with the top level section
420 my $section = $doc_obj->get_top_section();
421 $self->extra_metadata ($doc_obj, $section, $metadata);
422
423 # do plugin specific processing of doc_obj
424 unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
425 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
426 return -1;
427 }
428
429 # do any automatic metadata extraction
430 $self->auto_extract_metadata ($doc_obj);
431
432 # if we haven't found any Title so far, assign one
433 # this was shifted to here from inside read()
434 $self->title_fallback($doc_obj,$section,$file);
435 # add an OID
436 $doc_obj->set_OID();
437 $doc_obj->add_text($section, $text);
438
439 # process the document
440 $processor->process($doc_obj);
441
442 # clean up temporary files - we do this here instead of in
443 # run_convert becuase associated files aren't actually copied
444 # until after process has been run.
445 if (defined $self->{'tmp_filename'} &&
446 -e $self->{'tmp_filename'}) {
447 &util::rm($self->{'tmp_filename'})
448 }
449 if (defined $self->{'tmp_filename2'} &&
450 -e $self->{'tmp_filename2'}) {
451 &util::rm($self->{'tmp_filename2'})
452 }
453 if (defined $self->{'tmp_filename3'} &&
454 -e $self->{'tmp_filename3'}) {
455 &util::rm($self->{'tmp_filename3'})
456 }
457
458 $self->{'num_processed'}++;
459
460 return 1;
461}
462
463# do plugin specific processing of doc_obj
464sub process {
465 my $self = shift (@_);
466 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
467 my $outhandle = $self->{'outhandle'};
468
469 return 1;
470}
471
4721;
473
474
475
476
477
478
479
480
481
482
483
Note: See TracBrowser for help on using the repository browser.