source: gsdl/trunk/perllib/plugins/ImagePlug.pm@ 14954

Last change on this file since 14954 was 14954, checked in by mdewsnip, 16 years ago

New "-cached_generated_images" option for caching images in the Greenstone "tmp" directory, instead of giving them random names and regenerating them every import. Thanks to DL Consulting.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.8 KB
Line 
1###########################################################################
2#
3# ImagePlug.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package ImagePlug;
27
28use BasPlug;
29
30use strict;
31no strict 'refs'; # allow filehandles to be variables and viceversa
32
33sub BEGIN {
34 @ImagePlug::ISA = ('BasPlug');
35}
36
37my $arguments =
38 [ { 'name' => "process_exp",
39 'desc' => "{BasPlug.process_exp}",
40 'type' => "regexp",
41 'deft' => &get_default_process_exp(),
42 'reqd' => "no" },
43 { 'name' => "cache_generated_images",
44 'desc' => "{ImagePlug.cache_generated_image}",
45 'type' => "flag",
46 'reqd' => "no" },
47 { 'name' => "noscaleup",
48 'desc' => "{ImagePlug.noscaleup}",
49 'type' => "flag",
50 'reqd' => "no" },
51 { 'name' => "thumbnailsize",
52 'desc' => "{ImagePlug.thumbnailsize}",
53 'type' => "int",
54 'deft' => "100",
55 'range' => "1,",
56 'reqd' => "no" },
57 { 'name' => "thumbnailtype",
58 'desc' => "{ImagePlug.thumbnailtype}",
59 'type' => "string",
60 'deft' => "gif",
61 'reqd' => "no" },
62 { 'name' => "screenviewsize",
63 'desc' => "{ImagePlug.screenviewsize}",
64 'type' => "int",
65 'deft' => "0",
66 'range' => "1,",
67 'reqd' => "no" },
68 { 'name' => "screenviewtype",
69 'desc' => "{ImagePlug.screenviewtype}",
70 'type' => "string",
71 'deft' => "jpg",
72 'reqd' => "no" },
73 { 'name' => "converttotype",
74 'desc' => "{ImagePlug.converttotype}",
75 'type' => "string",
76 'deft' => "",
77 'reqd' => "no" },
78 { 'name' => "minimumsize",
79 'desc' => "{ImagePlug.minimumsize}",
80 'type' => "int",
81 'deft' => "100",
82 'range' => "1,",
83 'reqd' => "no" } ];
84
85my $options = { 'name' => "ImagePlug",
86 'desc' => "{ImagePlug.desc}",
87 'abstract' => "no",
88 'inherits' => "yes",
89 'args' => $arguments };
90
91
92
93sub new {
94 my ($class) = shift (@_);
95 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
96 push(@$pluginlist, $class);
97
98 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
99 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
100
101 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
102 $self->{'tmp_file_paths'} = ();
103
104 # Check that ImageMagick is installed and available on the path (except for Windows 95/98)
105 if (!($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT())) {
106 my $result = `identify 2>&1`;
107 if ($? == -1 || $? == 256) { # Linux and Windows return different values for "program not found"
108 $self->{'imagemagick_not_installed'} = 1;
109 }
110 }
111
112
113 return bless $self, $class;
114}
115
116sub get_default_process_exp {
117 my $self = shift (@_);
118
119 return q^(?i)(\.jpe?g|\.gif|\.png|\.bmp|\.xbm|\.tif?f)$^;
120}
121
122# this makes no sense for images
123sub block_cover_image
124{
125 my $self =shift (@_);
126 my ($filename) = @_;
127
128 return;
129}
130# Create the thumbnail and screenview images, and discover the Image's
131# size, width, and height using the convert utility.
132
133sub generate_images
134{
135 my $self = shift (@_);
136 my $filename = shift (@_); # filename with full path
137 my $file = shift (@_); # filename without path
138 my $doc_obj = shift (@_);
139 my $section = $doc_obj->get_top_section();
140
141 my $verbosity = $self->{'verbosity'};
142 my $outhandle = $self->{'outhandle'};
143
144 # check the filename is okay
145 return 0 if ($file eq "" || $filename eq "");
146
147# Code now extended to quote filenames in 'convert' commnads
148# Allows spaces in filenames, but note needs spaces to be escaped in URL as well
149# if ($filename =~ m/ /) {
150# print $outhandle "ImagePlug: \"$filename\" contains a space. choking.\n";
151# return undef;
152# }
153
154 my $minimumsize = $self->{'minimumsize'};
155 if (defined $minimumsize && (-s $filename < $minimumsize)) {
156 print $outhandle "ImagePlug: \"$filename\" too small, skipping\n"
157 if ($verbosity > 1);
158 }
159
160
161 # Convert the image to a new type (if required).
162 my $converttotype = $self->{'converttotype'};
163 my $originalfilename = ""; # only set if we do a conversion
164 my $type = "unknown";
165
166 if ($converttotype ne "" && $filename !~ m/$converttotype$/) {
167 $originalfilename = $filename;
168
169 my $result = $self->convert($originalfilename, $converttotype, "", "");
170 ($filename) = ($result =~ /=>(.*\.$converttotype)/);
171
172 $type = $converttotype;
173 $file =~ s/\..*$/\.$type/;
174 }
175
176
177 # Add the image metadata
178 my $url = $file;
179
180 ##not know why it is required at the first place, it seems all works fine without it, so I comment it out
181 ##$url =~ s/ /%20/g;
182
183 $doc_obj->add_metadata ($section, "Image", $url);
184
185 # Also want to set filename as 'Source' metadata to be
186 # consistent with other plugins
187 $doc_obj->add_metadata ($section, "Source", $url);
188
189 my ($image_type, $image_width, $image_height, $image_size)
190 = &identify($filename, $outhandle, $verbosity);
191
192 if ($image_type ne " ") {
193 $type = $image_type;
194 }
195
196 $doc_obj->add_metadata ($section, "FileFormat", $type);
197 $doc_obj->add_metadata ($section, "FileSize", $image_size);
198
199 $doc_obj->add_metadata ($section, "ImageType", $image_type);
200 $doc_obj->add_metadata ($section, "ImageWidth", $image_width);
201 $doc_obj->add_metadata ($section, "ImageHeight", $image_height);
202 $doc_obj->add_metadata ($section, "ImageSize", $image_size);
203 $doc_obj->add_metadata ($section, "NoText", "1");
204
205 $doc_obj->add_metadata ($section, "srclink",
206 "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\">");
207 $doc_obj->add_metadata ($section, "/srclink", "</a>");
208
209 $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\" width=100>");
210
211
212 # Add the image as an associated file
213 $doc_obj->associate_file($filename,$file,"image/$type",$section);
214
215
216 # Make the thumbnail image
217 my $thumbnailsize = $self->{'thumbnailsize'} || 100;
218 my $thumbnailtype = $self->{'thumbnailtype'} || 'gif';
219
220 # Generate the thumbnail with convert
221 my $result = $self->convert($filename, $thumbnailtype, "-geometry $thumbnailsize" . "x$thumbnailsize", "THUMB");
222 my ($thumbnailfile) = ($result =~ /=>(.*\.$thumbnailtype)/);
223
224 # Add the thumbnail as an associated file ...
225 if (-e "$thumbnailfile") {
226 $doc_obj->associate_file("$thumbnailfile", "thumbnail.$thumbnailtype",
227 "image/$thumbnailtype",$section);
228 $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype);
229 $doc_obj->add_metadata ($section, "Thumb", "thumbnail.$thumbnailtype");
230
231 $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>");
232 }
233
234 # Extract Thumnail metadata from convert output
235 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
236 $doc_obj->add_metadata ($section, "ThumbWidth", $1);
237 $doc_obj->add_metadata ($section, "ThumbHeight", $2);
238 }
239
240 # Make a screen-sized version of the picture if requested
241 if ($self->{'screenviewsize'}) {
242
243 # To do: if the actual image smaller than the screenview size,
244 # we should use the original !
245
246 my $screenviewsize = $self->{'screenviewsize'};
247 my $screenviewtype = $self->{'screenviewtype'} || 'jpeg';
248
249 # make the screenview image
250 my $result = $self->convert($filename, $screenviewtype, "-geometry $screenviewsize" . "x$screenviewsize", "SCREEN");
251 my ($screenviewfilename) = ($result =~ /=>(.*\.$screenviewtype)/);
252
253 # get screenview dimensions, size and type
254 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
255 $doc_obj->add_metadata ($section, "ScreenWidth", $1);
256 $doc_obj->add_metadata ($section, "ScreenHeight", $2);
257 }
258 else {
259 $doc_obj->add_metadata ($section, "ScreenWidth", $image_width);
260 $doc_obj->add_metadata ($section, "ScreenHeight", $image_height);
261 }
262
263 #add the screenview as an associated file ...
264 if (-e "$screenviewfilename") {
265 $doc_obj->associate_file("$screenviewfilename", "screenview.$screenviewtype",
266 "image/$screenviewtype",$section);
267 $doc_obj->add_metadata ($section, "ScreenType", $screenviewtype);
268 $doc_obj->add_metadata ($section, "Screen", "screenview.$screenviewtype");
269
270 $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");
271 } else {
272 print $outhandle "ImagePlug: couldn't find \"$screenviewfilename\"\n";
273 }
274 }
275
276 return $type;
277
278
279}
280
281
282
283# Discover the characteristics of an image file with the ImageMagick
284# "identify" command.
285
286sub identify {
287 my ($image, $outhandle, $verbosity) = @_;
288
289 # Use the ImageMagick "identify" command to get the file specs
290 my $command = "identify \"$image\" 2>&1";
291 print $outhandle "$command\n" if ($verbosity > 2);
292 my $result = '';
293 $result = `$command`;
294 print $outhandle "$result\n" if ($verbosity > 3);
295
296 # Read the type, width, and height
297 my $type = 'unknown';
298 my $width = 'unknown';
299 my $height = 'unknown';
300
301 my $image_safe = quotemeta $image;
302 if ($result =~ /^$image_safe (\w+) (\d+)x(\d+)/) {
303 $type = $1;
304 $width = $2;
305 $height = $3;
306 }
307
308 # Read the size
309 my $size = "unknown";
310 if ($result =~ m/^.* ([0-9]+)b/) {
311 $size = $1;
312 }
313 elsif ($result =~ m/^.* ([0-9]+)(\.([0-9]+))?kb?/) {
314 $size = 1024 * $1;
315 if (defined($2)) {
316 $size = $size + (1024 * $2);
317 # Truncate size (it isn't going to be very accurate anyway)
318 $size = int($size);
319 }
320 }
321
322 print $outhandle "file: $image:\t $type, $width, $height, $size\n"
323 if ($verbosity > 2);
324
325 # Return the specs
326 return ($type, $width, $height, $size);
327}
328
329
330sub convert
331{
332 my $self = shift(@_);
333 my $source_file_path = shift(@_);
334 my $target_file_type = shift(@_);
335 my $convert_options = shift(@_) || "";
336 my $convert_type = shift(@_) || "";
337
338 my $outhandle = $self->{'outhandle'};
339 my $verbosity = $self->{'verbosity'};
340
341 # Determine the full name and path of the output file
342 my $target_file_path = &util::get_tmp_filename() . "." . $target_file_type;
343 push(@{$self->{'tmp_file_paths'}}, $target_file_path);
344
345 # Generate and run the convert command
346 my $convert_command = "convert -interlace plane -verbose $convert_options \"$source_file_path\" \"$target_file_path\"";
347 print $outhandle "$convert_type $convert_command\n" if ($verbosity > 2);
348 my $result = `$convert_command 2>&1`;
349 print $outhandle "$convert_type RESULT = $result\n" if ($verbosity > 2);
350
351 return $result;
352}
353
354
355# The ImagePlug read() function.
356# ImagePlug overrides read() because there is no need to read the actual
357# text of the file in, because the contents of the file is not text...
358#
359# Return number of files processed, undef if can't process
360# Note that $base_dir might be "" and that $file might
361# include directories
362
363sub read {
364 my $self = shift (@_);
365 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
366
367 my $outhandle = $self->{'outhandle'};
368
369 #check process and block exps, smart block, etc
370 my ($block_status,$filename) = $self->read_block(@_);
371 return $block_status if ((!defined $block_status) || ($block_status==0));
372
373 print STDERR "<Processing n='$file' p='ImagePlug'>\n" if ($gli);
374 print $outhandle "ImagePlug processing $file\n"
375 if $self->{'verbosity'} > 1;
376
377 # None of this works very well on Windows 95/98...
378 if ($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT()) {
379 if ($gli) {
380 print STDERR "<ProcessingError n='$file' r='Windows 95/98 not supported'>\n";
381 }
382 print $outhandle "ImagePlug: Windows 95/98 not supported\n";
383 return -1;
384 }
385
386 # None of this is going to work very well without ImageMagick...
387 if ($self->{'imagemagick_not_installed'}) {
388 if ($gli) {
389 print STDERR "<ProcessingError n='$file' r='ImageMagick not installed'>\n";
390 }
391 print $outhandle "ImagePlug: ImageMagick not installed\n";
392 return -1;
393 }
394
395 #if there's a leading directory name, eat it...
396 $file =~ s/^.*[\/\\]//;
397
398 # create a new document
399 my $doc_obj = new doc ($filename, "indexed_doc");
400 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
401 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
402
403 #run convert to get the thumbnail and extract size and type info
404 my $result = generate_images($self, $filename, $file, $doc_obj);
405
406 if (!defined $result)
407 {
408 if ($gli) {
409 print STDERR "<ProcessingError n='$file'>\n";
410 }
411 print $outhandle "ImagePlug: couldn't process \"$filename\"\n";
412 return -1; # error during processing
413 }
414
415
416 #create an empty text string so we don't break downstream plugins
417 my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
418
419 # include any metadata passed in from previous plugins
420 # note that this metadata is associated with the top level section
421 my $section = $doc_obj->get_top_section();
422 $self->extra_metadata ($doc_obj, $section, $metadata);
423
424 # do plugin specific processing of doc_obj
425 unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
426 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
427 return -1;
428 }
429
430 # do any automatic metadata extraction
431 $self->auto_extract_metadata ($doc_obj);
432
433 # if we haven't found any Title so far, assign one
434 # this was shifted to here from inside read()
435 $self->title_fallback($doc_obj,$section,$file);
436 # add an OID
437 $doc_obj->set_OID();
438 $doc_obj->add_utf8_text($section, $text);
439
440 # process the document
441 $processor->process($doc_obj);
442
443 # clean up temporary files - we do this here instead of in
444 # generate_images becuase associated files aren't actually copied
445 # until after process has been run.
446 foreach my $tmp_file_path (@{$self->{'tmp_file_paths'}})
447 {
448 if (-e $tmp_file_path)
449 {
450 &util::rm($tmp_file_path);
451 }
452 }
453
454 $self->{'num_processed'}++;
455
456 return 1;
457}
458
459# do plugin specific processing of doc_obj
460sub process {
461 my $self = shift (@_);
462 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
463 my $outhandle = $self->{'outhandle'};
464
465 return 1;
466}
467
4681;
469
470
471
472
473
474
475
476
477
478
479
Note: See TracBrowser for help on using the repository browser.