source: gsdl/trunk/perllib/plugins/ImagePlug.pm@ 14952

Last change on this file since 14952 was 14952, checked in by mdewsnip, 16 years ago

Tidied up the code for cleaning up the temporary files, in preparation for adding image caching.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.6 KB
RevLine 
[1733]1###########################################################################
2#
3# ImagePlug.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package ImagePlug;
27
28use BasPlug;
29
[10254]30use strict;
31no strict 'refs'; # allow filehandles to be variables and viceversa
32
[1733]33sub BEGIN {
[10254]34 @ImagePlug::ISA = ('BasPlug');
[1733]35}
36
[4744]37my $arguments =
38 [ { 'name' => "process_exp",
[4873]39 'desc' => "{BasPlug.process_exp}",
[6408]40 'type' => "regexp",
[4744]41 'deft' => &get_default_process_exp(),
42 'reqd' => "no" },
43 { 'name' => "noscaleup",
[4873]44 'desc' => "{ImagePlug.noscaleup}",
[4744]45 'type' => "flag",
46 'reqd' => "no" },
47 { 'name' => "thumbnailsize",
[4873]48 'desc' => "{ImagePlug.thumbnailsize}",
[4744]49 'type' => "int",
50 'deft' => "100",
[10218]51 'range' => "1,",
[4744]52 'reqd' => "no" },
53 { 'name' => "thumbnailtype",
[4873]54 'desc' => "{ImagePlug.thumbnailtype}",
[4744]55 'type' => "string",
56 'deft' => "gif",
57 'reqd' => "no" },
58 { 'name' => "screenviewsize",
[4873]59 'desc' => "{ImagePlug.screenviewsize}",
[4744]60 'type' => "int",
61 'deft' => "0",
[10218]62 'range' => "1,",
[4744]63 'reqd' => "no" },
64 { 'name' => "screenviewtype",
[4873]65 'desc' => "{ImagePlug.screenviewtype}",
[4744]66 'type' => "string",
67 'deft' => "jpg",
68 'reqd' => "no" },
69 { 'name' => "converttotype",
[4873]70 'desc' => "{ImagePlug.converttotype}",
[4744]71 'type' => "string",
72 'deft' => "",
73 'reqd' => "no" },
74 { 'name' => "minimumsize",
[4873]75 'desc' => "{ImagePlug.minimumsize}",
[4744]76 'type' => "int",
77 'deft' => "100",
[10218]78 'range' => "1,",
[4744]79 'reqd' => "no" } ];
[1758]80
[3540]81my $options = { 'name' => "ImagePlug",
[5680]82 'desc' => "{ImagePlug.desc}",
[6408]83 'abstract' => "no",
[3540]84 'inherits' => "yes",
85 'args' => $arguments };
86
87
[1744]88
[1733]89sub new {
[10218]90 my ($class) = shift (@_);
91 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
92 push(@$pluginlist, $class);
[4724]93
[10218]94 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
95 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
[2230]96
[12169]97 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
[14952]98 $self->{'tmp_file_paths'} = ();
[1744]99
[9574]100 # Check that ImageMagick is installed and available on the path (except for Windows 95/98)
101 if (!($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT())) {
102 my $result = `identify 2>&1`;
103 if ($? == -1 || $? == 256) { # Linux and Windows return different values for "program not found"
104 $self->{'imagemagick_not_installed'} = 1;
105 }
[8145]106 }
107
[13243]108
[1733]109 return bless $self, $class;
110}
111
112sub get_default_process_exp {
113 my $self = shift (@_);
114
[1758]115 return q^(?i)(\.jpe?g|\.gif|\.png|\.bmp|\.xbm|\.tif?f)$^;
[1733]116}
117
[9067]118# this makes no sense for images
119sub block_cover_image
120{
121 my $self =shift (@_);
122 my ($filename) = @_;
[2230]123
[9067]124 return;
125}
[2230]126# Create the thumbnail and screenview images, and discover the Image's
127# size, width, and height using the convert utility.
128
[14950]129sub generate_images
130{
[1733]131 my $self = shift (@_);
[2230]132 my $filename = shift (@_); # filename with full path
133 my $file = shift (@_); # filename without path
[1733]134 my $doc_obj = shift (@_);
[1744]135 my $section = $doc_obj->get_top_section();
[2230]136
137 my $verbosity = $self->{'verbosity'};
138 my $outhandle = $self->{'outhandle'};
[1733]139
[2230]140 # check the filename is okay
141 return 0 if ($file eq "" || $filename eq "");
142
[3307]143# Code now extended to quote filenames in 'convert' commnads
144# Allows spaces in filenames, but note needs spaces to be escaped in URL as well
145# if ($filename =~ m/ /) {
146# print $outhandle "ImagePlug: \"$filename\" contains a space. choking.\n";
147# return undef;
148# }
[1744]149
[1758]150 my $minimumsize = $self->{'minimumsize'};
[2230]151 if (defined $minimumsize && (-s $filename < $minimumsize)) {
152 print $outhandle "ImagePlug: \"$filename\" too small, skipping\n"
153 if ($verbosity > 1);
154 }
155
[4724]156
[2230]157 # Convert the image to a new type (if required).
[1744]158 my $converttotype = $self->{'converttotype'};
[2230]159 my $originalfilename = ""; # only set if we do a conversion
[1733]160 my $type = "unknown";
[1758]161
[13879]162 if ($converttotype ne "" && $filename !~ m/$converttotype$/) {
[1744]163 $originalfilename = $filename;
[2230]164 $filename = &util::get_tmp_filename() . ".$converttotype";
165
[14951]166 my $result = $self->convert($originalfilename, $filename, "", "");
[2230]167
[1744]168 $type = $converttotype;
[13879]169 $file =~ s/\..*$/\.$type/;
[1733]170 }
[1744]171
[4724]172
[3137]173 # Add the image metadata
[3307]174 my $url = $file;
[13542]175
176 ##not know why it is required at the first place, it seems all works fine without it, so I comment it out
177 ##$url =~ s/ /%20/g;
[3307]178
179 $doc_obj->add_metadata ($section, "Image", $url);
[3517]180
181 # Also want to set filename as 'Source' metadata to be
182 # consistent with other plugins
183 $doc_obj->add_metadata ($section, "Source", $url);
184
[3137]185 my ($image_type, $image_width, $image_height, $image_size)
186 = &identify($filename, $outhandle, $verbosity);
[2230]187
[8121]188 if ($image_type ne " ") {
189 $type = $image_type;
190 }
191
192 $doc_obj->add_metadata ($section, "FileFormat", $type);
[8166]193 $doc_obj->add_metadata ($section, "FileSize", $image_size);
[8121]194
[3137]195 $doc_obj->add_metadata ($section, "ImageType", $image_type);
196 $doc_obj->add_metadata ($section, "ImageWidth", $image_width);
197 $doc_obj->add_metadata ($section, "ImageHeight", $image_height);
198 $doc_obj->add_metadata ($section, "ImageSize", $image_size);
[14117]199 $doc_obj->add_metadata ($section, "NoText", "1");
[3137]200
[4724]201 $doc_obj->add_metadata ($section, "srclink",
[11834]202 "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\">");
[4724]203 $doc_obj->add_metadata ($section, "/srclink", "</a>");
204
[11834]205 $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\" width=100>");
[4724]206
207
[3137]208 # Add the image as an associated file
209 $doc_obj->associate_file($filename,$file,"image/$type",$section);
210
[4724]211
[2230]212 # Make the thumbnail image
[3137]213 my $thumbnailsize = $self->{'thumbnailsize'} || 100;
[2230]214 my $thumbnailtype = $self->{'thumbnailtype'} || 'gif';
[4724]215
[3137]216 my $thumbnailfile = &util::get_tmp_filename() . ".$thumbnailtype";
[1733]217
[2230]218 # Generate the thumbnail with convert
[14951]219 my $result = $self->convert($filename, $thumbnailfile, "-geometry $thumbnailsize" . "x$thumbnailsize", "THUMB");
[2230]220
221 # Add the thumbnail as an associated file ...
[3137]222 if (-e "$thumbnailfile") {
223 $doc_obj->associate_file("$thumbnailfile", "thumbnail.$thumbnailtype",
[2230]224 "image/$thumbnailtype",$section);
225 $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype);
226 $doc_obj->add_metadata ($section, "Thumb", "thumbnail.$thumbnailtype");
[4724]227
[11834]228 $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>");
[2230]229 }
230
[3137]231 # Extract Thumnail metadata from convert output
232 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
233 $doc_obj->add_metadata ($section, "ThumbWidth", $1);
234 $doc_obj->add_metadata ($section, "ThumbHeight", $2);
[1733]235 }
[1758]236
[2230]237 # Make a screen-sized version of the picture if requested
238 if ($self->{'screenviewsize'}) {
239
[3137]240 # To do: if the actual image smaller than the screenview size,
241 # we should use the original !
242
[2230]243 my $screenviewsize = $self->{'screenviewsize'};
244 my $screenviewtype = $self->{'screenviewtype'} || 'jpeg';
245 my $screenviewfilename = &util::get_tmp_filename() . ".$screenviewtype";
246
247 # make the screenview image
[14951]248 my $result = $self->convert($filename, $screenviewfilename, "-geometry $screenviewsize" . "x$screenviewsize", "SCREEN");
[2230]249
250 # get screenview dimensions, size and type
251 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
252 $doc_obj->add_metadata ($section, "ScreenWidth", $1);
253 $doc_obj->add_metadata ($section, "ScreenHeight", $2);
254 }
[5078]255 else {
256 $doc_obj->add_metadata ($section, "ScreenWidth", $image_width);
257 $doc_obj->add_metadata ($section, "ScreenHeight", $image_height);
258 }
259
[2230]260 #add the screenview as an associated file ...
261 if (-e "$screenviewfilename") {
262 $doc_obj->associate_file("$screenviewfilename", "screenview.$screenviewtype",
263 "image/$screenviewtype",$section);
264 $doc_obj->add_metadata ($section, "ScreenType", $screenviewtype);
265 $doc_obj->add_metadata ($section, "Screen", "screenview.$screenviewtype");
[4724]266
[11834]267 $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");
[2230]268 } else {
269 print $outhandle "ImagePlug: couldn't find \"$screenviewfilename\"\n";
270 }
271 }
272
[1733]273 return $type;
[4724]274
275
[3137]276}
[2230]277
278
[4724]279
[3137]280# Discover the characteristics of an image file with the ImageMagick
281# "identify" command.
282
283sub identify {
284 my ($image, $outhandle, $verbosity) = @_;
285
286 # Use the ImageMagick "identify" command to get the file specs
[5845]287 my $command = "identify \"$image\" 2>&1";
[3137]288 print $outhandle "$command\n" if ($verbosity > 2);
289 my $result = '';
290 $result = `$command`;
291 print $outhandle "$result\n" if ($verbosity > 3);
292
293 # Read the type, width, and height
294 my $type = 'unknown';
295 my $width = 'unknown';
296 my $height = 'unknown';
297
[4790]298 my $image_safe = quotemeta $image;
299 if ($result =~ /^$image_safe (\w+) (\d+)x(\d+)/) {
[3137]300 $type = $1;
301 $width = $2;
302 $height = $3;
303 }
[5103]304
[3137]305 # Read the size
306 my $size = "unknown";
307 if ($result =~ m/^.* ([0-9]+)b/) {
308 $size = $1;
[5103]309 }
[5845]310 elsif ($result =~ m/^.* ([0-9]+)(\.([0-9]+))?kb?/) {
[3137]311 $size = 1024 * $1;
[5103]312 if (defined($2)) {
313 $size = $size + (1024 * $2);
314 # Truncate size (it isn't going to be very accurate anyway)
315 $size = int($size);
316 }
[3137]317 }
318
319 print $outhandle "file: $image:\t $type, $width, $height, $size\n"
320 if ($verbosity > 2);
321
322 # Return the specs
323 return ($type, $width, $height, $size);
[1733]324}
325
326
[14951]327sub convert
328{
329 my $self = shift(@_);
330 my $source_file_path = shift(@_);
331 my $target_file_path = shift(@_);
332 my $convert_options = shift(@_) || "";
333 my $convert_type = shift(@_) || "";
334
335 my $outhandle = $self->{'outhandle'};
336 my $verbosity = $self->{'verbosity'};
337
338 my $convert_command = "convert -interlace plane -verbose $convert_options \"$source_file_path\" \"$target_file_path\"";
339 print $outhandle "$convert_type $convert_command\n" if ($verbosity > 2);
340 my $result = `$convert_command 2>&1`;
341 print $outhandle "$convert_type RESULT = $result\n" if ($verbosity > 2);
342
[14952]343 push(@{$self->{'tmp_file_paths'}}, $target_file_path);
344
[14951]345 return $result;
346}
347
348
[11090]349# The ImagePlug read() function.
[1733]350# ImagePlug overrides read() because there is no need to read the actual
351# text of the file in, because the contents of the file is not text...
352#
353# Return number of files processed, undef if can't process
354# Note that $base_dir might be "" and that $file might
355# include directories
356
357sub read {
358 my $self = shift (@_);
[9853]359 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
[1733]360
[2230]361 my $outhandle = $self->{'outhandle'};
362
[11090]363 #check process and block exps, smart block, etc
364 my ($block_status,$filename) = $self->read_block(@_);
365 return $block_status if ((!defined $block_status) || ($block_status==0));
[6332]366
367 print STDERR "<Processing n='$file' p='ImagePlug'>\n" if ($gli);
[9960]368 print $outhandle "ImagePlug processing $file\n"
[1758]369 if $self->{'verbosity'} > 1;
370
[9585]371 # None of this works very well on Windows 95/98...
372 if ($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT()) {
373 if ($gli) {
374 print STDERR "<ProcessingError n='$file' r='Windows 95/98 not supported'>\n";
375 }
[9703]376 print $outhandle "ImagePlug: Windows 95/98 not supported\n";
[9585]377 return -1;
378 }
379
380 # None of this is going to work very well without ImageMagick...
381 if ($self->{'imagemagick_not_installed'}) {
382 if ($gli) {
383 print STDERR "<ProcessingError n='$file' r='ImageMagick not installed'>\n";
384 }
[9703]385 print $outhandle "ImagePlug: ImageMagick not installed\n";
[9585]386 return -1;
387 }
388
[2230]389 #if there's a leading directory name, eat it...
390 $file =~ s/^.*[\/\\]//;
[1733]391
392 # create a new document
393 my $doc_obj = new doc ($filename, "indexed_doc");
[12270]394 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
[7508]395 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
[2327]396
[1733]397 #run convert to get the thumbnail and extract size and type info
[14950]398 my $result = generate_images($self, $filename, $file, $doc_obj);
[1744]399
[1758]400 if (!defined $result)
[1744]401 {
[9585]402 if ($gli) {
403 print STDERR "<ProcessingError n='$file'>\n";
404 }
[9703]405 print $outhandle "ImagePlug: couldn't process \"$filename\"\n";
[7362]406 return -1; # error during processing
[1733]407 }
408
[13243]409
[13269]410 #create an empty text string so we don't break downstream plugins
411 my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
412
[1733]413 # include any metadata passed in from previous plugins
414 # note that this metadata is associated with the top level section
[1744]415 my $section = $doc_obj->get_top_section();
[1733]416 $self->extra_metadata ($doc_obj, $section, $metadata);
417
418 # do plugin specific processing of doc_obj
[9585]419 unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
420 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
421 return -1;
422 }
[1733]423
424 # do any automatic metadata extraction
425 $self->auto_extract_metadata ($doc_obj);
426
[8350]427 # if we haven't found any Title so far, assign one
428 # this was shifted to here from inside read()
429 $self->title_fallback($doc_obj,$section,$file);
[1733]430 # add an OID
431 $doc_obj->set_OID();
[13243]432 $doc_obj->add_utf8_text($section, $text);
[1733]433
434 # process the document
435 $processor->process($doc_obj);
436
[2230]437 # clean up temporary files - we do this here instead of in
[14950]438 # generate_images becuase associated files aren't actually copied
[2230]439 # until after process has been run.
[14952]440 foreach my $tmp_file_path (@{$self->{'tmp_file_paths'}})
441 {
442 if (-e $tmp_file_path)
443 {
444 &util::rm($tmp_file_path);
445 }
[1733]446 }
[13243]447
[3307]448 $self->{'num_processed'}++;
449
[2207]450 return 1;
[1733]451}
452
453# do plugin specific processing of doc_obj
454sub process {
455 my $self = shift (@_);
456 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
457 my $outhandle = $self->{'outhandle'};
458
459 return 1;
460}
461
4621;
[4724]463
464
465
466
467
468
469
470
471
472
473
Note: See TracBrowser for help on using the repository browser.