source: gsdl/trunk/perllib/plugins/ImagePlug.pm@ 14951

Last change on this file since 14951 was 14951, checked in by mdewsnip, 16 years ago

All calls to convert now go through one "convert()" function, in preparation for adding image caching.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.8 KB
Line 
1###########################################################################
2#
3# ImagePlug.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package ImagePlug;
27
28use BasPlug;
29
30use strict;
31no strict 'refs'; # allow filehandles to be variables and viceversa
32
33sub BEGIN {
34 @ImagePlug::ISA = ('BasPlug');
35}
36
37my $arguments =
38 [ { 'name' => "process_exp",
39 'desc' => "{BasPlug.process_exp}",
40 'type' => "regexp",
41 'deft' => &get_default_process_exp(),
42 'reqd' => "no" },
43 { 'name' => "noscaleup",
44 'desc' => "{ImagePlug.noscaleup}",
45 'type' => "flag",
46 'reqd' => "no" },
47 { 'name' => "thumbnailsize",
48 'desc' => "{ImagePlug.thumbnailsize}",
49 'type' => "int",
50 'deft' => "100",
51 'range' => "1,",
52 'reqd' => "no" },
53 { 'name' => "thumbnailtype",
54 'desc' => "{ImagePlug.thumbnailtype}",
55 'type' => "string",
56 'deft' => "gif",
57 'reqd' => "no" },
58 { 'name' => "screenviewsize",
59 'desc' => "{ImagePlug.screenviewsize}",
60 'type' => "int",
61 'deft' => "0",
62 'range' => "1,",
63 'reqd' => "no" },
64 { 'name' => "screenviewtype",
65 'desc' => "{ImagePlug.screenviewtype}",
66 'type' => "string",
67 'deft' => "jpg",
68 'reqd' => "no" },
69 { 'name' => "converttotype",
70 'desc' => "{ImagePlug.converttotype}",
71 'type' => "string",
72 'deft' => "",
73 'reqd' => "no" },
74 { 'name' => "minimumsize",
75 'desc' => "{ImagePlug.minimumsize}",
76 'type' => "int",
77 'deft' => "100",
78 'range' => "1,",
79 'reqd' => "no" } ];
80
81my $options = { 'name' => "ImagePlug",
82 'desc' => "{ImagePlug.desc}",
83 'abstract' => "no",
84 'inherits' => "yes",
85 'args' => $arguments };
86
87
88
89sub new {
90 my ($class) = shift (@_);
91 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
92 push(@$pluginlist, $class);
93
94 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
95 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
96
97 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
98
99 # Check that ImageMagick is installed and available on the path (except for Windows 95/98)
100 if (!($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT())) {
101 my $result = `identify 2>&1`;
102 if ($? == -1 || $? == 256) { # Linux and Windows return different values for "program not found"
103 $self->{'imagemagick_not_installed'} = 1;
104 }
105 }
106
107
108 return bless $self, $class;
109}
110
111sub get_default_process_exp {
112 my $self = shift (@_);
113
114 return q^(?i)(\.jpe?g|\.gif|\.png|\.bmp|\.xbm|\.tif?f)$^;
115}
116
117# this makes no sense for images
118sub block_cover_image
119{
120 my $self =shift (@_);
121 my ($filename) = @_;
122
123 return;
124}
125# Create the thumbnail and screenview images, and discover the Image's
126# size, width, and height using the convert utility.
127
128sub generate_images
129{
130 my $self = shift (@_);
131 my $filename = shift (@_); # filename with full path
132 my $file = shift (@_); # filename without path
133 my $doc_obj = shift (@_);
134 my $section = $doc_obj->get_top_section();
135
136 my $verbosity = $self->{'verbosity'};
137 my $outhandle = $self->{'outhandle'};
138
139 # check the filename is okay
140 return 0 if ($file eq "" || $filename eq "");
141
142# Code now extended to quote filenames in 'convert' commnads
143# Allows spaces in filenames, but note needs spaces to be escaped in URL as well
144# if ($filename =~ m/ /) {
145# print $outhandle "ImagePlug: \"$filename\" contains a space. choking.\n";
146# return undef;
147# }
148
149 my $minimumsize = $self->{'minimumsize'};
150 if (defined $minimumsize && (-s $filename < $minimumsize)) {
151 print $outhandle "ImagePlug: \"$filename\" too small, skipping\n"
152 if ($verbosity > 1);
153 }
154
155
156 # Convert the image to a new type (if required).
157 my $converttotype = $self->{'converttotype'};
158 my $originalfilename = ""; # only set if we do a conversion
159 my $type = "unknown";
160
161 if ($converttotype ne "" && $filename !~ m/$converttotype$/) {
162 $originalfilename = $filename;
163 $filename = &util::get_tmp_filename() . ".$converttotype";
164 $self->{'tmp_filename'} = $filename;
165
166 my $result = $self->convert($originalfilename, $filename, "", "");
167
168 $type = $converttotype;
169 $file =~ s/\..*$/\.$type/;
170 }
171
172
173 # Add the image metadata
174 my $url = $file;
175
176 ##not know why it is required at the first place, it seems all works fine without it, so I comment it out
177 ##$url =~ s/ /%20/g;
178
179 $doc_obj->add_metadata ($section, "Image", $url);
180
181 # Also want to set filename as 'Source' metadata to be
182 # consistent with other plugins
183 $doc_obj->add_metadata ($section, "Source", $url);
184
185 my ($image_type, $image_width, $image_height, $image_size)
186 = &identify($filename, $outhandle, $verbosity);
187
188 if ($image_type ne " ") {
189 $type = $image_type;
190 }
191
192 $doc_obj->add_metadata ($section, "FileFormat", $type);
193 $doc_obj->add_metadata ($section, "FileSize", $image_size);
194
195 $doc_obj->add_metadata ($section, "ImageType", $image_type);
196 $doc_obj->add_metadata ($section, "ImageWidth", $image_width);
197 $doc_obj->add_metadata ($section, "ImageHeight", $image_height);
198 $doc_obj->add_metadata ($section, "ImageSize", $image_size);
199 $doc_obj->add_metadata ($section, "NoText", "1");
200
201 $doc_obj->add_metadata ($section, "srclink",
202 "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\">");
203 $doc_obj->add_metadata ($section, "/srclink", "</a>");
204
205 $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\" width=100>");
206
207
208 # Add the image as an associated file
209 $doc_obj->associate_file($filename,$file,"image/$type",$section);
210
211
212 # Make the thumbnail image
213 my $thumbnailsize = $self->{'thumbnailsize'} || 100;
214 my $thumbnailtype = $self->{'thumbnailtype'} || 'gif';
215
216 my $thumbnailfile = &util::get_tmp_filename() . ".$thumbnailtype";
217 $self->{'tmp_filename2'} = $thumbnailfile;
218
219 # Generate the thumbnail with convert
220 my $result = $self->convert($filename, $thumbnailfile, "-geometry $thumbnailsize" . "x$thumbnailsize", "THUMB");
221
222 # Add the thumbnail as an associated file ...
223 if (-e "$thumbnailfile") {
224 $doc_obj->associate_file("$thumbnailfile", "thumbnail.$thumbnailtype",
225 "image/$thumbnailtype",$section);
226 $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype);
227 $doc_obj->add_metadata ($section, "Thumb", "thumbnail.$thumbnailtype");
228
229 $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>");
230 }
231
232 # Extract Thumnail metadata from convert output
233 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
234 $doc_obj->add_metadata ($section, "ThumbWidth", $1);
235 $doc_obj->add_metadata ($section, "ThumbHeight", $2);
236 }
237
238 # Make a screen-sized version of the picture if requested
239 if ($self->{'screenviewsize'}) {
240
241 # To do: if the actual image smaller than the screenview size,
242 # we should use the original !
243
244 my $screenviewsize = $self->{'screenviewsize'};
245 my $screenviewtype = $self->{'screenviewtype'} || 'jpeg';
246 my $screenviewfilename = &util::get_tmp_filename() . ".$screenviewtype";
247 $self->{'tmp_filename3'} = $screenviewfilename;
248
249 # make the screenview image
250 my $result = $self->convert($filename, $screenviewfilename, "-geometry $screenviewsize" . "x$screenviewsize", "SCREEN");
251
252 # get screenview dimensions, size and type
253 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
254 $doc_obj->add_metadata ($section, "ScreenWidth", $1);
255 $doc_obj->add_metadata ($section, "ScreenHeight", $2);
256 }
257 else {
258 $doc_obj->add_metadata ($section, "ScreenWidth", $image_width);
259 $doc_obj->add_metadata ($section, "ScreenHeight", $image_height);
260 }
261
262 #add the screenview as an associated file ...
263 if (-e "$screenviewfilename") {
264 $doc_obj->associate_file("$screenviewfilename", "screenview.$screenviewtype",
265 "image/$screenviewtype",$section);
266 $doc_obj->add_metadata ($section, "ScreenType", $screenviewtype);
267 $doc_obj->add_metadata ($section, "Screen", "screenview.$screenviewtype");
268
269 $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");
270 } else {
271 print $outhandle "ImagePlug: couldn't find \"$screenviewfilename\"\n";
272 }
273 }
274
275 return $type;
276
277
278}
279
280
281
282# Discover the characteristics of an image file with the ImageMagick
283# "identify" command.
284
285sub identify {
286 my ($image, $outhandle, $verbosity) = @_;
287
288 # Use the ImageMagick "identify" command to get the file specs
289 my $command = "identify \"$image\" 2>&1";
290 print $outhandle "$command\n" if ($verbosity > 2);
291 my $result = '';
292 $result = `$command`;
293 print $outhandle "$result\n" if ($verbosity > 3);
294
295 # Read the type, width, and height
296 my $type = 'unknown';
297 my $width = 'unknown';
298 my $height = 'unknown';
299
300 my $image_safe = quotemeta $image;
301 if ($result =~ /^$image_safe (\w+) (\d+)x(\d+)/) {
302 $type = $1;
303 $width = $2;
304 $height = $3;
305 }
306
307 # Read the size
308 my $size = "unknown";
309 if ($result =~ m/^.* ([0-9]+)b/) {
310 $size = $1;
311 }
312 elsif ($result =~ m/^.* ([0-9]+)(\.([0-9]+))?kb?/) {
313 $size = 1024 * $1;
314 if (defined($2)) {
315 $size = $size + (1024 * $2);
316 # Truncate size (it isn't going to be very accurate anyway)
317 $size = int($size);
318 }
319 }
320
321 print $outhandle "file: $image:\t $type, $width, $height, $size\n"
322 if ($verbosity > 2);
323
324 # Return the specs
325 return ($type, $width, $height, $size);
326}
327
328
329sub convert
330{
331 my $self = shift(@_);
332 my $source_file_path = shift(@_);
333 my $target_file_path = shift(@_);
334 my $convert_options = shift(@_) || "";
335 my $convert_type = shift(@_) || "";
336
337 my $outhandle = $self->{'outhandle'};
338 my $verbosity = $self->{'verbosity'};
339
340 my $convert_command = "convert -interlace plane -verbose $convert_options \"$source_file_path\" \"$target_file_path\"";
341 print $outhandle "$convert_type $convert_command\n" if ($verbosity > 2);
342 my $result = `$convert_command 2>&1`;
343 print $outhandle "$convert_type RESULT = $result\n" if ($verbosity > 2);
344
345 return $result;
346}
347
348
349# The ImagePlug read() function.
350# ImagePlug overrides read() because there is no need to read the actual
351# text of the file in, because the contents of the file is not text...
352#
353# Return number of files processed, undef if can't process
354# Note that $base_dir might be "" and that $file might
355# include directories
356
357sub read {
358 my $self = shift (@_);
359 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
360
361 my $outhandle = $self->{'outhandle'};
362
363 #check process and block exps, smart block, etc
364 my ($block_status,$filename) = $self->read_block(@_);
365 return $block_status if ((!defined $block_status) || ($block_status==0));
366
367 print STDERR "<Processing n='$file' p='ImagePlug'>\n" if ($gli);
368 print $outhandle "ImagePlug processing $file\n"
369 if $self->{'verbosity'} > 1;
370
371 # None of this works very well on Windows 95/98...
372 if ($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT()) {
373 if ($gli) {
374 print STDERR "<ProcessingError n='$file' r='Windows 95/98 not supported'>\n";
375 }
376 print $outhandle "ImagePlug: Windows 95/98 not supported\n";
377 return -1;
378 }
379
380 # None of this is going to work very well without ImageMagick...
381 if ($self->{'imagemagick_not_installed'}) {
382 if ($gli) {
383 print STDERR "<ProcessingError n='$file' r='ImageMagick not installed'>\n";
384 }
385 print $outhandle "ImagePlug: ImageMagick not installed\n";
386 return -1;
387 }
388
389 #if there's a leading directory name, eat it...
390 $file =~ s/^.*[\/\\]//;
391
392 # create a new document
393 my $doc_obj = new doc ($filename, "indexed_doc");
394 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
395 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
396
397 #run convert to get the thumbnail and extract size and type info
398 my $result = generate_images($self, $filename, $file, $doc_obj);
399
400 if (!defined $result)
401 {
402 if ($gli) {
403 print STDERR "<ProcessingError n='$file'>\n";
404 }
405 print $outhandle "ImagePlug: couldn't process \"$filename\"\n";
406 return -1; # error during processing
407 }
408
409
410 #create an empty text string so we don't break downstream plugins
411 my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
412
413 # include any metadata passed in from previous plugins
414 # note that this metadata is associated with the top level section
415 my $section = $doc_obj->get_top_section();
416 $self->extra_metadata ($doc_obj, $section, $metadata);
417
418 # do plugin specific processing of doc_obj
419 unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
420 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
421 return -1;
422 }
423
424 # do any automatic metadata extraction
425 $self->auto_extract_metadata ($doc_obj);
426
427 # if we haven't found any Title so far, assign one
428 # this was shifted to here from inside read()
429 $self->title_fallback($doc_obj,$section,$file);
430 # add an OID
431 $doc_obj->set_OID();
432 $doc_obj->add_utf8_text($section, $text);
433
434 # process the document
435 $processor->process($doc_obj);
436
437 # clean up temporary files - we do this here instead of in
438 # generate_images becuase associated files aren't actually copied
439 # until after process has been run.
440 if (defined $self->{'tmp_filename'} &&
441 -e $self->{'tmp_filename'}) {
442 &util::rm($self->{'tmp_filename'})
443 }
444
445 if (defined $self->{'tmp_filename2'} &&
446 -e $self->{'tmp_filename2'}) {
447 &util::rm($self->{'tmp_filename2'})
448 }
449 if (defined $self->{'tmp_filename3'} &&
450 -e $self->{'tmp_filename3'}) {
451 &util::rm($self->{'tmp_filename3'})
452 }
453
454 $self->{'num_processed'}++;
455
456 return 1;
457}
458
459# do plugin specific processing of doc_obj
460sub process {
461 my $self = shift (@_);
462 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
463 my $outhandle = $self->{'outhandle'};
464
465 return 1;
466}
467
4681;
469
470
471
472
473
474
475
476
477
478
479
Note: See TracBrowser for help on using the repository browser.