source: gsdl/trunk/perllib/plugins/ImagePlug.pm@ 15843

Last change on this file since 15843 was 15843, checked in by ak19, 16 years ago

The file URL added to doc.xml as Image and Source metadata is first converted to utf8 using filename_to_metadata, and then subroutine add_utf8_metadata is used rather than add_metadata for adding this URL to the doc.xml. Now images whose filenames use special characters for foreign languages display in the Image view. Generated a GS2 image collection including imgs using special chars on Linux, and viewed it both in Windows and Linux.

  • Property svn:keywords set to Author Date Id Revision
File size: 15.7 KB
Line 
1###########################################################################
2#
3# ImagePlug.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package ImagePlug;
27
28use BasPlug;
29
30use strict;
31no strict 'refs'; # allow filehandles to be variables and viceversa
32
33sub BEGIN {
34 @ImagePlug::ISA = ('BasPlug');
35}
36
37my $arguments =
38 [ { 'name' => "process_exp",
39 'desc' => "{BasPlug.process_exp}",
40 'type' => "regexp",
41 'deft' => &get_default_process_exp(),
42 'reqd' => "no" },
43 { 'name' => "cache_generated_images",
44 'desc' => "{ImagePlug.cache_generated_image}",
45 'type' => "flag",
46 'reqd' => "no" },
47 { 'name' => "noscaleup",
48 'desc' => "{ImagePlug.noscaleup}",
49 'type' => "flag",
50 'reqd' => "no" },
51 { 'name' => "nothumbnail",
52 'desc' => "{ImagePlug.generatethumbnail}",
53 'type' => "flag",
54 'reqd' => "no" },
55 { 'name' => "thumbnailsize",
56 'desc' => "{ImagePlug.thumbnailsize}",
57 'type' => "int",
58 'deft' => "100",
59 'range' => "1,",
60 'reqd' => "no" },
61 { 'name' => "thumbnailtype",
62 'desc' => "{ImagePlug.thumbnailtype}",
63 'type' => "string",
64 'deft' => "gif",
65 'reqd' => "no" },
66 { 'name' => "noscreenview",
67 'desc' => "{ImagePlug.generatescreenview}",
68 'type' => "flag",
69 'reqd' => "no" },
70 { 'name' => "screenviewsize",
71 'desc' => "{ImagePlug.screenviewsize}",
72 'type' => "int",
73 'deft' => "0",
74 'range' => "1,",
75 'reqd' => "no" },
76 { 'name' => "screenviewtype",
77 'desc' => "{ImagePlug.screenviewtype}",
78 'type' => "string",
79 'deft' => "jpg",
80 'reqd' => "no" },
81 { 'name' => "converttotype",
82 'desc' => "{ImagePlug.converttotype}",
83 'type' => "string",
84 'deft' => "",
85 'reqd' => "no" },
86 { 'name' => "minimumsize",
87 'desc' => "{ImagePlug.minimumsize}",
88 'type' => "int",
89 'deft' => "100",
90 'range' => "1,",
91 'reqd' => "no" } ];
92
93my $options = { 'name' => "ImagePlug",
94 'desc' => "{ImagePlug.desc}",
95 'abstract' => "no",
96 'inherits' => "yes",
97 'args' => $arguments };
98
99
100
101sub new {
102 my ($class) = shift (@_);
103 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
104 push(@$pluginlist, $class);
105
106 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
107 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
108
109 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
110 $self->{'tmp_file_paths'} = ();
111
112 # Check that ImageMagick is installed and available on the path (except for Windows 95/98)
113 if (!($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT())) {
114 my $result = `identify 2>&1`;
115 if ($? == -1 || $? == 256) { # Linux and Windows return different values for "program not found"
116 $self->{'imagemagick_not_installed'} = 1;
117 }
118 }
119
120
121 return bless $self, $class;
122}
123
124sub get_default_process_exp {
125 my $self = shift (@_);
126
127 return q^(?i)(\.jpe?g|\.gif|\.png|\.bmp|\.xbm|\.tif?f)$^;
128}
129
130# this makes no sense for images
131sub block_cover_image
132{
133 my $self =shift (@_);
134 my ($filename) = @_;
135
136 return;
137}
138# Create the thumbnail and screenview images, and discover the Image's
139# size, width, and height using the convert utility.
140
141sub generate_images
142{
143 my $self = shift (@_);
144 my $filename = shift (@_); # filename with full path
145 my $file = shift (@_); # filename without path
146 my $doc_obj = shift (@_);
147 my $section = $doc_obj->get_top_section();
148
149 my $verbosity = $self->{'verbosity'};
150 my $outhandle = $self->{'outhandle'};
151
152 # check the filename is okay
153 return 0 if ($file eq "" || $filename eq "");
154
155# Code now extended to quote filenames in 'convert' commnads
156# Allows spaces in filenames, but note needs spaces to be escaped in URL as well
157# if ($filename =~ m/ /) {
158# print $outhandle "ImagePlug: \"$filename\" contains a space. choking.\n";
159# return undef;
160# }
161
162 my $minimumsize = $self->{'minimumsize'};
163 if (defined $minimumsize && (-s $filename < $minimumsize)) {
164 print $outhandle "ImagePlug: \"$filename\" too small, skipping\n"
165 if ($verbosity > 1);
166 }
167
168
169 # Convert the image to a new type (if required).
170 my $converttotype = $self->{'converttotype'};
171 my $originalfilename = ""; # only set if we do a conversion
172 my $type = "unknown";
173
174 if ($converttotype ne "" && $filename !~ m/$converttotype$/) {
175 $originalfilename = $filename;
176
177 my $result = $self->convert($originalfilename, $converttotype, "", "");
178 ($filename) = ($result =~ /=>(.*\.$converttotype)/);
179
180 $type = $converttotype;
181 $file =~ s/\..*$/\.$type/;
182 }
183
184
185 # Add the image metadata
186 my $url = $file;
187
188 ##not know why it is required at the first place, it seems all works fine without it, so I comment it out
189 ##$url =~ s/ /%20/g;
190
191 my $utf8_filename_meta = $self->filename_to_metadata($url);
192 $doc_obj->add_utf8_metadata ($section, "Image", $utf8_filename_meta);
193
194 # Also want to set filename as 'Source' metadata to be
195 # consistent with other plugins
196 $doc_obj->add_utf8_metadata ($section, "Source", $utf8_filename_meta);
197
198 my ($image_type, $image_width, $image_height, $image_size)
199 = &identify($filename, $outhandle, $verbosity);
200
201 if ($image_type ne " ") {
202 $type = $image_type;
203 }
204
205 $doc_obj->add_metadata ($section, "FileFormat", $type);
206 $doc_obj->add_metadata ($section, "FileSize", $image_size);
207
208 $doc_obj->add_metadata ($section, "ImageType", $image_type);
209 $doc_obj->add_metadata ($section, "ImageWidth", $image_width);
210 $doc_obj->add_metadata ($section, "ImageHeight", $image_height);
211 $doc_obj->add_metadata ($section, "ImageSize", $image_size);
212 $doc_obj->add_metadata ($section, "NoText", "1");
213
214 $doc_obj->add_metadata ($section, "srclink",
215 "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\">");
216 $doc_obj->add_metadata ($section, "/srclink", "</a>");
217
218 $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\" width=100>");
219
220
221 # Add the image as an associated file
222 $doc_obj->associate_file($filename,$file,"image/$type",$section);
223
224
225 if (!$self->{'nothumbnail'}) {
226
227 # Make the thumbnail image
228 my $thumbnailsize = $self->{'thumbnailsize'} || 100;
229 my $thumbnailtype = $self->{'thumbnailtype'} || 'gif';
230
231 # Generate the thumbnail with convert
232 my $result = $self->convert($filename, $thumbnailtype, "-geometry $thumbnailsize" . "x$thumbnailsize", "THUMB");
233 my ($thumbnailfile) = ($result =~ /=>(.*\.$thumbnailtype)/);
234
235 # Add the thumbnail as an associated file ...
236 if (-e "$thumbnailfile") {
237 $doc_obj->associate_file("$thumbnailfile", "thumbnail.$thumbnailtype",
238 "image/$thumbnailtype",$section);
239 $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype);
240 $doc_obj->add_metadata ($section, "Thumb", "thumbnail.$thumbnailtype");
241
242 $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>");
243 }
244
245 # Extract Thumnail metadata from convert output
246 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
247 $doc_obj->add_metadata ($section, "ThumbWidth", $1);
248 $doc_obj->add_metadata ($section, "ThumbHeight", $2);
249 }
250
251 }
252
253
254 # Make a screen-sized version of the picture if requested
255 if (!$self->{'noscreenview'}) {
256
257 # To do: if the actual image smaller than the screenview size,
258 # we should use the original !
259
260 my $screenviewsize = $self->{'screenviewsize'};
261 my $screenviewtype = $self->{'screenviewtype'} || 'jpeg';
262
263 # make the screenview image
264 my $result = $self->convert($filename, $screenviewtype, "-geometry $screenviewsize" . "x$screenviewsize", "SCREEN");
265 my ($screenviewfilename) = ($result =~ /=>(.*\.$screenviewtype)/);
266
267 # get screenview dimensions, size and type
268 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
269 $doc_obj->add_metadata ($section, "ScreenWidth", $1);
270 $doc_obj->add_metadata ($section, "ScreenHeight", $2);
271 }
272 else {
273 $doc_obj->add_metadata ($section, "ScreenWidth", $image_width);
274 $doc_obj->add_metadata ($section, "ScreenHeight", $image_height);
275 }
276
277 #add the screenview as an associated file ...
278 if (-e "$screenviewfilename") {
279 $doc_obj->associate_file("$screenviewfilename", "screenview.$screenviewtype",
280 "image/$screenviewtype",$section);
281 $doc_obj->add_metadata ($section, "ScreenType", $screenviewtype);
282 $doc_obj->add_metadata ($section, "Screen", "screenview.$screenviewtype");
283
284 $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");
285 } else {
286 print $outhandle "ImagePlug: couldn't find \"$screenviewfilename\"\n";
287 }
288 }
289
290 return $type;
291
292
293}
294
295
296
297# Discover the characteristics of an image file with the ImageMagick
298# "identify" command.
299
300sub identify {
301 my ($image, $outhandle, $verbosity) = @_;
302
303 # Use the ImageMagick "identify" command to get the file specs
304 my $command = "identify \"$image\" 2>&1";
305 print $outhandle "$command\n" if ($verbosity > 2);
306 my $result = '';
307 $result = `$command`;
308 print $outhandle "$result\n" if ($verbosity > 3);
309
310 # Read the type, width, and height
311 my $type = 'unknown';
312 my $width = 'unknown';
313 my $height = 'unknown';
314
315 my $image_safe = quotemeta $image;
316 if ($result =~ /^$image_safe (\w+) (\d+)x(\d+)/) {
317 $type = $1;
318 $width = $2;
319 $height = $3;
320 }
321
322 # Read the size
323 my $size = "unknown";
324 if ($result =~ m/^.* ([0-9]+)b/) {
325 $size = $1;
326 }
327 elsif ($result =~ m/^.* ([0-9]+)(\.([0-9]+))?kb?/) {
328 $size = 1024 * $1;
329 if (defined($2)) {
330 $size = $size + (1024 * $2);
331 # Truncate size (it isn't going to be very accurate anyway)
332 $size = int($size);
333 }
334 }
335 elsif ($result =~ m/^.* (([0-9]+)(\.([0-9]+))?e\+([0-9]+))(kb|b)?/) {
336 # Deals with file sizes on Linux of type "3.4e+02kb" where e+02 is 1*10^2.
337 # 3.4e+02 therefore evaluates to 3.4 x 1 x 10^2 = 340kb.
338 # Programming languages including Perl know how that 3.4e+02 is a number,
339 # so we don't need to do any calculations.
340 $size = $1*1; # turn the string into a number by multiplying it by 1
341 #if we did $size = $1; $size would be merely the string "3.4e+02"
342 $size = int($size); # truncate size
343 }
344 print $outhandle "file: $image:\t $type, $width, $height, $size\n"
345 if ($verbosity > 2);
346
347 # Return the specs
348 return ($type, $width, $height, $size);
349}
350
351
352sub convert
353{
354 my $self = shift(@_);
355 my $source_file_path = shift(@_);
356 my $target_file_type = shift(@_);
357 my $convert_options = shift(@_) || "";
358 my $convert_type = shift(@_) || "";
359
360 my $outhandle = $self->{'outhandle'};
361 my $verbosity = $self->{'verbosity'};
362
363 # Determine the full name and path of the output file
364 my $target_file_path = &util::get_tmp_filename() . "." . $target_file_type;
365 push(@{$self->{'tmp_file_paths'}}, $target_file_path);
366
367 # Generate and run the convert command
368 my $convert_command = "convert -interlace plane -verbose $convert_options \"$source_file_path\" \"$target_file_path\"";
369 print $outhandle "$convert_type $convert_command\n" if ($verbosity > 2);
370 my $result = `$convert_command 2>&1`;
371 print $outhandle "$convert_type RESULT = $result\n" if ($verbosity > 2);
372
373 return $result;
374}
375
376
377# The ImagePlug read() function.
378# ImagePlug overrides read() because there is no need to read the actual
379# text of the file in, because the contents of the file is not text...
380#
381# Return number of files processed, undef if can't process
382# Note that $base_dir might be "" and that $file might
383# include directories
384
385sub read {
386 my $self = shift (@_);
387 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
388
389 my $outhandle = $self->{'outhandle'};
390
391 #check process and block exps, smart block, etc
392 my ($block_status,$filename) = $self->read_block(@_);
393 return $block_status if ((!defined $block_status) || ($block_status==0));
394
395 print STDERR "<Processing n='$file' p='ImagePlug'>\n" if ($gli);
396 print $outhandle "ImagePlug processing $file\n"
397 if $self->{'verbosity'} > 1;
398
399 # None of this works very well on Windows 95/98...
400 if ($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT()) {
401 if ($gli) {
402 print STDERR "<ProcessingError n='$file' r='Windows 95/98 not supported'>\n";
403 }
404 print $outhandle "ImagePlug: Windows 95/98 not supported\n";
405 return -1;
406 }
407
408 # None of this is going to work very well without ImageMagick...
409 if ($self->{'imagemagick_not_installed'}) {
410 if ($gli) {
411 print STDERR "<ProcessingError n='$file' r='ImageMagick not installed'>\n";
412 }
413 print $outhandle "ImagePlug: ImageMagick not installed\n";
414 return -1;
415 }
416
417 #if there's a leading directory name, eat it...
418 $file =~ s/^.*[\/\\]//;
419
420 # create a new document
421 my $doc_obj = new doc ($filename, "indexed_doc");
422 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
423 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
424
425 #run convert to get the thumbnail and extract size and type info
426 my $result = generate_images($self, $filename, $file, $doc_obj);
427
428 if (!defined $result)
429 {
430 if ($gli) {
431 print STDERR "<ProcessingError n='$file'>\n";
432 }
433 print $outhandle "ImagePlug: couldn't process \"$filename\"\n";
434 return -1; # error during processing
435 }
436
437
438 #create an empty text string so we don't break downstream plugins
439 my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
440
441 # include any metadata passed in from previous plugins
442 # note that this metadata is associated with the top level section
443 my $section = $doc_obj->get_top_section();
444 $self->extra_metadata ($doc_obj, $section, $metadata);
445
446 # do plugin specific processing of doc_obj
447 unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
448 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
449 return -1;
450 }
451
452 # do any automatic metadata extraction
453 $self->auto_extract_metadata ($doc_obj);
454
455 # if we haven't found any Title so far, assign one
456 # this was shifted to here from inside read()
457 $self->title_fallback($doc_obj,$section,$file);
458 # add an OID
459 $doc_obj->set_OID();
460 $doc_obj->add_utf8_text($section, $text);
461
462 # process the document
463 $processor->process($doc_obj);
464
465 # clean up temporary files - we do this here instead of in
466 # generate_images becuase associated files aren't actually copied
467 # until after process has been run.
468 foreach my $tmp_file_path (@{$self->{'tmp_file_paths'}})
469 {
470 if (-e $tmp_file_path)
471 {
472 &util::rm($tmp_file_path);
473 }
474 }
475
476 $self->{'num_processed'}++;
477
478 return 1;
479}
480
481# do plugin specific processing of doc_obj
482sub process {
483 my $self = shift (@_);
484 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
485 my $outhandle = $self->{'outhandle'};
486
487 return 1;
488}
489
4901;
491
492
493
494
495
496
497
498
499
500
501
Note: See TracBrowser for help on using the repository browser.