source: trunk/gsdl/perllib/plugins/ImagePlug.pm@ 7504

Last change on this file since 7504 was 7504, checked in by davidb, 20 years ago

ImagePlug, MP3Plug, UnknownPlug modified to set Title metadata based
on filename if no explicit Title metadata given. Also FileFormat
metadata is now also set to indicate type of file processed. To
share some of the functionality introduced some of the new code is
located in BasPlug.

  • Property svn:keywords set to Author Date Id Revision
File size: 13.8 KB
RevLine 
[1733]1###########################################################################
2#
3# ImagePlug.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package ImagePlug;
27
28use BasPlug;
29
30sub BEGIN {
31 @ISA = ('BasPlug');
32}
33
[4744]34my $arguments =
35 [ { 'name' => "process_exp",
[4873]36 'desc' => "{BasPlug.process_exp}",
[6408]37 'type' => "regexp",
[4744]38 'deft' => &get_default_process_exp(),
39 'reqd' => "no" },
40 { 'name' => "noscaleup",
[4873]41 'desc' => "{ImagePlug.noscaleup}",
[4744]42 'type' => "flag",
43 'reqd' => "no" },
44 { 'name' => "thumbnailsize",
[4873]45 'desc' => "{ImagePlug.thumbnailsize}",
[4744]46 'type' => "int",
47 'deft' => "100",
48 'reqd' => "no" },
49 { 'name' => "thumbnailtype",
[4873]50 'desc' => "{ImagePlug.thumbnailtype}",
[4744]51 'type' => "string",
52 'deft' => "gif",
53 'reqd' => "no" },
54 { 'name' => "screenviewsize",
[4873]55 'desc' => "{ImagePlug.screenviewsize}",
[4744]56 'type' => "int",
57 'deft' => "0",
58 'reqd' => "no" },
59 { 'name' => "screenviewtype",
[4873]60 'desc' => "{ImagePlug.screenviewtype}",
[4744]61 'type' => "string",
62 'deft' => "jpg",
63 'reqd' => "no" },
64 { 'name' => "converttotype",
[4873]65 'desc' => "{ImagePlug.converttotype}",
[4744]66 'type' => "string",
67 'deft' => "",
68 'reqd' => "no" },
69 { 'name' => "minimumsize",
[4873]70 'desc' => "{ImagePlug.minimumsize}",
[4744]71 'type' => "int",
72 'deft' => "100",
73 'reqd' => "no" } ];
[1758]74
[3540]75my $options = { 'name' => "ImagePlug",
[5680]76 'desc' => "{ImagePlug.desc}",
[6408]77 'abstract' => "no",
[3540]78 'inherits' => "yes",
79 'args' => $arguments };
80
81
[1744]82
[1733]83sub new {
84 my ($class) = @_;
[1744]85 my $plugin_name = shift (@_);
[1733]86 my $self = new BasPlug ("ImagePlug", @_);
[5924]87 $self->{'plugin_type'} = "ImagePlug";
[3540]88 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
89 my $option_list = $self->{'option_list'};
90 push( @{$option_list}, $options );
[4724]91
[1744]92 if (!parsargv::parse(\@_,
93 q^noscaleup^, \$self->{'noscaleup'},
94 q^converttotype/.*/^, \$self->{'converttotype'},
[2230]95 q^minimumsize/[0-9]*/100^, \$self->{'minimumsize'},
96
[1744]97 q^thumbnailsize/[0-9]*/100^, \$self->{'thumbnailsize'},
[2230]98 q^thumbnailtype/.*/gif^, \$self->{'thumbnailtype'},
99 q^screenviewsize/[0-9]*/0^, \$self->{'screenviewsize'},
100 q^screenviewtype/.*/jpg^, \$self->{'screenviewtype'},
[1744]101 "allow_extra_options")) {
102
[2207]103 print STDERR "\nImagePlug uses an incorrect option.\n";
104 print STDERR "Check your collect.cfg configuration file.\n";
[4873]105 $self->print_txt_usage(""); # Use default resource bundle
[1744]106 die "\n";
107 }
108
[1733]109 return bless $self, $class;
110}
111
112sub get_default_process_exp {
113 my $self = shift (@_);
114
[1758]115 return q^(?i)(\.jpe?g|\.gif|\.png|\.bmp|\.xbm|\.tif?f)$^;
[1733]116}
117
[2230]118
119# Create the thumbnail and screenview images, and discover the Image's
120# size, width, and height using the convert utility.
121
[1733]122sub run_convert {
123 my $self = shift (@_);
[2230]124 my $filename = shift (@_); # filename with full path
125 my $file = shift (@_); # filename without path
[1733]126 my $doc_obj = shift (@_);
[1744]127 my $section = $doc_obj->get_top_section();
[2230]128
129 my $verbosity = $self->{'verbosity'};
130 my $outhandle = $self->{'outhandle'};
[1733]131
[2230]132 # check the filename is okay
133 return 0 if ($file eq "" || $filename eq "");
134
[3307]135# Code now extended to quote filenames in 'convert' commnads
136# Allows spaces in filenames, but note needs spaces to be escaped in URL as well
137# if ($filename =~ m/ /) {
138# print $outhandle "ImagePlug: \"$filename\" contains a space. choking.\n";
139# return undef;
140# }
[1744]141
[1758]142 my $minimumsize = $self->{'minimumsize'};
[2230]143 if (defined $minimumsize && (-s $filename < $minimumsize)) {
144 print $outhandle "ImagePlug: \"$filename\" too small, skipping\n"
145 if ($verbosity > 1);
146 }
147
[4724]148
[2230]149 # Convert the image to a new type (if required).
[1744]150 my $converttotype = $self->{'converttotype'};
[2230]151 my $originalfilename = ""; # only set if we do a conversion
[1733]152 my $type = "unknown";
[1758]153
154 if ($converttotype ne "" && $filename =~ m/$converttotype$/) {
[2230]155
[1744]156 $originalfilename = $filename;
[2230]157 $filename = &util::get_tmp_filename() . ".$converttotype";
[1744]158 $self->{'tmp_filename'} = $filename;
[2230]159
[3307]160 my $command = "convert -interlace plane -verbose \"$originalfilename\" \"$filename\"";
[2230]161 print $outhandle "$command\n" if ($verbosity > 2);
162 my $result = '';
[1744]163 $result = `$command`;
[4724]164 print $outhandle "RESULT = $result\n" if ($verbosity > 2);
[2230]165
[1744]166 $type = $converttotype;
[1733]167 }
[1744]168
[4724]169
[3137]170 # Add the image metadata
[3307]171 my $url = $file;
172 $url =~ s/ /%20/g;
173
174 $doc_obj->add_metadata ($section, "Image", $url);
[3517]175
176 # Also want to set filename as 'Source' metadata to be
177 # consistent with other plugins
178 $doc_obj->add_metadata ($section, "Source", $url);
179
[7504]180 $self->title_fallback($doc_obj,$section,$file);
181 $doc_obj->add_metadata ($section, "FileFormat", $type);
182
[3137]183 my ($image_type, $image_width, $image_height, $image_size)
184 = &identify($filename, $outhandle, $verbosity);
[2230]185
[3137]186 $doc_obj->add_metadata ($section, "ImageType", $image_type);
187 $doc_obj->add_metadata ($section, "ImageWidth", $image_width);
188 $doc_obj->add_metadata ($section, "ImageHeight", $image_height);
189 $doc_obj->add_metadata ($section, "ImageSize", $image_size);
190
[4724]191
192 $doc_obj->add_metadata ($section, "srclink",
193 "<a href=_httpcollection_/index/assoc/[assocfilepath]/[Image]>");
194 $doc_obj->add_metadata ($section, "/srclink", "</a>");
195
196 $doc_obj->add_metadata ($section, "srcicon", "<img src=_httpcollection_/index/assoc/[assocfilepath]/[Image] width=100>");
197
198
[3137]199 # Add the image as an associated file
200 $doc_obj->associate_file($filename,$file,"image/$type",$section);
201
[4724]202
[2230]203 # Make the thumbnail image
[3137]204 my $thumbnailsize = $self->{'thumbnailsize'} || 100;
[2230]205 my $thumbnailtype = $self->{'thumbnailtype'} || 'gif';
[4724]206
[3137]207 my $thumbnailfile = &util::get_tmp_filename() . ".$thumbnailtype";
208 $self->{'tmp_filename2'} = $thumbnailfile;
[1733]209
[2230]210 # Generate the thumbnail with convert
[3307]211 my $command = "convert -interlace plane -verbose -geometry $thumbnailsize"
212 . "x$thumbnailsize \"$filename\" \"$thumbnailfile\"";
[4724]213 print $outhandle "THUMBNAIL: $command\n" if ($verbosity > 2);
[2230]214 my $result = '';
[2882]215 $result = `$command 2>&1` ;
[4724]216 print $outhandle "THUMB RESULT: $result\n" if ($verbosity > 2);
[2230]217
218 # Add the thumbnail as an associated file ...
[3137]219 if (-e "$thumbnailfile") {
220 $doc_obj->associate_file("$thumbnailfile", "thumbnail.$thumbnailtype",
[2230]221 "image/$thumbnailtype",$section);
222 $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype);
223 $doc_obj->add_metadata ($section, "Thumb", "thumbnail.$thumbnailtype");
[4724]224
[4894]225 $doc_obj->add_metadata ($section, "thumbicon", "<img src=_httpcollection_/index/assoc/[assocfilepath]/[Thumb] width=[ThumbWidth] height=[ThumbHeight]>");
[2230]226 }
227
[3137]228 # Extract Thumnail metadata from convert output
229 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
230 $doc_obj->add_metadata ($section, "ThumbWidth", $1);
231 $doc_obj->add_metadata ($section, "ThumbHeight", $2);
[1733]232 }
[1758]233
[2230]234 # Make a screen-sized version of the picture if requested
235 if ($self->{'screenviewsize'}) {
236
[3137]237 # To do: if the actual image smaller than the screenview size,
238 # we should use the original !
239
[2230]240 my $screenviewsize = $self->{'screenviewsize'};
241 my $screenviewtype = $self->{'screenviewtype'} || 'jpeg';
242 my $screenviewfilename = &util::get_tmp_filename() . ".$screenviewtype";
243 $self->{'tmp_filename3'} = $screenviewfilename;
244
245 # make the screenview image
[3307]246 my $command = "convert -interlace plane -verbose -geometry $screenviewsize"
247 . "x$screenviewsize \"$filename\" \"$screenviewfilename\"";
[2230]248 print $outhandle "$command\n" if ($verbosity > 2);
249 my $result = "";
[2882]250 $result = `$command 2>&1` ;
[2230]251 print $outhandle "$result\n" if ($verbosity > 3);
252
253 # get screenview dimensions, size and type
254 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
255 $doc_obj->add_metadata ($section, "ScreenWidth", $1);
256 $doc_obj->add_metadata ($section, "ScreenHeight", $2);
257 }
[5078]258 else {
259 $doc_obj->add_metadata ($section, "ScreenWidth", $image_width);
260 $doc_obj->add_metadata ($section, "ScreenHeight", $image_height);
261 }
262
[2230]263 #add the screenview as an associated file ...
264 if (-e "$screenviewfilename") {
265 $doc_obj->associate_file("$screenviewfilename", "screenview.$screenviewtype",
266 "image/$screenviewtype",$section);
267 $doc_obj->add_metadata ($section, "ScreenType", $screenviewtype);
268 $doc_obj->add_metadata ($section, "Screen", "screenview.$screenviewtype");
[4724]269
[4908]270 $doc_obj->add_metadata ($section, "screenicon", "<img src=_httpcollection_/index/assoc/[assocfilepath]/[Screen] width=[ScreenWidth] height=[ScreenHeight]>");
[2230]271 } else {
272 print $outhandle "ImagePlug: couldn't find \"$screenviewfilename\"\n";
273 }
274 }
275
[1733]276 return $type;
[4724]277
278
[3137]279}
[2230]280
281
[4724]282
[3137]283# Discover the characteristics of an image file with the ImageMagick
284# "identify" command.
285
286sub identify {
287 my ($image, $outhandle, $verbosity) = @_;
288
289 # Use the ImageMagick "identify" command to get the file specs
[5845]290 my $command = "identify \"$image\" 2>&1";
[3137]291 print $outhandle "$command\n" if ($verbosity > 2);
292 my $result = '';
293 $result = `$command`;
294 print $outhandle "$result\n" if ($verbosity > 3);
295
296 # Read the type, width, and height
297 my $type = 'unknown';
298 my $width = 'unknown';
299 my $height = 'unknown';
300
[4790]301 my $image_safe = quotemeta $image;
302 if ($result =~ /^$image_safe (\w+) (\d+)x(\d+)/) {
[3137]303 $type = $1;
304 $width = $2;
305 $height = $3;
306 }
[5103]307
[3137]308 # Read the size
309 my $size = "unknown";
310 if ($result =~ m/^.* ([0-9]+)b/) {
311 $size = $1;
[5103]312 }
[5845]313 elsif ($result =~ m/^.* ([0-9]+)(\.([0-9]+))?kb?/) {
[3137]314 $size = 1024 * $1;
[5103]315 if (defined($2)) {
316 $size = $size + (1024 * $2);
317 # Truncate size (it isn't going to be very accurate anyway)
318 $size = int($size);
319 }
[3137]320 }
321
322 print $outhandle "file: $image:\t $type, $width, $height, $size\n"
323 if ($verbosity > 2);
324
325 # Return the specs
326 return ($type, $width, $height, $size);
[1733]327}
328
329
330# The ImagePlug read() function. This function does all the right things
331# to make general options work for a given plugin. It calls the process()
332# function which does all the work specific to a plugin (like the old
333# read functions used to do). Most plugins should define their own
334# process() function and let this read() function keep control.
335#
336# ImagePlug overrides read() because there is no need to read the actual
337# text of the file in, because the contents of the file is not text...
338#
339# Return number of files processed, undef if can't process
340# Note that $base_dir might be "" and that $file might
341# include directories
342
343sub read {
344 my $self = shift (@_);
[6332]345 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
[1733]346
[2230]347 my $outhandle = $self->{'outhandle'};
348
[1733]349 my $filename = &util::filename_cat($base_dir, $file);
350 return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
351 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
352 return undef;
353 }
[6332]354
355 print STDERR "<Processing n='$file' p='ImagePlug'>\n" if ($gli);
[2230]356 print $outhandle "ImagePlug processing \"$filename\"\n"
[1758]357 if $self->{'verbosity'} > 1;
358
[2230]359 #if there's a leading directory name, eat it...
360 $file =~ s/^.*[\/\\]//;
[1733]361
362 # create a new document
363 my $doc_obj = new doc ($filename, "indexed_doc");
[2327]364 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
[5919]365 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "$self->{'plugin_type'}", "1");
[2327]366
[1733]367 #run convert to get the thumbnail and extract size and type info
[1758]368 my $result = run_convert($self, $filename, $file, $doc_obj);
[1744]369
[1758]370 if (!defined $result)
[1744]371 {
372 print "ImagePlug: couldn't process \"$filename\"\n";
[7362]373 return -1; # error during processing
[1733]374 }
375
376 #create an empty text string so we don't break downstream plugins
[2226]377 my $text = "Dummy text to sidestep display bug.";
[1733]378
379 # include any metadata passed in from previous plugins
380 # note that this metadata is associated with the top level section
[1744]381 my $section = $doc_obj->get_top_section();
[1733]382 $self->extra_metadata ($doc_obj, $section, $metadata);
383
384 # do plugin specific processing of doc_obj
[7362]385 return -1 unless defined ($self->process (\$text, $pluginfo, $base_dir,
[2226]386 $file, $metadata, $doc_obj));
[1733]387
388 # do any automatic metadata extraction
389 $self->auto_extract_metadata ($doc_obj);
390
391 # add an OID
392 $doc_obj->set_OID();
[2226]393 $doc_obj->add_text($section, $text);
[1733]394
395 # process the document
396 $processor->process($doc_obj);
397
[2230]398 # clean up temporary files - we do this here instead of in
399 # run_convert becuase associated files aren't actually copied
400 # until after process has been run.
[1744]401 if (defined $self->{'tmp_filename'} &&
402 -e $self->{'tmp_filename'}) {
[2230]403 &util::rm($self->{'tmp_filename'})
[1733]404 }
[1744]405 if (defined $self->{'tmp_filename2'} &&
406 -e $self->{'tmp_filename2'}) {
[2230]407 &util::rm($self->{'tmp_filename2'})
[1744]408 }
[2230]409 if (defined $self->{'tmp_filename3'} &&
410 -e $self->{'tmp_filename3'}) {
411 &util::rm($self->{'tmp_filename3'})
412 }
[1733]413
[3307]414 $self->{'num_processed'}++;
415
[2207]416 return 1;
[1733]417}
418
419# do plugin specific processing of doc_obj
420sub process {
421 my $self = shift (@_);
422 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
423 my $outhandle = $self->{'outhandle'};
424
425 return 1;
426}
427
4281;
[4724]429
430
431
432
433
434
435
436
437
438
439
Note: See TracBrowser for help on using the repository browser.