source: trunk/gsdl/perllib/plugins/ImagePlug.pm@ 7504

Last change on this file since 7504 was 7504, checked in by davidb, 20 years ago

ImagePlug, MP3Plug, UnknownPlug modified to set Title metadata based
on filename if no explicit Title metadata given. Also FileFormat
metadata is now also set to indicate type of file processed. To
share some of the functionality introduced some of the new code is
located in BasPlug.

  • Property svn:keywords set to Author Date Id Revision
File size: 13.8 KB
Line 
1###########################################################################
2#
3# ImagePlug.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package ImagePlug;
27
28use BasPlug;
29
30sub BEGIN {
31 @ISA = ('BasPlug');
32}
33
34my $arguments =
35 [ { 'name' => "process_exp",
36 'desc' => "{BasPlug.process_exp}",
37 'type' => "regexp",
38 'deft' => &get_default_process_exp(),
39 'reqd' => "no" },
40 { 'name' => "noscaleup",
41 'desc' => "{ImagePlug.noscaleup}",
42 'type' => "flag",
43 'reqd' => "no" },
44 { 'name' => "thumbnailsize",
45 'desc' => "{ImagePlug.thumbnailsize}",
46 'type' => "int",
47 'deft' => "100",
48 'reqd' => "no" },
49 { 'name' => "thumbnailtype",
50 'desc' => "{ImagePlug.thumbnailtype}",
51 'type' => "string",
52 'deft' => "gif",
53 'reqd' => "no" },
54 { 'name' => "screenviewsize",
55 'desc' => "{ImagePlug.screenviewsize}",
56 'type' => "int",
57 'deft' => "0",
58 'reqd' => "no" },
59 { 'name' => "screenviewtype",
60 'desc' => "{ImagePlug.screenviewtype}",
61 'type' => "string",
62 'deft' => "jpg",
63 'reqd' => "no" },
64 { 'name' => "converttotype",
65 'desc' => "{ImagePlug.converttotype}",
66 'type' => "string",
67 'deft' => "",
68 'reqd' => "no" },
69 { 'name' => "minimumsize",
70 'desc' => "{ImagePlug.minimumsize}",
71 'type' => "int",
72 'deft' => "100",
73 'reqd' => "no" } ];
74
75my $options = { 'name' => "ImagePlug",
76 'desc' => "{ImagePlug.desc}",
77 'abstract' => "no",
78 'inherits' => "yes",
79 'args' => $arguments };
80
81
82
83sub new {
84 my ($class) = @_;
85 my $plugin_name = shift (@_);
86 my $self = new BasPlug ("ImagePlug", @_);
87 $self->{'plugin_type'} = "ImagePlug";
88 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
89 my $option_list = $self->{'option_list'};
90 push( @{$option_list}, $options );
91
92 if (!parsargv::parse(\@_,
93 q^noscaleup^, \$self->{'noscaleup'},
94 q^converttotype/.*/^, \$self->{'converttotype'},
95 q^minimumsize/[0-9]*/100^, \$self->{'minimumsize'},
96
97 q^thumbnailsize/[0-9]*/100^, \$self->{'thumbnailsize'},
98 q^thumbnailtype/.*/gif^, \$self->{'thumbnailtype'},
99 q^screenviewsize/[0-9]*/0^, \$self->{'screenviewsize'},
100 q^screenviewtype/.*/jpg^, \$self->{'screenviewtype'},
101 "allow_extra_options")) {
102
103 print STDERR "\nImagePlug uses an incorrect option.\n";
104 print STDERR "Check your collect.cfg configuration file.\n";
105 $self->print_txt_usage(""); # Use default resource bundle
106 die "\n";
107 }
108
109 return bless $self, $class;
110}
111
112sub get_default_process_exp {
113 my $self = shift (@_);
114
115 return q^(?i)(\.jpe?g|\.gif|\.png|\.bmp|\.xbm|\.tif?f)$^;
116}
117
118
119# Create the thumbnail and screenview images, and discover the Image's
120# size, width, and height using the convert utility.
121
122sub run_convert {
123 my $self = shift (@_);
124 my $filename = shift (@_); # filename with full path
125 my $file = shift (@_); # filename without path
126 my $doc_obj = shift (@_);
127 my $section = $doc_obj->get_top_section();
128
129 my $verbosity = $self->{'verbosity'};
130 my $outhandle = $self->{'outhandle'};
131
132 # check the filename is okay
133 return 0 if ($file eq "" || $filename eq "");
134
135# Code now extended to quote filenames in 'convert' commnads
136# Allows spaces in filenames, but note needs spaces to be escaped in URL as well
137# if ($filename =~ m/ /) {
138# print $outhandle "ImagePlug: \"$filename\" contains a space. choking.\n";
139# return undef;
140# }
141
142 my $minimumsize = $self->{'minimumsize'};
143 if (defined $minimumsize && (-s $filename < $minimumsize)) {
144 print $outhandle "ImagePlug: \"$filename\" too small, skipping\n"
145 if ($verbosity > 1);
146 }
147
148
149 # Convert the image to a new type (if required).
150 my $converttotype = $self->{'converttotype'};
151 my $originalfilename = ""; # only set if we do a conversion
152 my $type = "unknown";
153
154 if ($converttotype ne "" && $filename =~ m/$converttotype$/) {
155
156 $originalfilename = $filename;
157 $filename = &util::get_tmp_filename() . ".$converttotype";
158 $self->{'tmp_filename'} = $filename;
159
160 my $command = "convert -interlace plane -verbose \"$originalfilename\" \"$filename\"";
161 print $outhandle "$command\n" if ($verbosity > 2);
162 my $result = '';
163 $result = `$command`;
164 print $outhandle "RESULT = $result\n" if ($verbosity > 2);
165
166 $type = $converttotype;
167 }
168
169
170 # Add the image metadata
171 my $url = $file;
172 $url =~ s/ /%20/g;
173
174 $doc_obj->add_metadata ($section, "Image", $url);
175
176 # Also want to set filename as 'Source' metadata to be
177 # consistent with other plugins
178 $doc_obj->add_metadata ($section, "Source", $url);
179
180 $self->title_fallback($doc_obj,$section,$file);
181 $doc_obj->add_metadata ($section, "FileFormat", $type);
182
183 my ($image_type, $image_width, $image_height, $image_size)
184 = &identify($filename, $outhandle, $verbosity);
185
186 $doc_obj->add_metadata ($section, "ImageType", $image_type);
187 $doc_obj->add_metadata ($section, "ImageWidth", $image_width);
188 $doc_obj->add_metadata ($section, "ImageHeight", $image_height);
189 $doc_obj->add_metadata ($section, "ImageSize", $image_size);
190
191
192 $doc_obj->add_metadata ($section, "srclink",
193 "<a href=_httpcollection_/index/assoc/[assocfilepath]/[Image]>");
194 $doc_obj->add_metadata ($section, "/srclink", "</a>");
195
196 $doc_obj->add_metadata ($section, "srcicon", "<img src=_httpcollection_/index/assoc/[assocfilepath]/[Image] width=100>");
197
198
199 # Add the image as an associated file
200 $doc_obj->associate_file($filename,$file,"image/$type",$section);
201
202
203 # Make the thumbnail image
204 my $thumbnailsize = $self->{'thumbnailsize'} || 100;
205 my $thumbnailtype = $self->{'thumbnailtype'} || 'gif';
206
207 my $thumbnailfile = &util::get_tmp_filename() . ".$thumbnailtype";
208 $self->{'tmp_filename2'} = $thumbnailfile;
209
210 # Generate the thumbnail with convert
211 my $command = "convert -interlace plane -verbose -geometry $thumbnailsize"
212 . "x$thumbnailsize \"$filename\" \"$thumbnailfile\"";
213 print $outhandle "THUMBNAIL: $command\n" if ($verbosity > 2);
214 my $result = '';
215 $result = `$command 2>&1` ;
216 print $outhandle "THUMB RESULT: $result\n" if ($verbosity > 2);
217
218 # Add the thumbnail as an associated file ...
219 if (-e "$thumbnailfile") {
220 $doc_obj->associate_file("$thumbnailfile", "thumbnail.$thumbnailtype",
221 "image/$thumbnailtype",$section);
222 $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype);
223 $doc_obj->add_metadata ($section, "Thumb", "thumbnail.$thumbnailtype");
224
225 $doc_obj->add_metadata ($section, "thumbicon", "<img src=_httpcollection_/index/assoc/[assocfilepath]/[Thumb] width=[ThumbWidth] height=[ThumbHeight]>");
226 }
227
228 # Extract Thumnail metadata from convert output
229 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
230 $doc_obj->add_metadata ($section, "ThumbWidth", $1);
231 $doc_obj->add_metadata ($section, "ThumbHeight", $2);
232 }
233
234 # Make a screen-sized version of the picture if requested
235 if ($self->{'screenviewsize'}) {
236
237 # To do: if the actual image smaller than the screenview size,
238 # we should use the original !
239
240 my $screenviewsize = $self->{'screenviewsize'};
241 my $screenviewtype = $self->{'screenviewtype'} || 'jpeg';
242 my $screenviewfilename = &util::get_tmp_filename() . ".$screenviewtype";
243 $self->{'tmp_filename3'} = $screenviewfilename;
244
245 # make the screenview image
246 my $command = "convert -interlace plane -verbose -geometry $screenviewsize"
247 . "x$screenviewsize \"$filename\" \"$screenviewfilename\"";
248 print $outhandle "$command\n" if ($verbosity > 2);
249 my $result = "";
250 $result = `$command 2>&1` ;
251 print $outhandle "$result\n" if ($verbosity > 3);
252
253 # get screenview dimensions, size and type
254 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
255 $doc_obj->add_metadata ($section, "ScreenWidth", $1);
256 $doc_obj->add_metadata ($section, "ScreenHeight", $2);
257 }
258 else {
259 $doc_obj->add_metadata ($section, "ScreenWidth", $image_width);
260 $doc_obj->add_metadata ($section, "ScreenHeight", $image_height);
261 }
262
263 #add the screenview as an associated file ...
264 if (-e "$screenviewfilename") {
265 $doc_obj->associate_file("$screenviewfilename", "screenview.$screenviewtype",
266 "image/$screenviewtype",$section);
267 $doc_obj->add_metadata ($section, "ScreenType", $screenviewtype);
268 $doc_obj->add_metadata ($section, "Screen", "screenview.$screenviewtype");
269
270 $doc_obj->add_metadata ($section, "screenicon", "<img src=_httpcollection_/index/assoc/[assocfilepath]/[Screen] width=[ScreenWidth] height=[ScreenHeight]>");
271 } else {
272 print $outhandle "ImagePlug: couldn't find \"$screenviewfilename\"\n";
273 }
274 }
275
276 return $type;
277
278
279}
280
281
282
283# Discover the characteristics of an image file with the ImageMagick
284# "identify" command.
285
286sub identify {
287 my ($image, $outhandle, $verbosity) = @_;
288
289 # Use the ImageMagick "identify" command to get the file specs
290 my $command = "identify \"$image\" 2>&1";
291 print $outhandle "$command\n" if ($verbosity > 2);
292 my $result = '';
293 $result = `$command`;
294 print $outhandle "$result\n" if ($verbosity > 3);
295
296 # Read the type, width, and height
297 my $type = 'unknown';
298 my $width = 'unknown';
299 my $height = 'unknown';
300
301 my $image_safe = quotemeta $image;
302 if ($result =~ /^$image_safe (\w+) (\d+)x(\d+)/) {
303 $type = $1;
304 $width = $2;
305 $height = $3;
306 }
307
308 # Read the size
309 my $size = "unknown";
310 if ($result =~ m/^.* ([0-9]+)b/) {
311 $size = $1;
312 }
313 elsif ($result =~ m/^.* ([0-9]+)(\.([0-9]+))?kb?/) {
314 $size = 1024 * $1;
315 if (defined($2)) {
316 $size = $size + (1024 * $2);
317 # Truncate size (it isn't going to be very accurate anyway)
318 $size = int($size);
319 }
320 }
321
322 print $outhandle "file: $image:\t $type, $width, $height, $size\n"
323 if ($verbosity > 2);
324
325 # Return the specs
326 return ($type, $width, $height, $size);
327}
328
329
330# The ImagePlug read() function. This function does all the right things
331# to make general options work for a given plugin. It calls the process()
332# function which does all the work specific to a plugin (like the old
333# read functions used to do). Most plugins should define their own
334# process() function and let this read() function keep control.
335#
336# ImagePlug overrides read() because there is no need to read the actual
337# text of the file in, because the contents of the file is not text...
338#
339# Return number of files processed, undef if can't process
340# Note that $base_dir might be "" and that $file might
341# include directories
342
343sub read {
344 my $self = shift (@_);
345 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
346
347 my $outhandle = $self->{'outhandle'};
348
349 my $filename = &util::filename_cat($base_dir, $file);
350 return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
351 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
352 return undef;
353 }
354
355 print STDERR "<Processing n='$file' p='ImagePlug'>\n" if ($gli);
356 print $outhandle "ImagePlug processing \"$filename\"\n"
357 if $self->{'verbosity'} > 1;
358
359 #if there's a leading directory name, eat it...
360 $file =~ s/^.*[\/\\]//;
361
362 # create a new document
363 my $doc_obj = new doc ($filename, "indexed_doc");
364 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
365 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "$self->{'plugin_type'}", "1");
366
367 #run convert to get the thumbnail and extract size and type info
368 my $result = run_convert($self, $filename, $file, $doc_obj);
369
370 if (!defined $result)
371 {
372 print "ImagePlug: couldn't process \"$filename\"\n";
373 return -1; # error during processing
374 }
375
376 #create an empty text string so we don't break downstream plugins
377 my $text = "Dummy text to sidestep display bug.";
378
379 # include any metadata passed in from previous plugins
380 # note that this metadata is associated with the top level section
381 my $section = $doc_obj->get_top_section();
382 $self->extra_metadata ($doc_obj, $section, $metadata);
383
384 # do plugin specific processing of doc_obj
385 return -1 unless defined ($self->process (\$text, $pluginfo, $base_dir,
386 $file, $metadata, $doc_obj));
387
388 # do any automatic metadata extraction
389 $self->auto_extract_metadata ($doc_obj);
390
391 # add an OID
392 $doc_obj->set_OID();
393 $doc_obj->add_text($section, $text);
394
395 # process the document
396 $processor->process($doc_obj);
397
398 # clean up temporary files - we do this here instead of in
399 # run_convert becuase associated files aren't actually copied
400 # until after process has been run.
401 if (defined $self->{'tmp_filename'} &&
402 -e $self->{'tmp_filename'}) {
403 &util::rm($self->{'tmp_filename'})
404 }
405 if (defined $self->{'tmp_filename2'} &&
406 -e $self->{'tmp_filename2'}) {
407 &util::rm($self->{'tmp_filename2'})
408 }
409 if (defined $self->{'tmp_filename3'} &&
410 -e $self->{'tmp_filename3'}) {
411 &util::rm($self->{'tmp_filename3'})
412 }
413
414 $self->{'num_processed'}++;
415
416 return 1;
417}
418
419# do plugin specific processing of doc_obj
420sub process {
421 my $self = shift (@_);
422 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
423 my $outhandle = $self->{'outhandle'};
424
425 return 1;
426}
427
4281;
429
430
431
432
433
434
435
436
437
438
439
Note: See TracBrowser for help on using the repository browser.