source: main/trunk/greenstone2/perllib/plugins/EmbeddedMetadataPlugin.pm@ 24403

Last change on this file since 24403 was 24403, checked in by ak19, 13 years ago

Dr Bainbridge has fixed the conflict between OAIPlugin and EmbeddedMetadataPlugin which resulted in the oai tutorial (with the JCDL pictures) going wrong: meta was not attached to the images. Dr Bainbridge solved the problem by introducing a new method in BasePlugin: can_process_this_file_for_metadata, which by default returns undef so that things should work by default mostly. This method has been overridden in OAIPlugin and EmbeddedMetadataPlugin now to do the right thing there.

File size: 8.8 KB
Line 
1###########################################################################
2#
3# EmbeddedMetadataPlugin.pm -- A plugin for EXIF
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright 2007 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package EmbeddedMetadataPlugin;
29
30use BasePlugin;
31
32use Encode;
33use Image::ExifTool qw(:Public);
34use strict;
35
36no strict 'refs'; # allow filehandles to be variables and viceversa
37
38
39sub BEGIN
40{
41 @EmbeddedMetadataPlugin::ISA = ('BasePlugin');
42 binmode(STDERR, ":utf8");
43}
44
45my $encoding_plus_auto_list = [{
46 'name' => "auto",
47 'desc' => "{ReadTextFile.input_encoding.auto}" }];
48push(@{$encoding_plus_auto_list},@{$BasePlugin::encoding_list});
49
50my $arguments = [{
51 'name' => "metadata_field_separator",
52 'desc' => "{HTMLPlugin.metadata_field_separator}",
53 'type' => "string",
54 'deft' => ""
55 },{
56 'name' => "input_encoding",
57 'desc' => "{ReadTextFile.input_encoding}",
58 'type' => "enum",
59 'list' => $encoding_plus_auto_list,
60 'reqd' => "no",
61 'deft' => "auto"
62 },{
63 'name' => "join_before_split",
64 'desc' => "{EmbeddedMetadataPlugin.join_before_split}",
65 'type' => "flag"
66 },{
67 'name' => "join_character",
68 'desc' => "{EmbeddedMetadataPlugin.join_character}",
69 'type' => "string",
70 'deft' => " "
71 },{
72 'name' => "trim_whitespace",
73 'desc' => "{EmbeddedMetadataPlugin.trim_whitespace}",
74 'type' => "enum",
75 'list' => [{'name' => "true", 'desc' => "{common.true}"}, {'name' => "false", 'desc' => "{common.false}"}],
76 'deft' => "true"
77 }];
78
79my $options = {
80 'name' => "EmbeddedMetadataPlugin",
81 'desc' => "{EmbeddedMetadataPlugin.desc}",
82 'abstract' => "no",
83 'inherits' => "yes",
84 'args' => $arguments };
85
86sub new()
87{
88 my ($class) = shift (@_);
89 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
90 push(@$pluginlist, $class);
91
92 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
93 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
94
95 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
96
97 # Create a new Image::ExifTool object
98 my $exifTool = new Image::ExifTool;
99 $exifTool->Options(Duplicates => 0);
100 $exifTool->Options(PrintConv => 0);
101 $exifTool->Options(Unknown => 1);
102 $exifTool->Options('Verbose');
103 $self->{'exiftool'} = $exifTool;
104
105 return bless $self, $class;
106}
107
108
109# Need to think some more about this
110sub get_default_process_exp()
111{
112 return ".*";
113 #q^(?i)\.(wma|wmv|jpe?g|gif)$^;
114}
115
116
117# This plugin doesn't block any files
118#sub get_default_block_exp()
119#{
120# return '';
121#}
122
123# plugins that rely on more than process_exp (eg XML plugins) can override this method
124sub can_process_this_file {
125 my $self = shift(@_);
126
127 # we process metadata, not the file
128 return 0;
129}
130
131# Even if a plugin can extract metadata in its metadata_read pass,
132# make the default return 'undef' so processing of the file continues
133# down the pipeline, so other plugins can also have the opportunity to
134# locate metadata and set it up in the extrametakeys variables that
135# are passed around.
136
137sub can_process_this_file_for_metadata {
138 my $self = shift(@_);
139
140 # this plugin will look for metadata in any file through its
141 # metadata_read() returning undef here means anything else further
142 # down the pipeline can do the same
143
144 return undef;
145}
146
147
148sub extractEmbeddedMetadata()
149{
150 my $self = shift(@_);
151 my ($file, $filename, $extrametadata, $extrametakeys) = @_;
152
153 my %exif_metadata = ();
154
155 my $verbosity = $self->{'verbosity'};
156 my $outhandle = $self->{'outhandle'};
157
158 my $metadata_count = 0;
159
160 my $separator = $self->{'metadata_field_separator'};
161 if ($separator eq "") {
162 undef $separator;
163 }
164
165 my @group_list = Image::ExifTool::GetAllGroups(0);
166 foreach my $group (@group_list) {
167## print STDERR "**** group = $group\n";
168
169 # Extract meta information from an image
170 $self->{'exiftool'}->Options(Group0 => [$group]);
171 $self->{'exiftool'}->ExtractInfo($filename);
172
173 # Get list of tags in the order they were found in the file
174 my @tag_list = $self->{'exiftool'}->GetFoundTags('File');
175 foreach my $tag (@tag_list) {
176
177 # Strip any numbering suffix
178 $tag =~ s/^([^\s]+)\s.*$/$1/i;
179 my $value = $self->{'exiftool'}->GetValue($tag);
180 if (defined $value && $value =~ /[a-z0-9]+/i) {
181 my $field = "ex.$group.$tag";
182
183 my $encoding = $self->{'input_encoding'};
184 if($encoding eq "auto")
185 {
186 $encoding = "utf8"
187 }
188
189 if (!defined $exif_metadata{$field})
190 {
191 $exif_metadata{$field} = [];
192 }
193
194 $field = Encode::decode($encoding,$field);
195 my $metadata_done = 0;
196 if (ref $value eq 'SCALAR') {
197 if ($$value =~ /^Binary data/) {
198 $value = "($$value)";
199 }
200 else {
201 my $len = length($$value);
202 $value = "(Binary data $len bytes)";
203 }
204 }
205 elsif (ref $value eq 'ARRAY') {
206 $metadata_done = 1;
207
208 my $allvals = "";
209 foreach my $v (@$value) {
210 $v = Encode::decode($encoding,$v);
211
212 if(!$self->{'join_before_split'}){
213 if (defined $separator) {
214 my @vs = split($separator, $v);
215 foreach my $val (@vs) {
216 if ($val =~ /\S/) {
217 push (@{$exif_metadata{$field}}, $self->gsSafe($val));
218 ++$metadata_count;
219 }
220 }
221 }
222 else
223 {
224 push (@{$exif_metadata{$field}}, $self->gsSafe($v));
225 ++$metadata_count;
226 }
227 }
228 else{
229 if($allvals ne ""){
230 $allvals = $allvals . $self->{'join_character'};
231 }
232 $allvals = $allvals . $v;
233 }
234 }
235
236 if($self->{'join_before_split'}){
237 if (defined $separator) {
238 my @vs = split($separator, $allvals);
239 foreach my $val (@vs) {
240 if ($val =~ /\S/) {
241 push (@{$exif_metadata{$field}}, $self->gsSafe($val));
242 ++$metadata_count;
243 }
244 }
245 }
246 else
247 {
248 push (@{$exif_metadata{$field}}, $self->gsSafe($allvals));
249 ++$metadata_count;
250 }
251 }
252 }
253 else {
254 $value = Encode::decode($encoding,$value);
255 if (defined $separator) {
256 my @vs = split($separator, $value);
257 $metadata_done = 1;
258 foreach my $v (@vs) {
259 if ($v =~ /\S/) {
260 push (@{$exif_metadata{$field}}, $self->gsSafe($v));
261 ++$metadata_count;
262 }
263 }
264 }
265 }
266 if (!$metadata_done) {
267 push (@{$exif_metadata{$field}}, $self->gsSafe($value));
268 ++$metadata_count;
269 }
270 }
271 }
272 }
273
274 if ($metadata_count > 0) {
275 print $outhandle " Extracted $metadata_count pieces of metadata from $filename EXIF block\n";
276 }
277
278 # Protect windows directory chars \
279 $file = &util::filename_to_regex($file);
280
281 # Associate the metadata now
282
283 $extrametadata->{$file} = \%exif_metadata;
284 push(@$extrametakeys, $file);
285
286}
287
288
289sub metadata_read()
290{
291 my $self = shift (@_);
292 my ($pluginfo, $base_dir, $file, $block_hash,
293 $extrametakeys, $extrametadata, $extrametafile,
294 $processor, $gli, $aux) = @_;
295
296 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
297
298 # we don't want to process directories
299 if (!-f $filename_full_path || !$self->can_process_this_file($filename_full_path)) {
300 return undef;
301 }
302 print STDERR "\n<Processing n='$file' p='EmbeddedMetadataPlugin'>\n" if ($gli);
303 print STDERR "EmbeddedMetadataPlugin: processing $file\n" if ($self->{'verbosity'}) > 1;
304
305 $self->extractEmbeddedMetadata($filename_no_path,$filename_full_path,
306 $extrametadata,$extrametakeys);
307
308 return undef;
309}
310
311sub read
312{
313 return undef;
314}
315
316sub process()
317{
318 # not used
319 return undef;
320}
321
322sub gsSafe() {
323 my $self = shift(@_);
324 my ($text) = @_;
325
326 # Replace potentially problematic characters
327 $text =~ s/\(/&#40;/g;
328 $text =~ s/\)/&#41;/g;
329 $text =~ s/,/&#44;/g;
330 $text =~ s/\</&#60;/g;
331 $text =~ s/\>/&#62;/g;
332 $text =~ s/\[/&#91;/g;
333 $text =~ s/\]/&#93;/g;
334 $text =~ s/\{/&#123;/g;
335 $text =~ s/\}/&#125;/g;
336 # Done
337
338 if ($self->{'trim_whitespace'} eq "true"){
339 $text =~ s/^\s+//;
340 $text =~ s/\s+$//;
341 }
342
343 return $text;
344}
345
3461;
Note: See TracBrowser for help on using the repository browser.