source: main/trunk/greenstone2/perllib/plugins/EmbeddedMetadataPlugin.pm@ 22552

Last change on this file since 22552 was 22552, checked in by kjdon, 14 years ago

by default we want this to process all files, so changed default process exp to .*. Then in metadata read, madeit check process_exp before processing the file, otherwise can't restrict processing to certain types of documents

File size: 6.0 KB
Line 
1###########################################################################
2#
3# EmbeddedMetadataPlugin.pm -- A plugin for EXIF
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright 2007 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package EmbeddedMetadataPlugin;
29
30use BasePlugin;
31
32use Image::ExifTool qw(:Public);
33use strict;
34
35no strict 'refs'; # allow filehandles to be variables and viceversa
36
37
38sub BEGIN
39{
40 @EmbeddedMetadataPlugin::ISA = ('BasePlugin');
41}
42
43
44
45my $arguments =
46 [ { 'name' => "metadata_field_separator",
47 'desc' => "{HTMLPlugin.metadata_field_separator}",
48 'type' => "string",
49 'deft' => "" }
50
51];
52
53
54my $options = { 'name' => "EmbeddedMetadataPlugin",
55 'desc' => "{EmbeddedMetadataPlugin.desc}",
56 'abstract' => "no",
57 'inherits' => "yes",
58 'args' => $arguments };
59
60sub new()
61{
62 my ($class) = shift (@_);
63 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
64 push(@$pluginlist, $class);
65
66 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
67 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
68
69 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
70
71
72 # Create a new Image::ExifTool object
73 my $exifTool = new Image::ExifTool;
74 $exifTool->Options(Duplicates => 0);
75 $exifTool->Options(PrintConv => 0);
76 $exifTool->Options(Unknown => 1);
77 $exifTool->Options('Verbose');
78 $self->{'exiftool'} = $exifTool;
79
80
81 return bless $self, $class;
82}
83
84
85# Need to think some more about this
86sub get_default_process_exp()
87{
88 return ".*";
89 #q^(?i)\.(wma|wmv|jpe?g|gif)$^;
90}
91
92
93# This plugin doesn't block any files
94#sub get_default_block_exp()
95#{
96# return '';
97#}
98
99
100sub extractEmbeddedMetadata()
101{
102 my $self = shift(@_);
103 my ($file, $filename, $extrametadata, $extrametakeys) = @_;
104
105 my %exif_metadata = ();
106
107 my $verbosity = $self->{'verbosity'};
108 my $outhandle = $self->{'outhandle'};
109
110 my $metadata_count = 0;
111
112 my $separator = $self->{'metadata_field_separator'};
113 if ($separator eq "") {
114 undef $separator;
115 }
116
117 my @group_list = Image::ExifTool::GetAllGroups(0);
118 foreach my $group (@group_list)
119 {
120## print STDERR "**** group = $group\n";
121
122 # Extract meta information from an image
123 $self->{'exiftool'}->Options(Group0 => [$group]);
124 $self->{'exiftool'}->ExtractInfo($filename);
125
126 # Get list of tags in the order they were found in the file
127 my @tag_list = $self->{'exiftool'}->GetFoundTags('File');
128 foreach my $tag (@tag_list)
129 {
130### print STDERR "**** tag = $tag\n";
131
132 # Strip any numbering suffix
133 $tag =~ s/^([^\s]+)\s.*$/$1/i;
134 my $value = $self->{'exiftool'}->GetValue($tag);
135 if (defined $value && $value =~ /[a-z0-9]+/i) {
136
137 my $field = "ex.$group.$tag";
138
139 if (!defined $exif_metadata{$field})
140 {
141 $exif_metadata{$field} = [];
142 }
143
144 my $metadata_done = 0;
145 if (ref $value eq 'SCALAR') {
146
147 if ($$value =~ /^Binary data/) {
148
149 $value = "($$value)";
150 }
151 else {
152
153 my $len = length($$value);
154 $value = "(Binary data $len bytes)";
155 }
156 }
157 elsif (ref $value eq 'ARRAY') {
158 $metadata_done = 1;
159 foreach my $v (@$value) {
160 push (@{$exif_metadata{$field}}, $self->gsSafe($v));
161 ++$metadata_count;
162 }
163 }
164 else {
165 if (defined $separator) {
166 my @vs = split($separator, $value);
167 $metadata_done = 1;
168 foreach my $v (@vs) {
169 if ($v =~ /\S/) {
170 push (@{$exif_metadata{$field}}, $self->gsSafe($v));
171 ++$metadata_count;
172 }
173 }
174 }
175 }
176 if (!$metadata_done) {
177 push (@{$exif_metadata{$field}}, $self->gsSafe($value));
178 ++$metadata_count;
179 }
180 }
181 }
182 }
183
184
185 if ($metadata_count > 0) {
186 print $outhandle " Extracted $metadata_count pieces of metadata from $filename EXIF block\n";
187 }
188
189 # Protect windows directory chars \
190 $file = &util::filename_to_regex($file);
191
192 # Associate the metadata now
193
194 $extrametadata->{$file} = \%exif_metadata;
195 push(@$extrametakeys, $file);
196
197}
198
199
200sub metadata_read()
201{
202 my $self = shift (@_);
203 my ($pluginfo, $base_dir, $file, $block_hash,
204 $extrametakeys, $extrametadata, $extrametafile,
205 $processor, $maxdocs, $gli) = @_;
206
207 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
208
209 # we don't want to process directories
210 if (!-f $filename_full_path || !$self->can_process_this_file($filename_full_path)) {
211 return undef;
212 }
213 print STDERR "\n<Processing n='$file' p='EmbeddedMetadataPlugin'>\n" if ($gli);
214 print STDERR "EmbeddedMetadataPlugin: processing $file\n" if ($self->{'verbosity'}) > 1;
215
216
217 $self->extractEmbeddedMetadata($filename_no_path,$filename_full_path,
218 $extrametadata,$extrametakeys);
219
220
221 return undef;
222}
223
224
225sub process()
226{
227 # not used
228 return undef;
229}
230
231sub gsSafe()
232 {
233 my $self = shift(@_);
234 my ($text) = @_;
235 # Replace dangerous characters
236 $text =~ s/\(/&#40;/g;
237 $text =~ s/\)/&#41;/g;
238 $text =~ s/,/&#44;/g;
239 $text =~ s/\</&#60;/g;
240 $text =~ s/\</&#62;/g;
241 $text =~ s/\[/&#91;/g;
242 $text =~ s/\]/&#93;/g;
243 # Done
244 return $text;
245 }
246
2471;
Note: See TracBrowser for help on using the repository browser.