source: main/trunk/greenstone2/perllib/plugins/EmbeddedMetadataPlugin.pm@ 22074

Last change on this file since 22074 was 22074, checked in by kjdon, 14 years ago

extrametadata needs the filename with no subfolder as that is added in later. So pass in plain file name to extractEmbeddedMetadata instead of eg folder/file.jpg. Also, don't try and process folders, or each file inside it will get some exif error metadata attached

File size: 5.4 KB
Line 
1###########################################################################
2#
3# EmbeddedMetadataPlugin.pm -- A plugin for EXIF
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright 2007 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package EmbeddedMetadataPlugin;
29
30use BasePlugin;
31
32use Image::ExifTool qw(:Public);
33use strict;
34
35no strict 'refs'; # allow filehandles to be variables and viceversa
36
37
38sub BEGIN
39{
40 @EmbeddedMetadataPlugin::ISA = ('BasePlugin');
41}
42
43
44
45my $arguments =
46 [ ];
47
48
49my $options = { 'name' => "EmbeddedMetadataPlugin",
50 'desc' => "{EmbeddedMetadataPlugin.desc}",
51 'abstract' => "no",
52 'inherits' => "yes",
53 'args' => $arguments };
54
55sub new()
56{
57 my ($class) = shift (@_);
58 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
59 push(@$pluginlist, $class);
60
61 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
62 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
63
64 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
65
66
67 # Create a new Image::ExifTool object
68 my $exifTool = new Image::ExifTool;
69 $exifTool->Options(Duplicates => 0);
70 $exifTool->Options(PrintConv => 0);
71 $exifTool->Options(Unknown => 1);
72 $exifTool->Options('Verbose');
73 $self->{'exiftool'} = $exifTool;
74
75
76 return bless $self, $class;
77}
78
79
80# Need to think some more about this
81sub get_default_process_exp()
82{
83 return "";
84 #q^(?i)\.(wma|wmv|jpe?g|gif)$^;
85}
86
87
88# This plugin doesn't block any files
89#sub get_default_block_exp()
90#{
91# return '';
92#}
93
94
95sub extractEmbeddedMetadata()
96{
97 my $self = shift(@_);
98 my ($file, $filename, $extrametadata, $extrametakeys) = @_;
99
100 my %exif_metadata = ();
101
102 my $verbosity = $self->{'verbosity'};
103 my $outhandle = $self->{'outhandle'};
104
105 my $metadata_count = 0;
106
107 my @group_list = Image::ExifTool::GetAllGroups(0);
108 foreach my $group (@group_list)
109 {
110## print STDERR "**** group = $group\n";
111
112 # Extract meta information from an image
113 $self->{'exiftool'}->Options(Group0 => [$group]);
114 $self->{'exiftool'}->ExtractInfo($filename);
115
116 # Get list of tags in the order they were found in the file
117 my @tag_list = $self->{'exiftool'}->GetFoundTags('File');
118 foreach my $tag (@tag_list)
119 {
120### print STDERR "**** tag = $tag\n";
121
122 # Strip any numbering suffix
123 $tag =~ s/^([^\s]+)\s.*$/$1/i;
124 my $value = $self->{'exiftool'}->GetValue($tag);
125 if (defined $value && $value =~ /[a-z0-9]+/i)
126 {
127 if (ref $value eq 'SCALAR')
128 {
129 if ($$value =~ /^Binary data/)
130 {
131 $value = "($$value)";
132 }
133 else
134 {
135 my $len = length($$value);
136 $value = "(Binary data $len bytes)";
137 }
138 }
139
140 my $field = "$group.$tag";
141
142 if (!defined $exif_metadata{$field})
143 {
144 $exif_metadata{$field} = [];
145 }
146 push (@{$exif_metadata{$field}}, $self->gsSafe($value));
147## print STDERR "**** adding $field: $value\n";
148
149 ++$metadata_count;
150 }
151 }
152 }
153
154
155 if ($metadata_count > 0) {
156 print $outhandle " Extracted $metadata_count pieces of metadata from $filename EXIF block\n";
157 }
158
159 # Protect windows directory chars \
160 $file = &util::filename_to_regex($file);
161
162 # Associate the metadata now
163
164 $extrametadata->{$file} = \%exif_metadata;
165 push(@$extrametakeys, $file);
166
167}
168
169
170sub metadata_read()
171{
172 my $self = shift (@_);
173 my ($pluginfo, $base_dir, $file, $block_hash,
174 $extrametakeys, $extrametadata, $extrametafile,
175 $processor, $maxdocs, $gli) = @_;
176
177 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
178
179 # we don't want to process directories
180 if (!-f $filename_full_path) {
181 return undef;
182 }
183 print STDERR "\n<Processing n='$file' p='EmbeddedMetadataPlugin'>\n" if ($gli);
184 print STDERR "EmbeddedMetadataPlugin: processing $file\n" if ($self->{'verbosity'}) > 1;
185
186
187 $self->extractEmbeddedMetadata($filename_no_path,$filename_full_path,
188 $extrametadata,$extrametakeys);
189
190
191 return undef;
192}
193
194
195sub process()
196{
197 # not used
198 return undef;
199}
200
201sub gsSafe()
202 {
203 my $self = shift(@_);
204 my ($text) = @_;
205 # Replace dangerous characters
206 $text =~ s/\(/&#40;/g;
207 $text =~ s/\)/&#41;/g;
208 $text =~ s/,/&#44;/g;
209 $text =~ s/\</&#60;/g;
210 $text =~ s/\</&#62;/g;
211 $text =~ s/\[/&#91;/g;
212 $text =~ s/\]/&#93;/g;
213 # Done
214 return $text;
215 }
216
2171;
Note: See TracBrowser for help on using the repository browser.