source: gsdl/trunk/perllib/plugins/MetadataEXIFPlugin.pm@ 18516

Last change on this file since 18516 was 18516, checked in by davidb, 15 years ago

Plugin for extracting EXIF metadata using the CPAN module of the same name. Works along similar lines to MetadataXMLPlugin. Extracts from image, audio and video file formats. Actually does more than just EXIF -- see ExifTool.pod for more details

File size: 5.3 KB
Line 
1###########################################################################
2#
3# MetadataEXIFPlugin.pm -- A plugin for EXIF
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright 2007 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package MetadataEXIFPlugin;
29
30use BasePlugin;
31
32use Image::ExifTool qw(:Public);
33use strict;
34
35no strict 'refs'; # allow filehandles to be variables and viceversa
36
37
38sub BEGIN
39{
40 @MetadataEXIFPlugin::ISA = ('BasePlugin');
41}
42
43
44
45my $arguments =
46 [ ];
47
48
49my $options = { 'name' => "MetadataEXIFPlugin",
50 'desc' => "{MetadataBasPlug.desc}",
51 'abstract' => "no",
52 'inherits' => "yes",
53 'args' => $arguments };
54
55sub new()
56{
57 my ($class) = shift (@_);
58 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
59 push(@$pluginlist, $class);
60
61 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
62 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
63
64 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
65
66
67 # Create a new Image::ExifTool object
68 my $exifTool = new Image::ExifTool;
69 $exifTool->Options(Duplicates => 0);
70 $exifTool->Options(PrintConv => 0);
71 $exifTool->Options(Unknown => 1);
72 $exifTool->Options('Verbose');
73 $self->{'exiftool'} = $exifTool;
74
75
76 return bless $self, $class;
77}
78
79
80# Need to think some more about this
81sub get_default_process_exp()
82{
83 return q^(?i)\.(wma|wmv|jpe?g|gif)$^;
84}
85
86
87# This plugin doesn't block any files
88sub get_default_block_exp()
89{
90 return '';
91}
92
93
94sub extractEmbeddedMetadata()
95{
96 my $self = shift(@_);
97 my ($file, $filename, $extrametadata, $extrametakeys) = @_;
98
99 my %exif_metadata = ();
100
101 my $verbosity = $self->{'verbosity'};
102 my $outhandle = $self->{'outhandle'};
103
104 my $metadata_count = 0;
105
106 my @group_list = Image::ExifTool::GetAllGroups(0);
107 foreach my $group (@group_list)
108 {
109## print STDERR "**** group = $group\n";
110
111 # Extract meta information from an image
112 $self->{'exiftool'}->Options(Group0 => [$group]);
113 $self->{'exiftool'}->ExtractInfo($filename);
114
115 # Get list of tags in the order they were found in the file
116 my @tag_list = $self->{'exiftool'}->GetFoundTags('File');
117 foreach my $tag (@tag_list)
118 {
119### print STDERR "**** tag = $tag\n";
120
121 # Strip any numbering suffix
122 $tag =~ s/^([^\s]+)\s.*$/$1/i;
123 my $value = $self->{'exiftool'}->GetValue($tag);
124 if (defined $value && $value =~ /[a-z0-9]+/i)
125 {
126 if (ref $value eq 'SCALAR')
127 {
128 if ($$value =~ /^Binary data/)
129 {
130 $value = "($$value)";
131 }
132 else
133 {
134 my $len = length($$value);
135 $value = "(Binary data $len bytes)";
136 }
137 }
138
139 my $field = "$group.$tag";
140
141 if (!defined $exif_metadata{$field})
142 {
143 $exif_metadata{$field} = [];
144 }
145 push (@{$exif_metadata{$field}}, $self->gsSafe($value));
146## print STDERR "**** adding $field: $value\n";
147
148 ++$metadata_count;
149 }
150 }
151 }
152
153
154 if ($metadata_count > 0) {
155 print $outhandle " Extracted $metadata_count pieces of metadata from $filename EXIF block\n";
156 }
157
158 # Associate the metadata now
159## print STDERR "**** file = $file\n";
160
161 $extrametadata->{$file} = \%exif_metadata;
162 push(@$extrametakeys, $file);
163
164}
165
166
167sub metadata_read()
168{
169 my $self = shift (@_);
170 my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
171
172
173
174 # See if we are looking at a PDF file... which may contain EXIF
175 my $filename = &util::filename_cat($base_dir, $file);
176# if ($filename !~ /\.pdf$/i || !-f $filename)
177# {
178# return undef;
179# }
180
181 print STDERR "\n<Processing n='$file' p='MetadataEXIFPlugin'>\n" if ($gli);
182 print STDERR "MetadataEXIFPlugin: processing $file\n" if ($self->{'verbosity'}) > 1;
183
184
185 $self->extractEmbeddedMetadata($file,$filename,
186 $extrametadata,$extrametakeys);
187
188
189 return undef;
190}
191
192
193sub process()
194{
195 # not used
196 return undef;
197}
198
199sub gsSafe()
200 {
201 my $self = shift(@_);
202 my ($text) = @_;
203 # Replace dangerous characters
204 $text =~ s/\(/&#40;/g;
205 $text =~ s/\)/&#41;/g;
206 $text =~ s/,/&#44;/g;
207 $text =~ s/\</&#60;/g;
208 $text =~ s/\</&#62;/g;
209 $text =~ s/\[/&#91;/g;
210 $text =~ s/\]/&#93;/g;
211 # Done
212 return $text;
213 }
214
2151;
Note: See TracBrowser for help on using the repository browser.