root/gsdl/trunk/perllib/plugins/MetadataEXIFPlugin.pm @ 18516

Revision 18516, 5.3 KB (checked in by davidb, 12 years ago)

Plugin for extracting EXIF metadata using the CPAN module of the same name. Works along similar lines to MetadataXMLPlugin. Extracts from image, audio and video file formats. Actually does more than just EXIF -- see ExifTool?.pod for more details

Line 
1###########################################################################
2#
3# MetadataEXIFPlugin.pm -- A plugin for EXIF
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright 2007 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package MetadataEXIFPlugin;
29
30use BasePlugin;
31
32use Image::ExifTool qw(:Public);
33use strict;
34
35no strict 'refs'; # allow filehandles to be variables and viceversa
36
37
38sub BEGIN
39{
40  @MetadataEXIFPlugin::ISA = ('BasePlugin');
41}
42
43
44
45my $arguments =
46    [ ];
47
48
49my $options = { 'name'     => "MetadataEXIFPlugin",
50        'desc'     => "{MetadataBasPlug.desc}",
51        'abstract' => "no",
52        'inherits' => "yes",
53        'args'     => $arguments };
54
55sub new()
56{
57    my ($class) = shift (@_);
58    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
59    push(@$pluginlist, $class);
60
61    if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
62    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
63
64    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
65
66
67    # Create a new Image::ExifTool object
68    my $exifTool = new Image::ExifTool;
69    $exifTool->Options(Duplicates => 0);
70    $exifTool->Options(PrintConv => 0);
71    $exifTool->Options(Unknown => 1);
72    $exifTool->Options('Verbose');
73    $self->{'exiftool'} = $exifTool;
74
75
76    return bless $self, $class;
77}
78
79
80# Need to think some more about this
81sub get_default_process_exp()
82{
83    return q^(?i)\.(wma|wmv|jpe?g|gif)$^;
84}
85
86
87# This plugin doesn't block any files
88sub get_default_block_exp()
89{
90    return '';
91}
92
93
94sub extractEmbeddedMetadata()
95{
96    my $self = shift(@_);
97    my ($file, $filename, $extrametadata, $extrametakeys) = @_;
98
99    my %exif_metadata = ();
100
101    my $verbosity = $self->{'verbosity'};
102    my $outhandle = $self->{'outhandle'};
103
104    my $metadata_count = 0;
105   
106    my @group_list = Image::ExifTool::GetAllGroups(0);
107    foreach my $group (@group_list)
108      {
109##    print STDERR "**** group = $group\n";
110
111        # Extract meta information from an image
112        $self->{'exiftool'}->Options(Group0 => [$group]);
113        $self->{'exiftool'}->ExtractInfo($filename);
114
115        # Get list of tags in the order they were found in the file
116        my @tag_list = $self->{'exiftool'}->GetFoundTags('File');
117        foreach my $tag (@tag_list)
118          {
119###       print STDERR "**** tag = $tag\n";
120
121            # Strip any numbering suffix
122            $tag =~ s/^([^\s]+)\s.*$/$1/i;
123            my $value = $self->{'exiftool'}->GetValue($tag);
124            if (defined $value && $value =~ /[a-z0-9]+/i)
125              {
126                if (ref $value eq 'SCALAR')
127                  {
128                    if ($$value =~ /^Binary data/)
129                      {
130                        $value = "($$value)";
131                      }
132                    else
133                      {
134                        my $len = length($$value);
135                        $value = "(Binary data $len bytes)";
136                      }
137                  }
138
139        my $field = "$group.$tag";
140
141        if (!defined $exif_metadata{$field})
142        {
143            $exif_metadata{$field} = [];
144        }
145        push (@{$exif_metadata{$field}}, $self->gsSafe($value));
146##      print STDERR "**** adding $field: $value\n";
147
148        ++$metadata_count;
149              }
150          }
151      }
152
153
154    if ($metadata_count > 0) {
155    print $outhandle " Extracted $metadata_count pieces of metadata from $filename EXIF block\n";
156    }
157 
158  # Associate the metadata now
159##    print STDERR "**** file = $file\n";
160
161  $extrametadata->{$file} = \%exif_metadata;
162  push(@$extrametakeys, $file);
163
164}
165
166
167sub metadata_read()
168{
169  my $self = shift (@_);
170  my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
171 
172
173
174  # See if we are looking at a PDF file... which may contain EXIF
175  my $filename = &util::filename_cat($base_dir, $file);
176#  if ($filename !~ /\.pdf$/i || !-f $filename)
177#    {
178#      return undef;
179#    }
180
181  print STDERR "\n<Processing n='$file' p='MetadataEXIFPlugin'>\n" if ($gli);
182  print STDERR "MetadataEXIFPlugin: processing $file\n" if ($self->{'verbosity'}) > 1;
183
184
185  $self->extractEmbeddedMetadata($file,$filename,
186                 $extrametadata,$extrametakeys);
187
188   
189  return undef;
190}
191
192
193sub process()
194{
195    # not used
196    return undef;
197}
198
199sub gsSafe()
200  {
201    my $self = shift(@_);
202    my ($text) = @_;
203    # Replace dangerous characters
204    $text =~ s/\(/&#40;/g;
205    $text =~ s/\)/&#41;/g;
206    $text =~ s/,/&#44;/g;
207    $text =~ s/\</&#60;/g;
208    $text =~ s/\</&#62;/g;
209    $text =~ s/\[/&#91;/g;
210    $text =~ s/\]/&#93;/g;
211    # Done
212    return $text;
213  }
214
2151;
Note: See TracBrowser for help on using the browser.