source: main/trunk/greenstone2/perllib/plugins/EmbeddedMetadataPlugin.pm@ 21746

Last change on this file since 21746 was 20927, checked in by kjdon, 14 years ago

added process exp back in but made it empty so that we don't get a warnign about non recursive plugin not having a process exp

File size: 5.5 KB
Line 
1###########################################################################
2#
3# EmbeddedMetadataPlugin.pm -- A plugin for EXIF
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright 2007 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package EmbeddedMetadataPlugin;
29
30use BasePlugin;
31
32use Image::ExifTool qw(:Public);
33use strict;
34
35no strict 'refs'; # allow filehandles to be variables and viceversa
36
37
38sub BEGIN
39{
40 @EmbeddedMetadataPlugin::ISA = ('BasePlugin');
41}
42
43
44
45my $arguments =
46 [ ];
47
48
49my $options = { 'name' => "EmbeddedMetadataPlugin",
50 'desc' => "{EmbeddedMetadataPlugin.desc}",
51 'abstract' => "no",
52 'inherits' => "yes",
53 'args' => $arguments };
54
55sub new()
56{
57 my ($class) = shift (@_);
58 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
59 push(@$pluginlist, $class);
60
61 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
62 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
63
64 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
65
66
67 # Create a new Image::ExifTool object
68 my $exifTool = new Image::ExifTool;
69 $exifTool->Options(Duplicates => 0);
70 $exifTool->Options(PrintConv => 0);
71 $exifTool->Options(Unknown => 1);
72 $exifTool->Options('Verbose');
73 $self->{'exiftool'} = $exifTool;
74
75
76 return bless $self, $class;
77}
78
79
80# Need to think some more about this
81sub get_default_process_exp()
82{
83 return "";
84 #q^(?i)\.(wma|wmv|jpe?g|gif)$^;
85}
86
87
88# This plugin doesn't block any files
89#sub get_default_block_exp()
90#{
91# return '';
92#}
93
94
95sub extractEmbeddedMetadata()
96{
97 my $self = shift(@_);
98 my ($file, $filename, $extrametadata, $extrametakeys) = @_;
99
100 my %exif_metadata = ();
101
102 my $verbosity = $self->{'verbosity'};
103 my $outhandle = $self->{'outhandle'};
104
105 my $metadata_count = 0;
106
107 my @group_list = Image::ExifTool::GetAllGroups(0);
108 foreach my $group (@group_list)
109 {
110## print STDERR "**** group = $group\n";
111
112 # Extract meta information from an image
113 $self->{'exiftool'}->Options(Group0 => [$group]);
114 $self->{'exiftool'}->ExtractInfo($filename);
115
116 # Get list of tags in the order they were found in the file
117 my @tag_list = $self->{'exiftool'}->GetFoundTags('File');
118 foreach my $tag (@tag_list)
119 {
120### print STDERR "**** tag = $tag\n";
121
122 # Strip any numbering suffix
123 $tag =~ s/^([^\s]+)\s.*$/$1/i;
124 my $value = $self->{'exiftool'}->GetValue($tag);
125 if (defined $value && $value =~ /[a-z0-9]+/i)
126 {
127 if (ref $value eq 'SCALAR')
128 {
129 if ($$value =~ /^Binary data/)
130 {
131 $value = "($$value)";
132 }
133 else
134 {
135 my $len = length($$value);
136 $value = "(Binary data $len bytes)";
137 }
138 }
139
140 my $field = "$group.$tag";
141
142 if (!defined $exif_metadata{$field})
143 {
144 $exif_metadata{$field} = [];
145 }
146 push (@{$exif_metadata{$field}}, $self->gsSafe($value));
147## print STDERR "**** adding $field: $value\n";
148
149 ++$metadata_count;
150 }
151 }
152 }
153
154
155 if ($metadata_count > 0) {
156 print $outhandle " Extracted $metadata_count pieces of metadata from $filename EXIF block\n";
157 }
158
159 # Protect windows directory chars \
160 $file = &util::filename_to_regex($file);
161 # print STDERR "**** file = $file\n";
162
163 # Associate the metadata now
164
165 $extrametadata->{$file} = \%exif_metadata;
166 push(@$extrametakeys, $file);
167
168}
169
170
171sub metadata_read()
172{
173 my $self = shift (@_);
174 my ($pluginfo, $base_dir, $file, $block_hash,
175 $extrametakeys, $extrametadata, $extrametafile,
176 $processor, $maxdocs, $gli) = @_;
177
178
179
180 # See if we are looking at a PDF file... which may contain EXIF
181 my $filename = &util::filename_cat($base_dir, $file);
182# if ($filename !~ /\.pdf$/i || !-f $filename)
183# {
184# return undef;
185# }
186
187 print STDERR "\n<Processing n='$file' p='EmbeddedMetadataPlugin'>\n" if ($gli);
188 print STDERR "EmbeddedMetadataPlugin: processing $file\n" if ($self->{'verbosity'}) > 1;
189
190
191 $self->extractEmbeddedMetadata($file,$filename,
192 $extrametadata,$extrametakeys);
193
194
195 return undef;
196}
197
198
199sub process()
200{
201 # not used
202 return undef;
203}
204
205sub gsSafe()
206 {
207 my $self = shift(@_);
208 my ($text) = @_;
209 # Replace dangerous characters
210 $text =~ s/\(/&#40;/g;
211 $text =~ s/\)/&#41;/g;
212 $text =~ s/,/&#44;/g;
213 $text =~ s/\</&#60;/g;
214 $text =~ s/\</&#62;/g;
215 $text =~ s/\[/&#91;/g;
216 $text =~ s/\]/&#93;/g;
217 # Done
218 return $text;
219 }
220
2211;
Note: See TracBrowser for help on using the repository browser.