source: gsdl/trunk/perllib/plugins/EmbeddedMetadataPlugin.pm@ 20862

Last change on this file since 20862 was 20862, checked in by kjdon, 14 years ago

plugin name change in file contents. Also, commented out process and block expressions, as this plugin doesn't block or process any files itself, just extracts metadata for the next plugin to use.

File size: 5.5 KB
Line 
1###########################################################################
2#
3# EmbeddedMetadataPlugin.pm -- A plugin for EXIF
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright 2007 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package EmbeddedMetadataPlugin;
29
30use BasePlugin;
31
32use Image::ExifTool qw(:Public);
33use strict;
34
35no strict 'refs'; # allow filehandles to be variables and viceversa
36
37
38sub BEGIN
39{
40 @EmbeddedMetadataPlugin::ISA = ('BasePlugin');
41}
42
43
44
45my $arguments =
46 [ ];
47
48
49my $options = { 'name' => "EmbeddedMetadataPlugin",
50 'desc' => "{EmbeddedMetadataPlugin.desc}",
51 'abstract' => "no",
52 'inherits' => "yes",
53 'args' => $arguments };
54
55sub new()
56{
57 my ($class) = shift (@_);
58 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
59 push(@$pluginlist, $class);
60
61 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
62 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
63
64 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
65
66
67 # Create a new Image::ExifTool object
68 my $exifTool = new Image::ExifTool;
69 $exifTool->Options(Duplicates => 0);
70 $exifTool->Options(PrintConv => 0);
71 $exifTool->Options(Unknown => 1);
72 $exifTool->Options('Verbose');
73 $self->{'exiftool'} = $exifTool;
74
75
76 return bless $self, $class;
77}
78
79
80# Need to think some more about this
81#sub get_default_process_exp()
82#{
83# return q^(?i)\.(wma|wmv|jpe?g|gif)$^;
84#}
85
86
87# This plugin doesn't block any files
88#sub get_default_block_exp()
89#{
90# return '';
91#}
92
93
94sub extractEmbeddedMetadata()
95{
96 my $self = shift(@_);
97 my ($file, $filename, $extrametadata, $extrametakeys) = @_;
98
99 my %exif_metadata = ();
100
101 my $verbosity = $self->{'verbosity'};
102 my $outhandle = $self->{'outhandle'};
103
104 my $metadata_count = 0;
105
106 my @group_list = Image::ExifTool::GetAllGroups(0);
107 foreach my $group (@group_list)
108 {
109## print STDERR "**** group = $group\n";
110
111 # Extract meta information from an image
112 $self->{'exiftool'}->Options(Group0 => [$group]);
113 $self->{'exiftool'}->ExtractInfo($filename);
114
115 # Get list of tags in the order they were found in the file
116 my @tag_list = $self->{'exiftool'}->GetFoundTags('File');
117 foreach my $tag (@tag_list)
118 {
119### print STDERR "**** tag = $tag\n";
120
121 # Strip any numbering suffix
122 $tag =~ s/^([^\s]+)\s.*$/$1/i;
123 my $value = $self->{'exiftool'}->GetValue($tag);
124 if (defined $value && $value =~ /[a-z0-9]+/i)
125 {
126 if (ref $value eq 'SCALAR')
127 {
128 if ($$value =~ /^Binary data/)
129 {
130 $value = "($$value)";
131 }
132 else
133 {
134 my $len = length($$value);
135 $value = "(Binary data $len bytes)";
136 }
137 }
138
139 my $field = "$group.$tag";
140
141 if (!defined $exif_metadata{$field})
142 {
143 $exif_metadata{$field} = [];
144 }
145 push (@{$exif_metadata{$field}}, $self->gsSafe($value));
146## print STDERR "**** adding $field: $value\n";
147
148 ++$metadata_count;
149 }
150 }
151 }
152
153
154 if ($metadata_count > 0) {
155 print $outhandle " Extracted $metadata_count pieces of metadata from $filename EXIF block\n";
156 }
157
158 # Protect windows directory chars \
159 $file = &util::filename_to_regex($file);
160 # print STDERR "**** file = $file\n";
161
162 # Associate the metadata now
163
164 $extrametadata->{$file} = \%exif_metadata;
165 push(@$extrametakeys, $file);
166
167}
168
169
170sub metadata_read()
171{
172 my $self = shift (@_);
173 my ($pluginfo, $base_dir, $file, $block_hash,
174 $extrametakeys, $extrametadata, $extrametafile,
175 $processor, $maxdocs, $gli) = @_;
176
177
178
179 # See if we are looking at a PDF file... which may contain EXIF
180 my $filename = &util::filename_cat($base_dir, $file);
181# if ($filename !~ /\.pdf$/i || !-f $filename)
182# {
183# return undef;
184# }
185
186 print STDERR "\n<Processing n='$file' p='EmbeddedMetadataPlugin'>\n" if ($gli);
187 print STDERR "EmbeddedMetadataPlugin: processing $file\n" if ($self->{'verbosity'}) > 1;
188
189
190 $self->extractEmbeddedMetadata($file,$filename,
191 $extrametadata,$extrametakeys);
192
193
194 return undef;
195}
196
197
198sub process()
199{
200 # not used
201 return undef;
202}
203
204sub gsSafe()
205 {
206 my $self = shift(@_);
207 my ($text) = @_;
208 # Replace dangerous characters
209 $text =~ s/\(/&#40;/g;
210 $text =~ s/\)/&#41;/g;
211 $text =~ s/,/&#44;/g;
212 $text =~ s/\</&#60;/g;
213 $text =~ s/\</&#62;/g;
214 $text =~ s/\[/&#91;/g;
215 $text =~ s/\]/&#93;/g;
216 # Done
217 return $text;
218 }
219
2201;
Note: See TracBrowser for help on using the repository browser.