########################################################################### # # MetadataEXIFPlugin.pm -- A plugin for EXIF # # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright 2007 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### package MetadataEXIFPlugin; use BasePlugin; use Image::ExifTool qw(:Public); use strict; no strict 'refs'; # allow filehandles to be variables and viceversa sub BEGIN { @MetadataEXIFPlugin::ISA = ('BasePlugin'); } my $arguments = [ ]; my $options = { 'name' => "MetadataEXIFPlugin", 'desc' => "{MetadataBasPlug.desc}", 'abstract' => "no", 'inherits' => "yes", 'args' => $arguments }; sub new() { my ($class) = shift (@_); my ($pluginlist,$inputargs,$hashArgOptLists) = @_; push(@$pluginlist, $class); if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); # Create a new Image::ExifTool object my $exifTool = new Image::ExifTool; $exifTool->Options(Duplicates => 0); $exifTool->Options(PrintConv => 0); $exifTool->Options(Unknown => 1); $exifTool->Options('Verbose'); $self->{'exiftool'} = $exifTool; return bless $self, $class; } # Need to think some more about this sub get_default_process_exp() { return q^(?i)\.(wma|wmv|jpe?g|gif)$^; } # This plugin doesn't block any files sub get_default_block_exp() { return ''; } sub extractEmbeddedMetadata() { my $self = shift(@_); my ($file, $filename, $extrametadata, $extrametakeys) = @_; my %exif_metadata = (); my $verbosity = $self->{'verbosity'}; my $outhandle = $self->{'outhandle'}; my $metadata_count = 0; my @group_list = Image::ExifTool::GetAllGroups(0); foreach my $group (@group_list) { ## print STDERR "**** group = $group\n"; # Extract meta information from an image $self->{'exiftool'}->Options(Group0 => [$group]); $self->{'exiftool'}->ExtractInfo($filename); # Get list of tags in the order they were found in the file my @tag_list = $self->{'exiftool'}->GetFoundTags('File'); foreach my $tag (@tag_list) { ### print STDERR "**** tag = $tag\n"; # Strip any numbering suffix $tag =~ s/^([^\s]+)\s.*$/$1/i; my $value = $self->{'exiftool'}->GetValue($tag); if (defined $value && $value =~ /[a-z0-9]+/i) { if (ref $value eq 'SCALAR') { if ($$value =~ /^Binary data/) { $value = "($$value)"; } else { my $len = length($$value); $value = "(Binary data $len bytes)"; } } my $field = "$group.$tag"; if (!defined $exif_metadata{$field}) { $exif_metadata{$field} = []; } push (@{$exif_metadata{$field}}, $self->gsSafe($value)); ## print STDERR "**** adding $field: $value\n"; ++$metadata_count; } } } if ($metadata_count > 0) { print $outhandle " Extracted $metadata_count pieces of metadata from $filename EXIF block\n"; } # Associate the metadata now ## print STDERR "**** file = $file\n"; $extrametadata->{$file} = \%exif_metadata; push(@$extrametakeys, $file); } sub metadata_read() { my $self = shift (@_); my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_; # See if we are looking at a PDF file... which may contain EXIF my $filename = &util::filename_cat($base_dir, $file); # if ($filename !~ /\.pdf$/i || !-f $filename) # { # return undef; # } print STDERR "\n\n" if ($gli); print STDERR "MetadataEXIFPlugin: processing $file\n" if ($self->{'verbosity'}) > 1; $self->extractEmbeddedMetadata($file,$filename, $extrametadata,$extrametakeys); return undef; } sub process() { # not used return undef; } sub gsSafe() { my $self = shift(@_); my ($text) = @_; # Replace dangerous characters $text =~ s/\(/(/g; $text =~ s/\)/)/g; $text =~ s/,/,/g; $text =~ s/\