source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/iWork.pm@ 24107

Last change on this file since 24107 was 24107, checked in by sjm84, 13 years ago

Updating the ExifTool perl modules

  • Property svn:executable set to *
File size: 7.6 KB
Line 
1#------------------------------------------------------------------------------
2# File: iWork.pm
3#
4# Description: Read Apple iWork '09 XML+ZIP files
5#
6# Revisions: 2009/11/11 - P. Harvey Created
7#------------------------------------------------------------------------------
8
9package Image::ExifTool::iWork;
10
11use strict;
12use vars qw($VERSION);
13use Image::ExifTool qw(:DataAccess :Utils);
14use Image::ExifTool::XMP;
15use Image::ExifTool::ZIP;
16
17$VERSION = '1.02';
18
19# test for recognized iWork document extensions and outter XML elements
20my %iWorkType = (
21 # file extensions
22 NUMBERS => 'Apple Numbers',
23 PAGES => 'Apple Pages',
24 KEY => 'Apple Keynote',
25 KTH => 'Apple Keynote Theme',
26 NMBTEMPLATE => 'Apple Numbers Template',
27 # we don't support double extensions --
28 # "PAGES.TEMPLATE" => 'Apple Pages Template',
29 # outter XML elements
30 'ls:document' => 'Apple Numbers',
31 'sl:document' => 'Apple Pages',
32 'key:presentation' => 'Apple Keynote',
33);
34
35# MIME types for iWork files (Apple has not registered these yet, but these
36# are my best guess after doing some googling. I'm not 100% sure what "sff"
37# indicates, but I think it refers to the new "flattened" package format)
38my %mimeType = (
39 'Apple Numbers' => 'application/x-iwork-numbers-sffnumbers',
40 'Apple Pages' => 'application/x-iwork-pages-sffpages',
41 'Apple Keynote' => 'application/x-iWork-keynote-sffkey',
42 'Apple Numbers Template' => 'application/x-iwork-numbers-sfftemplate',
43 'Apple Pages Template' => 'application/x-iwork-pages-sfftemplate',
44 'Apple Keynote Theme' => 'application/x-iWork-keynote-sffkth',
45);
46
47# iWork tags
48%Image::ExifTool::iWork::Main = (
49 GROUPS => { 0 => 'XML', 1 => 'XML', 2 => 'Document' },
50 PROCESS_PROC => \&Image::ExifTool::XMP::ProcessXMP,
51 VARS => { NO_ID => 1 },
52 NOTES => q{
53 The Apple iWork '09 file format is a ZIP archive containing XML files
54 similar to the Office Open XML (OOXML) format. Metadata tags in iWork
55 files are extracted even if they don't appear below.
56 },
57 authors => { Name => 'Author', Groups => { 2 => 'Author' } },
58 comment => { },
59 copyright => { Groups => { 2 => 'Author' } },
60 keywords => { },
61 projects => { List => 1 },
62 title => { },
63);
64
65#------------------------------------------------------------------------------
66# Generate a tag ID for this XML tag
67# Inputs: 0) tag property name list ref
68# Returns: tagID
69sub GetTagID($)
70{
71 my $props = shift;
72 return 0 if $$props[-1] =~ /^\w+:ID$/; # ignore ID tags
73 return ($$props[0] =~ /.*?:(.*)/) ? $1 : $$props[0];
74}
75
76#------------------------------------------------------------------------------
77# We found an XMP property name/value
78# Inputs: 0) ExifTool object ref, 1) tag table ref
79# 2) reference to array of XMP property names (last is current property)
80# 3) property value, 4) attribute hash ref (not used here)
81# Returns: 1 if valid tag was found
82sub FoundTag($$$$;$)
83{
84 my ($exifTool, $tagTablePtr, $props, $val, $attrs) = @_;
85 return 0 unless @$props;
86 my $verbose = $exifTool->Options('Verbose');
87
88 $exifTool->VPrint(0, " | - Tag '", join('/',@$props), "'\n") if $verbose > 1;
89
90 # un-escape XML character entities
91 $val = Image::ExifTool::XMP::UnescapeXML($val);
92 # convert from UTF8 to ExifTool Charset
93 $val = $exifTool->Decode($val, 'UTF8');
94 my $tag = GetTagID($props) or return 0;
95
96 # add any unknown tags to table
97 unless ($$tagTablePtr{$tag}) {
98 $exifTool->VPrint(0, " [adding $tag]\n") if $verbose;
99 Image::ExifTool::AddTagToTable($tagTablePtr, $tag, { Name => ucfirst $tag });
100 }
101 # save the tag
102 $exifTool->HandleTag($tagTablePtr, $tag, $val);
103
104 return 1;
105}
106
107#------------------------------------------------------------------------------
108# Extract information from an iWork file
109# Inputs: 0) ExifTool object reference, 1) dirInfo reference
110# Returns: 1
111# Notes: Upon entry to this routine, the file type has already been verified
112# as ZIP and the dirInfo hash contains a 'ZIP' Archive::Zip object reference
113sub Process_iWork($$)
114{
115 my ($exifTool, $dirInfo) = @_;
116 my $zip = $$dirInfo{ZIP};
117 my ($type, $index, $indexFile, $status);
118
119 # try to determine the file type
120 local $SIG{'__WARN__'} = \&Image::ExifTool::ZIP::WarnProc;
121 # trust type given by file extension if available
122 $type = $iWorkType{$$exifTool{FILE_EXT}} if $$exifTool{FILE_EXT};
123 unless ($type) {
124 # read the index file
125 my @members = $zip->membersMatching('^index\.(xml|apxl)$');
126 if (@members) {
127 ($index, $status) = $zip->contents($members[0]);
128 unless ($status) {
129 $indexFile = $members[0]->fileName();
130 if ($index =~ /^\s*<\?xml version=[^<]+<(\w+:\w+)/s) {
131 $type = $iWorkType{$1} if $iWorkType{$1};
132 }
133 }
134 }
135 $type or $type = 'ZIP'; # assume ZIP by default
136 }
137 $exifTool->SetFileType($type, $mimeType{$type});
138
139 my @members = $zip->members();
140 my $docNum = 0;
141 my $member;
142 foreach $member (@members) {
143 # get filename of this ZIP member
144 my $file = $member->fileName();
145 next unless defined $file;
146 $exifTool->VPrint(0, "File: $file\n");
147 # set the document number and extract ZIP tags
148 $$exifTool{DOC_NUM} = ++$docNum;
149 Image::ExifTool::ZIP::HandleMember($exifTool, $member);
150
151 # process only the index XML and JPEG thumbnail files
152 next unless $file =~ m{^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg)$}i;
153 # get the file contents if necessary
154 # (CAREFUL! $buff MUST be local since we hand off a value ref to PreviewImage)
155 my ($buff, $buffPt);
156 if ($indexFile and $indexFile eq $file) {
157 # use the index file we already loaded
158 $buffPt = \$index;
159 } else {
160 ($buff, $status) = $zip->contents($member);
161 $status and $exifTool->Warn("Error extracting $file"), next;
162 $buffPt = \$buff;
163 }
164 # extract JPEG as PreviewImage (should only be QuickLook/Thumbnail.jpg)
165 if ($file =~ /\.jpg$/) {
166 $exifTool->FoundTag('PreviewImage', $buffPt);
167 next;
168 }
169 # process "metadata" section of XML index file
170 next unless $$buffPt =~ /<(\w+):metadata>/g;
171 my $ns = $1;
172 my $p1 = pos $$buffPt;
173 next unless $$buffPt =~ m{</${ns}:metadata>}g;
174 # construct XML data from "metadata" section only
175 $$buffPt = '<?xml version="1.0"?>' . substr($$buffPt, $p1, pos($$buffPt)-$p1);
176 my %dirInfo = (
177 DataPt => $buffPt,
178 DirLen => length $$buffPt,
179 DataLen => length $$buffPt,
180 XMPParseOpts => {
181 FoundProc => \&FoundTag,
182 },
183 );
184 my $tagTablePtr = GetTagTable('Image::ExifTool::iWork::Main');
185 $exifTool->ProcessDirectory(\%dirInfo, $tagTablePtr);
186 undef $$buffPt; # (free memory now)
187 }
188 delete $$exifTool{DOC_NUM};
189 return 1;
190}
191
1921; # end
193
194__END__
195
196=head1 NAME
197
198Image::ExifTool::iWork - Read Apple iWork '09 XML+ZIP files
199
200=head1 SYNOPSIS
201
202This module is used by Image::ExifTool
203
204=head1 DESCRIPTION
205
206This module contains definitions required by Image::ExifTool to extract meta
207information from Apple iWork '09 XML+ZIP files.
208
209=head1 AUTHOR
210
211Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
212
213This library is free software; you can redistribute it and/or modify it
214under the same terms as Perl itself.
215
216=head1 SEE ALSO
217
218L<Image::ExifTool::TagNames/iWork Tags>,
219L<Image::ExifTool::TagNames/OOXML Tags>,
220L<Image::ExifTool(3pm)|Image::ExifTool>
221
222=cut
223
Note: See TracBrowser for help on using the repository browser.