1 | #------------------------------------------------------------------------------
|
---|
2 | # File: iWork.pm
|
---|
3 | #
|
---|
4 | # Description: Read Apple iWork '09 XML+ZIP files
|
---|
5 | #
|
---|
6 | # Revisions: 2009/11/11 - P. Harvey Created
|
---|
7 | #------------------------------------------------------------------------------
|
---|
8 |
|
---|
9 | package Image::ExifTool::iWork;
|
---|
10 |
|
---|
11 | use strict;
|
---|
12 | use vars qw($VERSION);
|
---|
13 | use Image::ExifTool qw(:DataAccess :Utils);
|
---|
14 | use Image::ExifTool::XMP;
|
---|
15 | use Image::ExifTool::ZIP;
|
---|
16 |
|
---|
17 | $VERSION = '1.02';
|
---|
18 |
|
---|
19 | # test for recognized iWork document extensions and outter XML elements
|
---|
20 | my %iWorkType = (
|
---|
21 | # file extensions
|
---|
22 | NUMBERS => 'Apple Numbers',
|
---|
23 | PAGES => 'Apple Pages',
|
---|
24 | KEY => 'Apple Keynote',
|
---|
25 | KTH => 'Apple Keynote Theme',
|
---|
26 | NMBTEMPLATE => 'Apple Numbers Template',
|
---|
27 | # we don't support double extensions --
|
---|
28 | # "PAGES.TEMPLATE" => 'Apple Pages Template',
|
---|
29 | # outter XML elements
|
---|
30 | 'ls:document' => 'Apple Numbers',
|
---|
31 | 'sl:document' => 'Apple Pages',
|
---|
32 | 'key:presentation' => 'Apple Keynote',
|
---|
33 | );
|
---|
34 |
|
---|
35 | # MIME types for iWork files (Apple has not registered these yet, but these
|
---|
36 | # are my best guess after doing some googling. I'm not 100% sure what "sff"
|
---|
37 | # indicates, but I think it refers to the new "flattened" package format)
|
---|
38 | my %mimeType = (
|
---|
39 | 'Apple Numbers' => 'application/x-iwork-numbers-sffnumbers',
|
---|
40 | 'Apple Pages' => 'application/x-iwork-pages-sffpages',
|
---|
41 | 'Apple Keynote' => 'application/x-iWork-keynote-sffkey',
|
---|
42 | 'Apple Numbers Template' => 'application/x-iwork-numbers-sfftemplate',
|
---|
43 | 'Apple Pages Template' => 'application/x-iwork-pages-sfftemplate',
|
---|
44 | 'Apple Keynote Theme' => 'application/x-iWork-keynote-sffkth',
|
---|
45 | );
|
---|
46 |
|
---|
47 | # iWork tags
|
---|
48 | %Image::ExifTool::iWork::Main = (
|
---|
49 | GROUPS => { 0 => 'XML', 1 => 'XML', 2 => 'Document' },
|
---|
50 | PROCESS_PROC => \&Image::ExifTool::XMP::ProcessXMP,
|
---|
51 | VARS => { NO_ID => 1 },
|
---|
52 | NOTES => q{
|
---|
53 | The Apple iWork '09 file format is a ZIP archive containing XML files
|
---|
54 | similar to the Office Open XML (OOXML) format. Metadata tags in iWork
|
---|
55 | files are extracted even if they don't appear below.
|
---|
56 | },
|
---|
57 | authors => { Name => 'Author', Groups => { 2 => 'Author' } },
|
---|
58 | comment => { },
|
---|
59 | copyright => { Groups => { 2 => 'Author' } },
|
---|
60 | keywords => { },
|
---|
61 | projects => { List => 1 },
|
---|
62 | title => { },
|
---|
63 | );
|
---|
64 |
|
---|
65 | #------------------------------------------------------------------------------
|
---|
66 | # Generate a tag ID for this XML tag
|
---|
67 | # Inputs: 0) tag property name list ref
|
---|
68 | # Returns: tagID
|
---|
69 | sub GetTagID($)
|
---|
70 | {
|
---|
71 | my $props = shift;
|
---|
72 | return 0 if $$props[-1] =~ /^\w+:ID$/; # ignore ID tags
|
---|
73 | return ($$props[0] =~ /.*?:(.*)/) ? $1 : $$props[0];
|
---|
74 | }
|
---|
75 |
|
---|
76 | #------------------------------------------------------------------------------
|
---|
77 | # We found an XMP property name/value
|
---|
78 | # Inputs: 0) ExifTool object ref, 1) tag table ref
|
---|
79 | # 2) reference to array of XMP property names (last is current property)
|
---|
80 | # 3) property value, 4) attribute hash ref (not used here)
|
---|
81 | # Returns: 1 if valid tag was found
|
---|
82 | sub FoundTag($$$$;$)
|
---|
83 | {
|
---|
84 | my ($exifTool, $tagTablePtr, $props, $val, $attrs) = @_;
|
---|
85 | return 0 unless @$props;
|
---|
86 | my $verbose = $exifTool->Options('Verbose');
|
---|
87 |
|
---|
88 | $exifTool->VPrint(0, " | - Tag '", join('/',@$props), "'\n") if $verbose > 1;
|
---|
89 |
|
---|
90 | # un-escape XML character entities
|
---|
91 | $val = Image::ExifTool::XMP::UnescapeXML($val);
|
---|
92 | # convert from UTF8 to ExifTool Charset
|
---|
93 | $val = $exifTool->Decode($val, 'UTF8');
|
---|
94 | my $tag = GetTagID($props) or return 0;
|
---|
95 |
|
---|
96 | # add any unknown tags to table
|
---|
97 | unless ($$tagTablePtr{$tag}) {
|
---|
98 | $exifTool->VPrint(0, " [adding $tag]\n") if $verbose;
|
---|
99 | Image::ExifTool::AddTagToTable($tagTablePtr, $tag, { Name => ucfirst $tag });
|
---|
100 | }
|
---|
101 | # save the tag
|
---|
102 | $exifTool->HandleTag($tagTablePtr, $tag, $val);
|
---|
103 |
|
---|
104 | return 1;
|
---|
105 | }
|
---|
106 |
|
---|
107 | #------------------------------------------------------------------------------
|
---|
108 | # Extract information from an iWork file
|
---|
109 | # Inputs: 0) ExifTool object reference, 1) dirInfo reference
|
---|
110 | # Returns: 1
|
---|
111 | # Notes: Upon entry to this routine, the file type has already been verified
|
---|
112 | # as ZIP and the dirInfo hash contains a 'ZIP' Archive::Zip object reference
|
---|
113 | sub Process_iWork($$)
|
---|
114 | {
|
---|
115 | my ($exifTool, $dirInfo) = @_;
|
---|
116 | my $zip = $$dirInfo{ZIP};
|
---|
117 | my ($type, $index, $indexFile, $status);
|
---|
118 |
|
---|
119 | # try to determine the file type
|
---|
120 | local $SIG{'__WARN__'} = \&Image::ExifTool::ZIP::WarnProc;
|
---|
121 | # trust type given by file extension if available
|
---|
122 | $type = $iWorkType{$$exifTool{FILE_EXT}} if $$exifTool{FILE_EXT};
|
---|
123 | unless ($type) {
|
---|
124 | # read the index file
|
---|
125 | my @members = $zip->membersMatching('^index\.(xml|apxl)$');
|
---|
126 | if (@members) {
|
---|
127 | ($index, $status) = $zip->contents($members[0]);
|
---|
128 | unless ($status) {
|
---|
129 | $indexFile = $members[0]->fileName();
|
---|
130 | if ($index =~ /^\s*<\?xml version=[^<]+<(\w+:\w+)/s) {
|
---|
131 | $type = $iWorkType{$1} if $iWorkType{$1};
|
---|
132 | }
|
---|
133 | }
|
---|
134 | }
|
---|
135 | $type or $type = 'ZIP'; # assume ZIP by default
|
---|
136 | }
|
---|
137 | $exifTool->SetFileType($type, $mimeType{$type});
|
---|
138 |
|
---|
139 | my @members = $zip->members();
|
---|
140 | my $docNum = 0;
|
---|
141 | my $member;
|
---|
142 | foreach $member (@members) {
|
---|
143 | # get filename of this ZIP member
|
---|
144 | my $file = $member->fileName();
|
---|
145 | next unless defined $file;
|
---|
146 | $exifTool->VPrint(0, "File: $file\n");
|
---|
147 | # set the document number and extract ZIP tags
|
---|
148 | $$exifTool{DOC_NUM} = ++$docNum;
|
---|
149 | Image::ExifTool::ZIP::HandleMember($exifTool, $member);
|
---|
150 |
|
---|
151 | # process only the index XML and JPEG thumbnail files
|
---|
152 | next unless $file =~ m{^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg)$}i;
|
---|
153 | # get the file contents if necessary
|
---|
154 | # (CAREFUL! $buff MUST be local since we hand off a value ref to PreviewImage)
|
---|
155 | my ($buff, $buffPt);
|
---|
156 | if ($indexFile and $indexFile eq $file) {
|
---|
157 | # use the index file we already loaded
|
---|
158 | $buffPt = \$index;
|
---|
159 | } else {
|
---|
160 | ($buff, $status) = $zip->contents($member);
|
---|
161 | $status and $exifTool->Warn("Error extracting $file"), next;
|
---|
162 | $buffPt = \$buff;
|
---|
163 | }
|
---|
164 | # extract JPEG as PreviewImage (should only be QuickLook/Thumbnail.jpg)
|
---|
165 | if ($file =~ /\.jpg$/) {
|
---|
166 | $exifTool->FoundTag('PreviewImage', $buffPt);
|
---|
167 | next;
|
---|
168 | }
|
---|
169 | # process "metadata" section of XML index file
|
---|
170 | next unless $$buffPt =~ /<(\w+):metadata>/g;
|
---|
171 | my $ns = $1;
|
---|
172 | my $p1 = pos $$buffPt;
|
---|
173 | next unless $$buffPt =~ m{</${ns}:metadata>}g;
|
---|
174 | # construct XML data from "metadata" section only
|
---|
175 | $$buffPt = '<?xml version="1.0"?>' . substr($$buffPt, $p1, pos($$buffPt)-$p1);
|
---|
176 | my %dirInfo = (
|
---|
177 | DataPt => $buffPt,
|
---|
178 | DirLen => length $$buffPt,
|
---|
179 | DataLen => length $$buffPt,
|
---|
180 | XMPParseOpts => {
|
---|
181 | FoundProc => \&FoundTag,
|
---|
182 | },
|
---|
183 | );
|
---|
184 | my $tagTablePtr = GetTagTable('Image::ExifTool::iWork::Main');
|
---|
185 | $exifTool->ProcessDirectory(\%dirInfo, $tagTablePtr);
|
---|
186 | undef $$buffPt; # (free memory now)
|
---|
187 | }
|
---|
188 | delete $$exifTool{DOC_NUM};
|
---|
189 | return 1;
|
---|
190 | }
|
---|
191 |
|
---|
192 | 1; # end
|
---|
193 |
|
---|
194 | __END__
|
---|
195 |
|
---|
196 | =head1 NAME
|
---|
197 |
|
---|
198 | Image::ExifTool::iWork - Read Apple iWork '09 XML+ZIP files
|
---|
199 |
|
---|
200 | =head1 SYNOPSIS
|
---|
201 |
|
---|
202 | This module is used by Image::ExifTool
|
---|
203 |
|
---|
204 | =head1 DESCRIPTION
|
---|
205 |
|
---|
206 | This module contains definitions required by Image::ExifTool to extract meta
|
---|
207 | information from Apple iWork '09 XML+ZIP files.
|
---|
208 |
|
---|
209 | =head1 AUTHOR
|
---|
210 |
|
---|
211 | Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
|
---|
212 |
|
---|
213 | This library is free software; you can redistribute it and/or modify it
|
---|
214 | under the same terms as Perl itself.
|
---|
215 |
|
---|
216 | =head1 SEE ALSO
|
---|
217 |
|
---|
218 | L<Image::ExifTool::TagNames/iWork Tags>,
|
---|
219 | L<Image::ExifTool::TagNames/OOXML Tags>,
|
---|
220 | L<Image::ExifTool(3pm)|Image::ExifTool>
|
---|
221 |
|
---|
222 | =cut
|
---|
223 |
|
---|