1 | #------------------------------------------------------------------------------
|
---|
2 | # File: ZIP.pm
|
---|
3 | #
|
---|
4 | # Description: Read ZIP archive meta information
|
---|
5 | #
|
---|
6 | # Revisions: 10/28/2007 - P. Harvey Created
|
---|
7 | #
|
---|
8 | # References: 1) http://www.pkware.com/documents/casestudies/APPNOTE.TXT
|
---|
9 | # 2) http://www.cpanforum.com/threads/9046
|
---|
10 | # 3) http://www.gzip.org/zlib/rfc-gzip.html
|
---|
11 | # 4) http://DataCompression.info/ArchiveFormats/RAR202.txt
|
---|
12 | #------------------------------------------------------------------------------
|
---|
13 |
|
---|
14 | package Image::ExifTool::ZIP;
|
---|
15 |
|
---|
16 | use strict;
|
---|
17 | use vars qw($VERSION $warnString);
|
---|
18 | use Image::ExifTool qw(:DataAccess :Utils);
|
---|
19 |
|
---|
20 | $VERSION = '1.07';
|
---|
21 |
|
---|
22 | sub WarnProc($) { $warnString = $_[0]; }
|
---|
23 |
|
---|
24 | # file types for recognized Open Document "mimetype" values
|
---|
25 | my %openDocType = (
|
---|
26 | 'application/vnd.oasis.opendocument.presentation' => 'ODP',
|
---|
27 | 'application/vnd.oasis.opendocument.spreadsheet' => 'ODS',
|
---|
28 | 'application/vnd.oasis.opendocument.text' => 'ODT',
|
---|
29 | );
|
---|
30 |
|
---|
31 | # ZIP metadata blocks
|
---|
32 | %Image::ExifTool::ZIP::Main = (
|
---|
33 | PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
|
---|
34 | GROUPS => { 2 => 'Other' },
|
---|
35 | FORMAT => 'int16u',
|
---|
36 | NOTES => q{
|
---|
37 | The following tags are extracted from ZIP archives. ExifTool also extracts
|
---|
38 | additional meta information from compressed documents inside some ZIP-based
|
---|
39 | files such Office Open XML (DOCX, PPTX and XLSX), Open Document (ODP, ODS
|
---|
40 | and ODT), iWork (KEY, PAGES, NUMBERS), and Capture One Enhanced Image
|
---|
41 | Package (EIP). The ExifTool family 3 groups may be used to organize the
|
---|
42 | output by embedded document number (ie. the exiftool C<-g3> option).
|
---|
43 | },
|
---|
44 | 2 => 'ZipRequiredVersion',
|
---|
45 | 3 => {
|
---|
46 | Name => 'ZipBitFlag',
|
---|
47 | PrintConv => '$val ? sprintf("0x%.4x",$val) : $val',
|
---|
48 | },
|
---|
49 | 4 => {
|
---|
50 | Name => 'ZipCompression',
|
---|
51 | PrintConv => {
|
---|
52 | 0 => 'None',
|
---|
53 | 1 => 'Shrunk',
|
---|
54 | 2 => 'Reduced with compression factor 1',
|
---|
55 | 3 => 'Reduced with compression factor 2',
|
---|
56 | 4 => 'Reduced with compression factor 3',
|
---|
57 | 5 => 'Reduced with compression factor 4',
|
---|
58 | 6 => 'Imploded',
|
---|
59 | 7 => 'Tokenized',
|
---|
60 | 8 => 'Deflated',
|
---|
61 | 9 => 'Enhanced Deflate using Deflate64(tm)',
|
---|
62 | 10 => 'Imploded (old IBM TERSE)',
|
---|
63 | 12 => 'BZIP2',
|
---|
64 | 14 => 'LZMA (EFS)',
|
---|
65 | 18 => 'IBM TERSE (new)',
|
---|
66 | 19 => 'IBM LZ77 z Architecture (PFS)',
|
---|
67 | 96 => 'JPEG recompressed', #2
|
---|
68 | 97 => 'WavPack compressed', #2
|
---|
69 | 98 => 'PPMd version I, Rev 1',
|
---|
70 | },
|
---|
71 | },
|
---|
72 | 5 => {
|
---|
73 | Name => 'ZipModifyDate',
|
---|
74 | Format => 'int32u',
|
---|
75 | Groups => { 2 => 'Time' },
|
---|
76 | ValueConv => sub {
|
---|
77 | my $val = shift;
|
---|
78 | return sprintf('%.4d:%.2d:%.2d %.2d:%.2d:%.2d',
|
---|
79 | ($val >> 25) + 1980, # year
|
---|
80 | ($val >> 21) & 0x0f, # month
|
---|
81 | ($val >> 16) & 0x1f, # day
|
---|
82 | ($val >> 11) & 0x1f, # hour
|
---|
83 | ($val >> 5) & 0x3f, # minute
|
---|
84 | $val & 0x1f # second
|
---|
85 | );
|
---|
86 | },
|
---|
87 | PrintConv => '$self->ConvertDateTime($val)',
|
---|
88 | },
|
---|
89 | 7 => { Name => 'ZipCRC', Format => 'int32u', PrintConv => 'sprintf("0x%.8x",$val)' },
|
---|
90 | 9 => { Name => 'ZipCompressedSize', Format => 'int32u' },
|
---|
91 | 11 => { Name => 'ZipUncompressedSize', Format => 'int32u' },
|
---|
92 | 13 => {
|
---|
93 | Name => 'ZipFileNameLength',
|
---|
94 | # don't store a tag -- just extract the value for use with ZipFileName
|
---|
95 | Hidden => 1,
|
---|
96 | RawConv => '$$self{ZipFileNameLength} = $val; undef',
|
---|
97 | },
|
---|
98 | # 14 => 'ZipExtraFieldLength',
|
---|
99 | 15 => {
|
---|
100 | Name => 'ZipFileName',
|
---|
101 | Format => 'string[$$self{ZipFileNameLength}]',
|
---|
102 | },
|
---|
103 | );
|
---|
104 |
|
---|
105 | # GNU ZIP tags (ref 3)
|
---|
106 | %Image::ExifTool::ZIP::GZIP = (
|
---|
107 | PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
|
---|
108 | GROUPS => { 2 => 'Other' },
|
---|
109 | NOTES => q{
|
---|
110 | These tags are extracted from GZIP (GNU ZIP) archives, but currently only
|
---|
111 | for the first file in the archive.
|
---|
112 | },
|
---|
113 | 2 => {
|
---|
114 | Name => 'Compression',
|
---|
115 | PrintConv => {
|
---|
116 | 8 => 'Deflated',
|
---|
117 | },
|
---|
118 | },
|
---|
119 | 3 => {
|
---|
120 | Name => 'Flags',
|
---|
121 | PrintConv => { BITMASK => {
|
---|
122 | 0 => 'Text',
|
---|
123 | 1 => 'CRC16',
|
---|
124 | 2 => 'ExtraFields',
|
---|
125 | 3 => 'FileName',
|
---|
126 | 4 => 'Comment',
|
---|
127 | }},
|
---|
128 | },
|
---|
129 | 4 => {
|
---|
130 | Name => 'ModifyDate',
|
---|
131 | Format => 'int32u',
|
---|
132 | Groups => { 2 => 'Time' },
|
---|
133 | ValueConv => 'ConvertUnixTime($val,1)',
|
---|
134 | PrintConv => '$self->ConvertDateTime($val)',
|
---|
135 | },
|
---|
136 | 8 => {
|
---|
137 | Name => 'ExtraFlags',
|
---|
138 | PrintConv => {
|
---|
139 | 0 => '(none)',
|
---|
140 | 2 => 'Maximum Compression',
|
---|
141 | 4 => 'Fastest Algorithm',
|
---|
142 | },
|
---|
143 | },
|
---|
144 | 9 => {
|
---|
145 | Name => 'OperatingSystem',
|
---|
146 | PrintConv => {
|
---|
147 | 0 => 'FAT filesystem (MS-DOS, OS/2, NT/Win32)',
|
---|
148 | 1 => 'Amiga',
|
---|
149 | 2 => 'VMS (or OpenVMS)',
|
---|
150 | 3 => 'Unix',
|
---|
151 | 4 => 'VM/CMS',
|
---|
152 | 5 => 'Atari TOS',
|
---|
153 | 6 => 'HPFS filesystem (OS/2, NT)',
|
---|
154 | 7 => 'Macintosh',
|
---|
155 | 8 => 'Z-System',
|
---|
156 | 9 => 'CP/M',
|
---|
157 | 10 => 'TOPS-20',
|
---|
158 | 11 => 'NTFS filesystem (NT)',
|
---|
159 | 12 => 'QDOS',
|
---|
160 | 13 => 'Acorn RISCOS',
|
---|
161 | 255 => 'unknown',
|
---|
162 | },
|
---|
163 | },
|
---|
164 | 10 => 'ArchivedFileName',
|
---|
165 | 11 => 'Comment',
|
---|
166 | );
|
---|
167 |
|
---|
168 | # RAR tags (ref 4)
|
---|
169 | %Image::ExifTool::ZIP::RAR = (
|
---|
170 | PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
|
---|
171 | GROUPS => { 2 => 'Other' },
|
---|
172 | NOTES => 'These tags are extracted from RAR archive files.',
|
---|
173 | 0 => {
|
---|
174 | Name => 'CompressedSize',
|
---|
175 | Format => 'int32u',
|
---|
176 | },
|
---|
177 | 4 => {
|
---|
178 | Name => 'UncompressedSize',
|
---|
179 | Format => 'int32u',
|
---|
180 | },
|
---|
181 | 8 => {
|
---|
182 | Name => 'OperatingSystem',
|
---|
183 | PrintConv => {
|
---|
184 | 0 => 'MS-DOS',
|
---|
185 | 1 => 'OS/2',
|
---|
186 | 2 => 'Win32',
|
---|
187 | 3 => 'Unix',
|
---|
188 | },
|
---|
189 | },
|
---|
190 | 13 => {
|
---|
191 | Name => 'ModifyDate',
|
---|
192 | Format => 'int32u',
|
---|
193 | Groups => { 2 => 'Time' },
|
---|
194 | ValueConv => sub {
|
---|
195 | my $val = shift;
|
---|
196 | return sprintf('%.4d:%.2d:%.2d %.2d:%.2d:%.2d',
|
---|
197 | ($val >> 25) + 1980, # year
|
---|
198 | ($val >> 21) & 0x0f, # month
|
---|
199 | ($val >> 16) & 0x1f, # day
|
---|
200 | ($val >> 11) & 0x1f, # hour
|
---|
201 | ($val >> 5) & 0x3f, # minute
|
---|
202 | $val & 0x1f # second
|
---|
203 | );
|
---|
204 | },
|
---|
205 | PrintConv => '$self->ConvertDateTime($val)',
|
---|
206 | },
|
---|
207 | 18 => {
|
---|
208 | Name => 'PackingMethod',
|
---|
209 | PrintHex => 1,
|
---|
210 | PrintConv => {
|
---|
211 | 0x30 => 'Stored',
|
---|
212 | 0x31 => 'Fastest',
|
---|
213 | 0x32 => 'Fast',
|
---|
214 | 0x33 => 'Normal',
|
---|
215 | 0x34 => 'Good Compression',
|
---|
216 | 0x35 => 'Best Compression',
|
---|
217 | },
|
---|
218 | },
|
---|
219 | 19 => {
|
---|
220 | Name => 'FileNameLength',
|
---|
221 | Format => 'int16u',
|
---|
222 | Hidden => 1,
|
---|
223 | RawConv => '$$self{FileNameLength} = $val; undef',
|
---|
224 | },
|
---|
225 | 25 => {
|
---|
226 | Name => 'ArchivedFileName',
|
---|
227 | Format => 'string[$$self{FileNameLength}]',
|
---|
228 | },
|
---|
229 | );
|
---|
230 |
|
---|
231 | #------------------------------------------------------------------------------
|
---|
232 | # Extract information from a RAR file (ref 4)
|
---|
233 | # Inputs: 0) ExifTool object reference, 1) dirInfo reference
|
---|
234 | # Returns: 1 on success, 0 if this wasn't a valid RAR file
|
---|
235 | sub ProcessRAR($$)
|
---|
236 | {
|
---|
237 | my ($exifTool, $dirInfo) = @_;
|
---|
238 | my $raf = $$dirInfo{RAF};
|
---|
239 | my ($flags, $buff);
|
---|
240 |
|
---|
241 | return 0 unless $raf->Read($buff, 7) and $buff eq "Rar!\x1a\x07\0";
|
---|
242 |
|
---|
243 | $exifTool->SetFileType();
|
---|
244 | SetByteOrder('II');
|
---|
245 | my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::RAR');
|
---|
246 | my $docNum = 0;
|
---|
247 |
|
---|
248 | for (;;) {
|
---|
249 | # read block header
|
---|
250 | $raf->Read($buff, 7) == 7 or last;
|
---|
251 | my ($type, $flags, $size) = unpack('xxCvv', $buff);
|
---|
252 | $size -= 7;
|
---|
253 | if ($flags & 0x8000) {
|
---|
254 | $raf->Read($buff, 4) == 4 or last;
|
---|
255 | $size += unpack('V',$buff) - 4;
|
---|
256 | }
|
---|
257 | last if $size < 0;
|
---|
258 | next unless $size; # ignore blocks with no data
|
---|
259 | # don't try to read very large blocks unless LargeFileSupport is enabled
|
---|
260 | if ($size > 0x80000000 and not $exifTool->Options('LargeFileSupport')) {
|
---|
261 | $exifTool->Warn('Large block encountered. Aborting.');
|
---|
262 | last;
|
---|
263 | }
|
---|
264 | # process the block
|
---|
265 | if ($type == 0x74) { # file block
|
---|
266 | # read maximum 4 KB from a file block
|
---|
267 | my $n = $size > 4096 ? 4096 : $size;
|
---|
268 | $raf->Read($buff, $n) == $n or last;
|
---|
269 | # add compressed size to start of data so we can extract it with the other tags
|
---|
270 | $buff = pack('V',$size) . $buff;
|
---|
271 | $$exifTool{DOC_NUM} = ++$docNum;
|
---|
272 | $exifTool->ProcessDirectory({ DataPt => \$buff }, $tagTablePtr);
|
---|
273 | $size -= $n;
|
---|
274 | } elsif ($type == 0x75 and $size > 6) { # comment block
|
---|
275 | $raf->Read($buff, $size) == $size or last;
|
---|
276 | # save comment, only if "Stored" (this is untested)
|
---|
277 | if (Get8u(\$buff, 3) == 0x30) {
|
---|
278 | $exifTool->FoundTag('Comment', substr($buff, 6));
|
---|
279 | }
|
---|
280 | next;
|
---|
281 | }
|
---|
282 | # seek to the start of the next block
|
---|
283 | $raf->Seek($size, 1) or last if $size;
|
---|
284 | }
|
---|
285 | $$exifTool{DOC_NUM} = 0;
|
---|
286 |
|
---|
287 | return 1;
|
---|
288 | }
|
---|
289 |
|
---|
290 | #------------------------------------------------------------------------------
|
---|
291 | # Extract information from a GNU ZIP file (ref 3)
|
---|
292 | # Inputs: 0) ExifTool object reference, 1) dirInfo reference
|
---|
293 | # Returns: 1 on success, 0 if this wasn't a valid GZIP file
|
---|
294 | sub ProcessGZIP($$)
|
---|
295 | {
|
---|
296 | my ($exifTool, $dirInfo) = @_;
|
---|
297 | my $raf = $$dirInfo{RAF};
|
---|
298 | my ($flags, $buff);
|
---|
299 |
|
---|
300 | return 0 unless $raf->Read($buff, 10) and $buff =~ /^\x1f\x8b\x08/;
|
---|
301 |
|
---|
302 | $exifTool->SetFileType();
|
---|
303 | SetByteOrder('II');
|
---|
304 |
|
---|
305 | my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::GZIP');
|
---|
306 | $exifTool->HandleTag($tagTablePtr, 2, Get8u(\$buff, 2));
|
---|
307 | $exifTool->HandleTag($tagTablePtr, 3, $flags = Get8u(\$buff, 3));
|
---|
308 | $exifTool->HandleTag($tagTablePtr, 4, Get32u(\$buff, 4));
|
---|
309 | $exifTool->HandleTag($tagTablePtr, 8, Get8u(\$buff, 8));
|
---|
310 | $exifTool->HandleTag($tagTablePtr, 9, Get8u(\$buff, 9));
|
---|
311 |
|
---|
312 | # extract file name and comment if they exist
|
---|
313 | if ($flags & 0x18) {
|
---|
314 | if ($flags & 0x04) {
|
---|
315 | # skip extra field
|
---|
316 | $raf->Read($buff, 2) == 2 or return 1;
|
---|
317 | my $len = Get16u(\$buff, 0);
|
---|
318 | $raf->Read($buff, $len) == $len or return 1;
|
---|
319 | }
|
---|
320 | $raf->Read($buff, 4096) or return 1;
|
---|
321 | my $pos = 0;
|
---|
322 | my $tagID;
|
---|
323 | # loop for ArchivedFileName (10) and Comment (11) tags
|
---|
324 | foreach $tagID (10, 11) {
|
---|
325 | my $mask = $tagID == 10 ? 0x08 : 0x10;
|
---|
326 | next unless $flags & $mask;
|
---|
327 | my $end = $buff =~ /\0/g ? pos($buff) - 1 : length($buff);
|
---|
328 | # (the doc specifies the string should be ISO 8859-1,
|
---|
329 | # but in OS X it seems to be UTF-8, so don't translate
|
---|
330 | # it because I could just as easily screw it up)
|
---|
331 | my $str = substr($buff, $pos, $end - $pos);
|
---|
332 | $exifTool->HandleTag($tagTablePtr, $tagID, $str);
|
---|
333 | last if $end >= length $buff;
|
---|
334 | $pos = $end + 1;
|
---|
335 | }
|
---|
336 | }
|
---|
337 | return 1;
|
---|
338 | }
|
---|
339 |
|
---|
340 | #------------------------------------------------------------------------------
|
---|
341 | # Call HandleTags for attributes of an Archive::Zip member
|
---|
342 | # Inputs: 0) ExifTool object ref, 1) member ref, 2) optional tag table ref
|
---|
343 | sub HandleMember($$;$)
|
---|
344 | {
|
---|
345 | my ($exifTool, $member, $tagTablePtr) = @_;
|
---|
346 | $tagTablePtr or $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main');
|
---|
347 | $exifTool->HandleTag($tagTablePtr, 2, $member->versionNeededToExtract());
|
---|
348 | $exifTool->HandleTag($tagTablePtr, 3, $member->bitFlag());
|
---|
349 | $exifTool->HandleTag($tagTablePtr, 4, $member->compressionMethod());
|
---|
350 | $exifTool->HandleTag($tagTablePtr, 5, $member->lastModFileDateTime());
|
---|
351 | $exifTool->HandleTag($tagTablePtr, 7, $member->crc32());
|
---|
352 | $exifTool->HandleTag($tagTablePtr, 9, $member->compressedSize());
|
---|
353 | $exifTool->HandleTag($tagTablePtr, 11, $member->uncompressedSize());
|
---|
354 | $exifTool->HandleTag($tagTablePtr, 15, $member->fileName());
|
---|
355 | }
|
---|
356 |
|
---|
357 | #------------------------------------------------------------------------------
|
---|
358 | # Extract information from an ZIP file
|
---|
359 | # Inputs: 0) ExifTool object reference, 1) dirInfo reference
|
---|
360 | # Returns: 1 on success, 0 if this wasn't a valid ZIP file
|
---|
361 | sub ProcessZIP($$)
|
---|
362 | {
|
---|
363 | my ($exifTool, $dirInfo) = @_;
|
---|
364 | my $raf = $$dirInfo{RAF};
|
---|
365 | my ($buff, $buf2, $zip, $docNum);
|
---|
366 |
|
---|
367 | return 0 unless $raf->Read($buff, 30) and $buff =~ /^PK\x03\x04/;
|
---|
368 |
|
---|
369 | my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main');
|
---|
370 |
|
---|
371 | # use Archive::Zip if avilable
|
---|
372 | for (;;) {
|
---|
373 | unless (eval 'require Archive::Zip' and eval 'require IO::File') {
|
---|
374 | if ($$exifTool{FILE_EXT} and $$exifTool{FILE_EXT} ne 'ZIP') {
|
---|
375 | $exifTool->Warn("Install Archive::Zip to decode compressed ZIP information");
|
---|
376 | }
|
---|
377 | last;
|
---|
378 | }
|
---|
379 | # Archive::Zip requires a seekable IO::File object
|
---|
380 | my $fh = $raf->{FILE_PT};
|
---|
381 | if ($fh and seek($fh, 0, 0)) {
|
---|
382 | unless (eval 'require IO::File') {
|
---|
383 | # (this shouldn't happen because IO::File is a prerequisite of Archive::Zip)
|
---|
384 | $exifTool->Warn("Install IO::File to decode compressed ZIP information");
|
---|
385 | last;
|
---|
386 | }
|
---|
387 | bless $fh, 'IO::File'; # Archive::Zip expects an IO::File object
|
---|
388 | } elsif (eval 'require IO::String') {
|
---|
389 | # read the whole file into memory (what else can I do?)
|
---|
390 | $raf->Slurp();
|
---|
391 | $fh = new IO::String ${$raf->{BUFF_PT}};
|
---|
392 | } else {
|
---|
393 | my $type = $fh ? 'pipe or socket' : 'scalar reference';
|
---|
394 | $exifTool->Warn("Install IO::String to decode compressed ZIP information from a $type");
|
---|
395 | last;
|
---|
396 | }
|
---|
397 | $exifTool->VPrint(1, " --- using Archive::Zip ---\n");
|
---|
398 | $zip = new Archive::Zip;
|
---|
399 | # catch all warnings! (Archive::Zip is bad for this)
|
---|
400 | local $SIG{'__WARN__'} = \&WarnProc;
|
---|
401 | my $status = $zip->readFromFileHandle($fh);
|
---|
402 | if ($status) {
|
---|
403 | undef $zip;
|
---|
404 | my %err = ( 1=>'Stream end error', 3=>'Format error', 4=>'IO error' );
|
---|
405 | my $err = $err{$status} || "Error $status";
|
---|
406 | $exifTool->Warn("$err reading ZIP file");
|
---|
407 | last;
|
---|
408 | }
|
---|
409 | $$dirInfo{ZIP} = $zip;
|
---|
410 |
|
---|
411 | # check for an Office Open file (DOCX, etc)
|
---|
412 | # --> read '[Content_Types].xml' to determine the file type
|
---|
413 | my ($mime, @members);
|
---|
414 | my $cType = $zip->memberNamed('[Content_Types].xml');
|
---|
415 | if ($cType) {
|
---|
416 | ($buff, $status) = $zip->contents($cType);
|
---|
417 | if (not $status and $buff =~ /ContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/) {
|
---|
418 | $mime = $2;
|
---|
419 | }
|
---|
420 | }
|
---|
421 | # check for docProps if we couldn't find a MIME type
|
---|
422 | $mime or @members = $zip->membersMatching('^docProps/.*\.(xml|XML)$');
|
---|
423 | if ($mime or @members) {
|
---|
424 | $$dirInfo{MIME} = $mime;
|
---|
425 | require Image::ExifTool::OOXML;
|
---|
426 | Image::ExifTool::OOXML::ProcessDOCX($exifTool, $dirInfo);
|
---|
427 | delete $$dirInfo{MIME};
|
---|
428 | last;
|
---|
429 | }
|
---|
430 |
|
---|
431 | # check for an EIP file
|
---|
432 | @members = $zip->membersMatching('^CaptureOne/.*\.(cos|COS)$');
|
---|
433 | if (@members) {
|
---|
434 | require Image::ExifTool::CaptureOne;
|
---|
435 | Image::ExifTool::CaptureOne::ProcessEIP($exifTool, $dirInfo);
|
---|
436 | last;
|
---|
437 | }
|
---|
438 |
|
---|
439 | # check for an iWork file
|
---|
440 | @members = $zip->membersMatching('^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg)$');
|
---|
441 | if (@members) {
|
---|
442 | require Image::ExifTool::iWork;
|
---|
443 | Image::ExifTool::iWork::Process_iWork($exifTool, $dirInfo);
|
---|
444 | last;
|
---|
445 | }
|
---|
446 |
|
---|
447 | # check for an Open Document file
|
---|
448 | my $mType = $zip->memberNamed('mimetype');
|
---|
449 | if ($mType) {
|
---|
450 | ($mime, $status) = $zip->contents($mType);
|
---|
451 | unless ($status) {
|
---|
452 | chomp $mime;
|
---|
453 | if ($openDocType{$mime}) {
|
---|
454 | $exifTool->SetFileType($openDocType{$mime}, $mime);
|
---|
455 | # extract Open Document metadata from "meta.xml"
|
---|
456 | my $meta = $zip->memberNamed('meta.xml');
|
---|
457 | if ($meta) {
|
---|
458 | ($buff, $status) = $zip->contents($meta);
|
---|
459 | unless ($status) {
|
---|
460 | my %dirInfo = (
|
---|
461 | DataPt => \$buff,
|
---|
462 | DirLen => length $buff,
|
---|
463 | DataLen => length $buff,
|
---|
464 | );
|
---|
465 | my $xmpTable = GetTagTable('Image::ExifTool::XMP::Main');
|
---|
466 | $exifTool->ProcessDirectory(\%dirInfo, $xmpTable);
|
---|
467 | }
|
---|
468 | }
|
---|
469 | # extract preview image(s) from "Thumbnails" directory
|
---|
470 | my $type;
|
---|
471 | my %tag = ( jpg => 'PreviewImage', png => 'PreviewPNG' );
|
---|
472 | foreach $type ('jpg', 'png') {
|
---|
473 | my $thumb = $zip->memberNamed("Thumbnails/thumbnail.$type");
|
---|
474 | next unless $thumb;
|
---|
475 | ($buff, $status) = $zip->contents($thumb);
|
---|
476 | $exifTool->FoundTag($tag{$type}, $buff) unless $status;
|
---|
477 | }
|
---|
478 | last;
|
---|
479 | }
|
---|
480 | }
|
---|
481 | }
|
---|
482 |
|
---|
483 | # otherwise just extract general ZIP information
|
---|
484 | $exifTool->SetFileType();
|
---|
485 | @members = $zip->members();
|
---|
486 | $docNum = 0;
|
---|
487 | my $member;
|
---|
488 | foreach $member (@members) {
|
---|
489 | $$exifTool{DOC_NUM} = ++$docNum;
|
---|
490 | HandleMember($exifTool, $member, $tagTablePtr);
|
---|
491 | }
|
---|
492 | last;
|
---|
493 | }
|
---|
494 | # all done if we processed this using Archive::Zip
|
---|
495 | if ($zip) {
|
---|
496 | delete $$dirInfo{ZIP};
|
---|
497 | delete $$exifTool{DOC_NUM};
|
---|
498 | return 1;
|
---|
499 | }
|
---|
500 | #
|
---|
501 | # process the ZIP file by hand (funny, but this seems easier than using Archive::Zip)
|
---|
502 | #
|
---|
503 | $docNum = 0;
|
---|
504 | $exifTool->VPrint(1, " -- processing as binary data --\n");
|
---|
505 | $raf->Seek(30, 0);
|
---|
506 | $exifTool->SetFileType();
|
---|
507 | SetByteOrder('II');
|
---|
508 |
|
---|
509 | # A. Local file header:
|
---|
510 | # local file header signature 0) 4 bytes (0x04034b50)
|
---|
511 | # version needed to extract 4) 2 bytes
|
---|
512 | # general purpose bit flag 6) 2 bytes
|
---|
513 | # compression method 8) 2 bytes
|
---|
514 | # last mod file time 10) 2 bytes
|
---|
515 | # last mod file date 12) 2 bytes
|
---|
516 | # crc-32 14) 4 bytes
|
---|
517 | # compressed size 18) 4 bytes
|
---|
518 | # uncompressed size 22) 4 bytes
|
---|
519 | # file name length 26) 2 bytes
|
---|
520 | # extra field length 28) 2 bytes
|
---|
521 | for (;;) {
|
---|
522 | my $len = Get16u(\$buff, 26) + Get16u(\$buff, 28);
|
---|
523 | $raf->Read($buf2, $len) == $len or last;
|
---|
524 |
|
---|
525 | $$exifTool{DOC_NUM} = ++$docNum;
|
---|
526 | $buff .= $buf2;
|
---|
527 | my %dirInfo = (
|
---|
528 | DataPt => \$buff,
|
---|
529 | DataPos => $raf->Tell() - 30 - $len,
|
---|
530 | DataLen => 30 + $len,
|
---|
531 | DirStart => 0,
|
---|
532 | DirLen => 30 + $len,
|
---|
533 | );
|
---|
534 | $exifTool->ProcessDirectory(\%dirInfo, $tagTablePtr);
|
---|
535 | my $flags = Get16u(\$buff, 6);
|
---|
536 | if ($flags & 0x08) {
|
---|
537 | # we don't yet support skipping stream mode data
|
---|
538 | # (when this happens, the CRC, compressed size and uncompressed
|
---|
539 | # sizes are set to 0 in the header. Instead, they are stored
|
---|
540 | # after the compressed data with an optional header of 0x08074b50)
|
---|
541 | $exifTool->Warn('Stream mode data encountered, file list may be incomplete');
|
---|
542 | last;
|
---|
543 | }
|
---|
544 | $len = Get32u(\$buff, 18); # file data length
|
---|
545 | $raf->Seek($len, 1) or last; # skip file data
|
---|
546 | $raf->Read($buff, 30) == 30 and $buff =~ /^PK\x03\x04/ or last;
|
---|
547 | }
|
---|
548 | delete $$exifTool{DOC_NUM};
|
---|
549 | return 1;
|
---|
550 | }
|
---|
551 |
|
---|
552 | 1; # end
|
---|
553 |
|
---|
554 | __END__
|
---|
555 |
|
---|
556 | =head1 NAME
|
---|
557 |
|
---|
558 | Image::ExifTool::ZIP - Read ZIP archive meta information
|
---|
559 |
|
---|
560 | =head1 SYNOPSIS
|
---|
561 |
|
---|
562 | This module is used by Image::ExifTool
|
---|
563 |
|
---|
564 | =head1 DESCRIPTION
|
---|
565 |
|
---|
566 | This module contains definitions required by Image::ExifTool to extract meta
|
---|
567 | information from ZIP, GZIP and RAR archives. This includes ZIP-based file
|
---|
568 | types like DOCX, PPTX, XLSX, ODP, ODS, ODT and EIP.
|
---|
569 |
|
---|
570 | =head1 AUTHOR
|
---|
571 |
|
---|
572 | Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
|
---|
573 |
|
---|
574 | This library is free software; you can redistribute it and/or modify it
|
---|
575 | under the same terms as Perl itself.
|
---|
576 |
|
---|
577 | =head1 REFERENCES
|
---|
578 |
|
---|
579 | =over 4
|
---|
580 |
|
---|
581 | =item L<http://www.pkware.com/documents/casestudies/APPNOTE.TXT>
|
---|
582 |
|
---|
583 | =item L<http://www.gzip.org/zlib/rfc-gzip.html>
|
---|
584 |
|
---|
585 | =item L<http://DataCompression.info/ArchiveFormats/RAR202.txt>
|
---|
586 |
|
---|
587 | =back
|
---|
588 |
|
---|
589 | =head1 SEE ALSO
|
---|
590 |
|
---|
591 | L<Image::ExifTool::TagNames/ZIP Tags>,
|
---|
592 | L<Image::ExifTool::TagNames/OOXML Tags>,
|
---|
593 | L<Image::ExifTool(3pm)|Image::ExifTool>
|
---|
594 |
|
---|
595 | =cut
|
---|
596 |
|
---|