source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/EXE.pm@ 34921

Last change on this file since 34921 was 34921, checked in by anupama, 3 years ago

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

  • Property svn:executable set to *
File size: 50.3 KB
Line 
1#------------------------------------------------------------------------------
2# File: EXE.pm
3#
4# Description: Read meta information of various executable file formats
5#
6# Revisions: 2008/08/28 - P. Harvey Created
7# 2011/07/12 - P. Harvey Added CHM (ok, not EXE, but it fits here)
8#
9# References: 1) http://www.openwatcom.org/ftp/devel/docs/pecoff.pdf
10# 2) http://support.microsoft.com/kb/65122
11# 3) http://www.opensource.apple.com
12# 4) http://www.skyfree.org/linux/references/ELF_Format.pdf
13# 5) http://msdn.microsoft.com/en-us/library/ms809762.aspx
14# 6) http://code.google.com/p/pefile/
15# 7) http://www.codeproject.com/KB/DLL/showver.aspx
16#------------------------------------------------------------------------------
17
18package Image::ExifTool::EXE;
19
20use strict;
21use vars qw($VERSION);
22use Image::ExifTool qw(:DataAccess :Utils);
23
24$VERSION = '1.17';
25
26sub ProcessPEResources($$);
27sub ProcessPEVersion($$);
28
29# PE file resource types (ref 6)
30my %resourceType = (
31 1 => 'Cursor',
32 2 => 'Bitmap',
33 3 => 'Icon',
34 4 => 'Menu',
35 5 => 'Dialog',
36 6 => 'String',
37 7 => 'Font Dir',
38 8 => 'Font',
39 9 => 'Accelerator',
40 10 => 'RC Data',
41 11 => 'Message Table',
42 12 => 'Group Cursor',
43 14 => 'Group Icon',
44 16 => 'Version',
45 17 => 'Dialog Include',
46 19 => 'Plug-n-Play',
47 20 => 'VxD',
48 21 => 'Animated Cursor',
49 22 => 'Animated Icon',
50 23 => 'HTML',
51 24 => 'Manifest',
52);
53
54my %languageCode = (
55 Notes => q{
56 See L<https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid>
57 for the full list of Microsoft language codes.
58 },
59 '0000' => 'Neutral',
60 '007F' => 'Invariant',
61 '0400' => 'Process default',
62 '0401' => 'Arabic',
63 '0402' => 'Bulgarian',
64 '0403' => 'Catalan',
65 '0404' => 'Chinese (Traditional)',
66 '0405' => 'Czech',
67 '0406' => 'Danish',
68 '0407' => 'German',
69 '0408' => 'Greek',
70 '0409' => 'English (U.S.)',
71 '040A' => 'Spanish (Castilian)',
72 '040B' => 'Finnish',
73 '040C' => 'French',
74 '040D' => 'Hebrew',
75 '040E' => 'Hungarian',
76 '040F' => 'Icelandic',
77 '0410' => 'Italian',
78 '0411' => 'Japanese',
79 '0412' => 'Korean',
80 '0413' => 'Dutch',
81 '0414' => 'Norwegian (Bokml)',
82 '0415' => 'Polish',
83 '0416' => 'Portuguese (Brazilian)',
84 '0417' => 'Rhaeto-Romanic',
85 '0418' => 'Romanian',
86 '0419' => 'Russian',
87 '041A' => 'Croato-Serbian (Latin)',
88 '041B' => 'Slovak',
89 '041C' => 'Albanian',
90 '041D' => 'Swedish',
91 '041E' => 'Thai',
92 '041F' => 'Turkish',
93 '0420' => 'Urdu',
94 # 0421-0493 ref 6
95 '0421' => 'Indonesian',
96 '0422' => 'Ukrainian',
97 '0423' => 'Belarusian',
98 '0424' => 'Slovenian',
99 '0425' => 'Estonian',
100 '0426' => 'Latvian',
101 '0427' => 'Lithuanian',
102 '0428' => 'Maori',
103 '0429' => 'Farsi',
104 '042a' => 'Vietnamese',
105 '042b' => 'Armenian',
106 '042c' => 'Azeri',
107 '042d' => 'Basque',
108 '042e' => 'Sorbian',
109 '042f' => 'Macedonian',
110 '0430' => 'Sutu',
111 '0431' => 'Tsonga',
112 '0432' => 'Tswana',
113 '0433' => 'Venda',
114 '0434' => 'Xhosa',
115 '0435' => 'Zulu',
116 '0436' => 'Afrikaans',
117 '0437' => 'Georgian',
118 '0438' => 'Faeroese',
119 '0439' => 'Hindi',
120 '043a' => 'Maltese',
121 '043b' => 'Saami',
122 '043c' => 'Gaelic',
123 '043e' => 'Malay',
124 '043f' => 'Kazak',
125 '0440' => 'Kyrgyz',
126 '0441' => 'Swahili',
127 '0443' => 'Uzbek',
128 '0444' => 'Tatar',
129 '0445' => 'Bengali',
130 '0446' => 'Punjabi',
131 '0447' => 'Gujarati',
132 '0448' => 'Oriya',
133 '0449' => 'Tamil',
134 '044a' => 'Telugu',
135 '044b' => 'Kannada',
136 '044c' => 'Malayalam',
137 '044d' => 'Assamese',
138 '044e' => 'Marathi',
139 '044f' => 'Sanskrit',
140 '0450' => 'Mongolian',
141 '0456' => 'Galician',
142 '0457' => 'Konkani',
143 '0458' => 'Manipuri',
144 '0459' => 'Sindhi',
145 '045a' => 'Syriac',
146 '0460' => 'Kashmiri',
147 '0461' => 'Nepali',
148 '0465' => 'Divehi',
149 '047f' => 'Invariant',
150 '048f' => 'Esperanto',
151 '0490' => 'Walon',
152 '0491' => 'Cornish',
153 '0492' => 'Welsh',
154 '0493' => 'Breton',
155 '0800' => 'Neutral 2',
156 '0804' => 'Chinese (Simplified)',
157 '0807' => 'German (Swiss)',
158 '0809' => 'English (British)',
159 '080A' => 'Spanish (Mexican)',
160 '080C' => 'French (Belgian)',
161 '0810' => 'Italian (Swiss)',
162 '0813' => 'Dutch (Belgian)',
163 '0814' => 'Norwegian (Nynorsk)',
164 '0816' => 'Portuguese',
165 '081A' => 'Serbo-Croatian (Cyrillic)',
166 '0C07' => 'German (Austrian)',
167 '0C09' => 'English (Australian)',
168 '0C0A' => 'Spanish (Modern)',
169 '0C0C' => 'French (Canadian)',
170 '1009' => 'English (Canadian)',
171 '100C' => 'French (Swiss)',
172);
173
174# Information extracted from PE COFF (Windows EXE) file header
175%Image::ExifTool::EXE::Main = (
176 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
177 GROUPS => { 2 => 'Other' },
178 FORMAT => 'int16u',
179 NOTES => q{
180 This module extracts information from various types of Windows, MacOS and
181 Unix executable and library files. The first table below lists information
182 extracted from the header of Windows PE (Portable Executable) EXE files and
183 DLL libraries.
184 },
185 0 => {
186 Name => 'MachineType',
187 PrintHex => 1,
188 PrintConv => {
189 0x014c => 'Intel 386 or later, and compatibles',
190 0x014d => 'Intel i860', #5
191 0x0162 => 'MIPS R3000',
192 0x0166 => 'MIPS little endian (R4000)',
193 0x0168 => 'MIPS R10000',
194 0x0169 => 'MIPS little endian WCI v2',
195 0x0183 => 'Alpha AXP (old)', #5
196 0x0184 => 'Alpha AXP',
197 0x01a2 => 'Hitachi SH3',
198 0x01a3 => 'Hitachi SH3 DSP',
199 0x01a6 => 'Hitachi SH4',
200 0x01a8 => 'Hitachi SH5',
201 0x01c0 => 'ARM little endian',
202 0x01c2 => 'Thumb',
203 0x01d3 => 'Matsushita AM33',
204 0x01f0 => 'PowerPC little endian',
205 0x01f1 => 'PowerPC with floating point support',
206 0x0200 => 'Intel IA64',
207 0x0266 => 'MIPS16',
208 0x0268 => 'Motorola 68000 series',
209 0x0284 => 'Alpha AXP 64-bit',
210 0x0366 => 'MIPS with FPU',
211 0x0466 => 'MIPS16 with FPU',
212 0x0ebc => 'EFI Byte Code',
213 0x8664 => 'AMD AMD64',
214 0x9041 => 'Mitsubishi M32R little endian',
215 0xc0ee => 'clr pure MSIL',
216 },
217 },
218 2 => {
219 Name => 'TimeStamp',
220 Format => 'int32u',
221 Groups => { 2 => 'Time' },
222 ValueConv => 'ConvertUnixTime($val,1)',
223 PrintConv => '$self->ConvertDateTime($val)',
224 },
225 9 => {
226 Name => 'ImageFileCharacteristics',
227 # ref https://docs.microsoft.com/en-us/windows/desktop/api/winnt/ns-winnt-_image_file_header
228 PrintConv => { BITMASK => {
229 0 => 'No relocs',
230 1 => 'Executable',
231 2 => 'No line numbers',
232 3 => 'No symbols',
233 4 => 'Aggressive working-set trim',
234 5 => 'Large address aware',
235 7 => 'Bytes reversed lo',
236 8 => '32-bit',
237 9 => 'No debug',
238 10 => 'Removable run from swap',
239 11 => 'Net run from swap',
240 12 => 'System file',
241 13 => 'DLL',
242 14 => 'Uniprocessor only',
243 15 => 'Bytes reversed hi',
244 }},
245 },
246 10 => {
247 Name => 'PEType',
248 PrintHex => 1,
249 PrintConv => {
250 0x107 => 'ROM Image',
251 0x10b => 'PE32',
252 0x20b => 'PE32+',
253 },
254 },
255 11 => {
256 Name => 'LinkerVersion',
257 Format => 'int8u[2]',
258 ValueConv => '$val=~tr/ /./; $val',
259 },
260 12 => {
261 Name => 'CodeSize',
262 Format => 'int32u',
263 },
264 14 => {
265 Name => 'InitializedDataSize',
266 Format => 'int32u',
267 },
268 16 => {
269 Name => 'UninitializedDataSize',
270 Format => 'int32u',
271 },
272 18 => {
273 Name => 'EntryPoint',
274 Format => 'int32u',
275 PrintConv => 'sprintf("0x%.4x", $val)',
276 },
277 30 => {
278 Name => 'OSVersion',
279 Format => 'int16u[2]',
280 ValueConv => '$val=~tr/ /./; $val',
281 },
282 32 => {
283 Name => 'ImageVersion',
284 Format => 'int16u[2]',
285 ValueConv => '$val=~tr/ /./; $val',
286 },
287 34 => {
288 Name => 'SubsystemVersion',
289 Format => 'int16u[2]',
290 ValueConv => '$val=~tr/ /./; $val',
291 },
292 44 => {
293 Name => 'Subsystem',
294 PrintConv => {
295 0 => 'Unknown',
296 1 => 'Native',
297 2 => 'Windows GUI',
298 3 => 'Windows command line',
299 5 => 'OS/2 command line', #5
300 7 => 'POSIX command line',
301 9 => 'Windows CE GUI',
302 10 => 'EFI application',
303 11 => 'EFI boot service',
304 12 => 'EFI runtime driver',
305 13 => 'EFI ROM', #6
306 14 => 'XBOX', #6
307 },
308 },
309);
310
311# PE file version information (ref 6)
312%Image::ExifTool::EXE::PEVersion = (
313 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
314 GROUPS => { 2 => 'Other' },
315 FORMAT => 'int32u',
316 NOTES => q{
317 Information extracted from the VS_VERSION_INFO structure of Windows PE
318 files.
319 },
320 # (boring -- always 0xfeef04bd)
321 #0 => {
322 # Name => 'Signature',
323 # PrintConv => 'sprintf("0x%.4x",$val)',
324 #},
325 # (boring -- always 1.0)
326 #1 => {
327 # Name => 'StructVersion',
328 # Format => 'int16u[2]',
329 # ValueConv => 'my @a=split(" ",$val); "$a[1].$a[0]"',
330 #},
331 2 => {
332 Name => 'FileVersionNumber',
333 Format => 'int16u[4]',
334 ValueConv => 'my @a=split(" ",$val); "$a[1].$a[0].$a[3].$a[2]"',
335 },
336 4 => {
337 Name => 'ProductVersionNumber',
338 Format => 'int16u[4]',
339 ValueConv => 'my @a=split(" ",$val); "$a[1].$a[0].$a[3].$a[2]"',
340 },
341 6 => {
342 Name => 'FileFlagsMask',
343 PrintConv => 'sprintf("0x%.4x",$val)',
344 },
345 7 => { # ref Cygwin /usr/include/w32api/winver.h
346 Name => 'FileFlags',
347 PrintConv => { BITMASK => {
348 0 => 'Debug',
349 1 => 'Pre-release',
350 2 => 'Patched',
351 3 => 'Private build',
352 4 => 'Info inferred',
353 5 => 'Special build',
354 }},
355 },
356 8 => {
357 Name => 'FileOS',
358 PrintHex => 1,
359 PrintConv => { # ref Cygwin /usr/include/w32api/winver.h
360 0x00001 => 'Win16',
361 0x00002 => 'PM-16',
362 0x00003 => 'PM-32',
363 0x00004 => 'Win32',
364 0x10000 => 'DOS',
365 0x20000 => 'OS/2 16-bit',
366 0x30000 => 'OS/2 32-bit',
367 0x40000 => 'Windows NT',
368 0x10001 => 'Windows 16-bit',
369 0x10004 => 'Windows 32-bit',
370 0x20002 => 'OS/2 16-bit PM-16',
371 0x30003 => 'OS/2 32-bit PM-32',
372 0x40004 => 'Windows NT 32-bit',
373 },
374 },
375 9 => { # ref Cygwin /usr/include/w32api/winver.h
376 Name => 'ObjectFileType',
377 PrintConv => {
378 0 => 'Unknown',
379 1 => 'Executable application',
380 2 => 'Dynamic link library',
381 3 => 'Driver',
382 4 => 'Font',
383 5 => 'VxD',
384 7 => 'Static library',
385 },
386 },
387 10 => 'FileSubtype',
388 # (these are usually zero, so ignore them)
389 # 11 => 'FileDateMS',
390 # 12 => 'FileDateLS',
391);
392
393# Windows PE StringFileInfo resource strings
394# (see http://msdn.microsoft.com/en-us/library/aa381049.aspx)
395%Image::ExifTool::EXE::PEString = (
396 GROUPS => { 2 => 'Other' },
397 VARS => { NO_ID => 1 },
398 NOTES => q{
399 Resource strings found in Windows PE files. The B<TagID>'s are not shown
400 because they are the same as the B<Tag Name>. ExifTool will extract any
401 existing StringFileInfo tags even if not listed in this table.
402 },
403 LanguageCode => {
404 Notes => 'Windows code page; extracted from the StringFileInfo value',
405 # ref http://techsupt.winbatch.com/TS/T000001050F49.html
406 # (also see http://support.bigfix.com/fixlet/documents/WinInspectors-2006-08-10.pdf)
407 # (also see ftp://ftp.dyu.edu.tw/pub/cpatch/faq/tech/tech_nlsnt.txt)
408 # (not a complete set)
409 PrintString => 1,
410 SeparateTable => 1,
411 PrintConv => \%languageCode,
412 },
413 CharacterSet => {
414 Notes => 'extracted from the StringFileInfo value',
415 # ref http://techsupt.winbatch.com/TS/T000001050F49.html
416 # (also see http://blog.chinaunix.net/u1/41189/showart_345768.html)
417 PrintString => 1,
418 PrintConv => {
419 '0000' => 'ASCII',
420 '03A4' => 'Windows, Japan (Shift - JIS X-0208)', # cp932
421 '03A8' => 'Windows, Chinese (Simplified)', # cp936
422 '03B5' => 'Windows, Korea (Shift - KSC 5601)', # cp949
423 '03B6' => 'Windows, Taiwan (Big5)', # cp950
424 '04B0' => 'Unicode', # UCS-2
425 '04E2' => 'Windows, Latin2 (Eastern European)',
426 '04E3' => 'Windows, Cyrillic',
427 '04E4' => 'Windows, Latin1',
428 '04E5' => 'Windows, Greek',
429 '04E6' => 'Windows, Turkish',
430 '04E7' => 'Windows, Hebrew',
431 '04E8' => 'Windows, Arabic',
432 },
433 },
434 BuildDate => { Groups => { 2 => 'Time' } }, # (non-standard)
435 BuildVersion => { }, # (non-standard)
436 Comments => { },
437 CompanyName => { },
438 Copyright => { }, # (non-standard)
439 FileDescription => { },
440 FileVersion => { },
441 InternalName => { },
442 LegalCopyright => { },
443 LegalTrademarks => { },
444 OriginalFilename=> { Name => 'OriginalFileName' },
445 PrivateBuild => { },
446 ProductName => { },
447 ProductVersion => { },
448 SpecialBuild => { },
449);
450
451# Information extracted from Mach-O (Mac OS X) file header
452%Image::ExifTool::EXE::MachO = (
453 GROUPS => { 2 => 'Other' },
454 VARS => { ID_LABEL => 'Index' },
455 NOTES => q{
456 Information extracted from Mach-O (Mac OS X) executable files and DYLIB
457 libraries.
458 },
459 # ref http://www.opensource.apple.com/darwinsource/DevToolsOct2007/cctools-622.9/include/mach/machine.h
460 0 => 'CPUArchitecture',
461 1 => 'CPUByteOrder',
462 2 => 'CPUCount',
463 # ref /System/Library/Frameworks/Kernel.framework/Versions/A/Headers/mach/machine.h
464 3 => {
465 Name => 'CPUType',
466 List => 1,
467 PrintConv => {
468 # handle 64-bit flag (0x1000000)
469 OTHER => sub {
470 my ($val, $inv, $conv) = @_;
471 my $v = $val & 0xfeffffff;
472 return $$conv{$v} ? "$$conv{$v} 64-bit" : "Unknown ($val)";
473 },
474 -1 => 'Any',
475 1 => 'VAX',
476 2 => 'ROMP',
477 4 => 'NS32032',
478 5 => 'NS32332',
479 6 => 'MC680x0',
480 7 => 'x86',
481 8 => 'MIPS',
482 9 => 'NS32532',
483 10 => 'MC98000',
484 11 => 'HPPA',
485 12 => 'ARM',
486 13 => 'MC88000',
487 14 => 'SPARC',
488 15 => 'i860 big endian',
489 16 => 'i860 little endian',
490 17 => 'RS6000',
491 18 => 'PowerPC',
492 255 => 'VEO',
493 },
494 },
495 # ref /System/Library/Frameworks/Kernel.framework/Versions/A/Headers/mach/machine.h
496 4 => {
497 Name => 'CPUSubtype',
498 List => 1,
499 PrintConv => {
500 # handle 64-bit flags on CPUType (0x1000000) and CPUSubtype (0x80000000)
501 OTHER => sub {
502 my ($val, $inv, $conv) = @_;
503 my @v = split ' ', $val;
504 my $v = ($v[0] & 0xfeffffff) . ' ' . ($v[1] & 0x7fffffff);
505 return $$conv{$v} ? "$$conv{$v} 64-bit" : "Unknown ($val)";
506 },
507 # in theory, subtype can be -1 for multiple CPU types,
508 # but in practice I'm not sure anyone uses this - PH
509 '1 0' => 'VAX (all)',
510 '1 1' => 'VAX780',
511 '1 2' => 'VAX785',
512 '1 3' => 'VAX750',
513 '1 4' => 'VAX730',
514 '1 5' => 'UVAXI',
515 '1 6' => 'UVAXII',
516 '1 7' => 'VAX8200',
517 '1 8' => 'VAX8500',
518 '1 9' => 'VAX8600',
519 '1 10' => 'VAX8650',
520 '1 11' => 'VAX8800',
521 '1 12' => 'UVAXIII',
522 '2 0' => 'RT (all)',
523 '2 1' => 'RT PC',
524 '2 2' => 'RT APC',
525 '2 3' => 'RT 135',
526 # 32032/32332/32532 subtypes.
527 '4 0' => 'NS32032 (all)',
528 '4 1' => 'NS32032 DPC (032 CPU)',
529 '4 2' => 'NS32032 SQT',
530 '4 3' => 'NS32032 APC FPU (32081)',
531 '4 4' => 'NS32032 APC FPA (Weitek)',
532 '4 5' => 'NS32032 XPC (532)',
533 '5 0' => 'NS32332 (all)',
534 '5 1' => 'NS32332 DPC (032 CPU)',
535 '5 2' => 'NS32332 SQT',
536 '5 3' => 'NS32332 APC FPU (32081)',
537 '5 4' => 'NS32332 APC FPA (Weitek)',
538 '5 5' => 'NS32332 XPC (532)',
539 '6 1' => 'MC680x0 (all)',
540 '6 2' => 'MC68040',
541 '6 3' => 'MC68030',
542 '7 3' => 'i386 (all)',
543 '7 4' => 'i486',
544 '7 132' => 'i486SX',
545 '7 5' => 'i586',
546 '7 22' => 'Pentium Pro',
547 '7 54' => 'Pentium II M3',
548 '7 86' => 'Pentium II M5',
549 '7 103' => 'Celeron',
550 '7 119' => 'Celeron Mobile',
551 '7 8' => 'Pentium III',
552 '7 24' => 'Pentium III M',
553 '7 40' => 'Pentium III Xeon',
554 '7 9' => 'Pentium M',
555 '7 10' => 'Pentium 4',
556 '7 26' => 'Pentium 4 M',
557 '7 11' => 'Itanium',
558 '7 27' => 'Itanium 2',
559 '7 12' => 'Xeon',
560 '7 28' => 'Xeon MP',
561 '8 0' => 'MIPS (all)',
562 '8 1' => 'MIPS R2300',
563 '8 2' => 'MIPS R2600',
564 '8 3' => 'MIPS R2800',
565 '8 4' => 'MIPS R2000a',
566 '8 5' => 'MIPS R2000',
567 '8 6' => 'MIPS R3000a',
568 '8 7' => 'MIPS R3000',
569 '10 0' => 'MC98000 (all)',
570 '10 1' => 'MC98601',
571 '11 0' => 'HPPA (all)',
572 '11 1' => 'HPPA 7100LC',
573 '12 0' => 'ARM (all)',
574 '12 1' => 'ARM A500 ARCH',
575 '12 2' => 'ARM A500',
576 '12 3' => 'ARM A440',
577 '12 4' => 'ARM M4',
578 '12 5' => 'ARM A680/V4T',
579 '12 6' => 'ARM V6',
580 '12 7' => 'ARM V5TEJ',
581 '12 8' => 'ARM XSCALE',
582 '12 9' => 'ARM V7',
583 '13 0' => 'MC88000 (all)',
584 '13 1' => 'MC88100',
585 '13 2' => 'MC88110',
586 '14 0' => 'SPARC (all)',
587 '14 1' => 'SUN 4/260',
588 '14 2' => 'SUN 4/110',
589 '15 0' => 'i860 (all)',
590 '15 1' => 'i860 860',
591 '16 0' => 'i860 little (all)',
592 '16 1' => 'i860 little',
593 '17 0' => 'RS6000 (all)',
594 '17 1' => 'RS6000',
595 '18 0' => 'PowerPC (all)',
596 '18 1' => 'PowerPC 601',
597 '18 2' => 'PowerPC 602',
598 '18 3' => 'PowerPC 603',
599 '18 4' => 'PowerPC 603e',
600 '18 5' => 'PowerPC 603ev',
601 '18 6' => 'PowerPC 604',
602 '18 7' => 'PowerPC 604e',
603 '18 8' => 'PowerPC 620',
604 '18 9' => 'PowerPC 750',
605 '18 10' => 'PowerPC 7400',
606 '18 11' => 'PowerPC 7450',
607 '18 100' => 'PowerPC 970',
608 '255 1' => 'VEO 1',
609 '255 2' => 'VEO 2',
610 },
611 },
612 5 => {
613 Name => 'ObjectFileType',
614 PrintHex => 1,
615 # ref https://svn.red-bean.com/pyobjc/branches/pyobjc-20x-branch/macholib/macholib/mach_o.py
616 PrintConv => {
617 -1 => 'Static library', #PH (internal use only)
618 1 => 'Relocatable object',
619 2 => 'Demand paged executable',
620 3 => 'Fixed VM shared library',
621 4 => 'Core',
622 5 => 'Preloaded executable',
623 6 => 'Dynamically bound shared library',
624 7 => 'Dynamic link editor',
625 8 => 'Dynamically bound bundle',
626 9 => 'Shared library stub for static linking',
627 # (the following from Apple loader.h header file)
628 10 => 'Debug information',
629 11 => 'x86_64 kexts',
630 },
631 },
632 6 => {
633 Name => 'ObjectFlags',
634 PrintHex => 1,
635 # ref Apple loader.h header file
636 PrintConv => { BITMASK => {
637 0 => 'No undefs',
638 1 => 'Incrementa link',
639 2 => 'Dyld link',
640 3 => 'Bind at load',
641 4 => 'Prebound',
642 5 => 'Split segs',
643 6 => 'Lazy init',
644 7 => 'Two level',
645 8 => 'Force flat',
646 9 => 'No multi defs',
647 10 => 'No fix prebinding',
648 11 => 'Prebindable',
649 12 => 'All mods bound',
650 13 => 'Subsections via symbols',
651 14 => 'Canonical',
652 15 => 'Weak defines',
653 16 => 'Binds to weak',
654 17 => 'Allow stack execution',
655 18 => 'Dead strippable dylib',
656 19 => 'Root safe',
657 20 => 'No reexported dylibs',
658 21 => 'Random address',
659 }},
660 },
661);
662
663# Information extracted from PEF (Classic MacOS executable) file header
664%Image::ExifTool::EXE::PEF = (
665 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
666 GROUPS => { 2 => 'Other' },
667 NOTES => q{
668 Information extracted from PEF (Classic MacOS) executable files and
669 libraries.
670 },
671 FORMAT => 'int32u',
672 2 => {
673 Name => 'CPUArchitecture',
674 Format => 'undef[4]',
675 PrintConv => {
676 pwpc => 'PowerPC',
677 m68k => '68000',
678 },
679 },
680 3 => 'PEFVersion',
681 4 => {
682 Name => 'TimeStamp',
683 Groups => { 2 => 'Time' },
684 # timestamp is relative to Jan 1, 1904
685 ValueConv => 'ConvertUnixTime($val - ((66 * 365 + 17) * 24 * 3600))',
686 PrintConv => '$self->ConvertDateTime($val)',
687 },
688 #5 => 'OldDefVersion',
689 #6 => 'OldImpVersion',
690 #7 => 'CurrentVersion',
691);
692
693# Information extracted from ELF (Unix executable) file header
694%Image::ExifTool::EXE::ELF = (
695 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
696 GROUPS => { 2 => 'Other' },
697 NOTES => q{
698 Information extracted from ELF (Unix) executable files and SO libraries.
699 },
700 4 => {
701 Name => 'CPUArchitecture',
702 PrintConv => {
703 1 => '32 bit',
704 2 => '64 bit',
705 },
706 },
707 5 => {
708 Name => 'CPUByteOrder',
709 PrintConv => {
710 1 => 'Little endian',
711 2 => 'Big endian',
712 },
713 },
714 16 => {
715 Name => 'ObjectFileType',
716 Format => 'int16u',
717 PrintConv => {
718 0 => 'None',
719 1 => 'Relocatable file',
720 2 => 'Executable file',
721 3 => 'Shared object file',
722 4 => 'Core file',
723 },
724 },
725 18 => {
726 Name => 'CPUType',
727 Format => 'int16u',
728 # ref /usr/include/linux/elf-em.h
729 PrintConv => {
730 0 => 'None',
731 1 => 'AT&T WE 32100',
732 2 => 'SPARC',
733 3 => 'i386',
734 4 => 'Motorola 68000',
735 5 => 'Motorola 88000',
736 6 => 'i486',
737 7 => 'i860',
738 8 => 'MIPS R3000',
739 10 => 'MIPS R4000',
740 15 => 'HPPA',
741 18 => 'Sun v8plus',
742 20 => 'PowerPC',
743 21 => 'PowerPC 64-bit',
744 22 => 'IBM S/390',
745 23 => 'Cell BE SPU',
746 42 => 'SuperH',
747 43 => 'SPARC v9 64-bit',
748 46 => 'Renesas H8/300,300H,H8S',
749 50 => 'HP/Intel IA-64',
750 62 => 'AMD x86-64',
751 76 => 'Axis Communications 32-bit embedded processor',
752 87 => 'NEC v850',
753 88 => 'Renesas M32R',
754 0x5441 => 'Fujitsu FR-V',
755 0x9026 => 'Alpha', # (interim value)
756 0x9041 => 'm32r (old)',
757 0x9080 => 'v850 (old)',
758 0xa390 => 'S/390 (old)',
759 },
760 },
761);
762
763# Information extracted from static library archives
764# (ref http://opensource.apple.com//source/xnu/xnu-1456.1.26/EXTERNAL_HEADERS/ar.h)
765%Image::ExifTool::EXE::AR = (
766 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
767 GROUPS => { 2 => 'Other' },
768 NOTES => q{
769 Information extracted from static libraries.
770 },
771 # 0 string[16] ar_name
772 16 => {
773 Name => 'CreateDate',
774 Groups => { 2 => 'Time' },
775 Format => 'string[12]',
776 ValueConv => 'ConvertUnixTime($val,1)',
777 PrintConv => '$self->ConvertDateTime($val)',
778 },
779 # 28 string[6] ar_uid
780 # 34 string[6] ar_gid
781 # 40 string[8] ar_mode
782 # 48 string[10] ar_size
783 # 58 string[2] terminator "`\n"
784);
785
786# Microsoft compiled help format (ref http://www.russotto.net/chm/chmformat.html)
787%Image::ExifTool::EXE::CHM = (
788 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
789 GROUPS => { 2 => 'Other' },
790 NOTES => 'Tags extracted from Microsoft Compiled HTML files.',
791 FORMAT => 'int32u',
792 1 => { Name => 'CHMVersion' },
793 # 2 - total header length
794 # 3 - 1
795 # 4 - low bits of date/time value plus 42 (ref http://www.nongnu.org/chmspec/latest/ITSF.html)
796 5 => {
797 Name => 'LanguageCode',
798 SeparateTable => 1,
799 ValueConv => 'sprintf("%.4X", $val)',
800 PrintConv => \%languageCode,
801 },
802);
803
804#------------------------------------------------------------------------------
805# Extract information from a CHM file
806# Inputs: 0) ExifTool object reference, 1) dirInfo reference
807# Returns: 1 on success, 0 if this wasn't a valid CHM file
808sub ProcessCHM($$)
809{
810 my ($et, $dirInfo) = @_;
811 my $raf = $$dirInfo{RAF};
812 my $buff;
813
814 return 0 unless $raf->Read($buff, 56) == 56 and
815 $buff =~ /^ITSF.{20}\x10\xfd\x01\x7c\xaa\x7b\xd0\x11\x9e\x0c\0\xa0\xc9\x22\xe6\xec/s;
816 my $tagTablePtr = GetTagTable('Image::ExifTool::EXE::CHM');
817 $et->SetFileType();
818 SetByteOrder('II');
819 $et->ProcessDirectory({ DataPt => \$buff }, $tagTablePtr);
820 return 1;
821}
822
823#------------------------------------------------------------------------------
824# Read Unicode string (null terminated) from resource
825# Inputs: 0) data ref, 1) start offset, 2) data end, 3) optional ExifTool object ref
826# Returns: 0) Unicode string translated to UTF8, or current CharSet with ExifTool ref
827# 1) end pos (rounded up to nearest 4 bytes)
828sub ReadUnicodeStr($$$;$)
829{
830 my ($dataPt, $pos, $end, $et) = @_;
831 $end = length $$dataPt if $end > length $$dataPt; # (be safe)
832 my $str = '';
833 while ($pos + 2 <= $end) {
834 my $ch = substr($$dataPt, $pos, 2);
835 $pos += 2;
836 last if $ch eq "\0\0";
837 $str .= $ch;
838 }
839 $pos += 2 if $pos & 0x03;
840 my $to = $et ? $et->Options('Charset') : 'UTF8';
841 return (Image::ExifTool::Decode(undef,$str,'UCS2','II',$to), $pos);
842}
843
844#------------------------------------------------------------------------------
845# Process Windows PE Version Resource
846# Inputs: 0) ExifTool object ref, 1) dirInfo ref
847# Returns: true on success
848sub ProcessPEVersion($$)
849{
850 my ($et, $dirInfo) = @_;
851 my $dataPt = $$dirInfo{DataPt};
852 my $pos = $$dirInfo{DirStart};
853 my $end = $pos + $$dirInfo{DirLen};
854 my ($index, $len, $valLen, $type, $string, $strEnd);
855
856 # get VS_VERSION_INFO
857 for ($index = 0; ; ++$index) {
858 $pos = ($pos + 3) & 0xfffffffc; # align on a 4-byte boundary
859 last if $pos + 6 > $end;
860 $len = Get16u($dataPt, $pos);
861 $valLen = Get16u($dataPt, $pos + 2);
862 $type = Get16u($dataPt, $pos + 4);
863 return 0 unless $len or $valLen; # prevent possible infinite loop
864 ($string, $strEnd) = ReadUnicodeStr($dataPt, $pos + 6, $pos + $len);
865 return 0 if $strEnd + $valLen > $end;
866 unless ($index or $string eq 'VS_VERSION_INFO') {
867 $et->Warn('Invalid Version Info block');
868 return 0;
869 }
870 if ($string eq 'VS_VERSION_INFO') {
871 # parse the fixed version info
872 $$dirInfo{DirStart} = $strEnd;
873 $$dirInfo{DirLen} = $valLen;
874 my $subTablePtr = GetTagTable('Image::ExifTool::EXE::PEVersion');
875 $et->ProcessDirectory($dirInfo, $subTablePtr);
876 $pos = $strEnd + $valLen;
877 } elsif ($string eq 'StringFileInfo' and $valLen == 0) {
878 $pos += $len;
879 my $pt = $strEnd;
880 # parse string table
881 my $tagTablePtr = GetTagTable('Image::ExifTool::EXE::PEString');
882 for ($index = 0; $pt + 6 < $pos; ++$index) {
883 $len = Get16u($dataPt, $pt);
884 $valLen = Get16u($dataPt, $pt + 2);
885 # $type = Get16u($dataPt, $pt + 4);
886 my $entryEnd = $pt + $len;
887 # get tag ID (converted to UTF8)
888 ($string, $pt) = ReadUnicodeStr($dataPt, $pt + 6, $entryEnd);
889 unless ($index) {
890 # separate the language code and character set
891 # (not sure what the CharacterSet tag is for, but the string
892 # values stored here are UCS-2 in all my files even if the
893 # CharacterSet is otherwise)
894 my $char;
895 if (length($string) > 4) {
896 $char = substr($string, 4);
897 $string = substr($string, 0, 4);
898 }
899 $et->HandleTag($tagTablePtr, 'LanguageCode', uc $string);
900 $et->HandleTag($tagTablePtr, 'CharacterSet', uc $char) if $char;
901 next;
902 }
903 my $tag = $string;
904 # create entry in tag table if it doesn't already exist
905 unless ($$tagTablePtr{$tag}) {
906 my $name = $tag;
907 $name =~ tr/-_a-zA-Z0-9//dc; # remove illegal characters
908 next unless length $name;
909 AddTagToTable($tagTablePtr, $tag, { Name => $name });
910 }
911 # get tag value (converted to current Charset)
912 if ($valLen) {
913 ($string, $pt) = ReadUnicodeStr($dataPt, $pt, $entryEnd, $et);
914 } else {
915 $string = '';
916 }
917 $et->HandleTag($tagTablePtr, $tag, $string);
918 # step to next entry (padded to an even word)
919 $pt = ($entryEnd + 3) & 0xfffffffc;
920 }
921 } else {
922 $pos += $len + $valLen;
923 # ignore other information (for now)
924 }
925 }
926 return 1;
927}
928
929#------------------------------------------------------------------------------
930# Process Windows PE Resources
931# Inputs: 0) ExifTool object ref, 1) dirInfo ref
932# Returns: true on success
933sub ProcessPEResources($$)
934{
935 my ($et, $dirInfo) = @_;
936 my $raf = $$dirInfo{RAF};
937 my $base = $$dirInfo{Base};
938 my $dirStart = $$dirInfo{DirStart} + $base;
939 my $level = $$dirInfo{Level} || 0;
940 my $verbose = $et->Options('Verbose');
941 my ($buff, $buf2, $item);
942
943 return 0 if $level > 10; # protect against deep recursion
944 # read the resource header
945 $raf->Seek($dirStart, 0) and $raf->Read($buff, 16) == 16 or return 0;
946 my $nameEntries = Get16u(\$buff, 12);
947 my $idEntries = Get16u(\$buff, 14);
948 my $count = $nameEntries + $idEntries;
949 $raf->Read($buff, $count * 8) == $count * 8 or return 0;
950 # loop through all resource entries
951 for ($item=0; $item<$count; ++$item) {
952 my $pos = $item * 8;
953 my $name = Get32u(\$buff, $pos);
954 my $entryPos = Get32u(\$buff, $pos + 4);
955 unless ($level) {
956 # set resource type if this is the 0th level directory
957 my $resType = $resourceType{$name} || sprintf('Unknown (0x%x)', $name);
958 # ignore everything but the Version resource unless verbose
959 if ($verbose) {
960 $et->VPrint(0, "$resType resource:\n");
961 } else {
962 next unless $resType eq 'Version';
963 }
964 $$dirInfo{ResType} = $resType;
965 }
966 if ($entryPos & 0x80000000) { # is this a directory?
967 # descend into next directory level
968 $$dirInfo{DirStart} = $entryPos & 0x7fffffff;
969 $$dirInfo{Level} = $level + 1;
970 ProcessPEResources($et, $dirInfo) or return 0;
971 --$$dirInfo{Level};
972 } elsif ($$dirInfo{ResType} eq 'Version' and $level == 2 and
973 not $$dirInfo{GotVersion}) # (only process first Version resource)
974 {
975 # get position of this resource in the file
976 my $buf2;
977 $raf->Seek($entryPos + $base, 0) and $raf->Read($buf2, 16) == 16 or return 0;
978 my $off = Get32u(\$buf2, 0);
979 my $len = Get32u(\$buf2, 4);
980 # determine which section this is in so we can convert the virtual address
981 my ($section, $filePos);
982 foreach $section (@{$$dirInfo{Sections}}) {
983 next unless $off >= $$section{VirtualAddress} and
984 $off < $$section{VirtualAddress} + $$section{Size};
985 $filePos = $off + $$section{Base} - $$section{VirtualAddress};
986 last;
987 }
988 return 0 unless $filePos;
989 $raf->Seek($filePos, 0) and $raf->Read($buf2, $len) == $len or return 0;
990 ProcessPEVersion($et, {
991 DataPt => \$buf2,
992 DataLen => $len,
993 DirStart => 0,
994 DirLen => $len,
995 }) or $et->Warn('Possibly corrupt Version resource');
996 $$dirInfo{GotVersion} = 1; # set flag so we don't do this again
997 }
998 }
999 return 1;
1000}
1001
1002#------------------------------------------------------------------------------
1003# Process Windows PE file data dictionary
1004# Inputs: 0) ExifTool object ref, 1) dirInfo ref
1005# Returns: true on success
1006sub ProcessPEDict($$)
1007{
1008 my ($et, $dirInfo) = @_;
1009 my $raf = $$dirInfo{RAF};
1010 my $dataPt = $$dirInfo{DataPt};
1011 my $dirLen = length($$dataPt);
1012 my ($pos, @sections, %dirInfo, $rsrcFound);
1013
1014 # loop through all sections
1015 for ($pos=0; $pos+40<=$dirLen; $pos+=40) {
1016 my $name = substr($$dataPt, $pos, 8);
1017 my $va = Get32u($dataPt, $pos + 12);
1018 my $size = Get32u($dataPt, $pos + 16);
1019 my $offset = Get32u($dataPt, $pos + 20);
1020 # remember the section offsets for the VirtualAddress lookup later
1021 push @sections, { Base => $offset, Size => $size, VirtualAddress => $va };
1022 # save details of the first resource section (or .text if .rsrc not found, ref forum11465)
1023 next unless ($name eq ".rsrc\0\0\0" and not $rsrcFound and defined($rsrcFound = 1)) or
1024 ($name eq ".text\0\0\0" and not %dirInfo);
1025 %dirInfo = (
1026 RAF => $raf,
1027 Base => $offset,
1028 DirStart => 0, # (relative to Base)
1029 DirLen => $size,
1030 Sections => \@sections,
1031 );
1032 }
1033 # process the first resource section
1034 ProcessPEResources($et, \%dirInfo) or return 0 if %dirInfo;
1035 return 1;
1036}
1037
1038#------------------------------------------------------------------------------
1039# Override file type if necessary for Mach object files and libraries
1040# Inputs: 0) ExifTool ref, 1) ObjectFileType number, 2) flag for fat binary
1041my %machOverride = (
1042 1 => [ 'object file', 'O' ],
1043 6 => [ 'dynamic link library', 'DYLIB' ],
1044 8 => [ 'dynamic bound bundle', 'DYLIB' ],
1045 9 => [ 'dynamic link library stub', 'DYLIB' ],
1046);
1047sub MachOverride($$;$)
1048{
1049 my ($et, $objType, $fat) = @_;
1050 my $override = $machOverride{$objType};
1051 if ($override) {
1052 my $desc = 'Mach-O ' . ($fat ? 'fat ' : '') . $$override[0];
1053 $et->OverrideFileType($desc, undef, $$override[1]);
1054 }
1055}
1056
1057#------------------------------------------------------------------------------
1058# Extract tags from Mach header
1059# Inputs: 0) ExifTool ref, 1) data ref, 2) flag to extract object type
1060# Returns: true if Mach header was found
1061# Mach type based on magic number
1062# [bit depth, byte order starting with "Little" or "Big"]
1063my %machType = (
1064 "\xfe\xed\xfa\xce" => ['32 bit', 'Big endian'],
1065 "\xce\xfa\xed\xfe" => ['32 bit', 'Little endian'],
1066 "\xfe\xed\xfa\xcf" => ['64 bit', 'Big endian'],
1067 "\xcf\xfa\xed\xfe" => ['64 bit', 'Little endian'],
1068);
1069sub ExtractMachTags($$;$)
1070{
1071 my ($et, $dataPt, $doObj) = @_;
1072 # get information about mach header based on the magic number (first 4 bytes)
1073 my $info = $machType{substr($$dataPt, 0, 4)};
1074 if ($info) {
1075 # Mach header structure:
1076 # 0 int32u magic
1077 # 4 int32u cputype
1078 # 8 int32u cpusubtype
1079 # 12 int32u filetype
1080 # 16 int32u ncmds
1081 # 20 int32u sizeofcmds
1082 # 24 int32u flags
1083 my $tagTablePtr = GetTagTable('Image::ExifTool::EXE::MachO');
1084 SetByteOrder($$info[1]);
1085 my $cpuType = Get32s($dataPt, 4);
1086 my $subType = Get32s($dataPt, 8);
1087 $et->HandleTag($tagTablePtr, 0, $$info[0]);
1088 $et->HandleTag($tagTablePtr, 1, $$info[1]);
1089 $et->HandleTag($tagTablePtr, 3, $cpuType);
1090 $et->HandleTag($tagTablePtr, 4, "$cpuType $subType");
1091 if ($doObj) {
1092 my $objType = Get32u($dataPt, 12);
1093 my $flags = Get32u($dataPt, 24);
1094 $et->HandleTag($tagTablePtr, 5, $objType);
1095 $et->HandleTag($tagTablePtr, 6, $flags);
1096 # override file type if this is an object file or library
1097 MachOverride($et, $objType);
1098 } else { # otherwise this was a static library
1099 $et->OverrideFileType('Mach-O static library', undef, 'A');
1100 }
1101 return 1;
1102 }
1103 return 0;
1104}
1105
1106#------------------------------------------------------------------------------
1107# Extract information from an EXE file
1108# Inputs: 0) ExifTool object reference, 1) dirInfo reference
1109# Returns: 1 on success, 0 if this wasn't a valid EXE file
1110sub ProcessEXE($$)
1111{
1112 my ($et, $dirInfo) = @_;
1113 my $raf = $$dirInfo{RAF};
1114 my ($buff, $buf2, $type, $mime, $ext, $tagTablePtr, %dirInfo);
1115
1116 my $size = $raf->Read($buff, 0x40) or return 0;
1117 my $fast3 = $$et{OPTIONS}{FastScan} && $$et{OPTIONS}{FastScan} == 3;
1118#
1119# DOS and Windows EXE
1120#
1121 if ($buff =~ /^MZ/ and $size == 0x40) {
1122 # DOS/Windows executable
1123 # validate DOS header
1124 # (ref http://www.delphidabbler.com/articles?article=8&part=2)
1125 # 0 int16u magic - Magic number ("MZ")
1126 # 2 int16u cblp - Bytes on last page of file
1127 # 4 int16u cp - Pages in file
1128 # 6 int16u crlc - Relocations
1129 # 8 int16u cparhdr - Size of header in paragraphs
1130 # 10 int16u minalloc - Minimum extra paragraphs needed
1131 # 12 int16u maxalloc - Maximum extra paragraphs needed
1132 # 14 int16u ss - Initial (relative) SS value
1133 # 16 int16u sp - Initial SP value
1134 # 18 int16u csum - Checksum
1135 # 20 int16u ip - Initial IP value
1136 # 22 int16u cs - Initial (relative) CS value
1137 # 24 int16u lfarlc - Address of relocation table
1138 # 26 int16u ovno - Overlay number
1139 # 28 int16u[4] res - Reserved words
1140 # 36 int16u oemid - OEM identifier (for oeminfo)
1141 # 38 int16u oeminfo - OEM info; oemid specific
1142 # 40 int16u[10] res2 - Reserved words
1143 # 60 int32u; lfanew - File address of new exe header
1144 SetByteOrder('II');
1145 my ($cblp, $cp, $lfarlc, $lfanew) = unpack('x2v2x18vx34V', $buff);
1146 my $fileSize = ($cp - ($cblp ? 1 : 0)) * 512 + $cblp;
1147 #(patch to accommodate observed 64-bit files)
1148 #return 0 if $fileSize < 0x40 or $fileSize < $lfarlc;
1149 #return 0 if $fileSize < 0x40; (changed to warning in ExifTool 12.08)
1150 $et->Warn('Invalid file size in DOS header') if $fileSize < 0x40;
1151 # read the Windows NE, PE or LE (virtual device driver) header
1152 #if ($lfarlc == 0x40 and $fileSize > $lfanew + 2 and ...
1153 if ($raf->Seek($lfanew, 0) and $raf->Read($buff, 0x40) and $buff =~ /^(NE|PE|LE)/) {
1154 if ($1 eq 'NE') {
1155 if ($size >= 0x40) { # NE header is 64 bytes (ref 2)
1156 # check for DLL
1157 my $appFlags = Get16u(\$buff, 0x0c);
1158 $ext = $appFlags & 0x80 ? 'DLL' : 'EXE';
1159 $type = "Win16 $ext";
1160 # offset 0x02 is 2 bytes with linker version and revision numbers
1161 # offset 0x36 is executable type (2 = Windows)
1162 }
1163 } elsif ($1 eq 'PE') {
1164 # PE header comes at byte 4 in buff:
1165 # 4 int16u Machine
1166 # 6 int16u NumberOfSections
1167 # 8 int32u TimeDateStamp
1168 # 12 int32u PointerToSymbolTable
1169 # 16 int32u NumberOfSymbols
1170 # 20 int16u SizeOfOptionalHeader
1171 # 22 int16u Characteristics
1172 if ($size >= 24) { # PE header is 24 bytes (plus optional header)
1173 my $mach = Get16u(\$buff, 4); # MachineType
1174 my $flags = Get16u(\$buff, 22); # ImageFileCharacteristics
1175 my $machine = $Image::ExifTool::EXE::Main{0}{PrintConv}{$mach} || '';
1176 my $winType = $machine =~ /64/ ? 'Win64' : 'Win32';
1177 $ext = $flags & 0x2000 ? 'DLL' : 'EXE';
1178 $et->SetFileType("$winType $ext", undef, $ext);
1179 return 1 if $fast3;
1180 # read the rest of the optional header if necessary
1181 my $optSize = Get16u(\$buff, 20);
1182 my $more = $optSize + 24 - $size;
1183 if ($more > 0) {
1184 if ($raf->Read($buf2, $more) == $more) {
1185 $buff .= $buf2;
1186 $size += $more;
1187 my $magic = Get16u(\$buff, 24);
1188 # verify PE magic number
1189 unless ($magic == 0x107 or $magic == 0x10b or $magic == 0x20b) {
1190 $et->Warn('Unknown PE magic number');
1191 return 1;
1192 }
1193 # --> 64-bit if $magic is 0x20b ????
1194 } else {
1195 $et->Warn('Error reading optional header');
1196 }
1197 }
1198 # process PE COFF file header
1199 $tagTablePtr = GetTagTable('Image::ExifTool::EXE::Main');
1200 %dirInfo = (
1201 DataPt => \$buff,
1202 DataPos => $raf->Tell() - $size,
1203 DataLen => $size,
1204 DirStart => 4,
1205 DirLen => $size - 4,
1206 );
1207 $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
1208 # process data dictionary
1209 my $num = Get16u(\$buff, 6); # NumberOfSections
1210 if ($raf->Read($buff, 40 * $num) == 40 * $num) {
1211 %dirInfo = (
1212 RAF => $raf,
1213 DataPt => \$buff,
1214 );
1215 ProcessPEDict($et, \%dirInfo) or $et->Warn('Error processing PE data dictionary');
1216 }
1217 return 1;
1218 }
1219 } else {
1220 $type = 'Virtual Device Driver';
1221 $ext = '386';
1222 }
1223 } else {
1224 $type = 'DOS EXE';
1225 $ext = 'exe';
1226 }
1227#
1228# Mach-O (Mac OS X)
1229#
1230 } elsif ($buff =~ /^(\xca\xfe\xba\xbe|\xfe\xed\xfa(\xce|\xcf)|(\xce|\xcf)\xfa\xed\xfe)/ and $size > 12) {
1231 # Mach-O executable
1232 # (ref http://developer.apple.com/documentation/DeveloperTools/Conceptual/MachORuntime/Reference/reference.html)
1233 $tagTablePtr = GetTagTable('Image::ExifTool::EXE::MachO');
1234 if ($1 eq "\xca\xfe\xba\xbe") {
1235 SetByteOrder('MM');
1236 $et->SetFileType('Mach-O fat binary executable', undef, '');
1237 return 1 if $fast3;
1238 my $count = Get32u(\$buff, 4); # get architecture count
1239 my $more = $count * 20 - ($size - 8);
1240 if ($more > 0) {
1241 unless ($raf->Read($buf2, $more) == $more) {
1242 $et->Warn('Error reading fat-arch headers');
1243 return 1;
1244 }
1245 $buff .= $buf2;
1246 $size += $more;
1247 }
1248 $et->HandleTag($tagTablePtr, 2, $count);
1249 my $i;
1250 for ($i=0; $i<$count; ++$i) {
1251 my $cpuType = Get32s(\$buff, 8 + $i * 20);
1252 my $subType = Get32s(\$buff, 12 + $i * 20);
1253 $et->HandleTag($tagTablePtr, 3, $cpuType);
1254 $et->HandleTag($tagTablePtr, 4, "$cpuType $subType");
1255 }
1256 # load first Mach-O header to get the object file type
1257 my $offset = Get32u(\$buff, 16);
1258 if ($raf->Seek($offset, 0) and $raf->Read($buf2, 16) == 16) {
1259 if ($buf2 =~ /^(\xfe\xed\xfa(\xce|\xcf)|(\xce|\xcf)\xfa\xed\xfe)/) {
1260 SetByteOrder($buf2 =~ /^\xfe\xed/ ? 'MM' : 'II');
1261 my $objType = Get32u(\$buf2, 12);
1262 $et->HandleTag($tagTablePtr, 5, $objType);
1263 # override file type if this is a library or object file
1264 MachOverride($et, $objType, 'fat');
1265 } elsif ($buf2 =~ /^!<arch>\x0a/) {
1266 # .a libraries use this magic number
1267 $et->HandleTag($tagTablePtr, 5, -1);
1268 # override file type since this is a library
1269 $et->OverrideFileType('Mach-O fat static library', undef, 'A');
1270 } else {
1271 $et->Warn('Unrecognized object file type');
1272 }
1273 } else {
1274 $et->Warn('Error reading file');
1275 }
1276 } elsif ($size >= 16) {
1277 $et->SetFileType('Mach-O executable', undef, '');
1278 return 1 if $fast3;
1279 ExtractMachTags($et, \$buff, 1);
1280 }
1281 return 1;
1282#
1283# PEF (classic MacOS)
1284#
1285 } elsif ($buff =~ /^Joy!peff/ and $size > 12) {
1286 # ref http://developer.apple.com/documentation/mac/pdf/MacOS_RT_Architectures.pdf
1287 $et->SetFileType('Classic MacOS executable', undef, '');
1288 return 1 if $fast3;
1289 SetByteOrder('MM');
1290 $tagTablePtr = GetTagTable('Image::ExifTool::EXE::PEF');
1291 %dirInfo = (
1292 DataPt => \$buff,
1293 DataPos => 0,
1294 DataLen => $size,
1295 DirStart => 0,
1296 DirLen => $size,
1297 );
1298 $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
1299 return 1;
1300#
1301# ELF (Unix)
1302#
1303 } elsif ($buff =~ /^\x7fELF/ and $size >= 16) {
1304 $et->SetFileType('ELF executable', undef, '');
1305 return 1 if $fast3;
1306 SetByteOrder(Get8u(\$buff,5) == 1 ? 'II' : 'MM');
1307 $tagTablePtr = GetTagTable('Image::ExifTool::EXE::ELF');
1308 %dirInfo = (
1309 DataPt => \$buff,
1310 DataPos => 0,
1311 DataLen => $size,
1312 DirLen => $size,
1313 );
1314 $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
1315 # override file type if this is a library or object file
1316 my $override = {
1317 1 => [ 'ELF object file', 'O' ],
1318 3 => [ 'ELF shared library', 'SO' ],
1319 }->{$$et{VALUE}{ObjectFileType} || 0};
1320 $et->OverrideFileType($$override[0], undef, $$override[1]) if $override;
1321 return 1;
1322#
1323# .a libraries
1324#
1325 } elsif ($buff =~ /^!<arch>\x0a/) {
1326 $et->SetFileType('Static library', undef, 'A');
1327 return 1 if $fast3;
1328 my $pos = 8; # current file position
1329 my $max = 10; # maximum number of archive files to check
1330 # read into list of ar structures (each 60 bytes long):
1331 while ($max-- > 0) {
1332 # seek to start of the ar structure and read it
1333 $raf->Seek($pos, 0) and $raf->Read($buff, 60) == 60 or last;
1334 substr($buff, 58, 2) eq "`\n" or $et->Warn('Invalid archive header'), last;
1335 unless ($tagTablePtr) {
1336 # extract some information from first file in archive
1337 $tagTablePtr = GetTagTable('Image::ExifTool::EXE::AR');
1338 %dirInfo = (
1339 DataPt => \$buff,
1340 DataPos => $pos,
1341 );
1342 $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
1343 }
1344 my $name = substr($buff, 0, 16);
1345 if ($name =~ m{^#1/(\d+) *$}) { # check for extended archive (BSD variant)
1346 my $len = $1;
1347 $len > 256 and $et->Warn('Invalid extended archive name length'), last;
1348 # (we read the name here just to move the file pointer)
1349 $raf->Read($name, $len) == $len or $et->Warn('Error reading archive name'), last;
1350 }
1351 my $arSize = substr($buff, 48, 10);
1352 $arSize =~ s/^(\d+).*/$1/s or last; # make sure archive size is a number
1353 $raf->Read($buff, 28) == 28 or last; # read (possible) Mach header
1354 ExtractMachTags($et, \$buff) and last; # try to extract tags
1355 $pos += 60 + $arSize; # step to next entry
1356 ++$pos if $pos & 0x01; # padded to an even byte
1357 }
1358 return 1;
1359#
1360# various scripts (perl, sh, etc...)
1361#
1362 } elsif ($buff =~ m{^#!\s*/\S*bin/(\w+)}) {
1363 my $prog = $1;
1364 $prog = $1 if $prog eq 'env' and $buff =~ /\b(perl|python|ruby|php)\b/;
1365 $type = "$prog script";
1366 $mime = "text/x-$prog";
1367 $ext = {
1368 perl => 'pl',
1369 python => 'py',
1370 ruby => 'rb',
1371 php => 'php',
1372 }->{$prog};
1373 # use '.sh' for extension of all shell scripts
1374 $ext = $prog =~ /sh$/ ? 'sh' : '' unless defined $ext;
1375 }
1376 return 0 unless $type;
1377 $et->SetFileType($type, $mime, $ext);
1378 return 1;
1379}
1380
13811; # end
1382
1383__END__
1384
1385=head1 NAME
1386
1387Image::ExifTool::EXE - Read executable file meta information
1388
1389=head1 SYNOPSIS
1390
1391This module is used by Image::ExifTool
1392
1393=head1 DESCRIPTION
1394
1395This module contains definitions required by Image::ExifTool to extract meta
1396information from various types of Windows, MacOS and Unix executable and
1397library files.
1398
1399=head1 AUTHOR
1400
1401Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
1402
1403This library is free software; you can redistribute it and/or modify it
1404under the same terms as Perl itself.
1405
1406=head1 REFERENCES
1407
1408=over 4
1409
1410=item L<http://www.openwatcom.org/ftp/devel/docs/pecoff.pdf>
1411
1412=item L<http://support.microsoft.com/kb/65122>
1413
1414=item L<http://www.opensource.apple.com>
1415
1416=item L<http://www.skyfree.org/linux/references/ELF_Format.pdf>
1417
1418=item L<http://msdn.microsoft.com/en-us/library/ms809762.aspx>
1419
1420=item L<http://code.google.com/p/pefile/>
1421
1422=item L<http://www.codeproject.com/KB/DLL/showver.aspx>
1423
1424=back
1425
1426=head1 SEE ALSO
1427
1428L<Image::ExifTool::TagNames/EXE Tags>,
1429L<Image::ExifTool(3pm)|Image::ExifTool>
1430
1431=cut
1432
Note: See TracBrowser for help on using the repository browser.