source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/Font.pm@ 24107

Last change on this file since 24107 was 24107, checked in by sjm84, 13 years ago

Updating the ExifTool perl modules

  • Property svn:executable set to *
File size: 27.0 KB
Line 
1#------------------------------------------------------------------------------
2# File: Font.pm
3#
4# Description: Read meta information from font files
5#
6# Revisions: 2010/01/15 - P. Harvey Created
7#
8# References: 1) http://developer.apple.com/textfonts/TTRefMan/RM06/Chap6.html
9# 2) http://www.microsoft.com/typography/otspec/otff.htm
10# 3) http://partners.adobe.com/public/developer/opentype/index_font_file.html
11# 4) http://partners.adobe.com/public/developer/en/font/5178.PFM.pdf
12# 5) http://opensource.adobe.com/svn/opensource/flex/sdk/trunk/modules/compiler/src/java/flex2/compiler/util/MimeMappings.java
13# 6) http://www.adobe.com/devnet/font/pdfs/5004.AFM_Spec.pdf
14#------------------------------------------------------------------------------
15
16package Image::ExifTool::Font;
17
18use strict;
19use vars qw($VERSION %ttLang);
20use Image::ExifTool qw(:DataAccess :Utils);
21
22$VERSION = '1.05';
23
24sub ProcessOTF($$);
25
26# TrueType 'name' platform codes
27my %ttPlatform = (
28 0 => 'Unicode',
29 1 => 'Macintosh',
30 2 => 'ISO',
31 3 => 'Windows',
32 4 => 'Custom',
33);
34
35# convert TrueType 'name' character encoding to ExifTool Charset (ref 1/2)
36my %ttCharset = (
37 Macintosh => {
38 0 => 'MacRoman', 17 => 'MacMalayalam',
39 1 => 'MacJapanese', 18 => 'MacSinhalese',
40 2 => 'MacChineseTW', 19 => 'MacBurmese',
41 3 => 'MacKorean', 20 => 'MacKhmer',
42 4 => 'MacArabic', 21 => 'MacThai',
43 5 => 'MacHebrew', 22 => 'MacLaotian',
44 6 => 'MacGreek', 23 => 'MacGeorgian',
45 7 => 'MacCyrillic', 24 => 'MacArmenian', # 7=Russian
46 8 => 'MacRSymbol', 25 => 'MacChineseCN',
47 9 => 'MacDevanagari', 26 => 'MacTibetan',
48 10 => 'MacGurmukhi', 27 => 'MacMongolian',
49 11 => 'MacGujarati', 28 => 'MacGeez',
50 12 => 'MacOriya', 29 => 'MacCyrillic', # 29=Slavic
51 13 => 'MacBengali', 30 => 'MacVietnam',
52 14 => 'MacTamil', 31 => 'MacSindhi',
53 15 => 'MacTelugu', 32 => '', # 32=uninterpreted
54 16 => 'MacKannada',
55 },
56 Windows => {
57 0 => 'Symbol', 4 => 'Big5',
58 1 => 'UCS2', 5 => 'Wansung',
59 2 => 'ShiftJIS', 6 => 'Johab',
60 3 => 'PRC', 10 => 'UCS4',
61 },
62 Unicode => {
63 # (we don't currently handle the various Unicode flavours)
64 0 => 'UCS2', # Unicode 1.0 semantics
65 1 => 'UCS2', # Unicode 1.1 semantics
66 2 => 'UCS2', # ISO 10646 semantics
67 3 => 'UCS2', # Unicode 2.0 and onwards semantics, Unicode BMP only.
68 4 => 'UCS2', # Unicode 2.0 and onwards semantics, Unicode full repertoire.
69 # 5 => Unicode Variation Sequences (not used in Naming table)
70 },
71 ISO => { # (deprecated)
72 0 => 'UTF8', # (7-bit ASCII)
73 1 => 'UCS2', # ISO 10646
74 2 => 'Latin', # ISO 8859-1
75 },
76 Custom => { },
77);
78
79# convert TrueType 'name' language code to ExifTool language code
80%ttLang = (
81 # Macintosh language codes (also used by QuickTime.pm)
82 # oddities:
83 # 49 - Cyrllic version 83 - Roman
84 # 50 - Arabic version 84 - Arabic
85 # 146 - with dot above
86 Macintosh => {
87 0 => 'en', 24 => 'lt', 48 => 'kk', 72 => 'ml', 129 => 'eu',
88 1 => 'fr', 25 => 'pl', 49 => 'az', 73 => 'kn', 130 => 'ca',
89 2 => 'de', 26 => 'hu', 50 => 'az', 74 => 'ta', 131 => 'la',
90 3 => 'it', 27 => 'et', 51 => 'hy', 75 => 'te', 132 => 'qu',
91 4 => 'nl', 28 => 'lv', 52 => 'ka', 76 => 'si', 133 => 'gn',
92 5 => 'sv', 29 => 'smi', 53 => 'ro', 77 => 'my', 134 => 'ay',
93 6 => 'es', 30 => 'fo', 54 => 'ky', 78 => 'km', 135 => 'tt',
94 7 => 'da', 31 => 'fa', 55 => 'tg', 79 => 'lo', 136 => 'ug',
95 8 => 'pt', 32 => 'ru', 56 => 'tk', 80 => 'vi', 137 => 'dz',
96 9 => 'no', 33 => 'zh-CN', 57 => 'mn-MN', 81 => 'id', 138 => 'jv',
97 10 => 'he', 34 => 'nl', 58 => 'mn-CN', 82 => 'tl', 139 => 'su',
98 11 => 'ja', 35 => 'ga', 59 => 'ps', 83 => 'ms-MY', 140 => 'gl',
99 12 => 'ar', 36 => 'sq', 60 => 'ku', 84 => 'ms-BN', 141 => 'af',
100 13 => 'fi', 37 => 'ro', 61 => 'ks', 85 => 'am', 142 => 'br',
101 14 => 'iu', 38 => 'cs', 62 => 'sd', 86 => 'ti', 144 => 'gd',
102 15 => 'is', 39 => 'sk', 63 => 'bo', 87 => 'om', 145 => 'vg',
103 16 => 'mt', 40 => 'sl', 64 => 'ne', 88 => 'so', 146 => 'ga',
104 17 => 'tr', 41 => 'yi', 65 => 'sa', 89 => 'sw', 147 => 'rar',
105 18 => 'hr', 42 => 'sr', 66 => 'mr', 90 => 'rw', 148 => 'el',
106 19 => 'zh-TW', 43 => 'mk', 67 => 'bn', 91 => 'rn', 149 => 'kl',
107 20 => 'ur', 44 => 'bg', 68 => 'as', 92 => 'ny', 150 => 'az',
108 21 => 'hi', 45 => 'uk', 69 => 'gu', 93 => 'mg',
109 22 => 'th', 46 => 'be', 70 => 'pa', 94 => 'eo',
110 23 => 'ko', 47 => 'uz', 71 => 'or', 128 => 'cy',
111 },
112 # Windows language codes (http://msdn.microsoft.com/en-us/library/0h88fahh(VS.85).aspx)
113 # Notes: This isn't an exact science. The reference above gives language codes
114 # which are different from some ISO 639-1 numbers. Also, some Windows language
115 # codes don't appear to have ISO 639-1 equivalents.
116 # 0x0428 - fa by ref above
117 # 0x048c - no ISO equivalent
118 # 0x081a/0x83c - sr-SP
119 # 0x0c0a - modern?
120 # 0x2409 - Carribean country code not found in ISO 3166-1
121 Windows => {
122 0x0401 => 'ar-SA', 0x0438 => 'fo', 0x0481 => 'mi', 0x1409 => 'en-NZ',
123 0x0402 => 'bg', 0x0439 => 'hi', 0x0482 => 'oc', 0x140a => 'es-CR',
124 0x0403 => 'ca', 0x043a => 'mt', 0x0483 => 'co', 0x140c => 'fr-LU',
125 0x0404 => 'zh-TW', 0x043b => 'se-NO', 0x0484 => 'gsw', 0x141a => 'bs-BA',
126 0x0405 => 'cs', 0x043c => 'gd', 0x0485 => 'sah', 0x143b => 'smj-SE',
127 0x0406 => 'da', 0x043d => 'yi', 0x0486 => 'ny', 0x1801 => 'ar-MA',
128 0x0407 => 'de-DE', 0x043e => 'ms-MY', 0x0487 => 'rw', 0x1809 => 'en-IE',
129 0x0408 => 'el', 0x043f => 'kk', 0x048c => 'Dari', 0x180a => 'es-PA',
130 0x0409 => 'en-US', 0x0440 => 'ky', 0x0801 => 'ar-IQ', 0x180c => 'fr-MC',
131 0x040a => 'es-ES', 0x0441 => 'sw', 0x0804 => 'zh-CN', 0x181a => 'sr-BA',
132 0x040b => 'fi', 0x0442 => 'tk', 0x0807 => 'de-CH', 0x183b => 'sma-NO',
133 0x040c => 'fr-FR', 0x0443 => 'uz-UZ', 0x0809 => 'en-GB', 0x1c01 => 'ar-TN',
134 0x040d => 'he', 0x0444 => 'tt', 0x080a => 'es-MX', 0x1c09 => 'en-ZA',
135 0x040e => 'hu', 0x0445 => 'bn-IN', 0x080c => 'fr-BE', 0x1c0a => 'es-DO',
136 0x040f => 'is', 0x0446 => 'pa', 0x0810 => 'it-CH', 0x1c1a => 'sr-BA',
137 0x0410 => 'it-IT', 0x0447 => 'gu', 0x0813 => 'nl-BE', 0x1c3b => 'sma-SE',
138 0x0411 => 'ja', 0x0448 => 'wo', 0x0814 => 'nn', 0x2001 => 'ar-OM',
139 0x0412 => 'ko', 0x0449 => 'ta', 0x0816 => 'pt-PT', 0x2009 => 'en-JM',
140 0x0413 => 'nl-NL', 0x044a => 'te', 0x0818 => 'ro-MO', 0x200a => 'es-VE',
141 0x0414 => 'no-NO', 0x044b => 'kn', 0x0819 => 'ru-MO', 0x201a => 'bs-BA',
142 0x0415 => 'pl', 0x044c => 'ml', 0x081a => 'sr-RS', 0x203b => 'sms',
143 0x0416 => 'pt-BR', 0x044d => 'as', 0x081d => 'sv-FI', 0x2401 => 'ar-YE',
144 0x0417 => 'rm', 0x044e => 'mr', 0x082c => 'az-AZ', 0x2409 => 'en-CB',
145 0x0418 => 'ro', 0x044f => 'sa', 0x082e => 'dsb', 0x240a => 'es-CO',
146 0x0419 => 'ru', 0x0450 => 'mn-MN', 0x083b => 'se-SE', 0x243b => 'smn',
147 0x041a => 'hr', 0x0451 => 'bo', 0x083c => 'ga', 0x2801 => 'ar-SY',
148 0x041b => 'sk', 0x0452 => 'cy', 0x083e => 'ms-BN', 0x2809 => 'en-BZ',
149 0x041c => 'sq', 0x0453 => 'km', 0x0843 => 'uz-UZ', 0x280a => 'es-PE',
150 0x041d => 'sv-SE', 0x0454 => 'lo', 0x0845 => 'bn-BD', 0x2c01 => 'ar-JO',
151 0x041e => 'th', 0x0456 => 'gl', 0x0850 => 'mn-CN', 0x2c09 => 'en-TT',
152 0x041f => 'tr', 0x0457 => 'kok', 0x085d => 'iu-CA', 0x2c0a => 'es-AR',
153 0x0420 => 'ur', 0x045a => 'syr', 0x085f => 'tmh', 0x3001 => 'ar-LB',
154 0x0421 => 'id', 0x045b => 'si', 0x086b => 'qu-EC', 0x3009 => 'en-ZW',
155 0x0422 => 'uk', 0x045d => 'iu-CA', 0x0c01 => 'ar-EG', 0x300a => 'es-EC',
156 0x0423 => 'be', 0x045e => 'am', 0x0c04 => 'zh-HK', 0x3401 => 'ar-KW',
157 0x0424 => 'sl', 0x0461 => 'ne', 0x0c07 => 'de-AT', 0x3409 => 'en-PH',
158 0x0425 => 'et', 0x0462 => 'fy', 0x0c09 => 'en-AU', 0x340a => 'es-CL',
159 0x0426 => 'lv', 0x0463 => 'ps', 0x0c0a => 'es-ES', 0x3801 => 'ar-AE',
160 0x0427 => 'lt', 0x0464 => 'fil', 0x0c0c => 'fr-CA', 0x380a => 'es-UY',
161 0x0428 => 'tg', 0x0465 => 'dv', 0x0c1a => 'sr-RS', 0x3c01 => 'ar-BH',
162 0x042a => 'vi', 0x0468 => 'ha', 0x0c3b => 'se-FI', 0x3c0a => 'es-PY',
163 0x042b => 'hy', 0x046a => 'yo', 0x0c6b => 'qu-PE', 0x4001 => 'ar-QA',
164 0x042c => 'az-AZ', 0x046b => 'qu-BO', 0x1001 => 'ar-LY', 0x4009 => 'en-IN',
165 0x042d => 'eu', 0x046c => 'st', 0x1004 => 'zh-SG', 0x400a => 'es-BO',
166 0x042e => 'hsb', 0x046d => 'ba', 0x1007 => 'de-LU', 0x4409 => 'en-MY',
167 0x042f => 'mk', 0x046e => 'lb', 0x1009 => 'en-CA', 0x440a => 'es-SV',
168 0x0430 => 'st', 0x046f => 'kl', 0x100a => 'es-GT', 0x4809 => 'en-SG',
169 0x0431 => 'ts', 0x0470 => 'ig', 0x100c => 'fr-CH', 0x480a => 'es-HN',
170 0x0432 => 'tn', 0x0478 => 'yi', 0x101a => 'hr-BA', 0x4c0a => 'es-NI',
171 0x0434 => 'xh', 0x047a => 'arn', 0x103b => 'smj-NO',0x500a => 'es-PR',
172 0x0435 => 'zu', 0x047c => 'moh', 0x1401 => 'ar-DZ', 0x540a => 'es-US',
173 0x0436 => 'af', 0x047e => 'br', 0x1404 => 'zh-MO',
174 0x0437 => 'ka', 0x0480 => 'ug', 0x1407 => 'de-LI',
175 },
176 Unicode => { },
177 ISO => { },
178 Custom => { },
179);
180
181# eclectic table of tags for various format font files
182%Image::ExifTool::Font::Main = (
183 GROUPS => { 2 => 'Document' },
184 NOTES => q{
185 This table contains a collection of tags found in font files of various
186 formats. ExifTool current recognizes OTF, TTF, TTC, DFONT, PFA, PFB, PFM,
187 AFM, ACFM and AMFM font files.
188 },
189 name => {
190 SubDirectory => { TagTable => 'Image::ExifTool::Font::Name' },
191 },
192 PFM => {
193 Name => 'PFMHeader',
194 SubDirectory => { TagTable => 'Image::ExifTool::Font::PFM' },
195 },
196 PSInfo => {
197 Name => 'PSFontInfo',
198 SubDirectory => { TagTable => 'Image::ExifTool::Font::PSInfo' },
199 },
200 AFM => {
201 Name => 'AFM',
202 SubDirectory => { TagTable => 'Image::ExifTool::Font::AFM' },
203 },
204 numfonts => 'NumFonts',
205 fontname => 'FontName',
206 postfont => {
207 Name => 'PostScriptFontName',
208 Description => 'PostScript Font Name',
209 },
210);
211
212# TrueType name tags (ref 1/2)
213%Image::ExifTool::Font::Name = (
214 GROUPS => { 2 => 'Document' },
215 NOTES => q{
216 The following tags are extracted from the TrueType font "name" table found
217 in OTF, TTF, TTC and DFONT files. These tags support localized languages by
218 adding a hyphen followed by a language code to the end of the tag name (ie.
219 "Copyright-fr" or "Licence-en-US"). Tags with no language code use the
220 default language of "en".
221 },
222 0 => { Name => 'Copyright', Groups => { 2 => 'Author' } },
223 1 => 'FontFamily',
224 2 => 'FontSubfamily',
225 3 => 'FontSubfamilyID',
226 4 => 'FontName', # full name
227 5 => 'NameTableVersion',
228 6 => { Name => 'PostScriptFontName', Description => 'PostScript Font Name' },
229 7 => 'Trademark',
230 8 => 'Manufacturer',
231 9 => 'Designer',
232 10 => 'Description',
233 11 => 'VendorURL',
234 12 => 'DesignerURL',
235 13 => 'License',
236 14 => 'LicenseInfoURL',
237 16 => 'PreferredFamily',
238 17 => 'PreferredSubfamily',
239 18 => 'CompatibleFontName',
240 19 => 'SampleText',
241 20 => {
242 Name => 'PostScriptFontName',
243 Description => 'PostScript Font Name',
244 },
245 21 => 'WWSFamilyName',
246 22 => 'WWSSubfamilyName',
247);
248
249# PostScript Font Metric file header (ref 4)
250%Image::ExifTool::Font::PFM = (
251 GROUPS => { 2 => 'Document' },
252 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
253 NOTES => 'Tags extracted from the PFM file header.',
254 0 => {
255 Name => 'PFMVersion',
256 Format => 'int16u',
257 PrintConv => 'sprintf("%x.%.2x",$val>>8,$val&0xff)',
258 },
259 6 => { Name => 'Copyright', Format => 'string[60]', Groups => { 2 => 'Author' } },
260 66 => { Name => 'FontType', Format => 'int16u' },
261 68 => { Name => 'PointSize', Format => 'int16u' },
262 70 => { Name => 'YResolution', Format => 'int16u' },
263 72 => { Name => 'XResolution', Format => 'int16u' },
264 74 => { Name => 'Ascent', Format => 'int16u' },
265 76 => { Name => 'InternalLeading', Format => 'int16u' },
266 78 => { Name => 'ExternalLeading', Format => 'int16u' },
267 80 => { Name => 'Italic' },
268 81 => { Name => 'Underline' },
269 82 => { Name => 'Strikeout' },
270 83 => { Name => 'Weight', Format => 'int16u' },
271 85 => { Name => 'CharacterSet' },
272 86 => { Name => 'PixWidth', Format => 'int16u' },
273 88 => { Name => 'PixHeight', Format => 'int16u' },
274 90 => { Name => 'PitchAndFamily' },
275 91 => { Name => 'AvgWidth', Format => 'int16u' },
276 93 => { Name => 'MaxWidth', Format => 'int16u' },
277 95 => { Name => 'FirstChar' },
278 96 => { Name => 'LastChar' },
279 97 => { Name => 'DefaultChar' },
280 98 => { Name => 'BreakChar' },
281 99 => { Name => 'WidthBytes', Format => 'int16u' },
282 # 101 => { Name => 'DeviceTypeOffset', Format => 'int32u' },
283 # 105 => { Name => 'FontNameOffset', Format => 'int32u' },
284 # 109 => { Name => 'BitsPointer', Format => 'int32u' },
285 # 113 => { Name => 'BitsOffset', Format => 'int32u' },
286);
287
288# PostScript FontInfo attributes (PFA, PFB) (ref PH)
289%Image::ExifTool::Font::PSInfo = (
290 GROUPS => { 2 => 'Document' },
291 NOTES => 'Tags extracted from PostScript font files (PFA and PFB).',
292 FullName => { },
293 FamilyName => { Name => 'FontFamily' },
294 Weight => { },
295 ItalicAngle => { },
296 isFixedPitch=> { },
297 UnderlinePosition => { },
298 UnderlineThickness => { },
299 Copyright => { Groups => { 2 => 'Author' } },
300 Notice => { Groups => { 2 => 'Author' } },
301 version => { },
302 FontName => { },
303 FontType => { },
304 FSType => { },
305);
306
307# Adobe Font Metrics tags (AFM) (ref 6)
308%Image::ExifTool::Font::AFM = (
309 GROUPS => { 2 => 'Document' },
310 NOTES => 'Tags extracted from Adobe Font Metrics files (AFM, ACFM and AMFM).',
311 'Creation Date' => { Name => 'CreateDate', Groups => { 2 => 'Time' } },
312 FontName => { },
313 FullName => { },
314 FamilyName => { Name => 'FontFamily' },
315 Weight => { },
316 Version => { },
317 Notice => { Groups => { 2 => 'Author' } },
318 EncodingScheme => { },
319 MappingScheme => { },
320 EscChar => { },
321 CharacterSet=> { },
322 Characters => { },
323 IsBaseFont => { },
324 # VVector => { },
325 IsFixedV => { },
326 CapHeight => { },
327 XHeight => { },
328 Ascender => { },
329 Descender => { },
330);
331
332#------------------------------------------------------------------------------
333# Read information from a TrueType font collection (TTC) (refs 2,3)
334# Inputs: 0) ExifTool ref, 1) dirInfo ref
335# Returns: 1 on success, 0 if this wasn't a valid TrueType font collection
336sub ProcessTTC($$)
337{
338 my ($exifTool, $dirInfo) = @_;
339 my $raf = $$dirInfo{RAF};
340 my ($buff, $i);
341
342 return 0 unless $raf->Read($buff, 12) == 12;
343 return 0 unless $buff =~ /^ttcf\0[\x01\x02]\0\0/;
344 SetByteOrder('MM');
345 my $num = Get32u(\$buff, 8);
346 # might as well put a limit on the number of fonts we will parse (< 256)
347 return 0 unless $num < 0x100 and $raf->Read($buff, $num * 4) == $num * 4;
348 $exifTool->SetFileType('TTC');
349 my $tagTablePtr = GetTagTable('Image::ExifTool::Font::Main');
350 $exifTool->HandleTag($tagTablePtr, 'numfonts', $num);
351 # loop through all fonts in the collection
352 for ($i=0; $i<$num; ++$i) {
353 my $n = $i + 1;
354 $exifTool->VPrint(0, "Font $n:\n");
355 $$exifTool{SET_GROUP1} = "+$n";
356 my $offset = Get32u(\$buff, $i * 4);
357 $raf->Seek($offset, 0) or last;
358 ProcessOTF($exifTool, $dirInfo) or last;
359 }
360 delete $$exifTool{SET_GROUP1};
361 return 1;
362}
363
364#------------------------------------------------------------------------------
365# Read information from a TrueType font file (OTF or TTF) (refs 1,2)
366# Inputs: 0) ExifTool ref, 1) dirInfo ref
367# Returns: 1 on success, 0 if this wasn't a valid TrueType font file
368sub ProcessOTF($$)
369{
370 my ($exifTool, $dirInfo) = @_;
371 my $raf = $$dirInfo{RAF};
372 my ($tbl, $buff, $pos, $i);
373 my $base = $$dirInfo{Base} || 0;
374
375 return 0 unless $raf->Read($buff, 12) == 12;
376 return 0 unless $buff =~ /^(\0\x01\0\0|OTTO|true|typ1)[\0\x01]/;
377
378 $exifTool->SetFileType($1 eq 'OTTO' ? 'OTF' : 'TTF');
379 SetByteOrder('MM');
380 my $numTables = Get16u(\$buff, 4);
381 return 0 unless $numTables > 0 and $numTables < 0x200;
382 my $len = $numTables * 16;
383 return 0 unless $raf->Read($tbl, $len) == $len;
384
385 my $verbose = $exifTool->Options('Verbose');
386 my $oldIndent = $$exifTool{INDENT};
387 $$exifTool{INDENT} .= '| ';
388 $exifTool->VerboseDir('TrueType', $numTables) if $verbose;
389
390 for ($pos=0; $pos<$len; $pos+=16) {
391 # look for 'name' table
392 my $tag = substr($tbl, $pos, 4);
393 next unless $tag eq 'name' or $verbose;
394 my $offset = Get32u(\$tbl, $pos + 8);
395 my $size = Get32u(\$tbl, $pos + 12);
396 unless ($raf->Seek($offset+$base, 0) and $raf->Read($buff, $size) == $size) {
397 $exifTool->Warn("Error reading '$tag' data");
398 next;
399 }
400 if ($verbose) {
401 $tag =~ s/([\0-\x1f\x80-\xff])/sprintf('\x%.2x',ord $1)/ge;
402 my $str = sprintf("%s%d) Tag '%s' (offset 0x%.4x, %d bytes)\n",
403 $$exifTool{INDENT}, $pos/16, $tag, $offset, $size);
404 $exifTool->VPrint(0, $str);
405 $exifTool->VerboseDump(\$buff, Addr => $offset) if $verbose > 2;
406 next unless $tag eq 'name';
407 }
408 next unless $size >= 8;
409 my $entries = Get16u(\$buff, 2);
410 my $recEnd = 6 + $entries * 12;
411 if ($recEnd > $size) {
412 $exifTool->Warn('Truncated name record');
413 last;
414 }
415 my $strStart = Get16u(\$buff, 4);
416 if ($strStart < $recEnd or $strStart > $size) {
417 $exifTool->Warn('Invalid string offset');
418 last;
419 }
420 # parse language-tag record (in format 1 Naming table only) (ref 2)
421 my %langTag;
422 if (Get16u(\$buff, 0) == 1 and $recEnd + 2 <= $size) {
423 my $langTags = Get16u(\$buff, $recEnd);
424 if ($langTags and $recEnd + 2 + $langTags * 4 < $size) {
425 for ($i=0; $i<$langTags; ++$i) {
426 my $pt = $recEnd + 2 + $i * 4;
427 my $langLen = Get16u(\$buff, $pt);
428 # make sure the language string length is reasonable (UTF-16BE)
429 last if $langLen == 0 or $langLen & 0x01 or $langLen > 40;
430 my $langPt = Get16u(\$buff, $pt + 2) + $strStart;
431 last if $langPt + $langLen > $size;
432 my $lang = substr($buff, $langPt, $langLen);
433 $lang = $exifTool->Decode($lang,'UCS2','MM','UTF8');
434 $lang =~ tr/-_a-zA-Z0-9//dc; # remove naughty characters
435 $langTag{$i + 0x8000} = $lang;
436 }
437 }
438 }
439 my $tagTablePtr = GetTagTable('Image::ExifTool::Font::Name');
440 $$exifTool{INDENT} .= '| ';
441 $exifTool->VerboseDir('Name', $entries) if $verbose;
442 for ($i=0; $i<$entries; ++$i) {
443 my $pt = 6 + $i * 12;
444 my $platform = Get16u(\$buff, $pt);
445 my $encoding = Get16u(\$buff, $pt + 2);
446 my $langID = Get16u(\$buff, $pt + 4);
447 my $nameID = Get16u(\$buff, $pt + 6);
448 my $strLen = Get16u(\$buff, $pt + 8);
449 my $strPt = Get16u(\$buff, $pt + 10) + $strStart;
450 if ($strPt + $strLen <= $size) {
451 my $val = substr($buff, $strPt, $strLen);
452 my ($lang, $charset, $extra);
453 my $sys = $ttPlatform{$platform};
454 # translate from specified encoding
455 if ($sys) {
456 $lang = $ttLang{$sys}{$langID} || $langTag{$langID};
457 $charset = $ttCharset{$sys}{$encoding};
458 if (not $charset) {
459 if (not defined $charset and not $$exifTool{FontWarn}) {
460 $exifTool->Warn("Unknown $sys character set ($encoding)");
461 $$exifTool{FontWarn} = 1;
462 }
463 } else {
464 # translate to ExifTool character set
465 $val = $exifTool->Decode($val, $charset);
466 }
467 } else {
468 $exifTool->Warn("Unknown platform ($platform) for name $nameID");
469 }
470 # get the tagInfo for our specific language (use 'en' for default)
471 my $tagInfo = $exifTool->GetTagInfo($tagTablePtr, $nameID);
472 if ($tagInfo and $lang and $lang ne 'en') {
473 my $langInfo = Image::ExifTool::GetLangInfo($tagInfo, $lang);
474 $tagInfo = $langInfo if $langInfo;
475 }
476 if ($verbose) {
477 $langID > 0x400 and $langID = sprintf('0x%x', $langID);
478 $extra = ", Plat=$platform/" . ($sys || 'Unknown') . ', ' .
479 "Enc=$encoding/" . ($charset || 'Unknown') . ', ' .
480 "Lang=$langID/" . ($lang || 'Unknown');
481 }
482 $exifTool->HandleTag($tagTablePtr, $nameID, $val,
483 TagInfo => $tagInfo,
484 DataPt => \$buff,
485 DataPos => $offset,
486 Start => $strPt,
487 Size => $strLen,
488 Index => $i,
489 Extra => $extra,
490 );
491 }
492 }
493 $$exifTool{INDENT} = $oldIndent . '| ';
494 last unless $verbose;
495 }
496 $$exifTool{INDENT} = $oldIndent;
497 return 1;
498}
499
500#------------------------------------------------------------------------------
501# Read information from an Adobe Font Metrics file (AFM, ACFM, AMFM) (ref 6)
502# Inputs: 0) ExifTool ref, 1) dirInfo ref
503# Returns: 1 on success, 0 if this wasn't a recognized AFM-type file
504sub ProcessAFM($$)
505{
506 my ($exifTool, $dirInfo) = @_;
507 my $raf = $$dirInfo{RAF};
508 my ($buff, $comment);
509
510 require Image::ExifTool::PostScript;
511 local $/ = Image::ExifTool::PostScript::GetInputRecordSeparator($raf);
512 $raf->ReadLine($buff);
513 return 0 unless $buff =~ /^Start(Comp|Master)?FontMetrics\s+\d+/;
514 my $ftyp = $1 ? ($1 eq 'Comp' ? 'ACFM' : 'AMFM') : 'AFM';
515 $exifTool->SetFileType($ftyp, 'application/x-font-afm');
516 my $tagTablePtr = GetTagTable('Image::ExifTool::Font::AFM');
517
518 for (;;) {
519 $raf->ReadLine($buff) or last;
520 if (defined $comment and $buff !~ /^Comment\s/) {
521 $exifTool->FoundTag('Comment', $comment);
522 undef $comment;
523 }
524 $buff =~ /^(\w+)\s+(.*?)[\x0d\x0a]/ or next;
525 my ($tag, $val) = ($1, $2);
526 if ($tag eq 'Comment' and $val =~ /^(Creation Date):\s+(.*)/) {
527 ($tag, $val) = ($1, $2);
528 }
529 $val =~ s/^\((.*)\)$/$1/; # (some values may be in brackets)
530 if ($tag eq 'Comment') {
531 # concatinate all comments into a single value
532 $comment = defined($comment) ? "$comment\n$val" : $val;
533 next;
534 }
535 unless ($exifTool->HandleTag($tagTablePtr, $tag, $val)) {
536 # end parsing if we start any subsection
537 last if $tag =~ /^Start/ and $tag ne 'StartDirection';
538 }
539 }
540 return 1;
541}
542
543#------------------------------------------------------------------------------
544# Read information from various format font files
545# Inputs: 0) ExifTool ref, 1) dirInfo ref
546# Returns: 1 on success, 0 if this wasn't a recognized Font file
547sub ProcessFont($$)
548{
549 my ($exifTool, $dirInfo) = @_;
550 my $raf = $$dirInfo{RAF};
551 my ($buff, $buf2, $rtnVal);
552 return 0 unless $raf->Read($buff, 24) and $raf->Seek(0,0);
553 if ($buff =~ /^(\0\x01\0\0|OTTO|true|typ1)[\0\x01]/) { # OTF, TTF
554 $rtnVal = ProcessOTF($exifTool, $dirInfo);
555 } elsif ($buff =~ /^ttcf\0[\x01\x02]\0\0/) { # TTC
556 $rtnVal = ProcessTTC($exifTool, $dirInfo);
557 } elsif ($buff =~ /^Start(Comp|Master)?FontMetrics\s+\d+/s) { # AFM
558 $rtnVal = ProcessAFM($exifTool, $dirInfo);
559 } elsif ($buff =~ /^(.{6})?%!(PS-(AdobeFont-|Bitstream )|FontType1-)/s) {# PFA, PFB
560 $raf->Seek(6,0) and $exifTool->SetFileType('PFB') if $1;
561 require Image::ExifTool::PostScript;
562 $rtnVal = Image::ExifTool::PostScript::ProcessPS($exifTool, $dirInfo);
563 } elsif ($buff =~ /^\0[\x01\x02]/ and $raf->Seek(0, 2) and # PFM
564 # validate file size
565 $raf->Tell() > 117 and $raf->Tell() == unpack('x2V',$buff) and
566 # read PFM header
567 $raf->Seek(0,0) and $raf->Read($buff,117) == 117 and
568 # validate "DeviceType" string (must be "PostScript\0")
569 SetByteOrder('II') and $raf->Seek(Get32u(\$buff, 101), 0) and
570 # the DeviceType should be "PostScript\0", but FontForge
571 # incorrectly writes "Postscript\0", so ignore case
572 $raf->Read($buf2, 11) == 11 and lc($buf2) eq "postscript\0")
573 {
574 $exifTool->SetFileType('PFM');
575 SetByteOrder('II');
576 my $tagTablePtr = GetTagTable('Image::ExifTool::Font::Main');
577 # process the PFM header
578 $exifTool->HandleTag($tagTablePtr, 'PFM', $buff);
579 # extract the font names
580 my $nameOff = Get32u(\$buff, 105);
581 if ($raf->Seek($nameOff, 0) and $raf->Read($buff, 256) and
582 $buff =~ /^([\x20-\xff]+)\0([\x20-\xff]+)\0/)
583 {
584 $exifTool->HandleTag($tagTablePtr, 'fontname', $1);
585 $exifTool->HandleTag($tagTablePtr, 'postfont', $2);
586 }
587 $rtnVal = 1;
588 } else {
589 $rtnVal = 0;
590 }
591 return $rtnVal;
592}
593
5941; # end
595
596__END__
597
598=head1 NAME
599
600Image::ExifTool::Font - Read meta information from font files
601
602=head1 SYNOPSIS
603
604This module is used by Image::ExifTool
605
606=head1 DESCRIPTION
607
608This module contains the routines required by Image::ExifTool to read meta
609information from various format font files. Currently recognized font file
610types are OTF, TTF, TTC, DFONT, PFA, PFB, PFM, AFM, ACFM and AMFM.
611
612=head1 AUTHOR
613
614Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
615
616This library is free software; you can redistribute it and/or modify it
617under the same terms as Perl itself.
618
619=head1 REFERENCES
620
621=over 4
622
623=item L<http://developer.apple.com/textfonts/TTRefMan/RM06/Chap6.html>
624
625=item L<http://www.microsoft.com/typography/otspec/otff.htm>
626
627=item L<http://partners.adobe.com/public/developer/opentype/index_font_file.html>
628
629=item L<http://partners.adobe.com/public/developer/en/font/5178.PFM.pdf>
630
631=item L<http://opensource.adobe.com/svn/opensource/flex/sdk/trunk/modules/compiler/src/java/flex2/compiler/util/MimeMappings.java>
632
633=item L<http://www.adobe.com/devnet/font/pdfs/5004.AFM_Spec.pdf>
634
635=back
636
637=head1 SEE ALSO
638
639L<Image::ExifTool::TagNames/Font Tags>,
640L<Image::ExifTool(3pm)|Image::ExifTool>
641
642=cut
643
Note: See TracBrowser for help on using the repository browser.