source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/Font.pm@ 34921

Last change on this file since 34921 was 34921, checked in by anupama, 3 years ago

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

  • Property svn:executable set to *
File size: 27.3 KB
Line 
1#------------------------------------------------------------------------------
2# File: Font.pm
3#
4# Description: Read meta information from font files
5#
6# Revisions: 2010/01/15 - P. Harvey Created
7#
8# References: 1) http://developer.apple.com/textfonts/TTRefMan/RM06/Chap6.html
9# 2) http://www.microsoft.com/typography/otspec/otff.htm
10# 3) http://partners.adobe.com/public/developer/opentype/index_font_file.html
11# 4) http://partners.adobe.com/public/developer/en/font/5178.PFM.pdf
12# 5) http://opensource.adobe.com/svn/opensource/flex/sdk/trunk/modules/compiler/src/java/flex2/compiler/util/MimeMappings.java
13# 6) http://www.adobe.com/devnet/font/pdfs/5004.AFM_Spec.pdf
14#------------------------------------------------------------------------------
15
16package Image::ExifTool::Font;
17
18use strict;
19use vars qw($VERSION %ttLang);
20use Image::ExifTool qw(:DataAccess :Utils);
21
22$VERSION = '1.10';
23
24sub ProcessOTF($$);
25
26# TrueType 'name' platform codes
27my %ttPlatform = (
28 0 => 'Unicode',
29 1 => 'Macintosh',
30 2 => 'ISO',
31 3 => 'Windows',
32 4 => 'Custom',
33);
34
35# convert TrueType 'name' character encoding to ExifTool Charset (ref 1/2)
36my %ttCharset = (
37 Macintosh => {
38 0 => 'MacRoman', 17 => 'MacMalayalam',
39 1 => 'MacJapanese', 18 => 'MacSinhalese',
40 2 => 'MacChineseTW', 19 => 'MacBurmese',
41 3 => 'MacKorean', 20 => 'MacKhmer',
42 4 => 'MacArabic', 21 => 'MacThai',
43 5 => 'MacHebrew', 22 => 'MacLaotian',
44 6 => 'MacGreek', 23 => 'MacGeorgian',
45 7 => 'MacCyrillic', 24 => 'MacArmenian', # 7=Russian
46 8 => 'MacRSymbol', 25 => 'MacChineseCN',
47 9 => 'MacDevanagari', 26 => 'MacTibetan',
48 10 => 'MacGurmukhi', 27 => 'MacMongolian',
49 11 => 'MacGujarati', 28 => 'MacGeez',
50 12 => 'MacOriya', 29 => 'MacCyrillic', # 29=Slavic
51 13 => 'MacBengali', 30 => 'MacVietnam',
52 14 => 'MacTamil', 31 => 'MacSindhi',
53 15 => 'MacTelugu', 32 => '', # 32=uninterpreted
54 16 => 'MacKannada',
55 },
56 Windows => {
57 0 => 'Symbol', 4 => 'Big5',
58 1 => 'UCS2', 5 => 'Wansung',
59 2 => 'ShiftJIS', 6 => 'Johab',
60 3 => 'PRC', 10 => 'UCS4',
61 },
62 Unicode => {
63 # (we don't currently handle the various Unicode flavours)
64 0 => 'UCS2', # Unicode 1.0 semantics
65 1 => 'UCS2', # Unicode 1.1 semantics
66 2 => 'UCS2', # ISO 10646 semantics
67 3 => 'UCS2', # Unicode 2.0 and onwards semantics, Unicode BMP only.
68 4 => 'UCS2', # Unicode 2.0 and onwards semantics, Unicode full repertoire.
69 # 5 => Unicode Variation Sequences (not used in Naming table)
70 },
71 ISO => { # (deprecated)
72 0 => 'UTF8', # (7-bit ASCII)
73 1 => 'UCS2', # ISO 10646
74 2 => 'Latin', # ISO 8859-1
75 },
76 Custom => { },
77);
78
79# convert TrueType 'name' language code to ExifTool language code
80%ttLang = (
81 # Macintosh language codes (also used by QuickTime.pm)
82 # oddities:
83 # 49 - Cyrillic version 83 - Roman
84 # 50 - Arabic version 84 - Arabic
85 # 146 - with dot above
86 Macintosh => {
87 0 => 'en', 24 => 'lt', 48 => 'kk', 72 => 'ml', 129 => 'eu',
88 1 => 'fr', 25 => 'pl', 49 => 'az', 73 => 'kn', 130 => 'ca',
89 2 => 'de', 26 => 'hu', 50 => 'az', 74 => 'ta', 131 => 'la',
90 3 => 'it', 27 => 'et', 51 => 'hy', 75 => 'te', 132 => 'qu',
91 4 => 'nl-NL', 28 => 'lv', 52 => 'ka', 76 => 'si', 133 => 'gn',
92 5 => 'sv', 29 => 'smi', 53 => 'ro', 77 => 'my', 134 => 'ay',
93 6 => 'es', 30 => 'fo', 54 => 'ky', 78 => 'km', 135 => 'tt',
94 7 => 'da', 31 => 'fa', 55 => 'tg', 79 => 'lo', 136 => 'ug',
95 8 => 'pt', 32 => 'ru', 56 => 'tk', 80 => 'vi', 137 => 'dz',
96 9 => 'no', 33 => 'zh-CN', 57 => 'mn-MN', 81 => 'id', 138 => 'jv',
97 10 => 'he', 34 => 'nl-BE', 58 => 'mn-CN', 82 => 'tl', 139 => 'su',
98 11 => 'ja', 35 => 'ga', 59 => 'ps', 83 => 'ms-MY', 140 => 'gl',
99 12 => 'ar', 36 => 'sq', 60 => 'ku', 84 => 'ms-BN', 141 => 'af',
100 13 => 'fi', 37 => 'ro', 61 => 'ks', 85 => 'am', 142 => 'br',
101 14 => 'el', 38 => 'cs', 62 => 'sd', 86 => 'ti', 144 => 'gd',
102 15 => 'is', 39 => 'sk', 63 => 'bo', 87 => 'om', 145 => 'gv',
103 16 => 'mt', 40 => 'sl', 64 => 'ne', 88 => 'so', 146 => 'ga',
104 17 => 'tr', 41 => 'yi', 65 => 'sa', 89 => 'sw', 147 => 'to',
105 18 => 'hr', 42 => 'sr', 66 => 'mr', 90 => 'rw', 148 => 'el',
106 19 => 'zh-TW', 43 => 'mk', 67 => 'bn', 91 => 'rn', 149 => 'kl',
107 20 => 'ur', 44 => 'bg', 68 => 'as', 92 => 'ny', 150 => 'az',
108 21 => 'hi', 45 => 'uk', 69 => 'gu', 93 => 'mg',
109 22 => 'th', 46 => 'be', 70 => 'pa', 94 => 'eo',
110 23 => 'ko', 47 => 'uz', 71 => 'or', 128 => 'cy',
111 },
112 # Windows language codes (http://msdn.microsoft.com/en-us/library/0h88fahh(VS.85).aspx)
113 # Notes: This isn't an exact science. The reference above gives language codes
114 # which are different from some ISO 639-1 numbers. Also, some Windows language
115 # codes don't appear to have ISO 639-1 equivalents.
116 # 0x0428 - fa by ref above
117 # 0x048c - no ISO equivalent
118 # 0x081a/0x83c - sr-SP
119 # 0x0c0a - modern?
120 # 0x2409 - Caribbean country code not found in ISO 3166-1
121 Windows => {
122 0x0401 => 'ar-SA', 0x0438 => 'fo', 0x0481 => 'mi', 0x1409 => 'en-NZ',
123 0x0402 => 'bg', 0x0439 => 'hi', 0x0482 => 'oc', 0x140a => 'es-CR',
124 0x0403 => 'ca', 0x043a => 'mt', 0x0483 => 'co', 0x140c => 'fr-LU',
125 0x0404 => 'zh-TW', 0x043b => 'se-NO', 0x0484 => 'gsw', 0x141a => 'bs-BA',
126 0x0405 => 'cs', 0x043c => 'gd', 0x0485 => 'sah', 0x143b => 'smj-SE',
127 0x0406 => 'da', 0x043d => 'yi', 0x0486 => 'ny', 0x1801 => 'ar-MA',
128 0x0407 => 'de-DE', 0x043e => 'ms-MY', 0x0487 => 'rw', 0x1809 => 'en-IE',
129 0x0408 => 'el', 0x043f => 'kk', 0x048c => 'Dari', 0x180a => 'es-PA',
130 0x0409 => 'en-US', 0x0440 => 'ky', 0x0801 => 'ar-IQ', 0x180c => 'fr-MC',
131 0x040a => 'es-ES', 0x0441 => 'sw', 0x0804 => 'zh-CN', 0x181a => 'sr-BA',
132 0x040b => 'fi', 0x0442 => 'tk', 0x0807 => 'de-CH', 0x183b => 'sma-NO',
133 0x040c => 'fr-FR', 0x0443 => 'uz-UZ', 0x0809 => 'en-GB', 0x1c01 => 'ar-TN',
134 0x040d => 'he', 0x0444 => 'tt', 0x080a => 'es-MX', 0x1c09 => 'en-ZA',
135 0x040e => 'hu', 0x0445 => 'bn-IN', 0x080c => 'fr-BE', 0x1c0a => 'es-DO',
136 0x040f => 'is', 0x0446 => 'pa', 0x0810 => 'it-CH', 0x1c1a => 'sr-BA',
137 0x0410 => 'it-IT', 0x0447 => 'gu', 0x0813 => 'nl-BE', 0x1c3b => 'sma-SE',
138 0x0411 => 'ja', 0x0448 => 'wo', 0x0814 => 'nn', 0x2001 => 'ar-OM',
139 0x0412 => 'ko', 0x0449 => 'ta', 0x0816 => 'pt-PT', 0x2009 => 'en-JM',
140 0x0413 => 'nl-NL', 0x044a => 'te', 0x0818 => 'ro-MO', 0x200a => 'es-VE',
141 0x0414 => 'no-NO', 0x044b => 'kn', 0x0819 => 'ru-MO', 0x201a => 'bs-BA',
142 0x0415 => 'pl', 0x044c => 'ml', 0x081a => 'sr-RS', 0x203b => 'sms',
143 0x0416 => 'pt-BR', 0x044d => 'as', 0x081d => 'sv-FI', 0x2401 => 'ar-YE',
144 0x0417 => 'rm', 0x044e => 'mr', 0x082c => 'az-AZ', 0x2409 => 'en-CB',
145 0x0418 => 'ro', 0x044f => 'sa', 0x082e => 'dsb', 0x240a => 'es-CO',
146 0x0419 => 'ru', 0x0450 => 'mn-MN', 0x083b => 'se-SE', 0x243b => 'smn',
147 0x041a => 'hr', 0x0451 => 'bo', 0x083c => 'ga', 0x2801 => 'ar-SY',
148 0x041b => 'sk', 0x0452 => 'cy', 0x083e => 'ms-BN', 0x2809 => 'en-BZ',
149 0x041c => 'sq', 0x0453 => 'km', 0x0843 => 'uz-UZ', 0x280a => 'es-PE',
150 0x041d => 'sv-SE', 0x0454 => 'lo', 0x0845 => 'bn-BD', 0x2c01 => 'ar-JO',
151 0x041e => 'th', 0x0456 => 'gl', 0x0850 => 'mn-CN', 0x2c09 => 'en-TT',
152 0x041f => 'tr', 0x0457 => 'kok', 0x085d => 'iu-CA', 0x2c0a => 'es-AR',
153 0x0420 => 'ur', 0x045a => 'syr', 0x085f => 'tmh', 0x3001 => 'ar-LB',
154 0x0421 => 'id', 0x045b => 'si', 0x086b => 'qu-EC', 0x3009 => 'en-ZW',
155 0x0422 => 'uk', 0x045d => 'iu-CA', 0x0c01 => 'ar-EG', 0x300a => 'es-EC',
156 0x0423 => 'be', 0x045e => 'am', 0x0c04 => 'zh-HK', 0x3401 => 'ar-KW',
157 0x0424 => 'sl', 0x0461 => 'ne', 0x0c07 => 'de-AT', 0x3409 => 'en-PH',
158 0x0425 => 'et', 0x0462 => 'fy', 0x0c09 => 'en-AU', 0x340a => 'es-CL',
159 0x0426 => 'lv', 0x0463 => 'ps', 0x0c0a => 'es-ES', 0x3801 => 'ar-AE',
160 0x0427 => 'lt', 0x0464 => 'fil', 0x0c0c => 'fr-CA', 0x380a => 'es-UY',
161 0x0428 => 'tg', 0x0465 => 'dv', 0x0c1a => 'sr-RS', 0x3c01 => 'ar-BH',
162 0x042a => 'vi', 0x0468 => 'ha', 0x0c3b => 'se-FI', 0x3c0a => 'es-PY',
163 0x042b => 'hy', 0x046a => 'yo', 0x0c6b => 'qu-PE', 0x4001 => 'ar-QA',
164 0x042c => 'az-AZ', 0x046b => 'qu-BO', 0x1001 => 'ar-LY', 0x4009 => 'en-IN',
165 0x042d => 'eu', 0x046c => 'st', 0x1004 => 'zh-SG', 0x400a => 'es-BO',
166 0x042e => 'hsb', 0x046d => 'ba', 0x1007 => 'de-LU', 0x4409 => 'en-MY',
167 0x042f => 'mk', 0x046e => 'lb', 0x1009 => 'en-CA', 0x440a => 'es-SV',
168 0x0430 => 'st', 0x046f => 'kl', 0x100a => 'es-GT', 0x4809 => 'en-SG',
169 0x0431 => 'ts', 0x0470 => 'ig', 0x100c => 'fr-CH', 0x480a => 'es-HN',
170 0x0432 => 'tn', 0x0478 => 'yi', 0x101a => 'hr-BA', 0x4c0a => 'es-NI',
171 0x0434 => 'xh', 0x047a => 'arn', 0x103b => 'smj-NO',0x500a => 'es-PR',
172 0x0435 => 'zu', 0x047c => 'moh', 0x1401 => 'ar-DZ', 0x540a => 'es-US',
173 0x0436 => 'af', 0x047e => 'br', 0x1404 => 'zh-MO',
174 0x0437 => 'ka', 0x0480 => 'ug', 0x1407 => 'de-LI',
175 },
176 Unicode => { },
177 ISO => { },
178 Custom => { },
179);
180
181# eclectic table of tags for various format font files
182%Image::ExifTool::Font::Main = (
183 GROUPS => { 2 => 'Document' },
184 NOTES => q{
185 This table contains a collection of tags found in font files of various
186 formats. ExifTool current recognizes OTF, TTF, TTC, DFONT, PFA, PFB, PFM,
187 AFM, ACFM and AMFM font files.
188 },
189 name => {
190 SubDirectory => { TagTable => 'Image::ExifTool::Font::Name' },
191 },
192 PFM => {
193 Name => 'PFMHeader',
194 SubDirectory => { TagTable => 'Image::ExifTool::Font::PFM' },
195 },
196 PSInfo => {
197 Name => 'PSFontInfo',
198 SubDirectory => { TagTable => 'Image::ExifTool::Font::PSInfo' },
199 },
200 AFM => {
201 Name => 'AFM',
202 SubDirectory => { TagTable => 'Image::ExifTool::Font::AFM' },
203 },
204 numfonts => 'NumFonts',
205 fontname => 'FontName',
206 postfont => {
207 Name => 'PostScriptFontName',
208 Description => 'PostScript Font Name',
209 },
210);
211
212# TrueType name tags (ref 1/2)
213%Image::ExifTool::Font::Name = (
214 GROUPS => { 2 => 'Document' },
215 NOTES => q{
216 The following tags are extracted from the TrueType font "name" table found
217 in OTF, TTF, TTC and DFONT files. These tags support localized languages by
218 adding a hyphen followed by a language code to the end of the tag name (eg.
219 "Copyright-fr" or "License-en-US"). Tags with no language code use the
220 default language of "en".
221 },
222 0 => { Name => 'Copyright', Groups => { 2 => 'Author' } },
223 1 => 'FontFamily',
224 2 => 'FontSubfamily',
225 3 => 'FontSubfamilyID',
226 4 => 'FontName', # full name
227 5 => 'NameTableVersion',
228 6 => { Name => 'PostScriptFontName', Description => 'PostScript Font Name' },
229 7 => 'Trademark',
230 8 => 'Manufacturer',
231 9 => 'Designer',
232 10 => 'Description',
233 11 => 'VendorURL',
234 12 => 'DesignerURL',
235 13 => 'License',
236 14 => 'LicenseInfoURL',
237 16 => 'PreferredFamily',
238 17 => 'PreferredSubfamily',
239 18 => 'CompatibleFontName',
240 19 => 'SampleText',
241 20 => {
242 Name => 'PostScriptFontName',
243 Description => 'PostScript Font Name',
244 },
245 21 => 'WWSFamilyName',
246 22 => 'WWSSubfamilyName',
247);
248
249# PostScript Font Metric file header (ref 4)
250%Image::ExifTool::Font::PFM = (
251 GROUPS => { 2 => 'Document' },
252 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
253 NOTES => 'Tags extracted from the PFM file header.',
254 0 => {
255 Name => 'PFMVersion',
256 Format => 'int16u',
257 PrintConv => 'sprintf("%x.%.2x",$val>>8,$val&0xff)',
258 },
259 6 => { Name => 'Copyright', Format => 'string[60]', Groups => { 2 => 'Author' } },
260 66 => { Name => 'FontType', Format => 'int16u' },
261 68 => { Name => 'PointSize', Format => 'int16u' },
262 70 => { Name => 'YResolution', Format => 'int16u' },
263 72 => { Name => 'XResolution', Format => 'int16u' },
264 74 => { Name => 'Ascent', Format => 'int16u' },
265 76 => { Name => 'InternalLeading', Format => 'int16u' },
266 78 => { Name => 'ExternalLeading', Format => 'int16u' },
267 80 => { Name => 'Italic' },
268 81 => { Name => 'Underline' },
269 82 => { Name => 'Strikeout' },
270 83 => { Name => 'Weight', Format => 'int16u' },
271 85 => { Name => 'CharacterSet' },
272 86 => { Name => 'PixWidth', Format => 'int16u' },
273 88 => { Name => 'PixHeight', Format => 'int16u' },
274 90 => { Name => 'PitchAndFamily' },
275 91 => { Name => 'AvgWidth', Format => 'int16u' },
276 93 => { Name => 'MaxWidth', Format => 'int16u' },
277 95 => { Name => 'FirstChar' },
278 96 => { Name => 'LastChar' },
279 97 => { Name => 'DefaultChar' },
280 98 => { Name => 'BreakChar' },
281 99 => { Name => 'WidthBytes', Format => 'int16u' },
282 # 101 => { Name => 'DeviceTypeOffset', Format => 'int32u' },
283 # 105 => { Name => 'FontNameOffset', Format => 'int32u' },
284 # 109 => { Name => 'BitsPointer', Format => 'int32u' },
285 # 113 => { Name => 'BitsOffset', Format => 'int32u' },
286);
287
288# PostScript FontInfo attributes (PFA, PFB) (ref PH)
289%Image::ExifTool::Font::PSInfo = (
290 GROUPS => { 2 => 'Document' },
291 NOTES => 'Tags extracted from PostScript font files (PFA and PFB).',
292 FullName => { },
293 FamilyName => { Name => 'FontFamily' },
294 Weight => { },
295 ItalicAngle => { },
296 isFixedPitch=> { },
297 UnderlinePosition => { },
298 UnderlineThickness => { },
299 Copyright => { Groups => { 2 => 'Author' } },
300 Notice => { Groups => { 2 => 'Author' } },
301 version => { },
302 FontName => { },
303 FontType => { },
304 FSType => { },
305);
306
307# Adobe Font Metrics tags (AFM) (ref 6)
308%Image::ExifTool::Font::AFM = (
309 GROUPS => { 2 => 'Document' },
310 NOTES => 'Tags extracted from Adobe Font Metrics files (AFM, ACFM and AMFM).',
311 'Creation Date' => { Name => 'CreateDate', Groups => { 2 => 'Time' } },
312 FontName => { },
313 FullName => { },
314 FamilyName => { Name => 'FontFamily' },
315 Weight => { },
316 Version => { },
317 Notice => { Groups => { 2 => 'Author' } },
318 EncodingScheme => { },
319 MappingScheme => { },
320 EscChar => { },
321 CharacterSet=> { },
322 Characters => { },
323 IsBaseFont => { },
324 # VVector => { },
325 IsFixedV => { },
326 CapHeight => { },
327 XHeight => { },
328 Ascender => { },
329 Descender => { },
330);
331
332#------------------------------------------------------------------------------
333# Read information from a TrueType font collection (TTC) (refs 2,3)
334# Inputs: 0) ExifTool ref, 1) dirInfo ref
335# Returns: 1 on success, 0 if this wasn't a valid TrueType font collection
336sub ProcessTTC($$)
337{
338 my ($et, $dirInfo) = @_;
339 my $raf = $$dirInfo{RAF};
340 my ($buff, $i);
341
342 return 0 unless $raf->Read($buff, 12) == 12;
343 return 0 unless $buff =~ /^ttcf\0[\x01\x02]\0\0/;
344 SetByteOrder('MM');
345 my $num = Get32u(\$buff, 8);
346 # might as well put a limit on the number of fonts we will parse (< 256)
347 return 0 unless $num < 0x100 and $raf->Read($buff, $num * 4) == $num * 4;
348 $et->SetFileType('TTC');
349 return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3;
350 my $tagTablePtr = GetTagTable('Image::ExifTool::Font::Main');
351 $et->HandleTag($tagTablePtr, 'numfonts', $num);
352 # loop through all fonts in the collection
353 for ($i=0; $i<$num; ++$i) {
354 my $n = $i + 1;
355 $et->VPrint(0, "Font $n:\n");
356 $$et{SET_GROUP1} = "+$n";
357 my $offset = Get32u(\$buff, $i * 4);
358 $raf->Seek($offset, 0) or last;
359 ProcessOTF($et, $dirInfo) or last;
360 }
361 delete $$et{SET_GROUP1};
362 return 1;
363}
364
365#------------------------------------------------------------------------------
366# Read information from a TrueType font file (OTF or TTF) (refs 1,2)
367# Inputs: 0) ExifTool ref, 1) dirInfo ref
368# Returns: 1 on success, 0 if this wasn't a valid TrueType font file
369sub ProcessOTF($$)
370{
371 my ($et, $dirInfo) = @_;
372 my $raf = $$dirInfo{RAF};
373 my ($tbl, $buff, $pos, $i);
374 my $base = $$dirInfo{Base} || 0;
375
376 return 0 unless $raf->Read($buff, 12) == 12;
377 return 0 unless $buff =~ /^(\0\x01\0\0|OTTO|true|typ1|\xa5(kbd|lst))[\0\x01]/;
378
379 $et->SetFileType($1 eq 'OTTO' ? 'OTF' : 'TTF');
380 return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3;
381 SetByteOrder('MM');
382 my $numTables = Get16u(\$buff, 4);
383 return 0 unless $numTables > 0 and $numTables < 0x200;
384 my $len = $numTables * 16;
385 return 0 unless $raf->Read($tbl, $len) == $len;
386
387 my $verbose = $et->Options('Verbose');
388 my $oldIndent = $$et{INDENT};
389 $$et{INDENT} .= '| ';
390 $et->VerboseDir('TrueType', $numTables) if $verbose;
391
392 for ($pos=0; $pos<$len; $pos+=16) {
393 # look for 'name' table
394 my $tag = substr($tbl, $pos, 4);
395 next unless $tag eq 'name' or $verbose;
396 my $offset = Get32u(\$tbl, $pos + 8);
397 my $size = Get32u(\$tbl, $pos + 12);
398 unless ($raf->Seek($offset+$base, 0) and $raf->Read($buff, $size) == $size) {
399 $et->Warn("Error reading '${tag}' data");
400 next;
401 }
402 if ($verbose) {
403 $tag =~ s/([\0-\x1f\x80-\xff])/sprintf('\x%.2x',ord $1)/ge;
404 my $str = sprintf("%s%d) Tag '%s' (offset 0x%.4x, %d bytes)\n",
405 $$et{INDENT}, $pos/16, $tag, $offset, $size);
406 $et->VPrint(0, $str);
407 $et->VerboseDump(\$buff, Addr => $offset) if $verbose > 2;
408 next unless $tag eq 'name';
409 }
410 next unless $size >= 8;
411 my $entries = Get16u(\$buff, 2);
412 my $recEnd = 6 + $entries * 12;
413 if ($recEnd > $size) {
414 $et->Warn('Truncated name record');
415 last;
416 }
417 my $strStart = Get16u(\$buff, 4);
418 if ($strStart < $recEnd or $strStart > $size) {
419 $et->Warn('Invalid string offset');
420 last;
421 }
422 # parse language-tag record (in format 1 Naming table only) (ref 2)
423 my %langTag;
424 if (Get16u(\$buff, 0) == 1 and $recEnd + 2 <= $size) {
425 my $langTags = Get16u(\$buff, $recEnd);
426 if ($langTags and $recEnd + 2 + $langTags * 4 < $size) {
427 for ($i=0; $i<$langTags; ++$i) {
428 my $pt = $recEnd + 2 + $i * 4;
429 my $langLen = Get16u(\$buff, $pt);
430 # make sure the language string length is reasonable (UTF-16BE)
431 last if $langLen == 0 or $langLen & 0x01 or $langLen > 40;
432 my $langPt = Get16u(\$buff, $pt + 2) + $strStart;
433 last if $langPt + $langLen > $size;
434 my $lang = substr($buff, $langPt, $langLen);
435 $lang = $et->Decode($lang,'UCS2','MM','UTF8');
436 $lang =~ tr/-_a-zA-Z0-9//dc; # remove naughty characters
437 $langTag{$i + 0x8000} = $lang;
438 }
439 }
440 }
441 my $tagTablePtr = GetTagTable('Image::ExifTool::Font::Name');
442 $$et{INDENT} .= '| ';
443 $et->VerboseDir('Name', $entries) if $verbose;
444 for ($i=0; $i<$entries; ++$i) {
445 my $pt = 6 + $i * 12;
446 my $platform = Get16u(\$buff, $pt);
447 my $encoding = Get16u(\$buff, $pt + 2);
448 my $langID = Get16u(\$buff, $pt + 4);
449 my $nameID = Get16u(\$buff, $pt + 6);
450 my $strLen = Get16u(\$buff, $pt + 8);
451 my $strPt = Get16u(\$buff, $pt + 10) + $strStart;
452 if ($strPt + $strLen <= $size) {
453 my $val = substr($buff, $strPt, $strLen);
454 my ($lang, $charset, $extra);
455 my $sys = $ttPlatform{$platform};
456 # translate from specified encoding
457 if ($sys) {
458 $lang = $ttLang{$sys}{$langID} || $langTag{$langID};
459 $charset = $ttCharset{$sys}{$encoding};
460 if (not $charset) {
461 if (not defined $charset and not $$et{FontWarn}) {
462 $et->Warn("Unknown $sys character set ($encoding)");
463 $$et{FontWarn} = 1;
464 }
465 } else {
466 # translate to ExifTool character set
467 $val = $et->Decode($val, $charset);
468 }
469 } else {
470 $et->Warn("Unknown platform ($platform) for name $nameID");
471 }
472 # get the tagInfo for our specific language (use 'en' for default)
473 my $tagInfo = $et->GetTagInfo($tagTablePtr, $nameID);
474 if ($tagInfo and $lang and $lang ne 'en') {
475 my $langInfo = Image::ExifTool::GetLangInfo($tagInfo, $lang);
476 $tagInfo = $langInfo if $langInfo;
477 }
478 if ($verbose) {
479 $langID > 0x400 and $langID = sprintf('0x%x', $langID);
480 $extra = ", Plat=$platform/" . ($sys || 'Unknown') . ', ' .
481 "Enc=$encoding/" . ($charset || 'Unknown') . ', ' .
482 "Lang=$langID/" . ($lang || 'Unknown');
483 }
484 $et->HandleTag($tagTablePtr, $nameID, $val,
485 TagInfo => $tagInfo,
486 DataPt => \$buff,
487 DataPos => $offset,
488 Start => $strPt,
489 Size => $strLen,
490 Index => $i,
491 Extra => $extra,
492 );
493 }
494 }
495 $$et{INDENT} = $oldIndent . '| ';
496 last unless $verbose;
497 }
498 $$et{INDENT} = $oldIndent;
499 return 1;
500}
501
502#------------------------------------------------------------------------------
503# Read information from an Adobe Font Metrics file (AFM, ACFM, AMFM) (ref 6)
504# Inputs: 0) ExifTool ref, 1) dirInfo ref
505# Returns: 1 on success, 0 if this wasn't a recognized AFM-type file
506sub ProcessAFM($$)
507{
508 my ($et, $dirInfo) = @_;
509 my $raf = $$dirInfo{RAF};
510 my ($buff, $comment);
511
512 require Image::ExifTool::PostScript;
513 local $/ = Image::ExifTool::PostScript::GetInputRecordSeparator($raf);
514 $raf->ReadLine($buff);
515 return 0 unless $buff =~ /^Start(Comp|Master)?FontMetrics\s+\d+/;
516 my $ftyp = $1 ? ($1 eq 'Comp' ? 'ACFM' : 'AMFM') : 'AFM';
517 $et->SetFileType($ftyp, 'application/x-font-afm');
518 return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3;
519 my $tagTablePtr = GetTagTable('Image::ExifTool::Font::AFM');
520
521 for (;;) {
522 $raf->ReadLine($buff) or last;
523 if (defined $comment and $buff !~ /^Comment\s/) {
524 $et->FoundTag('Comment', $comment);
525 undef $comment;
526 }
527 $buff =~ /^(\w+)\s+(.*?)[\x0d\x0a]/ or next;
528 my ($tag, $val) = ($1, $2);
529 if ($tag eq 'Comment' and $val =~ /^(Creation Date):\s+(.*)/) {
530 ($tag, $val) = ($1, $2);
531 }
532 $val =~ s/^\((.*)\)$/$1/; # (some values may be in brackets)
533 if ($tag eq 'Comment') {
534 # concatinate all comments into a single value
535 $comment = defined($comment) ? "$comment\n$val" : $val;
536 next;
537 }
538 unless ($et->HandleTag($tagTablePtr, $tag, $val)) {
539 # end parsing if we start any subsection
540 last if $tag =~ /^Start/ and $tag ne 'StartDirection';
541 }
542 }
543 return 1;
544}
545
546#------------------------------------------------------------------------------
547# Read information from various format font files
548# Inputs: 0) ExifTool ref, 1) dirInfo ref
549# Returns: 1 on success, 0 if this wasn't a recognized Font file
550sub ProcessFont($$)
551{
552 my ($et, $dirInfo) = @_;
553 my $raf = $$dirInfo{RAF};
554 my ($buff, $buf2, $rtnVal);
555 return 0 unless $raf->Read($buff, 24) and $raf->Seek(0,0);
556 if ($buff =~ /^(\0\x01\0\0|OTTO|true|typ1)[\0\x01]/) { # OTF, TTF
557 $rtnVal = ProcessOTF($et, $dirInfo);
558 } elsif ($buff =~ /^ttcf\0[\x01\x02]\0\0/) { # TTC
559 $rtnVal = ProcessTTC($et, $dirInfo);
560 } elsif ($buff =~ /^Start(Comp|Master)?FontMetrics\s+\d+/s) { # AFM
561 $rtnVal = ProcessAFM($et, $dirInfo);
562 } elsif ($buff =~ /^(.{6})?%!(PS-(AdobeFont-|Bitstream )|FontType1-)/s) {# PFA, PFB
563 $raf->Seek(6,0) and $et->SetFileType('PFB') if $1;
564 require Image::ExifTool::PostScript;
565 $rtnVal = Image::ExifTool::PostScript::ProcessPS($et, $dirInfo);
566 } elsif ($buff =~ /^\0[\x01\x02]/ and $raf->Seek(0, 2) and # PFM
567 # validate file size
568 $raf->Tell() > 117 and $raf->Tell() == unpack('x2V',$buff) and
569 # read PFM header
570 $raf->Seek(0,0) and $raf->Read($buff,117) == 117 and
571 # validate "DeviceType" string (must be "PostScript\0")
572 SetByteOrder('II') and $raf->Seek(Get32u(\$buff, 101), 0) and
573 # the DeviceType should be "PostScript\0", but FontForge
574 # incorrectly writes "Postscript\0", so ignore case
575 $raf->Read($buf2, 11) == 11 and lc($buf2) eq "postscript\0")
576 {
577 $et->SetFileType('PFM');
578 return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3;
579 SetByteOrder('II');
580 my $tagTablePtr = GetTagTable('Image::ExifTool::Font::Main');
581 # process the PFM header
582 $et->HandleTag($tagTablePtr, 'PFM', $buff);
583 # extract the font names
584 my $nameOff = Get32u(\$buff, 105);
585 if ($raf->Seek($nameOff, 0) and $raf->Read($buff, 256) and
586 $buff =~ /^([\x20-\xff]+)\0([\x20-\xff]+)\0/)
587 {
588 $et->HandleTag($tagTablePtr, 'fontname', $1);
589 $et->HandleTag($tagTablePtr, 'postfont', $2);
590 }
591 $rtnVal = 1;
592 } elsif ($buff =~ /^(wOF[F2])/) {
593 my $type = $1 eq 'wOFF' ? 'woff' : 'woff2';
594 $et->SetFileType(uc($type), "font/$type");
595 # (don't yet extract metadata from these files)
596 $rtnVal = 1;
597 } else {
598 $rtnVal = 0;
599 }
600 return $rtnVal;
601}
602
6031; # end
604
605__END__
606
607=head1 NAME
608
609Image::ExifTool::Font - Read meta information from font files
610
611=head1 SYNOPSIS
612
613This module is used by Image::ExifTool
614
615=head1 DESCRIPTION
616
617This module contains the routines required by Image::ExifTool to read meta
618information from various format font files. Currently recognized font file
619types are OTF, TTF, TTC, DFONT, PFA, PFB, PFM, AFM, ACFM and AMFM. As well,
620WOFF and WOFF2 font files are identified, but metadata is not currently
621extracted from these formats.
622
623=head1 AUTHOR
624
625Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
626
627This library is free software; you can redistribute it and/or modify it
628under the same terms as Perl itself.
629
630=head1 REFERENCES
631
632=over 4
633
634=item L<http://developer.apple.com/textfonts/TTRefMan/RM06/Chap6.html>
635
636=item L<http://www.microsoft.com/typography/otspec/otff.htm>
637
638=item L<http://partners.adobe.com/public/developer/opentype/index_font_file.html>
639
640=item L<http://partners.adobe.com/public/developer/en/font/5178.PFM.pdf>
641
642=item L<http://opensource.adobe.com/svn/opensource/flex/sdk/trunk/modules/compiler/src/java/flex2/compiler/util/MimeMappings.java>
643
644=item L<http://www.adobe.com/devnet/font/pdfs/5004.AFM_Spec.pdf>
645
646=back
647
648=head1 SEE ALSO
649
650L<Image::ExifTool::TagNames/Font Tags>,
651L<Image::ExifTool(3pm)|Image::ExifTool>
652
653=cut
654
Note: See TracBrowser for help on using the repository browser.