1 | #------------------------------------------------------------------------------
|
---|
2 | # File: PostScript.pm
|
---|
3 | #
|
---|
4 | # Description: Read PostScript meta information
|
---|
5 | #
|
---|
6 | # Revisions: 07/08/2005 - P. Harvey Created
|
---|
7 | #
|
---|
8 | # References: 1) http://partners.adobe.com/public/developer/en/ps/5002.EPSF_Spec.pdf
|
---|
9 | # 2) http://partners.adobe.com/public/developer/en/ps/5001.DSC_Spec.pdf
|
---|
10 | # 3) http://partners.adobe.com/public/developer/en/illustrator/sdk/AI7FileFormat.pdf
|
---|
11 | #------------------------------------------------------------------------------
|
---|
12 |
|
---|
13 | package Image::ExifTool::PostScript;
|
---|
14 |
|
---|
15 | use strict;
|
---|
16 | use vars qw($VERSION $AUTOLOAD);
|
---|
17 | use Image::ExifTool qw(:DataAccess :Utils);
|
---|
18 |
|
---|
19 | $VERSION = '1.33';
|
---|
20 |
|
---|
21 | sub WritePS($$);
|
---|
22 | sub ProcessPS($$;$);
|
---|
23 |
|
---|
24 | # PostScript tag table
|
---|
25 | %Image::ExifTool::PostScript::Main = (
|
---|
26 | PROCESS_PROC => \&ProcessPS,
|
---|
27 | WRITE_PROC => \&WritePS,
|
---|
28 | PREFERRED => 1, # always add these tags when writing
|
---|
29 | GROUPS => { 2 => 'Image' },
|
---|
30 | # Note: Make all of these tags priority 0 since the first one found at
|
---|
31 | # the start of the file should take priority (in case multiples exist)
|
---|
32 | Author => { Priority => 0, Groups => { 2 => 'Author' }, Writable => 'string' },
|
---|
33 | BoundingBox => { Priority => 0 },
|
---|
34 | Copyright => { Priority => 0, Writable => 'string' }, #2
|
---|
35 | CreationDate => {
|
---|
36 | Name => 'CreateDate',
|
---|
37 | Priority => 0,
|
---|
38 | Groups => { 2 => 'Time' },
|
---|
39 | Writable => 'string',
|
---|
40 | PrintConv => '$self->ConvertDateTime($val)',
|
---|
41 | },
|
---|
42 | Creator => { Priority => 0, Writable => 'string' },
|
---|
43 | ImageData => { Priority => 0 },
|
---|
44 | For => { Priority => 0, Writable => 'string', Notes => 'for whom the document was prepared'},
|
---|
45 | Keywords => { Priority => 0, Writable => 'string' },
|
---|
46 | ModDate => {
|
---|
47 | Name => 'ModifyDate',
|
---|
48 | Priority => 0,
|
---|
49 | Groups => { 2 => 'Time' },
|
---|
50 | Writable => 'string',
|
---|
51 | PrintConv => '$self->ConvertDateTime($val)',
|
---|
52 | },
|
---|
53 | Pages => { Priority => 0 },
|
---|
54 | Routing => { Priority => 0, Writable => 'string' }, #2
|
---|
55 | Subject => { Priority => 0, Writable => 'string' },
|
---|
56 | Title => { Priority => 0, Writable => 'string' },
|
---|
57 | Version => { Priority => 0, Writable => 'string' }, #2
|
---|
58 | # these subdirectories for documentation only
|
---|
59 | BeginPhotoshop => {
|
---|
60 | Name => 'PhotoshopData',
|
---|
61 | SubDirectory => {
|
---|
62 | TagTable => 'Image::ExifTool::Photoshop::Main',
|
---|
63 | },
|
---|
64 | },
|
---|
65 | BeginICCProfile => {
|
---|
66 | Name => 'ICC_Profile',
|
---|
67 | SubDirectory => {
|
---|
68 | TagTable => 'Image::ExifTool::ICC_Profile::Main',
|
---|
69 | },
|
---|
70 | },
|
---|
71 | begin_xml_packet => {
|
---|
72 | Name => 'XMP',
|
---|
73 | SubDirectory => {
|
---|
74 | TagTable => 'Image::ExifTool::XMP::Main',
|
---|
75 | },
|
---|
76 | },
|
---|
77 | TIFFPreview => {
|
---|
78 | Binary => 1,
|
---|
79 | Notes => q{
|
---|
80 | not a real tag ID, but used to represent the TIFF preview extracted from DOS
|
---|
81 | EPS images
|
---|
82 | },
|
---|
83 | },
|
---|
84 | BeginDocument => {
|
---|
85 | Name => 'EmbeddedFile',
|
---|
86 | SubDirectory => {
|
---|
87 | TagTable => 'Image::ExifTool::PostScript::Main',
|
---|
88 | },
|
---|
89 | Notes => 'extracted with ExtractEmbedded option',
|
---|
90 | },
|
---|
91 | EmbeddedFileName => {
|
---|
92 | Notes => q{
|
---|
93 | not a real tag ID, but the file name from a BeginDocument statement.
|
---|
94 | Extracted with document metadata when ExtractEmbedded option is used
|
---|
95 | },
|
---|
96 | },
|
---|
97 | );
|
---|
98 |
|
---|
99 | # composite tags
|
---|
100 | %Image::ExifTool::PostScript::Composite = (
|
---|
101 | GROUPS => { 2 => 'Image' },
|
---|
102 | # BoundingBox is in points, not pixels,
|
---|
103 | # but use it anyway if ImageData is not available
|
---|
104 | ImageWidth => {
|
---|
105 | Desire => {
|
---|
106 | 0 => 'Main:PostScript:ImageData',
|
---|
107 | 1 => 'PostScript:BoundingBox',
|
---|
108 | },
|
---|
109 | ValueConv => 'Image::ExifTool::PostScript::ImageSize(\@val, 0)',
|
---|
110 | },
|
---|
111 | ImageHeight => {
|
---|
112 | Desire => {
|
---|
113 | 0 => 'Main:PostScript:ImageData',
|
---|
114 | 1 => 'PostScript:BoundingBox',
|
---|
115 | },
|
---|
116 | ValueConv => 'Image::ExifTool::PostScript::ImageSize(\@val, 1)',
|
---|
117 | },
|
---|
118 | );
|
---|
119 |
|
---|
120 | # add our composite tags
|
---|
121 | Image::ExifTool::AddCompositeTags('Image::ExifTool::PostScript');
|
---|
122 |
|
---|
123 | #------------------------------------------------------------------------------
|
---|
124 | # AutoLoad our writer routines when necessary
|
---|
125 | #
|
---|
126 | sub AUTOLOAD
|
---|
127 | {
|
---|
128 | return Image::ExifTool::DoAutoLoad($AUTOLOAD, @_);
|
---|
129 | }
|
---|
130 |
|
---|
131 | #------------------------------------------------------------------------------
|
---|
132 | # Is this a PC system
|
---|
133 | # Returns: true for PC systems
|
---|
134 | my %isPC = (MSWin32 => 1, os2 => 1, dos => 1, NetWare => 1, symbian => 1, cygwin => 1);
|
---|
135 | sub IsPC()
|
---|
136 | {
|
---|
137 | return $isPC{$^O};
|
---|
138 | }
|
---|
139 |
|
---|
140 | #------------------------------------------------------------------------------
|
---|
141 | # Get image width or height
|
---|
142 | # Inputs: 0) value list ref (ImageData, BoundingBox), 1) true to get height
|
---|
143 | sub ImageSize($$)
|
---|
144 | {
|
---|
145 | my ($vals, $getHeight) = @_;
|
---|
146 | my ($w, $h);
|
---|
147 | if ($$vals[0] and $$vals[0] =~ /^(\d+) (\d+)/) {
|
---|
148 | ($w, $h) = ($1, $2);
|
---|
149 | } elsif ($$vals[1] and $$vals[1] =~ /^(\d+) (\d+) (\d+) (\d+)/) {
|
---|
150 | ($w, $h) = ($3 - $1, $4 - $2);
|
---|
151 | }
|
---|
152 | return $getHeight ? $h : $w;
|
---|
153 | }
|
---|
154 |
|
---|
155 | #------------------------------------------------------------------------------
|
---|
156 | # Set PostScript format error warning
|
---|
157 | # Inputs: 0) ExifTool object reference, 1) error string
|
---|
158 | # Returns: 1
|
---|
159 | sub PSErr($$)
|
---|
160 | {
|
---|
161 | my ($exifTool, $str) = @_;
|
---|
162 | # set file type if not done already
|
---|
163 | my $ext = $$exifTool{FILE_EXT};
|
---|
164 | $exifTool->SetFileType(($ext and $ext eq 'AI') ? 'AI' : 'PS');
|
---|
165 | $exifTool->Warn("PostScript format error ($str)");
|
---|
166 | return 1;
|
---|
167 | }
|
---|
168 |
|
---|
169 | #------------------------------------------------------------------------------
|
---|
170 | # Return input record separator to use for the specified file
|
---|
171 | # Inputs: 0) RAF reference
|
---|
172 | # Returns: Input record separator or undef on error
|
---|
173 | sub GetInputRecordSeparator($)
|
---|
174 | {
|
---|
175 | my $raf = shift;
|
---|
176 | my $pos = $raf->Tell(); # save current position
|
---|
177 | my ($data, $sep);
|
---|
178 | $raf->Read($data,256) or return undef;
|
---|
179 | my ($a, $d) = (999,999);
|
---|
180 | $a = pos($data), pos($data) = 0 if $data =~ /\x0a/g;
|
---|
181 | $d = pos($data) if $data =~ /\x0d/g;
|
---|
182 | my $diff = $a - $d;
|
---|
183 | if ($diff eq 1) {
|
---|
184 | $sep = "\x0d\x0a";
|
---|
185 | } elsif ($diff eq -1) {
|
---|
186 | $sep = "\x0a\x0d";
|
---|
187 | } elsif ($diff > 0) {
|
---|
188 | $sep = "\x0d";
|
---|
189 | } elsif ($diff < 0) {
|
---|
190 | $sep = "\x0a";
|
---|
191 | } # else error
|
---|
192 | $raf->Seek($pos, 0); # restore original position
|
---|
193 | return $sep;
|
---|
194 | }
|
---|
195 |
|
---|
196 | #------------------------------------------------------------------------------
|
---|
197 | # Decode comment from PostScript file
|
---|
198 | # Inputs: 0) comment string, 1) RAF ref, 2) reference to lines array
|
---|
199 | # 3) optional data reference for extra lines read from file
|
---|
200 | # Returns: Decoded comment string (may be an array reference)
|
---|
201 | # - handles multi-line comments and escape sequences
|
---|
202 | sub DecodeComment($$$;$)
|
---|
203 | {
|
---|
204 | my ($val, $raf, $lines, $dataPt) = @_;
|
---|
205 | $val =~ s/\x0d*\x0a*$//; # remove trailing CR, LF or CR/LF
|
---|
206 | # check for continuation comments
|
---|
207 | for (;;) {
|
---|
208 | unless (@$lines) {
|
---|
209 | my $buff;
|
---|
210 | $raf->ReadLine($buff) or last;
|
---|
211 | my $altnl = $/ eq "\x0d" ? "\x0a" : "\x0d";
|
---|
212 | if ($buff =~ /$altnl/) {
|
---|
213 | # split into separate lines
|
---|
214 | @$lines = split /$altnl/, $buff, -1;
|
---|
215 | # handle case of DOS newline data inside file using Unix newlines
|
---|
216 | @$lines = ( $$lines[0] . $$lines[1] ) if @$lines == 2 and $$lines[1] eq $/;
|
---|
217 | } else {
|
---|
218 | push @$lines, $buff;
|
---|
219 | }
|
---|
220 | }
|
---|
221 | last unless $$lines[0] =~ /^%%\+/; # is the next line a continuation?
|
---|
222 | $$dataPt .= $$lines[0] if $dataPt; # add to data if necessary
|
---|
223 | $$lines[0] =~ s/\x0d*\x0a*$//; # remove trailing CR, LF or CR/LF
|
---|
224 | $val .= substr(shift(@$lines), 3); # add to value (without leading "%%+")
|
---|
225 | }
|
---|
226 | my @vals;
|
---|
227 | # handle bracketed string values
|
---|
228 | if ($val =~ s/^\((.*)\)$/$1/) { # remove brackets if necessary
|
---|
229 | # split into an array of strings if necessary
|
---|
230 | my $nesting = 1;
|
---|
231 | while ($val =~ /(\(|\))/g) {
|
---|
232 | my $bra = $1;
|
---|
233 | my $pos = pos($val) - 2;
|
---|
234 | my $backslashes = 0;
|
---|
235 | while ($pos and substr($val, $pos, 1) eq '\\') {
|
---|
236 | --$pos;
|
---|
237 | ++$backslashes;
|
---|
238 | }
|
---|
239 | next if $backslashes & 0x01; # escaped if odd number
|
---|
240 | if ($bra eq '(') {
|
---|
241 | ++$nesting;
|
---|
242 | } else {
|
---|
243 | --$nesting;
|
---|
244 | unless ($nesting) {
|
---|
245 | push @vals, substr($val, 0, pos($val)-1);
|
---|
246 | $val = substr($val, pos($val));
|
---|
247 | ++$nesting if $val =~ s/\s*\(//;
|
---|
248 | }
|
---|
249 | }
|
---|
250 | }
|
---|
251 | push @vals, $val;
|
---|
252 | foreach $val (@vals) {
|
---|
253 | # decode escape sequences in bracketed strings
|
---|
254 | # (similar to code in PDF.pm, but without line continuation)
|
---|
255 | while ($val =~ /\\(.)/sg) {
|
---|
256 | my $n = pos($val) - 2;
|
---|
257 | my $c = $1;
|
---|
258 | my $r;
|
---|
259 | if ($c =~ /[0-7]/) {
|
---|
260 | # get up to 2 more octal digits
|
---|
261 | $c .= $1 if $val =~ /\G([0-7]{1,2})/g;
|
---|
262 | # convert octal escape code
|
---|
263 | $r = chr(oct($c) & 0xff);
|
---|
264 | } else {
|
---|
265 | # convert escaped characters
|
---|
266 | ($r = $c) =~ tr/nrtbf/\n\r\t\b\f/;
|
---|
267 | }
|
---|
268 | substr($val, $n, length($c)+1) = $r;
|
---|
269 | # continue search after this character
|
---|
270 | pos($val) = $n + length($r);
|
---|
271 | }
|
---|
272 | }
|
---|
273 | $val = @vals > 1 ? \@vals : $vals[0];
|
---|
274 | }
|
---|
275 | return $val;
|
---|
276 | }
|
---|
277 |
|
---|
278 | #------------------------------------------------------------------------------
|
---|
279 | # Unescape PostScript string
|
---|
280 | # Inputs: 0) string
|
---|
281 | # Returns: unescaped string
|
---|
282 | sub UnescapePostScript($)
|
---|
283 | {
|
---|
284 | my $str = shift;
|
---|
285 | # decode escape sequences in literal strings
|
---|
286 | while ($str =~ /\\(.)/sg) {
|
---|
287 | my $n = pos($str) - 2;
|
---|
288 | my $c = $1;
|
---|
289 | my $r;
|
---|
290 | if ($c =~ /[0-7]/) {
|
---|
291 | # get up to 2 more octal digits
|
---|
292 | $c .= $1 if $str =~ /\G([0-7]{1,2})/g;
|
---|
293 | # convert octal escape code
|
---|
294 | $r = chr(oct($c) & 0xff);
|
---|
295 | } elsif ($c eq "\x0d") {
|
---|
296 | # the string is continued if the line ends with '\'
|
---|
297 | # (also remove "\x0d\x0a")
|
---|
298 | $c .= $1 if $str =~ /\G(\x0a)/g;
|
---|
299 | $r = '';
|
---|
300 | } elsif ($c eq "\x0a") {
|
---|
301 | $r = '';
|
---|
302 | } else {
|
---|
303 | # convert escaped characters
|
---|
304 | ($r = $c) =~ tr/nrtbf/\n\r\t\b\f/;
|
---|
305 | }
|
---|
306 | substr($str, $n, length($c)+1) = $r;
|
---|
307 | # continue search after this character
|
---|
308 | pos($str) = $n + length($r);
|
---|
309 | }
|
---|
310 | return $str;
|
---|
311 | }
|
---|
312 |
|
---|
313 | #------------------------------------------------------------------------------
|
---|
314 | # Extract information from EPS, PS or AI file
|
---|
315 | # Inputs: 0) ExifTool object reference, 1) dirInfo reference, 2) optional tag table ref
|
---|
316 | # Returns: 1 if this was a valid PostScript file
|
---|
317 | sub ProcessPS($$;$)
|
---|
318 | {
|
---|
319 | my ($exifTool, $dirInfo, $tagTablePtr) = @_;
|
---|
320 | my $raf = $$dirInfo{RAF};
|
---|
321 | my $embedded = $exifTool->Options('ExtractEmbedded');
|
---|
322 | my ($data, $dos, $endDoc, $fontTable, $comment);
|
---|
323 |
|
---|
324 | # allow read from data
|
---|
325 | $raf = new File::RandomAccess($$dirInfo{DataPt}) unless $raf;
|
---|
326 | #
|
---|
327 | # determine if this is a postscript file
|
---|
328 | #
|
---|
329 | $raf->Read($data, 4) == 4 or return 0;
|
---|
330 | # accept either ASCII or DOS binary postscript file format
|
---|
331 | return 0 unless $data =~ /^(%!PS|%!Ad|%!Fo|\xc5\xd0\xd3\xc6)/;
|
---|
332 | if ($data =~ /^%!Ad/) {
|
---|
333 | # I've seen PS files start with "%!Adobe-PS"...
|
---|
334 | return 0 unless $raf->Read($data, 6) == 6 and $data eq "obe-PS";
|
---|
335 | } elsif ($data =~ /^\xc5\xd0\xd3\xc6/) {
|
---|
336 | # process DOS binary file header
|
---|
337 | # - save DOS header then seek ahead and check PS header
|
---|
338 | $raf->Read($dos, 26) == 26 or return 0;
|
---|
339 | SetByteOrder('II');
|
---|
340 | unless ($raf->Seek(Get32u(\$dos, 0), 0) and
|
---|
341 | $raf->Read($data, 4) == 4 and $data eq '%!PS')
|
---|
342 | {
|
---|
343 | return PSErr($exifTool, 'invalid header');
|
---|
344 | }
|
---|
345 | } else {
|
---|
346 | # check for PostScript font file (PFA or PFB)
|
---|
347 | my $d2;
|
---|
348 | $data .= $d2 if $raf->Read($d2,12);
|
---|
349 | if ($data =~ /^%!(PS-(AdobeFont-|Bitstream )|FontType1-)/) {
|
---|
350 | $exifTool->SetFileType('PFA'); # PostScript ASCII font file
|
---|
351 | $fontTable = GetTagTable('Image::ExifTool::Font::PSInfo');
|
---|
352 | # PostScript font files may contain an unformatted comments which may
|
---|
353 | # contain useful information, so accumulate these for the Comment tag
|
---|
354 | $comment = 1;
|
---|
355 | }
|
---|
356 | $raf->Seek(-length($data), 1);
|
---|
357 | }
|
---|
358 | #
|
---|
359 | # set the newline type based on the first newline found in the file
|
---|
360 | #
|
---|
361 | local $/ = GetInputRecordSeparator($raf);
|
---|
362 | $/ or return PSErr($exifTool, 'invalid PS data');
|
---|
363 |
|
---|
364 | # set file type (PostScript or EPS)
|
---|
365 | $raf->ReadLine($data) or $data = '';
|
---|
366 | my $type;
|
---|
367 | if ($data =~ /EPSF/) {
|
---|
368 | $type = 'EPS';
|
---|
369 | } else {
|
---|
370 | # read next line to see if this is an Illustrator file
|
---|
371 | my $line2;
|
---|
372 | my $pos = $raf->Tell();
|
---|
373 | if ($raf->ReadLine($line2) and $line2 =~ /^%%Creator: Adobe Illustrator/) {
|
---|
374 | $type = 'AI';
|
---|
375 | } else {
|
---|
376 | $type = 'PS';
|
---|
377 | }
|
---|
378 | $raf->Seek($pos, 0);
|
---|
379 | }
|
---|
380 | $exifTool->SetFileType($type);
|
---|
381 | #
|
---|
382 | # extract TIFF information from DOS header
|
---|
383 | #
|
---|
384 | $tagTablePtr or $tagTablePtr = GetTagTable('Image::ExifTool::PostScript::Main');
|
---|
385 | if ($dos) {
|
---|
386 | my $base = Get32u(\$dos, 16);
|
---|
387 | if ($base) {
|
---|
388 | my $pos = $raf->Tell();
|
---|
389 | # extract the TIFF preview
|
---|
390 | my $len = Get32u(\$dos, 20);
|
---|
391 | my $val = $exifTool->ExtractBinary($base, $len, 'TIFFPreview');
|
---|
392 | if (defined $val and $val =~ /^(MM\0\x2a|II\x2a\0|Binary)/) {
|
---|
393 | $exifTool->HandleTag($tagTablePtr, 'TIFFPreview', $val);
|
---|
394 | } else {
|
---|
395 | $exifTool->Warn('Bad TIFF preview image');
|
---|
396 | }
|
---|
397 | # extract information from TIFF in DOS header
|
---|
398 | # (set Parent to '' to avoid setting FileType tag again)
|
---|
399 | my %dirInfo = (
|
---|
400 | Parent => '',
|
---|
401 | RAF => $raf,
|
---|
402 | Base => $base,
|
---|
403 | );
|
---|
404 | $exifTool->ProcessTIFF(\%dirInfo) or $exifTool->Warn('Bad embedded TIFF');
|
---|
405 | # position file pointer to extract PS information
|
---|
406 | $raf->Seek($pos, 0);
|
---|
407 | }
|
---|
408 | }
|
---|
409 | #
|
---|
410 | # parse the postscript
|
---|
411 | #
|
---|
412 | my ($buff, $mode, $beginToken, $endToken, $docNum, $subDocNum, $changedNL);
|
---|
413 | my (@lines, $altnl);
|
---|
414 | if ($/ eq "\x0d") {
|
---|
415 | $altnl = "\x0a";
|
---|
416 | } else {
|
---|
417 | $/ = "\x0a"; # end on any LF (even if DOS CR+LF)
|
---|
418 | $altnl = "\x0d";
|
---|
419 | }
|
---|
420 | for (;;) {
|
---|
421 | if (@lines) {
|
---|
422 | $data = shift @lines;
|
---|
423 | } else {
|
---|
424 | $raf->ReadLine($data) or last;
|
---|
425 | # check for alternate newlines as efficiently as possible
|
---|
426 | if ($data =~ /$altnl/) {
|
---|
427 | if (length($data) > 500000 and IsPC()) {
|
---|
428 | # Windows can't split very long lines due to poor memory handling,
|
---|
429 | # so re-read the file with the other newline character instead
|
---|
430 | # (slower but uses less memory)
|
---|
431 | unless ($changedNL) {
|
---|
432 | $changedNL = 1;
|
---|
433 | my $t = $/;
|
---|
434 | $/ = $altnl;
|
---|
435 | $altnl = $t;
|
---|
436 | $raf->Seek(-length($data), 1);
|
---|
437 | next;
|
---|
438 | }
|
---|
439 | } else {
|
---|
440 | # split into separate lines
|
---|
441 | @lines = split /$altnl/, $data, -1;
|
---|
442 | $data = shift @lines;
|
---|
443 | if (@lines == 1 and $lines[0] eq $/) {
|
---|
444 | # handle case of DOS newline data inside file using Unix newlines
|
---|
445 | $data .= $lines[0];
|
---|
446 | undef @lines;
|
---|
447 | }
|
---|
448 | }
|
---|
449 | }
|
---|
450 | }
|
---|
451 | undef $changedNL;
|
---|
452 | if ($mode) {
|
---|
453 | if (not $endToken) {
|
---|
454 | $buff .= $data;
|
---|
455 | next unless $data =~ m{<\?xpacket end=.(w|r).\?>($/|$)};
|
---|
456 | } elsif ($data !~ /^$endToken/i) {
|
---|
457 | if ($mode eq 'XMP') {
|
---|
458 | $buff .= $data;
|
---|
459 | } elsif ($mode eq 'Document') {
|
---|
460 | # ignore embedded documents, but keep track of nesting level
|
---|
461 | $docNum .= '-1' if $data =~ /^$beginToken/;
|
---|
462 | } else {
|
---|
463 | # data is ASCII-hex encoded
|
---|
464 | $data =~ tr/0-9A-Fa-f//dc; # remove all but hex characters
|
---|
465 | $buff .= pack('H*', $data); # translate from hex
|
---|
466 | }
|
---|
467 | next;
|
---|
468 | } elsif ($mode eq 'Document') {
|
---|
469 | $docNum =~ s/-?\d+$//; # decrement document nesting level
|
---|
470 | # done with Document mode if we are back at the top level
|
---|
471 | undef $mode unless $docNum;
|
---|
472 | next;
|
---|
473 | }
|
---|
474 | } elsif ($endDoc and $data =~ /^$endDoc/i) {
|
---|
475 | $docNum =~ s/-?(\d+)$//; # decrement nesting level
|
---|
476 | $subDocNum = $1; # remember our last sub-document number
|
---|
477 | $$exifTool{DOC_NUM} = $docNum;
|
---|
478 | undef $endDoc unless $docNum; # done with document if top level
|
---|
479 | next;
|
---|
480 | } elsif ($data =~ /^(%{1,2})(Begin)(_xml_packet|Photoshop|ICCProfile|Document|Binary)/i) {
|
---|
481 | # the beginning of a data block
|
---|
482 | my %modeLookup = (
|
---|
483 | _xml_packet => 'XMP',
|
---|
484 | photoshop => 'Photoshop',
|
---|
485 | iccprofile => 'ICC_Profile',
|
---|
486 | document => 'Document',
|
---|
487 | binary => undef, # (we will try to skip this)
|
---|
488 | );
|
---|
489 | $mode = $modeLookup{lc $3};
|
---|
490 | unless ($mode) {
|
---|
491 | if (not @lines and $data =~ /^%{1,2}BeginBinary:\s*(\d+)/i) {
|
---|
492 | $raf->Seek($1, 1) or last; # skip binary data
|
---|
493 | }
|
---|
494 | next;
|
---|
495 | }
|
---|
496 | $buff = '';
|
---|
497 | $beginToken = $1 . $2 . $3;
|
---|
498 | $endToken = $1 . ($2 eq 'begin' ? 'end' : 'End') . $3;
|
---|
499 | if ($mode eq 'Document') {
|
---|
500 | # this is either the 1st sub-document or Nth document
|
---|
501 | if ($docNum) {
|
---|
502 | # increase nesting level
|
---|
503 | $docNum .= '-' . (++$subDocNum);
|
---|
504 | } else {
|
---|
505 | # this is the Nth document
|
---|
506 | $docNum = $$exifTool{DOC_COUNT} + 1;
|
---|
507 | }
|
---|
508 | $subDocNum = 0; # new level, so reset subDocNum
|
---|
509 | next unless $embedded; # skip over this document
|
---|
510 | # set document number for family 4-7 group names
|
---|
511 | $$exifTool{DOC_NUM} = $docNum;
|
---|
512 | $$exifTool{LIST_TAGS} = { }; # don't build lists across different documents
|
---|
513 | $exifTool->{PROCESSED} = { }; # re-initialize processed directory lookup too
|
---|
514 | $endDoc = $endToken; # parse to EndDocument token
|
---|
515 | # reset mode to allow parsing into sub-directories
|
---|
516 | undef $endToken;
|
---|
517 | undef $mode;
|
---|
518 | # save document name if available
|
---|
519 | if ($data =~ /^$beginToken:\s+([^\n\r]+)/i) {
|
---|
520 | my $docName = $1;
|
---|
521 | # remove brackets if necessary
|
---|
522 | $docName = $1 if $docName =~ /^\((.*)\)$/;
|
---|
523 | $exifTool->HandleTag($tagTablePtr, 'EmbeddedFileName', $docName);
|
---|
524 | }
|
---|
525 | }
|
---|
526 | next;
|
---|
527 | } elsif ($data =~ /^<\?xpacket begin=.{7,13}W5M0MpCehiHzreSzNTczkc9d/) {
|
---|
528 | # pick up any stray XMP data
|
---|
529 | $mode = 'XMP';
|
---|
530 | $buff = $data;
|
---|
531 | undef $endToken; # no end token (just look for xpacket end)
|
---|
532 | # XMP could be contained in a single line (if newlines are different)
|
---|
533 | next unless $data =~ m{<\?xpacket end=.(w|r).\?>($/|$)};
|
---|
534 | } elsif ($data =~ /^%%?(\w+): ?(.*)/s and $$tagTablePtr{$1}) {
|
---|
535 | my ($tag, $val) = ($1, $2);
|
---|
536 | # only allow 'ImageData' to have single leading '%'
|
---|
537 | next unless $data =~ /^%%/ or $1 eq 'ImageData';
|
---|
538 | # decode comment string (reading continuation lines if necessary)
|
---|
539 | $val = DecodeComment($val, $raf, \@lines);
|
---|
540 | $exifTool->HandleTag($tagTablePtr, $tag, $val);
|
---|
541 | next;
|
---|
542 | } elsif ($embedded and $data =~ /^%AI12_CompressedData/) {
|
---|
543 | # the rest of the file is compressed
|
---|
544 | unless (eval 'require Compress::Zlib') {
|
---|
545 | $exifTool->Warn('Install Compress::Zlib to extract compressed embedded data');
|
---|
546 | last;
|
---|
547 | }
|
---|
548 | # seek back to find the start of the compressed data in the file
|
---|
549 | my $tlen = length($data) + @lines;
|
---|
550 | $tlen += length $_ foreach @lines;
|
---|
551 | my $backTo = $raf->Tell() - $tlen - 64;
|
---|
552 | $backTo = 0 if $backTo < 0;
|
---|
553 | last unless $raf->Seek($backTo, 0) and $raf->Read($data, 2048);
|
---|
554 | last unless $data =~ s/.*?%AI12_CompressedData//;
|
---|
555 | my $inflate = Compress::Zlib::inflateInit();
|
---|
556 | $inflate or $exifTool->Warn('Error initializing inflate'), last;
|
---|
557 | # generate a PS-like file in memory from the compressed data
|
---|
558 | my $verbose = $exifTool->Options('Verbose');
|
---|
559 | if ($verbose > 1) {
|
---|
560 | $exifTool->VerboseDir('AI12_CompressedData (first 4kB)');
|
---|
561 | $exifTool->VerboseDump(\$data);
|
---|
562 | }
|
---|
563 | # remove header if it exists (Windows AI files only)
|
---|
564 | $data =~ s/^.{0,256}EndData[\x0d\x0a]+//s;
|
---|
565 | my $val;
|
---|
566 | for (;;) {
|
---|
567 | my ($v2, $stat) = $inflate->inflate($data);
|
---|
568 | $stat == Compress::Zlib::Z_STREAM_END() and $val .= $v2, last;
|
---|
569 | $stat != Compress::Zlib::Z_OK() and undef($val), last;
|
---|
570 | if (defined $val) {
|
---|
571 | $val .= $v2;
|
---|
572 | } elsif ($v2 =~ /^%!PS/) {
|
---|
573 | $val = $v2;
|
---|
574 | } else {
|
---|
575 | # add postscript header (for file recognition) if it doesn't exist
|
---|
576 | $val = "%!PS-Adobe-3.0$/" . $v2;
|
---|
577 | }
|
---|
578 | $raf->Read($data, 65536) or last;
|
---|
579 | }
|
---|
580 | defined $val or $exifTool->Warn('Error inflating AI compressed data'), last;
|
---|
581 | if ($verbose > 1) {
|
---|
582 | $exifTool->VerboseDir('Uncompressed AI12 Data');
|
---|
583 | $exifTool->VerboseDump(\$val);
|
---|
584 | }
|
---|
585 | # extract information from embedded images in the uncompressed data
|
---|
586 | $val = # add PS header in case it needs one
|
---|
587 | ProcessPS($exifTool, { DataPt => \$val });
|
---|
588 | last;
|
---|
589 | } elsif ($fontTable) {
|
---|
590 | if (defined $comment) {
|
---|
591 | # extract initial comments from PostScript Font files
|
---|
592 | if ($data =~ /^%\s+(.*?)[\x0d\x0a]/) {
|
---|
593 | $comment .= "\n" if $comment;
|
---|
594 | $comment .= $1;
|
---|
595 | next;
|
---|
596 | } elsif ($data !~ /^%/) {
|
---|
597 | # stop extracting comments at the first non-comment line
|
---|
598 | $exifTool->FoundTag('Comment', $comment) if length $comment;
|
---|
599 | undef $comment;
|
---|
600 | }
|
---|
601 | }
|
---|
602 | if ($data =~ m{^\s*/(\w+)\s*(.*)} and $$fontTable{$1}) {
|
---|
603 | my ($tag, $val) = ($1, $2);
|
---|
604 | if ($val =~ /^\((.*)\)/) {
|
---|
605 | $val = UnescapePostScript($1);
|
---|
606 | } elsif ($val =~ m{/?(\S+)}) {
|
---|
607 | $val = $1;
|
---|
608 | }
|
---|
609 | $exifTool->HandleTag($fontTable, $tag, $val);
|
---|
610 | } elsif ($data =~ /^currentdict end/) {
|
---|
611 | # only extract tags from initial FontInfo dict
|
---|
612 | undef $fontTable;
|
---|
613 | }
|
---|
614 | next;
|
---|
615 | } else {
|
---|
616 | next;
|
---|
617 | }
|
---|
618 | # extract information from buffered data
|
---|
619 | my %dirInfo = (
|
---|
620 | DataPt => \$buff,
|
---|
621 | DataLen => length $buff,
|
---|
622 | DirStart => 0,
|
---|
623 | DirLen => length $buff,
|
---|
624 | Parent => 'PostScript',
|
---|
625 | );
|
---|
626 | my $subTablePtr = GetTagTable("Image::ExifTool::${mode}::Main");
|
---|
627 | unless ($exifTool->ProcessDirectory(\%dirInfo, $subTablePtr)) {
|
---|
628 | $exifTool->Warn("Error processing $mode information in PostScript file");
|
---|
629 | }
|
---|
630 | undef $buff;
|
---|
631 | undef $mode;
|
---|
632 | }
|
---|
633 | $mode = 'Document' if $endDoc and not $mode;
|
---|
634 | $mode and PSErr($exifTool, "unterminated $mode data");
|
---|
635 | return 1;
|
---|
636 | }
|
---|
637 |
|
---|
638 | #------------------------------------------------------------------------------
|
---|
639 | # Extract information from EPS file
|
---|
640 | # Inputs: 0) ExifTool object reference, 1) dirInfo reference
|
---|
641 | # Returns: 1 if this was a valid PostScript file
|
---|
642 | sub ProcessEPS($$)
|
---|
643 | {
|
---|
644 | return ProcessPS($_[0],$_[1]);
|
---|
645 | }
|
---|
646 |
|
---|
647 | 1; # end
|
---|
648 |
|
---|
649 |
|
---|
650 | __END__
|
---|
651 |
|
---|
652 | =head1 NAME
|
---|
653 |
|
---|
654 | Image::ExifTool::PostScript - Read PostScript meta information
|
---|
655 |
|
---|
656 | =head1 SYNOPSIS
|
---|
657 |
|
---|
658 | This module is loaded automatically by Image::ExifTool when required.
|
---|
659 |
|
---|
660 | =head1 DESCRIPTION
|
---|
661 |
|
---|
662 | This code reads meta information from EPS (Encapsulated PostScript), PS
|
---|
663 | (PostScript) and AI (Adobe Illustrator) files.
|
---|
664 |
|
---|
665 | =head1 AUTHOR
|
---|
666 |
|
---|
667 | Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
|
---|
668 |
|
---|
669 | This library is free software; you can redistribute it and/or modify it
|
---|
670 | under the same terms as Perl itself.
|
---|
671 |
|
---|
672 | =head1 REFERENCES
|
---|
673 |
|
---|
674 | =over 4
|
---|
675 |
|
---|
676 | =item L<http://partners.adobe.com/public/developer/en/ps/5002.EPSF_Spec.pdf>
|
---|
677 |
|
---|
678 | =item L<http://partners.adobe.com/public/developer/en/ps/5001.DSC_Spec.pdf>
|
---|
679 |
|
---|
680 | =item L<http://partners.adobe.com/public/developer/en/illustrator/sdk/AI7FileFormat.pdf>
|
---|
681 |
|
---|
682 | =back
|
---|
683 |
|
---|
684 | =head1 SEE ALSO
|
---|
685 |
|
---|
686 | L<Image::ExifTool::TagNames/PostScript Tags>,
|
---|
687 | L<Image::ExifTool(3pm)|Image::ExifTool>
|
---|
688 |
|
---|
689 | =cut
|
---|