1 | #------------------------------------------------------------------------------
|
---|
2 | # File: WritePDF.pl
|
---|
3 | #
|
---|
4 | # Description: Write PDF meta information
|
---|
5 | #
|
---|
6 | # Revisions: 12/08/2007 - P. Harvey Created
|
---|
7 | #
|
---|
8 | # References: 1) http://partners.adobe.com/public/developer/pdf/index_reference.html
|
---|
9 | #
|
---|
10 | # Notes: The special "PDF-update" group can be deleted to revert exiftool updates
|
---|
11 | #------------------------------------------------------------------------------
|
---|
12 | package Image::ExifTool::PDF;
|
---|
13 |
|
---|
14 | use strict;
|
---|
15 | use vars qw($lastFetched);
|
---|
16 |
|
---|
17 | sub WriteObject($$);
|
---|
18 | sub EncodeString($);
|
---|
19 | sub CryptObject($);
|
---|
20 |
|
---|
21 | # comments to mark beginning and end of ExifTool incremental update
|
---|
22 | my $beginComment = '%BeginExifToolUpdate';
|
---|
23 | my $endComment = '%EndExifToolUpdate ';
|
---|
24 |
|
---|
25 | my $keyExt; # crypt key extension
|
---|
26 | my $pdfVer; # version of PDF file we are currently writing
|
---|
27 |
|
---|
28 | # internal tags used in dictionary objects
|
---|
29 | my %myDictTags = (
|
---|
30 | _tags => 1, _stream => 1, _decrypted => 1, _needCrypt => 1,
|
---|
31 | _filtered => 1, _entry_size => 1, _table => 1,
|
---|
32 | );
|
---|
33 |
|
---|
34 | #------------------------------------------------------------------------------
|
---|
35 | # Validate raw PDF values for writing (string date integer real boolean name)
|
---|
36 | # Inputs: 0) ExifTool object ref, 1) tagInfo hash ref, 2) raw value ref
|
---|
37 | # Returns: error string or undef (and possibly changes value) on success
|
---|
38 | sub CheckPDF($$$)
|
---|
39 | {
|
---|
40 | my ($exifTool, $tagInfo, $valPtr) = @_;
|
---|
41 | my $format = $$tagInfo{Writable} || $tagInfo->{Table}->{WRITABLE};
|
---|
42 | if (not $format) {
|
---|
43 | return 'No writable format';
|
---|
44 | } elsif ($format eq 'string') {
|
---|
45 | # convert to Unicode if necessary
|
---|
46 | if ($$valPtr =~ /[\x80-\xff]/) {
|
---|
47 | $$valPtr = "\xfe\xff" . $exifTool->Encode($$valPtr,'UCS2','MM');
|
---|
48 | }
|
---|
49 | } elsif ($format eq 'date') {
|
---|
50 | # be flexible about this for now
|
---|
51 | return 'Bad date format' unless $$valPtr =~ /^\d{4}/;
|
---|
52 | } elsif ($format eq 'integer') {
|
---|
53 | return 'Not an integer' unless Image::ExifTool::IsInt($$valPtr);
|
---|
54 | } elsif ($format eq 'real') {
|
---|
55 | return 'Not a real number' unless $$valPtr =~ /^[+-]?(?=\d|\.\d)\d*(\.\d*)?$/;
|
---|
56 | } elsif ($format eq 'boolean') {
|
---|
57 | $$valPtr = ($$valPtr and $$valPtr !~ /^f/i) ? 'true' : 'false';
|
---|
58 | } elsif ($format eq 'name') {
|
---|
59 | return 'Invalid PDF name' if $$valPtr =~ /\0/;
|
---|
60 | } else {
|
---|
61 | return "Invalid PDF format '$format'";
|
---|
62 | }
|
---|
63 | return undef; # value is OK
|
---|
64 | }
|
---|
65 |
|
---|
66 | #------------------------------------------------------------------------------
|
---|
67 | # Format value for writing to PDF file
|
---|
68 | # Inputs: 0) value, 1) format string (string,date,integer,real,boolean,name)
|
---|
69 | # Returns: formatted value or undef on error
|
---|
70 | # Notes: Called at write time, so $pdfVer may be checked
|
---|
71 | sub WritePDFValue($$)
|
---|
72 | {
|
---|
73 | my ($val, $format) = @_;
|
---|
74 | if (not $format) {
|
---|
75 | return undef;
|
---|
76 | } elsif ($format eq 'string') {
|
---|
77 | EncodeString(\$val);
|
---|
78 | } elsif ($format eq 'date') {
|
---|
79 | # convert date to "D:YYYYmmddHHMMSS+-HH'MM'" format
|
---|
80 | $val =~ s/([-+]\d{2}):(\d{2})/$1'$2'/; # change timezone delimiters if necessary
|
---|
81 | $val =~ tr/ ://d; # remove spaces and colons
|
---|
82 | $val = "D:$val"; # add leading "D:"
|
---|
83 | EncodeString(\$val);
|
---|
84 | } elsif ($format =~ /^(integer|real|boolean)$/) {
|
---|
85 | # no reformatting necessary
|
---|
86 | } elsif ($format eq 'name') {
|
---|
87 | return undef if $val =~ /\0/;
|
---|
88 | if ($pdfVer >= 1.2) {
|
---|
89 | $val =~ s/([\t\n\f\r ()<>[\]{}\/%#])/sprintf('#%.2x',ord $1)/sge;
|
---|
90 | } else {
|
---|
91 | return undef if $val =~ /[\t\n\f\r ()<>[\]{}\/%]/;
|
---|
92 | }
|
---|
93 | $val = "/$val"; # add leading '/'
|
---|
94 | } else {
|
---|
95 | return undef;
|
---|
96 | }
|
---|
97 | return $val;
|
---|
98 | }
|
---|
99 |
|
---|
100 | #------------------------------------------------------------------------------
|
---|
101 | # Encode PDF string
|
---|
102 | # Inputs: 0) reference to PDF string
|
---|
103 | # Returns: (updates string with encoded data)
|
---|
104 | sub EncodeString($)
|
---|
105 | {
|
---|
106 | my $strPt = shift;
|
---|
107 | if (ref $$strPt eq 'ARRAY') {
|
---|
108 | my $str;
|
---|
109 | foreach $str (@{$$strPt}) {
|
---|
110 | EncodeString(\$str);
|
---|
111 | }
|
---|
112 | return;
|
---|
113 | }
|
---|
114 | Crypt($strPt, $keyExt, 1); # encrypt if necessary
|
---|
115 | # encode as hex if we have any control characters (except tab)
|
---|
116 | if ($$strPt=~/[\x00-\x08\x0a-\x1f\x7f\xff]/) {
|
---|
117 | # encode as hex
|
---|
118 | my $str='';
|
---|
119 | my $len = length $$strPt;
|
---|
120 | my $i = 0;
|
---|
121 | for (;;) {
|
---|
122 | my $n = $len - $i or last;
|
---|
123 | $n = 40 if $n > 40; # break into reasonable-length lines
|
---|
124 | $str .= $/ if $i;
|
---|
125 | $str .= unpack('H*', substr($$strPt, $i, $n));
|
---|
126 | $i += $n;
|
---|
127 | }
|
---|
128 | $$strPt = "<$str>";
|
---|
129 | } else {
|
---|
130 | $$strPt =~ s/([()\\])/\\$1/g; # must escape round brackets and backslashes
|
---|
131 | $$strPt = "($$strPt)";
|
---|
132 | }
|
---|
133 | }
|
---|
134 |
|
---|
135 | #------------------------------------------------------------------------------
|
---|
136 | # Encrypt an object
|
---|
137 | # Inputs: 0) PDF object (encrypts in place)
|
---|
138 | # Notes: Encrypts according to "_needCrypt" dictionary entry,
|
---|
139 | # then deletes "_needCrypt" when done
|
---|
140 | sub CryptObject($)
|
---|
141 | {
|
---|
142 | my $obj = $_[0];
|
---|
143 | if (not ref $obj) {
|
---|
144 | # only literal strings and hex strings are encrypted
|
---|
145 | if ($obj =~ /^[(<]/) {
|
---|
146 | undef $lastFetched; # (reset this just in case)
|
---|
147 | my $val = ReadPDFValue($obj);
|
---|
148 | EncodeString(\$val);
|
---|
149 | $_[0] = $val;
|
---|
150 | }
|
---|
151 | } elsif (ref $obj eq 'HASH') {
|
---|
152 | my $tag;
|
---|
153 | my $needCrypt = $$obj{_needCrypt};
|
---|
154 | foreach $tag (keys %$obj) {
|
---|
155 | next if $myDictTags{$tag};
|
---|
156 | # re-encrypt necessary objects only (others are still encrypted)
|
---|
157 | # (this is really annoying, but is necessary because objects stored
|
---|
158 | # in encrypted streams are decrypted when extracting, but strings stored
|
---|
159 | # as direct objects are decrypted later since they must be decoded
|
---|
160 | # before being decrypted)
|
---|
161 | if ($needCrypt) {
|
---|
162 | next unless defined $$needCrypt{$tag} ? $$needCrypt{$tag} : $$needCrypt{'*'};
|
---|
163 | }
|
---|
164 | CryptObject($$obj{$tag});
|
---|
165 | }
|
---|
166 | delete $$obj{_needCrypt}; # avoid re-re-crypting
|
---|
167 | } elsif (ref $obj eq 'ARRAY') {
|
---|
168 | my $val;
|
---|
169 | foreach $val (@$obj) {
|
---|
170 | CryptObject($val);
|
---|
171 | }
|
---|
172 | }
|
---|
173 | }
|
---|
174 |
|
---|
175 | #------------------------------------------------------------------------------
|
---|
176 | # Get free entries from xref stream dictionary that we wrote previously
|
---|
177 | # Inputs: 0) xref dictionary reference
|
---|
178 | # Returns: free entry hash (keys are object numbers, values are xref entry list refs)
|
---|
179 | sub GetFreeEntries($)
|
---|
180 | {
|
---|
181 | my $dict = shift;
|
---|
182 | my %xrefFree;
|
---|
183 | # from the start we have only written xref stream entries in 'CNn' format,
|
---|
184 | # so we can simplify things for now and only support this type of entry
|
---|
185 | my $w = $$dict{W};
|
---|
186 | if (ref $w eq 'ARRAY' and "@$w" eq '1 4 2') {
|
---|
187 | my $size = $$dict{_entry_size}; # this will be 7 for 'CNn'
|
---|
188 | my $index = $$dict{Index};
|
---|
189 | my $len = length $$dict{_stream};
|
---|
190 | # scan the table for free objects
|
---|
191 | my $num = scalar(@$index) / 2;
|
---|
192 | my $pos = 0;
|
---|
193 | my ($i, $j);
|
---|
194 | for ($i=0; $i<$num; ++$i) {
|
---|
195 | my $start = $$index[$i*2];
|
---|
196 | my $count = $$index[$i*2+1];
|
---|
197 | for ($j=0; $j<$count; ++$j) {
|
---|
198 | last if $pos + $size > $len;
|
---|
199 | my @t = unpack("x$pos CNn", $$dict{_stream});
|
---|
200 | # add entry if object was free
|
---|
201 | $xrefFree{$start+$j} = [ $t[1], $t[2], 'f' ] if $t[0] == 0;
|
---|
202 | $pos += $size; # step to next entry
|
---|
203 | }
|
---|
204 | }
|
---|
205 | }
|
---|
206 | return \%xrefFree;
|
---|
207 | }
|
---|
208 |
|
---|
209 | #------------------------------------------------------------------------------
|
---|
210 | # Write PDF object
|
---|
211 | # Inputs: 0) output file or scalar ref, 1) PDF object
|
---|
212 | # Returns: true on success
|
---|
213 | # Notes: inserts white space before object, but none afterward
|
---|
214 | sub WriteObject($$)
|
---|
215 | {
|
---|
216 | my ($outfile, $obj) = @_;
|
---|
217 | if (ref $obj eq 'SCALAR') {
|
---|
218 | Write($outfile, ' ', $$obj) or return 0;
|
---|
219 | } elsif (ref $obj eq 'ARRAY') {
|
---|
220 | # write array
|
---|
221 | Write($outfile, @$obj > 10 ? $/ : ' ', '[') or return 0;
|
---|
222 | my $item;
|
---|
223 | foreach $item (@$obj) {
|
---|
224 | WriteObject($outfile, $item) or return 0;
|
---|
225 | }
|
---|
226 | Write($outfile, ' ]') or return 0;
|
---|
227 | } elsif (ref $obj eq 'HASH') {
|
---|
228 | # write dictionary
|
---|
229 | my $tag;
|
---|
230 | Write($outfile, $/, '<<') or return 0;
|
---|
231 | # prepare object as required if it has a stream
|
---|
232 | if ($$obj{_stream}) {
|
---|
233 | # encrypt stream if necessary (must be done before determining Length)
|
---|
234 | CryptStream($obj, $keyExt) if $$obj{_decrypted};
|
---|
235 | # write "Length" entry in dictionary
|
---|
236 | $$obj{Length} = length $$obj{_stream};
|
---|
237 | push @{$$obj{_tags}}, 'Length';
|
---|
238 | # delete Filter-related entries since we don't yet write filtered streams
|
---|
239 | delete $$obj{Filter};
|
---|
240 | delete $$obj{DecodeParms};
|
---|
241 | delete $$obj{DL};
|
---|
242 | }
|
---|
243 | # don't write my internal entries
|
---|
244 | my %wrote = %myDictTags;
|
---|
245 | # write tags in original order, adding new ones later alphabetically
|
---|
246 | foreach $tag (@{$$obj{_tags}}, sort keys %$obj) {
|
---|
247 | # ignore already-written or missing entries
|
---|
248 | next if $wrote{$tag} or not defined $$obj{$tag};
|
---|
249 | Write($outfile, $/, "/$tag") or return 0;
|
---|
250 | WriteObject($outfile, $$obj{$tag}) or return 0;
|
---|
251 | $wrote{$tag} = 1;
|
---|
252 | }
|
---|
253 | Write($outfile, $/, '>>') or return 0;
|
---|
254 | if ($$obj{_stream}) {
|
---|
255 | # write object stream
|
---|
256 | # (a single 0x0d may not follow 'stream', so use 0x0d+0x0a here to be sure)
|
---|
257 | Write($outfile, $/, "stream\x0d\x0a") or return 0;
|
---|
258 | Write($outfile, $$obj{_stream}, $/, 'endstream') or return 0;
|
---|
259 | }
|
---|
260 | } else {
|
---|
261 | # write string, number, name or object reference
|
---|
262 | Write($outfile, ' ', $obj);
|
---|
263 | }
|
---|
264 | return 1;
|
---|
265 | }
|
---|
266 |
|
---|
267 | #------------------------------------------------------------------------------
|
---|
268 | # Write PDF File
|
---|
269 | # Inputs: 0) ExifTool object reference, 1) dirInfo reference
|
---|
270 | # Returns: 1 on success, 0 if not valid PDF file, -1 on write error
|
---|
271 | # Notes: dictionary structure: Main --+--> Info
|
---|
272 | # +--> Root --> Metadata
|
---|
273 | sub WritePDF($$)
|
---|
274 | {
|
---|
275 | my ($exifTool, $dirInfo) = @_;
|
---|
276 | my $raf = $$dirInfo{RAF};
|
---|
277 | my $outfile = $$dirInfo{OutFile};
|
---|
278 | my ($buff, %capture, %newXRef, %newObj, $objRef);
|
---|
279 | my ($out, $id, $gen, $obj);
|
---|
280 |
|
---|
281 | # make sure this is a PDF file
|
---|
282 | my $pos = $raf->Tell();
|
---|
283 | $raf->Read($buff, 10) >= 8 or return 0;
|
---|
284 | $buff =~ /^%PDF-(\d+\.\d+)/ or return 0;
|
---|
285 | $raf->Seek($pos, 0);
|
---|
286 |
|
---|
287 | # create a new ExifTool object and use it to read PDF and XMP information
|
---|
288 | my $newTool = new Image::ExifTool;
|
---|
289 | $newTool->Options(List => 1);
|
---|
290 | $newTool->Options(Password => $exifTool->Options('Password'));
|
---|
291 | $$newTool{PDF_CAPTURE} = \%capture;
|
---|
292 | my $info = $newTool->ImageInfo($raf, 'XMP', 'PDF:*', 'Error', 'Warning');
|
---|
293 | # not a valid PDF file unless we got a version number
|
---|
294 | # (note: can't just check $$info{PDFVersion} due to possibility of XMP-pdf:PDFVersion)
|
---|
295 | my $vers = $newTool->GetInfo('PDF:PDFVersion');
|
---|
296 | ($pdfVer) = values %$vers;
|
---|
297 | $pdfVer or $exifTool->Error('Missing PDF:PDFVersion'), return 0;
|
---|
298 | # check version number
|
---|
299 | if ($pdfVer > 1.7) {
|
---|
300 | if ($pdfVer >= 2.0) {
|
---|
301 | $exifTool->Error("Can't yet write PDF version $pdfVer"); # (future major version changes)
|
---|
302 | return 1;
|
---|
303 | }
|
---|
304 | $exifTool->Warn("ExifTool is untested with PDF version $pdfVer files", 1);
|
---|
305 | }
|
---|
306 | # fail if we had any serious errors while extracting information
|
---|
307 | if ($capture{Error} or $$info{Error}) {
|
---|
308 | $exifTool->Error($capture{Error} || $$info{Error});
|
---|
309 | return 1;
|
---|
310 | }
|
---|
311 | # make sure we have everything we need to rewrite this file
|
---|
312 | foreach $obj (qw(Main Root xref)) {
|
---|
313 | next if $capture{$obj};
|
---|
314 | # any warning we received may give a clue about why this object is missing
|
---|
315 | $exifTool->Error($$info{Warning}) if $$info{Warning};
|
---|
316 | $exifTool->Error("Can't find $obj object");
|
---|
317 | return 1;
|
---|
318 | }
|
---|
319 |
|
---|
320 | # copy file up to start of previous exiftool update or end of file
|
---|
321 | # (comment, startxref & EOF with 11-digit offsets and 2-byte newlines is 63 bytes)
|
---|
322 | $raf->Seek(-64,2) and $raf->Read($buff,64) and $raf->Seek(0,0) or return -1;
|
---|
323 | my $rtn = 1;
|
---|
324 | my $prevUpdate;
|
---|
325 | # (now $endComment is before "startxref", but pre-7.41 we wrote it after the EOF)
|
---|
326 | if ($buff =~ /$endComment(\d+)\s+(startxref\s+\d+\s+%%EOF\s+)?$/s) {
|
---|
327 | $prevUpdate = $1;
|
---|
328 | # rewrite the file up to the original EOF
|
---|
329 | Image::ExifTool::CopyBlock($raf, $outfile, $prevUpdate) or $rtn = -1;
|
---|
330 | # verify that we are now at the start of an ExifTool update
|
---|
331 | unless ($raf->Read($buff, length $beginComment) and $buff eq $beginComment) {
|
---|
332 | $exifTool->Error('Previous ExifTool update is corrupted');
|
---|
333 | return $rtn;
|
---|
334 | }
|
---|
335 | $raf->Seek($prevUpdate, 0) or $rtn = -1;
|
---|
336 | if ($exifTool->{DEL_GROUP}->{'PDF-update'}) {
|
---|
337 | $exifTool->VPrint(0, " Reverted previous ExifTool updates\n");
|
---|
338 | ++$$exifTool{CHANGED};
|
---|
339 | return $rtn;
|
---|
340 | }
|
---|
341 | } elsif ($exifTool->{DEL_GROUP}->{'PDF-update'}) {
|
---|
342 | $exifTool->Error('File contains no previous ExifTool update');
|
---|
343 | return $rtn;
|
---|
344 | } else {
|
---|
345 | # rewrite the whole file
|
---|
346 | while ($raf->Read($buff, 65536)) {
|
---|
347 | Write($outfile, $buff) or $rtn = -1;
|
---|
348 | }
|
---|
349 | }
|
---|
350 | $out = $exifTool->Options('TextOut') if $exifTool->Options('Verbose');
|
---|
351 | #
|
---|
352 | # create our new PDF objects to write
|
---|
353 | #
|
---|
354 | my $xref = $capture{xref};
|
---|
355 | my $mainDict = $capture{Main};
|
---|
356 | my $metaRef = $capture{Root}->{Metadata};
|
---|
357 | my $nextObject;
|
---|
358 |
|
---|
359 | # start by finding reference for info object in case it was deleted
|
---|
360 | # in a previous edit so we can re-use it here if adding PDF Info
|
---|
361 | my $prevInfoRef;
|
---|
362 | if ($prevUpdate) {
|
---|
363 | unless ($capture{Prev}) {
|
---|
364 | $exifTool->Error("Can't locate trailer dictionary prior to last edit");
|
---|
365 | return $rtn;
|
---|
366 | }
|
---|
367 | $prevInfoRef = $capture{Prev}->{Info};
|
---|
368 | # start from previous size so the xref table doesn't continue
|
---|
369 | # to grow if we repeatedly add and delete the Metadata object
|
---|
370 | $nextObject = $capture{Prev}->{Size};
|
---|
371 | # don't re-use Meta reference if object was added in a previous update
|
---|
372 | undef $metaRef if $metaRef and $$metaRef=~/^(\d+)/ and $1 >= $nextObject;
|
---|
373 | } else {
|
---|
374 | $prevInfoRef = $$mainDict{Info};
|
---|
375 | $nextObject = $$mainDict{Size};
|
---|
376 | }
|
---|
377 |
|
---|
378 | # delete entire PDF group if specified
|
---|
379 | my $infoChanged = 0;
|
---|
380 | if ($exifTool->{DEL_GROUP}->{PDF} and $capture{Info}) {
|
---|
381 | delete $capture{Info};
|
---|
382 | $info = { XMP => $$info{XMP} }; # remove extracted PDF tags
|
---|
383 | print $out " Deleting PDF Info dictionary\n" if $out;
|
---|
384 | ++$infoChanged;
|
---|
385 | }
|
---|
386 |
|
---|
387 | # create new Info dictionary if necessary
|
---|
388 | $capture{Info} = { _tags => [ ] } unless $capture{Info};
|
---|
389 | my $infoDict = $capture{Info};
|
---|
390 |
|
---|
391 | # must pre-determine Info reference to be used in encryption
|
---|
392 | my $infoRef = $prevInfoRef || \ "$nextObject 0 R";
|
---|
393 | $keyExt = $$infoRef;
|
---|
394 |
|
---|
395 | # must encrypt all values in dictionary if they came from an encrypted stream
|
---|
396 | CryptObject($infoDict) if $$infoDict{_needCrypt};
|
---|
397 |
|
---|
398 | # must set line separator before calling WritePDFValue()
|
---|
399 | local $/ = $capture{newline};
|
---|
400 |
|
---|
401 | # rewrite PDF Info tags
|
---|
402 | my $newTags = $exifTool->GetNewTagInfoHash(\%Image::ExifTool::PDF::Info);
|
---|
403 | my $tagID;
|
---|
404 | foreach $tagID (sort keys %$newTags) {
|
---|
405 | my $tagInfo = $$newTags{$tagID};
|
---|
406 | my $nvHash = $exifTool->GetNewValueHash($tagInfo);
|
---|
407 | my (@vals, $deleted);
|
---|
408 | my $tag = $$tagInfo{Name};
|
---|
409 | my $val = $$info{$tag};
|
---|
410 | my $tagKey = $tag;
|
---|
411 | unless (defined $val) {
|
---|
412 | # must check for tag key with copy number
|
---|
413 | ($tagKey) = grep /^$tag/, keys %$info;
|
---|
414 | $val = $$info{$tagKey} if $tagKey;
|
---|
415 | }
|
---|
416 | if (defined $val) {
|
---|
417 | my @oldVals;
|
---|
418 | if (ref $val eq 'ARRAY') {
|
---|
419 | @oldVals = @$val;
|
---|
420 | $val = shift @oldVals;
|
---|
421 | }
|
---|
422 | for (;;) {
|
---|
423 | if (Image::ExifTool::IsOverwriting($nvHash, $val) > 0) {
|
---|
424 | $deleted = 1;
|
---|
425 | $exifTool->VerboseValue("- PDF:$tag", $val);
|
---|
426 | ++$infoChanged;
|
---|
427 | } else {
|
---|
428 | push @vals, $val;
|
---|
429 | }
|
---|
430 | last unless @oldVals;
|
---|
431 | $val = shift @oldVals;
|
---|
432 | }
|
---|
433 | # don't write this out if we deleted all values
|
---|
434 | delete $$infoDict{$tagID} unless @vals;
|
---|
435 | }
|
---|
436 | # decide whether we want to write this tag
|
---|
437 | # (always create native PDF information, so don't check IsCreating())
|
---|
438 | next unless $deleted or $$tagInfo{List} or not exists $$infoDict{$tagID};
|
---|
439 |
|
---|
440 | # add new values to existing ones
|
---|
441 | my @newVals = Image::ExifTool::GetNewValues($nvHash);
|
---|
442 | if (@newVals) {
|
---|
443 | push @vals, @newVals;
|
---|
444 | ++$infoChanged;
|
---|
445 | if ($out) {
|
---|
446 | foreach $val (@newVals) {
|
---|
447 | $exifTool->VerboseValue("+ PDF:$tag", $val);
|
---|
448 | }
|
---|
449 | }
|
---|
450 | }
|
---|
451 | unless (@vals) {
|
---|
452 | # remove this entry from the Info dictionary if no values remain
|
---|
453 | delete $$infoDict{$tagID};
|
---|
454 | next;
|
---|
455 | }
|
---|
456 | # format value(s) for writing to PDF file
|
---|
457 | my $writable = $$tagInfo{Writable} || $Image::ExifTool::PDF::Info{WRITABLE};
|
---|
458 | if (not $$tagInfo{List}) {
|
---|
459 | $val = WritePDFValue(shift @vals, $writable);
|
---|
460 | } elsif ($$tagInfo{List} eq 'array') {
|
---|
461 | foreach $val (@vals) {
|
---|
462 | $val = WritePDFValue($val, $writable);
|
---|
463 | defined $val or undef(@vals), last;
|
---|
464 | }
|
---|
465 | $val = @vals ? \@vals : undef;
|
---|
466 | } else {
|
---|
467 | $val = WritePDFValue(join($exifTool->Options('ListSep'), @vals), $writable);
|
---|
468 | }
|
---|
469 | if (defined $val) {
|
---|
470 | $$infoDict{$tagID} = $val;
|
---|
471 | ++$infoChanged;
|
---|
472 | } else {
|
---|
473 | $exifTool->Warn("Error converting $$tagInfo{Name} value");
|
---|
474 | }
|
---|
475 | }
|
---|
476 | if ($infoChanged) {
|
---|
477 | $$exifTool{CHANGED} += $infoChanged;
|
---|
478 | } elsif ($prevUpdate) {
|
---|
479 | # must still write Info dictionary if it was previously updated
|
---|
480 | my $oldPos = LocateObject($xref, $$infoRef);
|
---|
481 | $infoChanged = 1 if $oldPos and $oldPos > $prevUpdate;
|
---|
482 | }
|
---|
483 |
|
---|
484 | # create new Info dictionary if necessary
|
---|
485 | if ($infoChanged) {
|
---|
486 | # increment object count if we used a new object here
|
---|
487 | if (scalar(keys %{$capture{Info}}) > 1) {
|
---|
488 | $newObj{$$infoRef} = $capture{Info};# save to write later
|
---|
489 | $$mainDict{Info} = $infoRef; # add reference to trailer dictionary
|
---|
490 | ++$nextObject unless $prevInfoRef;
|
---|
491 | } else {
|
---|
492 | # remove Info from Main (trailer) dictionary
|
---|
493 | delete $$mainDict{Info};
|
---|
494 | # write free entry in xref table if Info existed prior to all edits
|
---|
495 | $newObj{$$infoRef} = '' if $prevInfoRef;
|
---|
496 | }
|
---|
497 | }
|
---|
498 |
|
---|
499 | # rewrite XMP
|
---|
500 | my %xmpInfo = (
|
---|
501 | DataPt => $$info{XMP},
|
---|
502 | Parent => 'PDF',
|
---|
503 | );
|
---|
504 | my $xmpTable = Image::ExifTool::GetTagTable('Image::ExifTool::XMP::Main');
|
---|
505 | my $oldChanged = $$exifTool{CHANGED};
|
---|
506 | my $newXMP = $exifTool->WriteDirectory(\%xmpInfo, $xmpTable);
|
---|
507 | $newXMP = $$info{XMP} ? ${$$info{XMP}} : '' unless defined $newXMP;
|
---|
508 |
|
---|
509 | # WriteDirectory() will increment CHANGED erroneously if non-existent
|
---|
510 | # XMP is deleted as a block -- so check for this
|
---|
511 | unless ($newXMP or $$info{XMP}) {
|
---|
512 | $$exifTool{CHANGED} = $oldChanged;
|
---|
513 | $exifTool->VPrint(0, " (XMP not changed -- still empty)\n");
|
---|
514 | }
|
---|
515 | my ($metaChanged, $rootChanged);
|
---|
516 |
|
---|
517 | if ($$exifTool{CHANGED} != $oldChanged and defined $newXMP) {
|
---|
518 | $metaChanged = 1;
|
---|
519 | } elsif ($prevUpdate and $capture{Root}->{Metadata}) {
|
---|
520 | # must still write Metadata dictionary if it was previously updated
|
---|
521 | my $oldPos = LocateObject($xref, ${$capture{Root}->{Metadata}});
|
---|
522 | $metaChanged = 1 if $oldPos and $oldPos > $prevUpdate;
|
---|
523 | }
|
---|
524 | if ($metaChanged) {
|
---|
525 | if ($newXMP) {
|
---|
526 | unless ($metaRef) {
|
---|
527 | # allocate new PDF object
|
---|
528 | $metaRef = \ "$nextObject 0 R";
|
---|
529 | ++$nextObject;
|
---|
530 | $capture{Root}->{Metadata} = $metaRef;
|
---|
531 | $rootChanged = 1; # set flag to replace Root dictionary
|
---|
532 | }
|
---|
533 | # create the new metadata dictionary to write later
|
---|
534 | $newObj{$$metaRef} = {
|
---|
535 | Type => '/Metadata',
|
---|
536 | Subtype => '/XML',
|
---|
537 | # Length => length $newXMP, (set by WriteObject)
|
---|
538 | _tags => [ qw(Type Subtype) ],
|
---|
539 | _stream => $newXMP,
|
---|
540 | _decrypted => 1, # (this will be ignored if EncryptMetadata is false)
|
---|
541 | };
|
---|
542 | } elsif ($capture{Root}->{Metadata}) {
|
---|
543 | # free existing metadata object
|
---|
544 | $newObj{${$capture{Root}->{Metadata}}} = '';
|
---|
545 | delete $capture{Root}->{Metadata};
|
---|
546 | $rootChanged = 1; # set flag to replace Root dictionary
|
---|
547 | }
|
---|
548 | }
|
---|
549 | # add new Root dictionary if necessary
|
---|
550 | my $rootRef = $$mainDict{Root};
|
---|
551 | unless ($rootRef) {
|
---|
552 | $exifTool->Error("Can't find Root dictionary");
|
---|
553 | return $rtn;
|
---|
554 | }
|
---|
555 | if (not $rootChanged and $prevUpdate) {
|
---|
556 | # must still write Root dictionary if it was previously updated
|
---|
557 | my $oldPos = LocateObject($xref, $$rootRef);
|
---|
558 | $rootChanged = 1 if $oldPos and $oldPos > $prevUpdate;
|
---|
559 | }
|
---|
560 | $newObj{$$rootRef} = $capture{Root} if $rootChanged;
|
---|
561 | #
|
---|
562 | # write incremental update if anything was changed
|
---|
563 | #
|
---|
564 | if ($$exifTool{CHANGED}) {
|
---|
565 | # remember position of original EOF
|
---|
566 | my $oldEOF = Tell($outfile);
|
---|
567 | Write($outfile, $beginComment) or $rtn = -1;
|
---|
568 |
|
---|
569 | # write new objects
|
---|
570 | foreach $objRef (sort keys %newObj) {
|
---|
571 | $objRef =~ /^(\d+) (\d+)/ or $rtn = -1, last;
|
---|
572 | ($id, $gen) = ($1, $2);
|
---|
573 | if (not $newObj{$objRef}) {
|
---|
574 | ++$gen if $gen < 65535;
|
---|
575 | # write free entry in xref table
|
---|
576 | $newXRef{$id} = [ 0, $gen, 'f' ];
|
---|
577 | next;
|
---|
578 | }
|
---|
579 | # create new entry for xref table
|
---|
580 | $newXRef{$id} = [ Tell($outfile) + length($/), $gen, 'n' ];
|
---|
581 | $keyExt = "$id $gen obj"; # (must set for stream encryption)
|
---|
582 | Write($outfile, $/, $keyExt) or $rtn = -1;
|
---|
583 | WriteObject($outfile, $newObj{$objRef}) or $rtn = -1;
|
---|
584 | Write($outfile, $/, 'endobj') or $rtn = -1;
|
---|
585 | }
|
---|
586 |
|
---|
587 | # Prev points to old xref table
|
---|
588 | $$mainDict{Prev} = $capture{startxref} unless $prevUpdate;
|
---|
589 |
|
---|
590 | # add xref entry for head of free-object list
|
---|
591 | $newXRef{0} = [ 0, 65535, 'f' ];
|
---|
592 |
|
---|
593 | # must insert free xref entries from previous exiftool update if applicable
|
---|
594 | if ($prevUpdate) {
|
---|
595 | my $mainFree;
|
---|
596 | # extract free entries from our previous Main xref stream
|
---|
597 | if ($$mainDict{Type} and $$mainDict{Type} eq '/XRef') {
|
---|
598 | $mainFree = GetFreeEntries($xref->{dicts}->[0]);
|
---|
599 | } else {
|
---|
600 | # free entries from Main xref table already captured for us
|
---|
601 | $mainFree = $capture{mainFree};
|
---|
602 | }
|
---|
603 | foreach $id (sort { $a <=> $b } keys %$mainFree) {
|
---|
604 | $newXRef{$id} = $$mainFree{$id} unless $newXRef{$id};
|
---|
605 | }
|
---|
606 | }
|
---|
607 |
|
---|
608 | # connect linked list of free object in our xref table
|
---|
609 | my $prevFree = 0;
|
---|
610 | foreach $id (sort { $b <=> $a } keys %newXRef) { # (reverse sort)
|
---|
611 | next unless $newXRef{$id}->[2] eq 'f'; # skip if not free
|
---|
612 | # no need to add free entry for objects added by us
|
---|
613 | # in previous edits then freed again
|
---|
614 | if ($id >= $nextObject) {
|
---|
615 | delete $newXRef{$id}; # Note: deleting newXRef entry!
|
---|
616 | next;
|
---|
617 | }
|
---|
618 | $newXRef{$id}->[0] = $prevFree;
|
---|
619 | $prevFree = $id;
|
---|
620 | }
|
---|
621 |
|
---|
622 | # prepare our main dictionary for writing
|
---|
623 | $$mainDict{Size} = $nextObject; # update number of objects
|
---|
624 | # must change the ID if it exists
|
---|
625 | if (ref $$mainDict{ID} eq 'ARRAY' and @{$$mainDict{ID}} > 1) {
|
---|
626 | # increment first byte since this is an easy change to make
|
---|
627 | $id = $mainDict->{ID}->[1];
|
---|
628 | if ($id =~ /^<([0-9a-f]{2})/i) {
|
---|
629 | my $byte = unpack('H2',chr((hex($1) + 1) & 0xff));
|
---|
630 | substr($id, 1, 2) = $byte;
|
---|
631 | } elsif ($id =~ /^\((.)/s) {
|
---|
632 | substr($id, 1, 1) = chr((ord($1) + 1) & 0xff);
|
---|
633 | }
|
---|
634 | $mainDict->{ID}->[1] = $id;
|
---|
635 | }
|
---|
636 |
|
---|
637 | # remember position of xref table in file (we will write this next)
|
---|
638 | my $startxref = Tell($outfile) + length($/);
|
---|
639 |
|
---|
640 | # must write xref as a stream in xref-stream-only files
|
---|
641 | if ($$mainDict{Type} and $$mainDict{Type} eq '/XRef') {
|
---|
642 |
|
---|
643 | # create entry for the xref stream object itself
|
---|
644 | $newXRef{$nextObject++} = [ Tell($outfile) + length($/), 0, 'n' ];
|
---|
645 | $$mainDict{Size} = $nextObject;
|
---|
646 | # create xref stream and Index entry
|
---|
647 | $$mainDict{W} = [ 1, 4, 2 ]; # int8u, int32u, int16u ('CNn')
|
---|
648 | $$mainDict{Index} = [ ];
|
---|
649 | $$mainDict{_stream} = '';
|
---|
650 | my @ids = sort { $a <=> $b } keys %newXRef;
|
---|
651 | while (@ids) {
|
---|
652 | my $startID = $ids[0];
|
---|
653 | for (;;) {
|
---|
654 | $id = shift @ids;
|
---|
655 | my ($pos, $gen, $type) = @{$newXRef{$id}};
|
---|
656 | if ($pos > 0xffffffff) {
|
---|
657 | $exifTool->Error('Huge files not yet supported');
|
---|
658 | last;
|
---|
659 | }
|
---|
660 | $$mainDict{_stream} .= pack('CNn', $type eq 'f' ? 0 : 1, $pos, $gen);
|
---|
661 | last if not @ids or $ids[0] != $id + 1;
|
---|
662 | }
|
---|
663 | # add Index entries for this section of the xref stream
|
---|
664 | push @{$$mainDict{Index}}, $startID, $id - $startID + 1;
|
---|
665 | }
|
---|
666 | # write the xref stream object
|
---|
667 | $keyExt = "$id 0 obj"; # (set anyway, but xref stream should NOT be encrypted)
|
---|
668 | Write($outfile, $/, $keyExt) or $rtn = -1;
|
---|
669 | WriteObject($outfile, $mainDict) or $rtn = -1;
|
---|
670 | Write($outfile, $/, 'endobj') or $rtn = -1;
|
---|
671 |
|
---|
672 | } else {
|
---|
673 |
|
---|
674 | # write new xref table
|
---|
675 | Write($outfile, $/, 'xref', $/) or $rtn = -1;
|
---|
676 | # lines must be exactly 20 bytes, so pad newline if necessary
|
---|
677 | my $endl = (length($/) == 1 ? ' ' : '') . $/;
|
---|
678 | my @ids = sort { $a <=> $b } keys %newXRef;
|
---|
679 | while (@ids) {
|
---|
680 | my $startID = $ids[0];
|
---|
681 | $buff = '';
|
---|
682 | for (;;) {
|
---|
683 | $id = shift @ids;
|
---|
684 | $buff .= sprintf("%.10d %.5d %s%s", @{$newXRef{$id}}, $endl);
|
---|
685 | last if not @ids or $ids[0] != $id + 1;
|
---|
686 | }
|
---|
687 | # write this (contiguous-numbered object) section of the xref table
|
---|
688 | Write($outfile, $startID, ' ', $id - $startID + 1, $/, $buff) or $rtn = -1;
|
---|
689 | }
|
---|
690 |
|
---|
691 | # write main (trailer) dictionary
|
---|
692 | Write($outfile, 'trailer') or $rtn = -1;
|
---|
693 | WriteObject($outfile, $mainDict) or $rtn = -1;
|
---|
694 | }
|
---|
695 | # write trailing comment (marker to allow edits to be reverted)
|
---|
696 | Write($outfile, $/, $endComment, $oldEOF, $/) or $rtn = -1;
|
---|
697 |
|
---|
698 | # write pointer to main xref table and EOF marker
|
---|
699 | Write($outfile, 'startxref', $/, $startxref, $/, '%%EOF', $/) or $rtn = -1;
|
---|
700 |
|
---|
701 | } elsif ($prevUpdate) {
|
---|
702 |
|
---|
703 | # nothing new changed, so copy over previous incremental update
|
---|
704 | $raf->Seek($prevUpdate, 0) or $rtn = -1;
|
---|
705 | while ($raf->Read($buff, 65536)) {
|
---|
706 | Write($outfile, $buff) or $rtn = -1;
|
---|
707 | }
|
---|
708 | }
|
---|
709 | undef $newTool;
|
---|
710 | undef %capture;
|
---|
711 | return $rtn;
|
---|
712 | }
|
---|
713 |
|
---|
714 |
|
---|
715 | 1; # end
|
---|
716 |
|
---|
717 | __END__
|
---|
718 |
|
---|
719 | =head1 NAME
|
---|
720 |
|
---|
721 | Image::ExifTool::WritePDF.pl - Write PDF meta information
|
---|
722 |
|
---|
723 | =head1 SYNOPSIS
|
---|
724 |
|
---|
725 | These routines are autoloaded by Image::ExifTool::PDF.
|
---|
726 |
|
---|
727 | =head1 DESCRIPTION
|
---|
728 |
|
---|
729 | This file contains routines to write PDF metadata.
|
---|
730 |
|
---|
731 | =head1 NOTES
|
---|
732 |
|
---|
733 | When writing a PDF, exiftool does not modify the existing data. Instead,
|
---|
734 | the PDF file is appended with an incremental update which can easily be
|
---|
735 | removed to revert the file (by using ExifTool to delete the special
|
---|
736 | C<PDF-update> pseudo group).
|
---|
737 |
|
---|
738 | =head1 AUTHOR
|
---|
739 |
|
---|
740 | Copyright 2003-2011, Phil Harvey (phil at owl.phy.queensu.ca)
|
---|
741 |
|
---|
742 | This library is free software; you can redistribute it and/or modify it
|
---|
743 | under the same terms as Perl itself.
|
---|
744 |
|
---|
745 | =head1 REFERENCES
|
---|
746 |
|
---|
747 | =over 4
|
---|
748 |
|
---|
749 | =item L<http://partners.adobe.com/public/developer/pdf/index_reference.html>
|
---|
750 |
|
---|
751 | =back
|
---|
752 |
|
---|
753 | =head1 SEE ALSO
|
---|
754 |
|
---|
755 | L<Image::ExifTool::PDF(3pm)|Image::ExifTool::PDF>,
|
---|
756 | L<Image::ExifTool(3pm)|Image::ExifTool>
|
---|
757 |
|
---|
758 | =cut
|
---|