source: main/trunk/greenstone2/perllib/cpan/Image/ExifTool/WritePDF.pl@ 34921

Last change on this file since 34921 was 34921, checked in by anupama, 3 years ago

Committing the improvements to EmbeddedMetaPlugin's processing of Keywords vs other metadata fields. Keywords were literally stored as arrays of words rather than phrases in PDFs (at least in Diego's sample PDF), whereas other meta fields like Subjects and Creators stored them as arrays of phrases. To get both to work, Kathy updated EXIF to a newer version, to retrieve the actual EXIF values stored in the PDF. And Kathy and Dr Bainbridge came up with a new option that I added called apply_join_before_split_to_metafields that's a regex which can list the metadata fields to apply the join_before_split to and whcih previously always got applied to all metadata fields. Now it's applied to any *Keywords metafields by default, as that's the metafield we have experience of that behaves differently to the others, as it stores by word instead of phrases. Tested on Diego's sample PDF. Diego has double-checked it to works on his sample PDF too, setting the split char to ; and turning on the join_before_split and leaving apply_join_before_split_to_metafields at its default of .*Keywords. File changes are strings.properties for the tooltip, the plugin introducing the option and working with it and Kathy's EXIF updates affecting cpan/File and cpan/Image.

  • Property svn:executable set to *
File size: 29.0 KB
Line 
1#------------------------------------------------------------------------------
2# File: WritePDF.pl
3#
4# Description: Write PDF meta information
5#
6# Revisions: 12/08/2007 - P. Harvey Created
7#
8# References: 1) http://partners.adobe.com/public/developer/pdf/index_reference.html
9#
10# Notes: The special "PDF-update" group can be deleted to revert exiftool updates
11#------------------------------------------------------------------------------
12package Image::ExifTool::PDF;
13
14use strict;
15use vars qw($lastFetched);
16
17sub WriteObject($$);
18sub EncodeString($);
19sub CryptObject($);
20
21# comments to mark beginning and end of ExifTool incremental update
22my $beginComment = '%BeginExifToolUpdate';
23my $endComment = '%EndExifToolUpdate ';
24
25my $keyExt; # crypt key extension
26my $pdfVer; # version of PDF file we are currently writing
27
28# internal tags used in dictionary objects
29my %myDictTags = (
30 _tags => 1, _stream => 1, _decrypted => 1, _needCrypt => 1,
31 _filtered => 1, _entry_size => 1, _table => 1,
32);
33
34# map for directories that we can add
35my %pdfMap = (
36 XMP => 'PDF',
37);
38
39#------------------------------------------------------------------------------
40# Validate raw PDF values for writing (string date integer real boolean name)
41# Inputs: 0) ExifTool object ref, 1) tagInfo hash ref, 2) raw value ref
42# Returns: error string or undef (and possibly changes value) on success
43sub CheckPDF($$$)
44{
45 my ($et, $tagInfo, $valPtr) = @_;
46 my $format = $$tagInfo{Writable} || $tagInfo->{Table}->{WRITABLE};
47 if (not $format) {
48 return 'No writable format';
49 } elsif ($format eq 'string') {
50 # (encode later because list-type string tags need to be encoded as a unit)
51 } elsif ($format eq 'date') {
52 # be flexible about this for now
53 return 'Bad date format' unless $$valPtr =~ /^\d{4}/;
54 } elsif ($format eq 'integer') {
55 return 'Not an integer' unless Image::ExifTool::IsInt($$valPtr);
56 } elsif ($format eq 'real') {
57 return 'Not a real number' unless $$valPtr =~ /^[+-]?(?=\d|\.\d)\d*(\.\d*)?$/;
58 } elsif ($format eq 'boolean') {
59 $$valPtr = ($$valPtr and $$valPtr !~ /^f/i) ? 'true' : 'false';
60 } elsif ($format eq 'name') {
61 return 'Invalid PDF name' if $$valPtr =~ /\0/;
62 } else {
63 return "Invalid PDF format '${format}'";
64 }
65 return undef; # value is OK
66}
67
68#------------------------------------------------------------------------------
69# Format value for writing to PDF file
70# Inputs: 0) ExifTool ref, 1) value, 2) format string (string,date,integer,real,boolean,name)
71# Returns: formatted value or undef on error
72# Notes: Called at write time, so $pdfVer may be checked
73sub WritePDFValue($$$)
74{
75 my ($et, $val, $format) = @_;
76 if (not $format) {
77 return undef;
78 } elsif ($format eq 'string') {
79 # encode as UCS2 if it contains any special characters
80 $val = "\xfe\xff" . $et->Encode($val,'UCS2','MM') if $val =~ /[\x80-\xff]/;
81 EncodeString(\$val);
82 } elsif ($format eq 'date') {
83 # convert date to "D:YYYYmmddHHMMSS+-HH'MM'" format
84 $val =~ s/([-+]\d{2}):(\d{2})/${1}'${2}'/; # change timezone delimiters if necessary
85 $val =~ tr/ ://d; # remove spaces and colons
86 $val = "D:$val"; # add leading "D:"
87 EncodeString(\$val);
88 } elsif ($format =~ /^(integer|real|boolean)$/) {
89 # no reformatting necessary
90 } elsif ($format eq 'name') {
91 return undef if $val =~ /\0/;
92 if ($pdfVer >= 1.2) {
93 $val =~ s/([\t\n\f\r ()<>[\]{}\/%#])/sprintf('#%.2x',ord $1)/sge;
94 } else {
95 return undef if $val =~ /[\t\n\f\r ()<>[\]{}\/%]/;
96 }
97 $val = "/$val"; # add leading '/'
98 } else {
99 return undef;
100 }
101 return $val;
102}
103
104#------------------------------------------------------------------------------
105# Encode PDF string
106# Inputs: 0) reference to PDF string
107# Returns: (updates string with encoded data)
108sub EncodeString($)
109{
110 my $strPt = shift;
111 if (ref $$strPt eq 'ARRAY') {
112 my $str;
113 foreach $str (@{$$strPt}) {
114 EncodeString(\$str);
115 }
116 return;
117 }
118 Crypt($strPt, $keyExt, 1); # encrypt if necessary
119 # encode as hex if we have any control characters (except tab)
120 if ($$strPt=~/[\x00-\x08\x0a-\x1f\x7f\xff]/) {
121 # encode as hex
122 my $str='';
123 my $len = length $$strPt;
124 my $i = 0;
125 for (;;) {
126 my $n = $len - $i or last;
127 $n = 40 if $n > 40; # break into reasonable-length lines
128 $str .= $/ if $i;
129 $str .= unpack('H*', substr($$strPt, $i, $n));
130 $i += $n;
131 }
132 $$strPt = "<$str>";
133 } else {
134 $$strPt =~ s/([()\\])/\\$1/g; # must escape round brackets and backslashes
135 $$strPt = "($$strPt)";
136 }
137}
138
139#------------------------------------------------------------------------------
140# Encrypt an object
141# Inputs: 0) PDF object (encrypts in place)
142# Notes: Encrypts according to "_needCrypt" dictionary entry,
143# then deletes "_needCrypt" when done
144sub CryptObject($)
145{
146 my $obj = $_[0];
147 if (not ref $obj) {
148 # only literal strings and hex strings are encrypted
149 if ($obj =~ /^[(<]/) {
150 undef $lastFetched; # (reset this just in case)
151 my $val = ReadPDFValue($obj);
152 EncodeString(\$val);
153 $_[0] = $val;
154 }
155 } elsif (ref $obj eq 'HASH') {
156 my $tag;
157 my $needCrypt = $$obj{_needCrypt};
158 foreach $tag (keys %$obj) {
159 next if $myDictTags{$tag};
160 # re-encrypt necessary objects only (others are still encrypted)
161 # (this is really annoying, but is necessary because objects stored
162 # in encrypted streams are decrypted when extracting, but strings stored
163 # as direct objects are decrypted later since they must be decoded
164 # before being decrypted)
165 if ($needCrypt) {
166 next unless defined $$needCrypt{$tag} ? $$needCrypt{$tag} : $$needCrypt{'*'};
167 }
168 CryptObject($$obj{$tag});
169 }
170 delete $$obj{_needCrypt}; # avoid re-re-crypting
171 } elsif (ref $obj eq 'ARRAY') {
172 my $val;
173 foreach $val (@$obj) {
174 CryptObject($val);
175 }
176 }
177}
178
179#------------------------------------------------------------------------------
180# Get free entries from xref stream dictionary that we wrote previously
181# Inputs: 0) xref dictionary reference
182# Returns: free entry hash (keys are object numbers, values are xref entry list refs)
183sub GetFreeEntries($)
184{
185 my $dict = shift;
186 my %xrefFree;
187 # from the start we have only written xref stream entries in 'CNn' format,
188 # so we can simplify things for now and only support this type of entry
189 my $w = $$dict{W};
190 if (ref $w eq 'ARRAY' and "@$w" eq '1 4 2') {
191 my $size = $$dict{_entry_size}; # this will be 7 for 'CNn'
192 my $index = $$dict{Index};
193 my $len = length $$dict{_stream};
194 # scan the table for free objects
195 my $num = scalar(@$index) / 2;
196 my $pos = 0;
197 my ($i, $j);
198 for ($i=0; $i<$num; ++$i) {
199 my $start = $$index[$i*2];
200 my $count = $$index[$i*2+1];
201 for ($j=0; $j<$count; ++$j) {
202 last if $pos + $size > $len;
203 my @t = unpack("x$pos CNn", $$dict{_stream});
204 # add entry if object was free
205 $xrefFree{$start+$j} = [ $t[1], $t[2], 'f' ] if $t[0] == 0;
206 $pos += $size; # step to next entry
207 }
208 }
209 }
210 return \%xrefFree;
211}
212
213#------------------------------------------------------------------------------
214# Write PDF object
215# Inputs: 0) output file or scalar ref, 1) PDF object
216# Returns: true on success
217# Notes: inserts white space before object, but none afterward
218sub WriteObject($$)
219{
220 my ($outfile, $obj) = @_;
221 if (ref $obj eq 'SCALAR') {
222 Write($outfile, ' ', $$obj) or return 0;
223 } elsif (ref $obj eq 'ARRAY') {
224 # write array
225 Write($outfile, @$obj > 10 ? $/ : ' ', '[') or return 0;
226 my $item;
227 foreach $item (@$obj) {
228 WriteObject($outfile, $item) or return 0;
229 }
230 Write($outfile, ' ]') or return 0;
231 } elsif (ref $obj eq 'HASH') {
232 # write dictionary
233 my $tag;
234 Write($outfile, $/, '<<') or return 0;
235 # prepare object as required if it has a stream
236 if ($$obj{_stream}) {
237 # encrypt stream if necessary (must be done before determining Length)
238 CryptStream($obj, $keyExt) if $$obj{_decrypted};
239 # write "Length" entry in dictionary
240 $$obj{Length} = length $$obj{_stream};
241 push @{$$obj{_tags}}, 'Length';
242 # delete Filter-related entries since we don't yet write filtered streams
243 delete $$obj{Filter};
244 delete $$obj{DecodeParms};
245 delete $$obj{DL};
246 }
247 # don't write my internal entries
248 my %wrote = %myDictTags;
249 # write tags in original order, adding new ones later alphabetically
250 foreach $tag (@{$$obj{_tags}}, sort keys %$obj) {
251 # ignore already-written or missing entries
252 next if $wrote{$tag} or not defined $$obj{$tag};
253 Write($outfile, $/, "/$tag") or return 0;
254 WriteObject($outfile, $$obj{$tag}) or return 0;
255 $wrote{$tag} = 1;
256 }
257 Write($outfile, $/, '>>') or return 0;
258 if ($$obj{_stream}) {
259 # write object stream
260 # (a single 0x0d may not follow 'stream', so use 0x0d+0x0a here to be sure)
261 Write($outfile, $/, "stream\x0d\x0a") or return 0;
262 Write($outfile, $$obj{_stream}, $/, 'endstream') or return 0;
263 }
264 } else {
265 # write string, number, name or object reference
266 Write($outfile, ' ', $obj);
267 }
268 return 1;
269}
270
271#------------------------------------------------------------------------------
272# Write PDF File
273# Inputs: 0) ExifTool object reference, 1) dirInfo reference
274# Returns: 1 on success, 0 if not valid PDF file, -1 on write error
275# Notes: dictionary structure: Main --+--> Info
276# +--> Root --> Metadata
277sub WritePDF($$)
278{
279 my ($et, $dirInfo) = @_;
280 my $raf = $$dirInfo{RAF};
281 my $outfile = $$dirInfo{OutFile};
282 my ($buff, %capture, %newXRef, %newObj, $objRef);
283 my ($out, $id, $gen, $obj);
284
285 # make sure this is a PDF file
286 my $pos = $raf->Tell();
287 $raf->Read($buff, 1024) >= 8 or return 0;
288 $buff =~ /^(\s*)%PDF-(\d+\.\d+)/ or return 0;
289 $$et{PDFBase} = length $1;
290 $raf->Seek($pos, 0);
291
292 # create a new ExifTool object and use it to read PDF and XMP information
293 my $newTool = new Image::ExifTool;
294 $newTool->Options(List => 1);
295 $newTool->Options(Password => $et->Options('Password'));
296 $$newTool{PDF_CAPTURE} = \%capture;
297 my $info = $newTool->ImageInfo($raf, 'XMP', 'PDF:*', 'Error', 'Warning');
298 # not a valid PDF file unless we got a version number
299 # (note: can't just check $$info{PDFVersion} due to possibility of XMP-pdf:PDFVersion)
300 my $vers = $newTool->GetInfo('PDF:PDFVersion');
301 # take highest version number if multiple versions in an incremental save
302 ($pdfVer) = sort { $b <=> $a } values %$vers;
303 $pdfVer or $et->Error('Missing PDF:PDFVersion'), return 0;
304 # check version number
305 if ($pdfVer > 1.7) {
306 $et->Warn("The PDF $pdfVer specification is not freely available", 1);
307 # (so writing by ExifTool is based on trial and error)
308 }
309 # fail if we had any serious errors while extracting information
310 if ($capture{Error} or $$info{Error}) {
311 $et->Error($capture{Error} || $$info{Error});
312 return 1;
313 }
314 # make sure we have everything we need to rewrite this file
315 foreach $obj (qw(Main Root xref)) {
316 next if $capture{$obj};
317 # any warning we received may give a clue about why this object is missing
318 $et->Error($$info{Warning}) if $$info{Warning};
319 $et->Error("Can't find $obj object");
320 return 1;
321 }
322 $et->InitWriteDirs(\%pdfMap, 'XMP');
323
324 # copy file up to start of previous exiftool update or end of file
325 # (comment, startxref & EOF with 11-digit offsets and 2-byte newlines is 63 bytes)
326 $raf->Seek(-64,2) and $raf->Read($buff,64) and $raf->Seek(0,0) or return -1;
327 my $rtn = 1;
328 my $prevUpdate;
329 # (now $endComment is before "startxref", but pre-7.41 we wrote it after the EOF)
330 if ($buff =~ /$endComment(\d+)\s+(startxref\s+\d+\s+%%EOF\s+)?$/s) {
331 $prevUpdate = $1;
332 # rewrite the file up to the original EOF
333 Image::ExifTool::CopyBlock($raf, $outfile, $prevUpdate + $$et{PDFBase}) or $rtn = -1;
334 # verify that we are now at the start of an ExifTool update
335 unless ($raf->Read($buff, length $beginComment) and $buff eq $beginComment) {
336 $et->Error('Previous ExifTool update is corrupted');
337 return $rtn;
338 }
339 $raf->Seek($prevUpdate+$$et{PDFBase}, 0) or $rtn = -1;
340 if ($$et{DEL_GROUP}{'PDF-update'}) {
341 $et->VPrint(0, " Reverted previous ExifTool updates\n");
342 ++$$et{CHANGED};
343 return $rtn;
344 }
345 } elsif ($$et{DEL_GROUP}{'PDF-update'}) {
346 $et->Error('File contains no previous ExifTool update');
347 return $rtn;
348 } else {
349 # rewrite the whole file
350 while ($raf->Read($buff, 65536)) {
351 Write($outfile, $buff) or $rtn = -1;
352 }
353 }
354 $out = $et->Options('TextOut') if $et->Options('Verbose');
355#
356# create our new PDF objects to write
357#
358 my $xref = $capture{xref};
359 my $mainDict = $capture{Main};
360 my $metaRef = $capture{Root}->{Metadata};
361 my $nextObject;
362
363 # start by finding reference for info object in case it was deleted
364 # in a previous edit so we can re-use it here if adding PDF Info
365 my $prevInfoRef;
366 if ($prevUpdate) {
367 unless ($capture{Prev}) {
368 $et->Error("Can't locate trailer dictionary prior to last edit");
369 return $rtn;
370 }
371 $prevInfoRef = $capture{Prev}->{Info};
372 # start from previous size so the xref table doesn't continue
373 # to grow if we repeatedly add and delete the Metadata object
374 $nextObject = $capture{Prev}->{Size};
375 # don't re-use Meta reference if object was added in a previous update
376 undef $metaRef if $metaRef and $$metaRef=~/^(\d+)/ and $1 >= $nextObject;
377 } else {
378 $prevInfoRef = $$mainDict{Info};
379 $nextObject = $$mainDict{Size};
380 }
381
382 # delete entire PDF group if specified
383 my $infoChanged = 0;
384 if ($$et{DEL_GROUP}{PDF} and $capture{Info}) {
385 delete $capture{Info};
386 $info = { XMP => $$info{XMP} }; # remove extracted PDF tags
387 print $out " Deleting PDF Info dictionary\n" if $out;
388 ++$infoChanged;
389 }
390
391 # create new Info dictionary if necessary
392 $capture{Info} = { _tags => [ ] } unless $capture{Info};
393 my $infoDict = $capture{Info};
394
395 # must pre-determine Info reference to be used in encryption
396 my $infoRef = $prevInfoRef || \ "$nextObject 0 R";
397 $keyExt = $$infoRef;
398
399 # must encrypt all values in dictionary if they came from an encrypted stream
400 CryptObject($infoDict) if $$infoDict{_needCrypt};
401
402 # must set line separator before calling WritePDFValue()
403 local $/ = $capture{newline};
404
405 # rewrite PDF Info tags
406 my $newTags = $et->GetNewTagInfoHash(\%Image::ExifTool::PDF::Info);
407 my $tagID;
408 foreach $tagID (sort keys %$newTags) {
409 my $tagInfo = $$newTags{$tagID};
410 my $nvHash = $et->GetNewValueHash($tagInfo);
411 my (@vals, $deleted);
412 my $tag = $$tagInfo{Name};
413 my $val = $$info{$tag};
414 my $tagKey = $tag;
415 unless (defined $val) {
416 # must check for tag key with copy number
417 ($tagKey) = grep /^$tag/, keys %$info;
418 $val = $$info{$tagKey} if $tagKey;
419 }
420 if (defined $val) {
421 my @oldVals;
422 if (ref $val eq 'ARRAY') {
423 @oldVals = @$val;
424 $val = shift @oldVals;
425 }
426 for (;;) {
427 if ($et->IsOverwriting($nvHash, $val) > 0) {
428 $deleted = 1;
429 $et->VerboseValue("- PDF:$tag", $val);
430 ++$infoChanged;
431 } else {
432 push @vals, $val;
433 }
434 last unless @oldVals;
435 $val = shift @oldVals;
436 }
437 # don't write this out if we deleted all values
438 delete $$infoDict{$tagID} unless @vals;
439 } elsif ($$nvHash{EditOnly}) {
440 next;
441 }
442 # decide whether we want to write this tag
443 # (native PDF information is always preferred, so don't check IsCreating)
444 next unless $deleted or $$tagInfo{List} or not exists $$infoDict{$tagID};
445
446 # add new values to existing ones
447 my @newVals = $et->GetNewValue($nvHash);
448 if (@newVals) {
449 push @vals, @newVals;
450 ++$infoChanged;
451 if ($out) {
452 foreach $val (@newVals) {
453 $et->VerboseValue("+ PDF:$tag", $val);
454 }
455 }
456 }
457 unless (@vals) {
458 # remove this entry from the Info dictionary if no values remain
459 delete $$infoDict{$tagID};
460 next;
461 }
462 # format value(s) for writing to PDF file
463 my $writable = $$tagInfo{Writable} || $Image::ExifTool::PDF::Info{WRITABLE};
464 if (not $$tagInfo{List}) {
465 $val = WritePDFValue($et, shift(@vals), $writable);
466 } elsif ($$tagInfo{List} eq 'array') {
467 foreach $val (@vals) {
468 $val = WritePDFValue($et, $val, $writable);
469 defined $val or undef(@vals), last;
470 }
471 $val = @vals ? \@vals : undef;
472 } else {
473 $val = WritePDFValue($et, join($et->Options('ListSep'), @vals), $writable);
474 }
475 if (defined $val) {
476 $$infoDict{$tagID} = $val;
477 ++$infoChanged;
478 } else {
479 $et->Warn("Error converting $$tagInfo{Name} value");
480 }
481 }
482 if ($infoChanged) {
483 $$et{CHANGED} += $infoChanged;
484 } elsif ($prevUpdate) {
485 # must still write Info dictionary if it was previously updated
486 my $oldPos = LocateObject($xref, $$infoRef);
487 $infoChanged = 1 if $oldPos and $oldPos > $prevUpdate;
488 }
489
490 # create new Info dictionary if necessary
491 if ($infoChanged) {
492 # increment object count if we used a new object here
493 if (scalar(keys %{$capture{Info}}) > 1) {
494 $newObj{$$infoRef} = $capture{Info};# save to write later
495 $$mainDict{Info} = $infoRef; # add reference to trailer dictionary
496 ++$nextObject unless $prevInfoRef;
497 } else {
498 # remove Info from Main (trailer) dictionary
499 delete $$mainDict{Info};
500 # write free entry in xref table if Info existed prior to all edits
501 $newObj{$$infoRef} = '' if $prevInfoRef;
502 }
503 }
504
505 # rewrite XMP
506 my %xmpInfo = (
507 DataPt => $$info{XMP},
508 Parent => 'PDF',
509 );
510 my $xmpTable = Image::ExifTool::GetTagTable('Image::ExifTool::XMP::Main');
511 my $oldChanged = $$et{CHANGED};
512 my $newXMP = $et->WriteDirectory(\%xmpInfo, $xmpTable);
513 $newXMP = $$info{XMP} ? ${$$info{XMP}} : '' unless defined $newXMP;
514
515 # WriteDirectory() will increment CHANGED erroneously if non-existent
516 # XMP is deleted as a block -- so check for this
517 unless ($newXMP or $$info{XMP}) {
518 $$et{CHANGED} = $oldChanged;
519 $et->VPrint(0, " (XMP not changed -- still empty)\n");
520 }
521 my ($metaChanged, $rootChanged);
522
523 if ($$et{CHANGED} != $oldChanged and defined $newXMP) {
524 $metaChanged = 1;
525 } elsif ($prevUpdate and $capture{Root}->{Metadata}) {
526 # must still write Metadata dictionary if it was previously updated
527 my $oldPos = LocateObject($xref, ${$capture{Root}->{Metadata}});
528 $metaChanged = 1 if $oldPos and $oldPos > $prevUpdate;
529 }
530 if ($metaChanged) {
531 if ($newXMP) {
532 unless ($metaRef) {
533 # allocate new PDF object
534 $metaRef = \ "$nextObject 0 R";
535 ++$nextObject;
536 $capture{Root}->{Metadata} = $metaRef;
537 $rootChanged = 1; # set flag to replace Root dictionary
538 }
539 # create the new metadata dictionary to write later
540 $newObj{$$metaRef} = {
541 Type => '/Metadata',
542 Subtype => '/XML',
543 # Length => length $newXMP, (set by WriteObject)
544 _tags => [ qw(Type Subtype) ],
545 _stream => $newXMP,
546 _decrypted => 1, # (this will be ignored if EncryptMetadata is false)
547 };
548 } elsif ($capture{Root}->{Metadata}) {
549 # free existing metadata object
550 $newObj{${$capture{Root}->{Metadata}}} = '';
551 delete $capture{Root}->{Metadata};
552 $rootChanged = 1; # set flag to replace Root dictionary
553 }
554 }
555 # add new Root dictionary if necessary
556 my $rootRef = $$mainDict{Root};
557 unless ($rootRef) {
558 $et->Error("Can't find Root dictionary");
559 return $rtn;
560 }
561 if (not $rootChanged and $prevUpdate) {
562 # must still write Root dictionary if it was previously updated
563 my $oldPos = LocateObject($xref, $$rootRef);
564 $rootChanged = 1 if $oldPos and $oldPos > $prevUpdate;
565 }
566 $newObj{$$rootRef} = $capture{Root} if $rootChanged;
567#
568# write incremental update if anything was changed
569#
570 if ($$et{CHANGED}) {
571 # remember position of original EOF
572 my $oldEOF = Tell($outfile) - $$et{PDFBase};
573 Write($outfile, $beginComment) or $rtn = -1;
574
575 # write new objects
576 foreach $objRef (sort keys %newObj) {
577 $objRef =~ /^(\d+) (\d+)/ or $rtn = -1, last;
578 ($id, $gen) = ($1, $2);
579 if (not $newObj{$objRef}) {
580 ++$gen if $gen < 65535;
581 # write free entry in xref table
582 $newXRef{$id} = [ 0, $gen, 'f' ];
583 next;
584 }
585 # create new entry for xref table
586 $newXRef{$id} = [ Tell($outfile) - $$et{PDFBase} + length($/), $gen, 'n' ];
587 $keyExt = "$id $gen obj"; # (must set for stream encryption)
588 Write($outfile, $/, $keyExt) or $rtn = -1;
589 WriteObject($outfile, $newObj{$objRef}) or $rtn = -1;
590 Write($outfile, $/, 'endobj') or $rtn = -1;
591 }
592
593 # Prev points to old xref table
594 $$mainDict{Prev} = $capture{startxref} unless $prevUpdate;
595
596 # add xref entry for head of free-object list
597 $newXRef{0} = [ 0, 65535, 'f' ];
598
599 # must insert free xref entries from previous exiftool update if applicable
600 if ($prevUpdate) {
601 my $mainFree;
602 # extract free entries from our previous Main xref stream
603 if ($$mainDict{Type} and $$mainDict{Type} eq '/XRef') {
604 $mainFree = GetFreeEntries($xref->{dicts}->[0]);
605 } else {
606 # free entries from Main xref table already captured for us
607 $mainFree = $capture{mainFree};
608 }
609 foreach $id (sort { $a <=> $b } keys %$mainFree) {
610 $newXRef{$id} = $$mainFree{$id} unless $newXRef{$id};
611 }
612 }
613
614 # connect linked list of free object in our xref table
615 my $prevFree = 0;
616 foreach $id (sort { $b <=> $a } keys %newXRef) { # (reverse sort)
617 next unless $newXRef{$id}->[2] eq 'f'; # skip if not free
618 # no need to add free entry for objects added by us
619 # in previous edits then freed again
620 if ($id >= $nextObject) {
621 delete $newXRef{$id}; # Note: deleting newXRef entry!
622 next;
623 }
624 $newXRef{$id}->[0] = $prevFree;
625 $prevFree = $id;
626 }
627
628 # prepare our main dictionary for writing
629 $$mainDict{Size} = $nextObject; # update number of objects
630 # must change the ID if it exists
631 if (ref $$mainDict{ID} eq 'ARRAY' and @{$$mainDict{ID}} > 1) {
632 # increment first byte since this is an easy change to make
633 $id = $mainDict->{ID}->[1];
634 if ($id =~ /^<([0-9a-f]{2})/i) {
635 my $byte = unpack('H2',chr((hex($1) + 1) & 0xff));
636 substr($id, 1, 2) = $byte;
637 } elsif ($id =~ /^\((.)/s and $1 ne '\\' and $1 ne ')' and $1 ne '(') {
638 my $ch = chr((ord($1) + 1) & 0xff);
639 # avoid generating characters that could cause problems
640 $ch = 'a' if $ch =~ /[()\\\x00-\x08\x0a-\x1f\x7f\xff]/;
641 substr($id, 1, 1) = $ch;
642 }
643 $mainDict->{ID}->[1] = $id;
644 }
645
646 # remember position of xref table in file (we will write this next)
647 my $startxref = Tell($outfile) - $$et{PDFBase} + length($/);
648
649 # must write xref as a stream in xref-stream-only files
650 if ($$mainDict{Type} and $$mainDict{Type} eq '/XRef') {
651
652 # create entry for the xref stream object itself
653 $newXRef{$nextObject++} = [ Tell($outfile) - $$et{PDFBase} + length($/), 0, 'n' ];
654 $$mainDict{Size} = $nextObject;
655 # create xref stream and Index entry
656 $$mainDict{W} = [ 1, 4, 2 ]; # int8u, int32u, int16u ('CNn')
657 $$mainDict{Index} = [ ];
658 $$mainDict{_stream} = '';
659 my @ids = sort { $a <=> $b } keys %newXRef;
660 while (@ids) {
661 my $startID = $ids[0];
662 for (;;) {
663 $id = shift @ids;
664 my ($pos, $gen, $type) = @{$newXRef{$id}};
665 if ($pos > 0xffffffff) {
666 $et->Error('Huge files not yet supported');
667 last;
668 }
669 $$mainDict{_stream} .= pack('CNn', $type eq 'f' ? 0 : 1, $pos, $gen);
670 last if not @ids or $ids[0] != $id + 1;
671 }
672 # add Index entries for this section of the xref stream
673 push @{$$mainDict{Index}}, $startID, $id - $startID + 1;
674 }
675 # write the xref stream object
676 $keyExt = "$id 0 obj"; # (set anyway, but xref stream should NOT be encrypted)
677 Write($outfile, $/, $keyExt) or $rtn = -1;
678 WriteObject($outfile, $mainDict) or $rtn = -1;
679 Write($outfile, $/, 'endobj') or $rtn = -1;
680
681 } else {
682
683 # write new xref table
684 Write($outfile, $/, 'xref', $/) or $rtn = -1;
685 # lines must be exactly 20 bytes, so pad newline if necessary
686 my $endl = (length($/) == 1 ? ' ' : '') . $/;
687 my @ids = sort { $a <=> $b } keys %newXRef;
688 while (@ids) {
689 my $startID = $ids[0];
690 $buff = '';
691 for (;;) {
692 $id = shift @ids;
693 $buff .= sprintf("%.10d %.5d %s%s", @{$newXRef{$id}}, $endl);
694 last if not @ids or $ids[0] != $id + 1;
695 }
696 # write this (contiguous-numbered object) section of the xref table
697 Write($outfile, $startID, ' ', $id - $startID + 1, $/, $buff) or $rtn = -1;
698 }
699
700 # write main (trailer) dictionary
701 Write($outfile, 'trailer') or $rtn = -1;
702 WriteObject($outfile, $mainDict) or $rtn = -1;
703 }
704 # write trailing comment (marker to allow edits to be reverted)
705 Write($outfile, $/, $endComment, $oldEOF, $/) or $rtn = -1;
706
707 # write pointer to main xref table and EOF marker
708 Write($outfile, 'startxref', $/, $startxref, $/, '%%EOF', $/) or $rtn = -1;
709
710 } elsif ($prevUpdate) {
711
712 # nothing new changed, so copy over previous incremental update
713 $raf->Seek($prevUpdate+$$et{PDFBase}, 0) or $rtn = -1;
714 while ($raf->Read($buff, 65536)) {
715 Write($outfile, $buff) or $rtn = -1;
716 }
717 }
718 if ($rtn > 0 and $$et{CHANGED} and ($$et{DEL_GROUP}{PDF} or $$et{DEL_GROUP}{XMP})) {
719 $et->Warn('ExifTool PDF edits are reversible. Deleted tags may be recovered!', 1);
720 }
721 undef $newTool;
722 undef %capture;
723 return $rtn;
724}
725
726
7271; # end
728
729__END__
730
731=head1 NAME
732
733Image::ExifTool::WritePDF.pl - Write PDF meta information
734
735=head1 SYNOPSIS
736
737These routines are autoloaded by Image::ExifTool::PDF.
738
739=head1 DESCRIPTION
740
741This file contains routines to write PDF metadata.
742
743=head1 NOTES
744
745When writing a PDF, exiftool does not modify the existing data. Instead,
746the PDF file is appended with an incremental update which can easily be
747removed to revert the file (by using ExifTool to delete the special
748C<PDF-update> pseudo group).
749
750=head1 AUTHOR
751
752Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com)
753
754This library is free software; you can redistribute it and/or modify it
755under the same terms as Perl itself.
756
757=head1 REFERENCES
758
759=over 4
760
761=item L<http://partners.adobe.com/public/developer/pdf/index_reference.html>
762
763=back
764
765=head1 SEE ALSO
766
767L<Image::ExifTool::PDF(3pm)|Image::ExifTool::PDF>,
768L<Image::ExifTool(3pm)|Image::ExifTool>
769
770=cut
Note: See TracBrowser for help on using the repository browser.