1 | #!/usr/bin/perl
|
---|
2 | #Jessica Thurston
|
---|
3 | #usage: perl amend_pdf.pl filename
|
---|
4 | #This program takes a pdf file 'filename'
|
---|
5 | #and modifys it to include outline entries
|
---|
6 | #(bookmarks).
|
---|
7 | #This is achieved using a pdf library which
|
---|
8 | #parses the pdf file into objects allowing
|
---|
9 | #this script to more easily manipulate the pdf
|
---|
10 | #objects and then modify the file by means
|
---|
11 | #of incremental updates. (modifys the necessary
|
---|
12 | #pdf objects and appends them to the end of
|
---|
13 | #the file, then updates the xreference table and
|
---|
14 | #trailer). This library can be downloaded at
|
---|
15 | # http://www.sanface.com/PDF-lib/
|
---|
16 | #A file url.txt stored in the same
|
---|
17 | #directory is read containing two columns of data:
|
---|
18 | #title of related pdf document url of related pdf document
|
---|
19 | #The pdf document is modified so that the title of the
|
---|
20 | #related pdf documents are displayed as bookmarks and
|
---|
21 | #when the user clicks on it the web browser opens at
|
---|
22 | #the url stored in the same column of url.txt.
|
---|
23 | #
|
---|
24 | #This program was designed to be used with the Greenstone
|
---|
25 | #digital library in the following ways:
|
---|
26 | #-When a user has built a collection containing
|
---|
27 | # pdf documents, they have a choice of either displaying
|
---|
28 | # the page in the web browser in gml form or saving the
|
---|
29 | # original pdf document to view in an acrobat pdf reader.
|
---|
30 | #-The related document file url.txt can be downloaded
|
---|
31 | # in the same way if the user has included in the format
|
---|
32 | # string for that page (ie search list) in the collection
|
---|
33 | # configuration file [urllink] related documents [/urllink].
|
---|
34 | # This will create a html link 'related documents' which
|
---|
35 | # when clicked on will enable the user to save a file url.txt
|
---|
36 | # containing the related documents for that particular document.
|
---|
37 | #-This file has been written upon running the perl script
|
---|
38 | # relationPDF.pl which will also amend the gml documents to
|
---|
39 | # include the 'urllink' information enabling the user to
|
---|
40 | # include the related document links in config file format
|
---|
41 | # strings.
|
---|
42 | #-So once this related doc info has been included the user
|
---|
43 | # can download both the pdf file (doc.pdf) and the related
|
---|
44 | # document file for that particluar document (url.txt).
|
---|
45 | #-perl amend_pdf.pl doc.pdf is then run in the command line
|
---|
46 | # modifying the pdf document to the desired effect.
|
---|
47 | #bugs:
|
---|
48 | #-cannot be run more than once on the same file
|
---|
49 | #-doesn't make use of free objects (free objects
|
---|
50 | # are quite unlikely though, I didn't come across
|
---|
51 | # any while looking through pdfs)
|
---|
52 | #-many other things I haven't bothered testing
|
---|
53 |
|
---|
54 | use Carp;
|
---|
55 | use Getopt::Long;
|
---|
56 | use PDF;
|
---|
57 |
|
---|
58 | $objects = 0;
|
---|
59 | $offsets = 1;
|
---|
60 |
|
---|
61 |
|
---|
62 | #for each command line argument add the outlines
|
---|
63 | #contained in url.txt
|
---|
64 | #foreach (@ARGV) {
|
---|
65 | # create_outlines($_);
|
---|
66 | #}
|
---|
67 |
|
---|
68 | if(defined $ARGV[0]){
|
---|
69 | $pdf_document = $ARGV[0];
|
---|
70 | } else {
|
---|
71 | print STDERR "You must supply pdf document filename\n";
|
---|
72 | return;
|
---|
73 | }
|
---|
74 | if(defined $ARGV[1]){
|
---|
75 | $related_docs = $ARGV[1];
|
---|
76 | } else {
|
---|
77 | print STDERR "You must supply related document filename\n";
|
---|
78 | return;
|
---|
79 | }
|
---|
80 | create_outlines($pdf_document);
|
---|
81 |
|
---|
82 | exit(1);
|
---|
83 |
|
---|
84 |
|
---|
85 | #this function first checks to see is the file is indeed
|
---|
86 | #a pdf file - if not then exits. If it is a pdf then it
|
---|
87 | #checks to see if it is encrypted. If it is then exits.
|
---|
88 | #Next we check if the outlines dictionary exists in the
|
---|
89 | #document catalog.
|
---|
90 | #-If the outline entry exists but there are no actual outlines
|
---|
91 | # present then call add_outlines but pass on the existing
|
---|
92 | # outline dictionary.
|
---|
93 | #-otherwise call modify_outlines to modify the existing outline
|
---|
94 | # structure to include new outlines.
|
---|
95 | #If outlines dictionary doesn't exist call add_outlines.
|
---|
96 | sub create_outlines {
|
---|
97 |
|
---|
98 | my $filename = shift;
|
---|
99 |
|
---|
100 | #parse pdf filename
|
---|
101 | my @filelist = split("/", $filename);
|
---|
102 | my $file = pop @filelist;
|
---|
103 |
|
---|
104 | print STDERR "pdf file to be manipulated: $file\n";
|
---|
105 | print STDERR "pdf file to be saved: tmp_$file\n";
|
---|
106 |
|
---|
107 | #make a copy of that file in this directory
|
---|
108 | print STDERR "cp $filename $file\n";
|
---|
109 | `cp $filename $file\n`;
|
---|
110 | `cp $filename tmp_$file\n`;
|
---|
111 |
|
---|
112 | #create and parse a new 'pdf' object
|
---|
113 | my $PDFfile = PDF->new($file);
|
---|
114 |
|
---|
115 | #if it is a pdf file
|
---|
116 | if ($PDFfile->{"Header"}){
|
---|
117 |
|
---|
118 | if($PDFfile->IscryptPDF){
|
---|
119 | #an encrypted file so cannot continue with
|
---|
120 | #adding the related doc outlines to it
|
---|
121 | print STDERR "file \"$file\" is encrypted \n";
|
---|
122 | return;
|
---|
123 | }
|
---|
124 |
|
---|
125 | #display some relevant information about the document
|
---|
126 | print STDERR "Author: ", $PDFfile->GetInfo ("Author"), "\n";
|
---|
127 | print STDERR "Title: ", $PDFfile->GetInfo ("Title"), "\n";
|
---|
128 | print STDERR "Subject: ", $PDFfile->GetInfo ("Subject"), "\n";
|
---|
129 | print STDERR "Updated: ", $PDFfile->{"Updated"}, "\n";
|
---|
130 |
|
---|
131 | #if the pdf document already includes an outline dictionary
|
---|
132 | if (defined $PDFfile->{"Catalog"}{"/Outlines"}) {
|
---|
133 |
|
---|
134 | #get the outline object from the indirect ref
|
---|
135 | my $outline_data = $PDFfile->GetObject($PDFfile->{"Catalog"}{"/Outlines"});
|
---|
136 | #obtain the number of existing outlines
|
---|
137 | $PDFfile->{"Outlines"}{"/Count"} = $outline_data->{"/Count"};
|
---|
138 | print STDERR "number of existing outlines: $PDFfile->{\"Outlines\"}{\"/Count\"}\n";
|
---|
139 |
|
---|
140 | #this means that the pdf file had an outline dictionary but
|
---|
141 | #did not actually include any outlines.
|
---|
142 | if($PDFfile->{"Outlines"}{"/Count"} == 0){
|
---|
143 | #obtain object number of outline dictionary and pass to add_outlines
|
---|
144 | my $dictionary = split(/\s/, $PDFfile->{"Catalog"}{"/Outlines"});
|
---|
145 |
|
---|
146 | add_outlines($PDFfile, $file, $dictionary);
|
---|
147 | } else {
|
---|
148 | #collect other outline data to pass to modify_outlines
|
---|
149 | $PDFfile->{"Outlines"}{"/First"} = $outline_data->{"/First"};
|
---|
150 | $PDFfile->{"Outlines"}{"/Last"} = $outline_data->{"/Last"};
|
---|
151 |
|
---|
152 | #modify last outline entry
|
---|
153 | modify_outlines($outline_data, $PDFfile, $file);
|
---|
154 | }
|
---|
155 | } else { #there was no outline dictionary thus no outlines so add some
|
---|
156 | print STDERR "no bookmarks in \"$file\" \n";
|
---|
157 | add_outlines($PDFfile, $file, 0);
|
---|
158 | }
|
---|
159 | } else { #the file was not a pdf file
|
---|
160 | print STDERR "$file is not a pdf file!!\n";
|
---|
161 | }
|
---|
162 |
|
---|
163 |
|
---|
164 | #copy back the file we modified and the saved copy
|
---|
165 | my $copypath = join("/", @filelist);
|
---|
166 | print STDERR "cp $file $filename\n";
|
---|
167 | print STDERR "cp tmp_$file $copypath/tmp_$file\n";
|
---|
168 | `cp $file $filename\n`;
|
---|
169 | `cp tmp_$file $copypath/tmp_$file\n`;
|
---|
170 |
|
---|
171 | #remove the copy of that file in this directory
|
---|
172 | #print STDERR "rm $file\n";
|
---|
173 | `rm $file\n`;
|
---|
174 | `rm tmp_$file\n`;
|
---|
175 | }
|
---|
176 |
|
---|
177 |
|
---|
178 | #add_outlines function recieves following params
|
---|
179 | #PDF file information, filename of pdf file, outline dictionary
|
---|
180 | #(if outline dict. is 0 that means it must be created).
|
---|
181 | #First the function decides if we are to create new dictionary
|
---|
182 | #or not. Next read in the url file. Open up the pdf document.
|
---|
183 | #Modify the catalog so that it includes the outline dictionary
|
---|
184 | #(this is done even if the catalog doesn't need modifying
|
---|
185 | # so that the objects in the objects/offset table will not
|
---|
186 | # be all mucked up when it comes time to write the xreference
|
---|
187 | # table)
|
---|
188 | #Then append outline dictionary to end of file (either modified
|
---|
189 | #or created). Then append the top heirarchy related document
|
---|
190 | #outline. Then the new file url bookmarks. Call xref_table
|
---|
191 | #to write new xreference table. Call trailer to write new trailer.
|
---|
192 |
|
---|
193 | #table[0][0] - document catalog
|
---|
194 | #table[0][1] - document catalog offset
|
---|
195 | #table[1][0] - outline dictionary
|
---|
196 | #table[1][1] - outline dictionary offset
|
---|
197 | #table[2][0] - related document outline
|
---|
198 | #table[2][1] - related document outline offset
|
---|
199 | #table[3][0] etc - url outlines, read from file
|
---|
200 | #table[3][1] etc - url outlines offset, read from file
|
---|
201 |
|
---|
202 | sub add_outlines (*\$){
|
---|
203 |
|
---|
204 | my $PDFfile = shift;
|
---|
205 | my $file = shift;
|
---|
206 |
|
---|
207 | my @table;
|
---|
208 | $table[1][$objects] = shift;
|
---|
209 |
|
---|
210 | #read in file of url's
|
---|
211 | my @urls = read_file();
|
---|
212 | my $url_num = $#urls + 1;
|
---|
213 |
|
---|
214 | #if outline dictionary was not present in catalog
|
---|
215 | if ($table[1][$objects] == 0){ #get new object number for new dictionary
|
---|
216 | $table[1][$objects] = $PDFfile->{"Trailer"}{"/Size"}; #outline dictionary to be created
|
---|
217 | $table[2][$objects] = $table[1][$objects] + 1; #related document outline
|
---|
218 | } else { #else get object number for related doc outline to be appended
|
---|
219 | $table[2][$objects] = $PDFfile->{"Trailer"}{"/Size"}; #related document outline
|
---|
220 | }
|
---|
221 |
|
---|
222 | #open up pdf file for appending
|
---|
223 | open(FILE, ">> $file") or croak "can't open $file: $!";
|
---|
224 | binmode \*FILE;
|
---|
225 |
|
---|
226 | #obtain object number and offset for the document catalog
|
---|
227 | $table[0][$objects] = split(/\s/, $PDFfile->{"Trailer"}{"/Root"});
|
---|
228 | $table[0][$offsets] = tell \*FILE;
|
---|
229 |
|
---|
230 | #print the modified or original catalog back to the file (appended)
|
---|
231 | print FILE "$table[0][$objects] 0 obj
|
---|
232 | ";
|
---|
233 | print FILE "<<
|
---|
234 | ";
|
---|
235 | print FILE "/Pages $PDFfile->{\"Catalog\"}{\"/Pages\"}
|
---|
236 | ";
|
---|
237 | print FILE "/Outlines $table[1][$objects] 0 R
|
---|
238 | "; #only line actually added
|
---|
239 | print FILE "/Type /Catalog
|
---|
240 | ";
|
---|
241 | print FILE "/DefaultGray $PDFfile->{\"Catalog\"}{\"/DefaultGray\"}
|
---|
242 | "
|
---|
243 | if (defined ($PDFfile->{"Catalog"}{"/DefaultGray"}));
|
---|
244 | print FILE "/DefaultRGB $PDFfile->{\"Catalog\"}{\"/DefaultRGB\"}
|
---|
245 | "
|
---|
246 | if (defined ($PDFfile->{"Catalog"}{"/DefaultRGB"}));
|
---|
247 | print FILE "/PageLabels $PDFfile->{\"Catalog\"}{\"/PageLabels\"}
|
---|
248 | "
|
---|
249 | if (defined ($PDFfile->{"Catalog"}{"/PageLabels"}));
|
---|
250 | print FILE "/Names $PDFfile->{\"Catalog\"}{\"/Names\"}
|
---|
251 | "
|
---|
252 | if (defined ($PDFfile->{"Catalog"}{"/Names"}));
|
---|
253 | print FILE "/Dests $PDFfile->{\"Catalog\"}{\"/Dests\"}
|
---|
254 | "
|
---|
255 | if (defined ($PDFfile->{"Catalog"}{"/Dests"}));
|
---|
256 | print FILE "/ViewerPreferences $PDFfile->{\"Catalog\"}{\"/ViewerPreferences\"}
|
---|
257 | "
|
---|
258 | if (defined ($PDFfile->{"Catalog"}{"/ViewerPreferences"}));
|
---|
259 | print FILE "/PageLayout $PDFfile->{\"Catalog\"}{\"/PageLayout\"}
|
---|
260 | "
|
---|
261 | if (defined ($PDFfile->{"Catalog"}{"/PageLayout"}));
|
---|
262 | print FILE "/PageMode $PDFfile->{\"Catalog\"}{\"/PageMode\"}
|
---|
263 | "
|
---|
264 | if (defined ($PDFfile->{"Catalog"}{"/PageMode"}));
|
---|
265 | print FILE "/Threads $PDFfile->{\"Catalog\"}{\"/Threads\"}
|
---|
266 | "
|
---|
267 | if (defined ($PDFfile->{"Catalog"}{"/Threads"}));
|
---|
268 | print FILE "/OpenAction $PDFfile->{\"Catalog\"}{\"/OpenAction\"}
|
---|
269 | "
|
---|
270 | if (defined ($PDFfile->{"Catalog"}{"/OpenAction"}));
|
---|
271 | print FILE "/URI $PDFfile->{\"Catalog\"}{\"/URI\"}
|
---|
272 | "
|
---|
273 | if (defined ($PDFfile->{"Catalog"}{"/URI"}));
|
---|
274 | print FILE "/Acroform $PDFfile->{\"Catalog\"}{\"/Acroform\"}
|
---|
275 | "
|
---|
276 | if (defined ($PDFfile->{"Catalog"}{"/Acroform"}));
|
---|
277 | print FILE "/StructTreeRoot $PDFfile->{\"Catalog\"}{\"/StructTreeRoot\"}
|
---|
278 | "
|
---|
279 | if (defined ($PDFfile->{"Catalog"}{"/StructTreeRoot"}));
|
---|
280 | print FILE "/SpiderInfo $PDFfile->{\"Catalog\"}{\"/SpiderInfo\"}
|
---|
281 | "
|
---|
282 | if (defined ($PDFfile->{"Catalog"}{"/SpiderInfo"}));
|
---|
283 | print FILE ">>
|
---|
284 | ";
|
---|
285 | print FILE "endobj
|
---|
286 | ";
|
---|
287 |
|
---|
288 | #obtain offset for outline dictionary
|
---|
289 | $table[1][$offsets] = tell \*FILE;
|
---|
290 |
|
---|
291 | #append newly created outline dictionary
|
---|
292 | print FILE "$table[1][$objects] 0 obj
|
---|
293 | ";
|
---|
294 | print FILE "<<
|
---|
295 | ";
|
---|
296 | print FILE "/Type /Outlines
|
---|
297 | ";
|
---|
298 | print FILE "/Count ", $url_num + 1, "
|
---|
299 | ";
|
---|
300 | print FILE "/First $table[2][$objects] 0 R
|
---|
301 | ";
|
---|
302 | print FILE "/Last $table[2][$objects] 0 R
|
---|
303 | ";
|
---|
304 | print FILE ">>
|
---|
305 | ";
|
---|
306 | print FILE "endobj
|
---|
307 | ";
|
---|
308 |
|
---|
309 |
|
---|
310 | #get the related document outline object num and offset
|
---|
311 | my $obj = $table[2][$objects];
|
---|
312 | $table[2][$offsets] = tell \*FILE;
|
---|
313 |
|
---|
314 | #append the top heirarchy related document outline to file
|
---|
315 | print FILE "$table[2][$objects] 0 obj
|
---|
316 | ";
|
---|
317 | print FILE "<<
|
---|
318 | ";
|
---|
319 | print FILE "/Title (Related Documents)
|
---|
320 | ";
|
---|
321 | print FILE "/Parent $table[1][$objects] 0 R
|
---|
322 | ";
|
---|
323 | print FILE "/Count ", $url_num, "
|
---|
324 | ";
|
---|
325 | print FILE "/First ", $obj + 1, " 0 R
|
---|
326 | ";
|
---|
327 | print FILE "/Last ", $table[2][$objects] + $url_num, " 0 R
|
---|
328 | ";
|
---|
329 | print FILE ">>
|
---|
330 | ";
|
---|
331 | print FILE "endobj
|
---|
332 | ";
|
---|
333 |
|
---|
334 | my $ind = 3;
|
---|
335 | $obj++;
|
---|
336 |
|
---|
337 |
|
---|
338 | #store the object nums and offsets of the new related document
|
---|
339 | #outlines and write them to the file (must be outline with
|
---|
340 | #an action eg go to specific url)
|
---|
341 | for $i ( 0 .. $#urls ) {
|
---|
342 | $table[$ind][$offsets] = tell \*FILE;
|
---|
343 | $table[$ind][$objects] = $obj;
|
---|
344 | print FILE "$obj 0 obj
|
---|
345 | ";
|
---|
346 | print FILE "<<
|
---|
347 | ";
|
---|
348 | print FILE "/Title ($urls[$i][0])
|
---|
349 | ";
|
---|
350 | print FILE "/Parent $table[2][$objects] 0 R
|
---|
351 | ";
|
---|
352 | print FILE "/Next ", $obj + 1, " 0 R
|
---|
353 | " if (($obj + 1) <= ($table[2][$objects] + $url_num));
|
---|
354 | print FILE "/Prev ", $obj - 1, " 0 R
|
---|
355 | " if(($obj - 1) != ($table[2][$objects]));
|
---|
356 | print FILE "/A << /Type /Action
|
---|
357 | ";
|
---|
358 | print FILE "/S /URI
|
---|
359 | ";
|
---|
360 | print FILE "/URI ($urls[$i][1])
|
---|
361 | ";
|
---|
362 | print FILE ">>
|
---|
363 | ";
|
---|
364 | print FILE ">>
|
---|
365 | ";
|
---|
366 | print FILE "endobj
|
---|
367 | ";
|
---|
368 | $obj++;
|
---|
369 | $ind++;
|
---|
370 | }
|
---|
371 |
|
---|
372 |
|
---|
373 | #append new X-reference table
|
---|
374 | my $xref_offset = tell \*FILE;
|
---|
375 | xref_table(\*FILE, $url_num, @table);
|
---|
376 |
|
---|
377 | #print trailer
|
---|
378 | trailer(\*FILE, $PDFfile, $obj);
|
---|
379 | print FILE "$xref_offset
|
---|
380 | ";
|
---|
381 | print FILE "%%EOF
|
---|
382 | ";
|
---|
383 |
|
---|
384 | #close FILE;
|
---|
385 | }
|
---|
386 |
|
---|
387 | #modify_outlines function recieves following params
|
---|
388 | #PDF file information, outline data, pdf filename
|
---|
389 | #First the function obtains the object data for the
|
---|
390 | #last outline entry. Next read in the url file.
|
---|
391 | #Open up the pdf document.Then append the modified
|
---|
392 | #outline dictionary to end of file. Next append the
|
---|
393 | #modified last outline. Then append the
|
---|
394 | #top heirarchy related document outline. Then the
|
---|
395 | #new file url bookmarks. Call xref_table
|
---|
396 | #to write new xreference table.
|
---|
397 | #Call trailer to write new trailer.
|
---|
398 |
|
---|
399 | #table[0][0] - outline dictionary
|
---|
400 | #table[0][1] - outline dictionary offset
|
---|
401 | #table[1][0] - last outline
|
---|
402 | #table[1][1] - last outline offset
|
---|
403 | #table[2][0] - related document outline
|
---|
404 | #table[2][1] - related document outline offset
|
---|
405 | #table[3][0] etc - url outlines, read from file
|
---|
406 | #table[3][1] etc - url outlines offset, read from file
|
---|
407 |
|
---|
408 | sub modify_outlines (*\$){
|
---|
409 |
|
---|
410 | my ($outline_data, $PDFfile, $file) = @_;
|
---|
411 |
|
---|
412 | #collect the data for the last outline (which must be modified)
|
---|
413 | $PDFfile->{"Outlines"}{"/Last"} = $outline_data->{"/Last"};
|
---|
414 | my $last_data = $PDFfile->GetObject($PDFfile->{"Outlines"}{"/Last"});
|
---|
415 |
|
---|
416 | #read in file of url's
|
---|
417 | my @urls = read_file();
|
---|
418 | my $url_num = $#urls + 1;
|
---|
419 |
|
---|
420 | my @table;
|
---|
421 |
|
---|
422 | #This number is the number to use for the next created object
|
---|
423 | #eg the related doc heirarchy
|
---|
424 | $table[2][$objects] = $PDFfile->{"Trailer"}{"/Size"};
|
---|
425 |
|
---|
426 | #open up the pdf file for appending
|
---|
427 | open(FILE, ">> $file") or croak "can't open $file: $!";
|
---|
428 | binmode \*FILE;
|
---|
429 |
|
---|
430 | #store the object number and offset of the outline dictionary
|
---|
431 | $table[0][$objects] = split(/\s/, $PDFfile->{"Catalog"}{"/Outlines"});
|
---|
432 | $table[0][$offsets] = tell \*FILE;
|
---|
433 |
|
---|
434 | #write the outline dictionary back to the file (appending)
|
---|
435 | print FILE "$table[0][$objects] 0 obj
|
---|
436 | ";
|
---|
437 | print FILE "<<
|
---|
438 | ";
|
---|
439 | print FILE "/Type /Outlines
|
---|
440 | ";
|
---|
441 | print FILE "/Count ", $PDFfile->{"Outlines"}{"/Count"} + 1, "
|
---|
442 | "
|
---|
443 | if (defined ($PDFfile->{"Outlines"}{"/Count"}));
|
---|
444 | print FILE "/First $PDFfile->{\"Outlines\"}{\"/First\"}
|
---|
445 | "
|
---|
446 | if (defined ($PDFfile->{"Outlines"}{"/First"}));
|
---|
447 | print FILE "/Last $table[2][$objects] 0 R
|
---|
448 | ";
|
---|
449 | print FILE ">>
|
---|
450 | ";
|
---|
451 | print FILE "endobj
|
---|
452 | ";
|
---|
453 |
|
---|
454 | #store the last outline entry object num and file offset
|
---|
455 | my @last_entry = split(/\s/, $PDFfile->{"Outlines"}{"/Last"});
|
---|
456 | $table[1][$objects] = $last_entry[0];
|
---|
457 | $table[1][$offsets] = tell \*FILE;
|
---|
458 |
|
---|
459 | #append modified last outline entry
|
---|
460 | print FILE "$table[1][$objects] 0 obj
|
---|
461 | ";
|
---|
462 | print FILE "<<
|
---|
463 | ";
|
---|
464 | print FILE "/Title $last_data->{\"/Title\"}
|
---|
465 | ";
|
---|
466 | print FILE "/Dest $last_data->{\"/Dest\"}
|
---|
467 | " if(defined $last_data->{"/Dest"});
|
---|
468 | print FILE "/Parent $last_data->{\"/Parent\"}
|
---|
469 | ";
|
---|
470 | print FILE "/Prev $last_data->{\"/Prev\"}
|
---|
471 | ";
|
---|
472 | print FILE "/Next $table[2][$objects] 0 R
|
---|
473 | ";
|
---|
474 | print FILE "/First $last_data->{\"/First\"}
|
---|
475 | " if(defined $last_data->{"/First"});
|
---|
476 | print FILE "/Last $last_data->{\"/Last\"}
|
---|
477 | " if(defined $last_data->{"/Last"});
|
---|
478 | print FILE "/Count $last_data->{\"/Count\"}
|
---|
479 | "if(defined $last_data->{"/Count"});
|
---|
480 | print FILE "/A $last_data->{\"/A\"}
|
---|
481 | " if(defined $last_data->{"/A"});
|
---|
482 | print FILE "/SE $last_data->{\"/SE\"}
|
---|
483 | " if(defined $last_data->{"/SE"});
|
---|
484 | print FILE ">>
|
---|
485 | ";
|
---|
486 | print FILE "endobj
|
---|
487 | ";
|
---|
488 |
|
---|
489 | #store the object num and offset of the related
|
---|
490 | #document top level outline
|
---|
491 | my $obj = $table[2][$objects] + 1;
|
---|
492 | $table[2][$offsets] = tell \*FILE;
|
---|
493 |
|
---|
494 | #append related document top level outline
|
---|
495 | print FILE "$table[2][$objects] 0 obj
|
---|
496 | ";
|
---|
497 | print FILE "<<
|
---|
498 | ";
|
---|
499 | print FILE "/Title (Related Documents)
|
---|
500 | ";
|
---|
501 | print FILE "/Parent $last_data->{\"/Parent\"}
|
---|
502 | ";
|
---|
503 | print FILE "/Count ", $url_num, "
|
---|
504 | ";
|
---|
505 | print FILE "/First $obj 0 R
|
---|
506 | ";
|
---|
507 | print FILE "/Last ", $table[2][$objects] + $url_num , " 0 R
|
---|
508 | ";
|
---|
509 | print FILE ">>
|
---|
510 | ";
|
---|
511 | print FILE "endobj
|
---|
512 | ";
|
---|
513 |
|
---|
514 |
|
---|
515 | my $ind = 3;
|
---|
516 |
|
---|
517 | #store the object nums and offsets of the new related document
|
---|
518 | #outlines and write them to the file (must be outline with
|
---|
519 | #an action eg go to specific url)
|
---|
520 | for $i ( 0 .. $#urls ) {
|
---|
521 | $table[$ind][$objects] = $obj;
|
---|
522 | $table[$ind][$offsets] = tell \*FILE;
|
---|
523 | print FILE "$obj 0 obj
|
---|
524 | ";
|
---|
525 | print FILE "<<
|
---|
526 | ";
|
---|
527 | print FILE "/Title ($urls[$i][0])
|
---|
528 | ";
|
---|
529 | print FILE "/Parent $table[2][$objects] 0 R
|
---|
530 | ";
|
---|
531 | print FILE "/Next ", $obj + 1, " 0 R
|
---|
532 | " if (($obj + 1) <= ($table[2][$objects] + $url_num));
|
---|
533 | print FILE "/Prev ", $obj - 1, " 0 R
|
---|
534 | " if (($obj - 1) != ($table[2][$objects]));
|
---|
535 | print FILE "/A << /Type /Action
|
---|
536 | ";
|
---|
537 | print FILE "/S /URI
|
---|
538 | ";
|
---|
539 | print FILE "/URI ($urls[$i][1])
|
---|
540 | ";
|
---|
541 | print FILE ">>
|
---|
542 | ";
|
---|
543 | print FILE ">>
|
---|
544 | ";
|
---|
545 | print FILE "endobj
|
---|
546 | ";
|
---|
547 | $obj++;
|
---|
548 | $ind++;
|
---|
549 | }
|
---|
550 |
|
---|
551 | #append new X-reference table
|
---|
552 | my $xref_offset = tell \*FILE;
|
---|
553 | xref_table(\*FILE, $url_num, @table);
|
---|
554 |
|
---|
555 | #print trailer
|
---|
556 | trailer(\*FILE, $PDFfile, $obj);
|
---|
557 | print FILE "$xref_offset
|
---|
558 | ";
|
---|
559 | print FILE "%%EOF
|
---|
560 | ";
|
---|
561 |
|
---|
562 | close FILE;
|
---|
563 |
|
---|
564 | }
|
---|
565 |
|
---|
566 |
|
---|
567 | #This function reads a file 'url.txt' which contains
|
---|
568 | #two columns of data in the following format:
|
---|
569 | #related document title related document url
|
---|
570 | #each array of title, url is stored in an array
|
---|
571 | #(to obtain the title of the first related
|
---|
572 | #document in the file)
|
---|
573 | #eg table[1st related document][title]
|
---|
574 | #(to obtain the url of the second related
|
---|
575 | #document in the file)
|
---|
576 | #eg table[2nd related document][url]
|
---|
577 | #this table is then returned to the calling
|
---|
578 | #function.
|
---|
579 | sub read_file {
|
---|
580 |
|
---|
581 | #create two-dimensional array for urls
|
---|
582 | my @urls;
|
---|
583 |
|
---|
584 | #open up url.txt to start reading
|
---|
585 | open(URL, "$related_docs") or croak "Can't open $related_docs: $!";
|
---|
586 |
|
---|
587 | my $counter = 0;
|
---|
588 |
|
---|
589 | #read in url.txt and store into the arry
|
---|
590 | while (<URL>){
|
---|
591 | chomp;
|
---|
592 | if(/([^\t]*)\t(.*)/){
|
---|
593 | push @urls, [ split ]; #split on /s into arrays of arrays
|
---|
594 | }
|
---|
595 | }
|
---|
596 |
|
---|
597 | close URL;
|
---|
598 |
|
---|
599 | return (@urls);
|
---|
600 |
|
---|
601 | }
|
---|
602 |
|
---|
603 |
|
---|
604 | #this function takes as parameters the filehandle
|
---|
605 | #to the pdf document, the number of related documents
|
---|
606 | #to this pdf doc and a table of object numbers and
|
---|
607 | #their offsets. Using this information it writes
|
---|
608 | #(appends) a new xreference table to the pdf
|
---|
609 | #document.
|
---|
610 |
|
---|
611 | sub xref_table (*\$) {
|
---|
612 |
|
---|
613 | my ($fd, $num, @table) = @_;
|
---|
614 | my $offset;
|
---|
615 |
|
---|
616 | #print the new xref table (append to file)
|
---|
617 | print $fd "xref
|
---|
618 | ";
|
---|
619 | print $fd "0 1
|
---|
620 | ";
|
---|
621 | print $fd "0000000000 65535 f
|
---|
622 | ";
|
---|
623 | print $fd "$table[0][$objects] 1
|
---|
624 | ";
|
---|
625 | $offset = '0'x(10-length($table[0][$offsets])).$table[0][$offsets];
|
---|
626 | print $fd "$offset 00000 n
|
---|
627 | ";
|
---|
628 | print $fd "$table[1][$objects] 1
|
---|
629 | ";
|
---|
630 | $offset = '0'x(10-length($table[1][$offsets])).$table[1][$offsets];
|
---|
631 | print $fd "$offset 00000 n
|
---|
632 | ";
|
---|
633 | print $fd "$table[2][$objects] ", $num + 1, "
|
---|
634 | ";
|
---|
635 | for $i ( 2 .. ($num + 2) ) { #add 2 on because already written 2 to file
|
---|
636 | $offset = '0'x(10-length($table[$i][$offsets])).$table[$i][$offsets];
|
---|
637 | print $fd "$offset 00000 n
|
---|
638 | ";
|
---|
639 | }
|
---|
640 |
|
---|
641 | }
|
---|
642 |
|
---|
643 |
|
---|
644 | #This function recieves as parameters the filehandle
|
---|
645 | #to the pdf document, parsed information about the
|
---|
646 | #document and the new size (last object number + 1)
|
---|
647 | #of the pdf file. Using this information it appends
|
---|
648 | #a new trailer to the end of the pdf document.
|
---|
649 |
|
---|
650 | sub trailer (*\$) {
|
---|
651 |
|
---|
652 | my ($fd, $PDFfile, $new_size) = @_;
|
---|
653 |
|
---|
654 | #append the new trailer to the end of the file
|
---|
655 | print $fd "trailer
|
---|
656 | ";
|
---|
657 | print $fd "<<
|
---|
658 | ";
|
---|
659 | print $fd "/Size ", $new_size, "
|
---|
660 | ";
|
---|
661 | print $fd "/Root $PDFfile->{\"Trailer\"}{\"/Root\"}
|
---|
662 | ";
|
---|
663 | print $fd "/Info $PDFfile->{\"Trailer\"}{\"/Info\"}
|
---|
664 | "
|
---|
665 | if (defined ($PDFfile->{"Trailer"}{"/Info"}));
|
---|
666 | print $fd "/ID [$PDFfile->{\"Trailer\"}{\"/ID\"}[0]$PDFfile->{\"Trailer\"}{\"/ID\"}[1]]
|
---|
667 | "
|
---|
668 | if (defined ($PDFfile->{"Trailer"}{"/ID"}));
|
---|
669 | print $fd "/Prev $PDFfile->{\"Last_XRef_Offset\"}
|
---|
670 | ";
|
---|
671 | print $fd "/Encrypt $PDFfile->{\"Trailer\"}{\"/Encrypt\"}
|
---|
672 | "
|
---|
673 | if (defined ($PDFfile->{"Trailer"}{"/Encrypt"}));
|
---|
674 | print $fd ">>
|
---|
675 | ";
|
---|
676 | print $fd "startxref
|
---|
677 | ";
|
---|
678 |
|
---|
679 | }
|
---|
680 |
|
---|
681 |
|
---|
682 |
|
---|