Last change
on this file since 14121 was 719, checked in by davidb, 25 years ago |
added w3mir package
|
-
Property svn:keywords
set to
Author Date Id Revision
|
File size:
1.3 KB
|
Line | |
---|
1 | # -*- perl -*-
|
---|
2 | #
|
---|
3 | # w3pdfuri.pm -- extract uris from a adobe acrobat pdf file.
|
---|
4 | # version 0.1
|
---|
5 | $VERSION=0.1;
|
---|
6 | #
|
---|
7 | # Assumptions:
|
---|
8 | # - File starts with %PDF
|
---|
9 | # - All URLS match this RE: ^\/URI\s+\(([^)]*)\)\s*$ (found by inspection of
|
---|
10 | # one (1) pdf file (the reader.pdf in the acrobat distribution).
|
---|
11 | # - pdf files are potentially very large, so we read it from disk
|
---|
12 | # record by record.
|
---|
13 | # - Record separator is ^M
|
---|
14 | #
|
---|
15 | # History:
|
---|
16 | # - 19/02/97 janl: Version 0.1, seems to work with PDF-1.2
|
---|
17 | #
|
---|
18 |
|
---|
19 | package w3pdfuri;
|
---|
20 |
|
---|
21 | use strict;
|
---|
22 |
|
---|
23 | sub list ($) {
|
---|
24 | my ($file) = @_;
|
---|
25 |
|
---|
26 | my @urls=();
|
---|
27 | local($/)="\r";
|
---|
28 |
|
---|
29 | unless (open(PDF,"< $file")) {
|
---|
30 | warn "Could not open $file for input: $!\n";
|
---|
31 | return;
|
---|
32 | }
|
---|
33 |
|
---|
34 | $_ = <PDF>;
|
---|
35 | unless (/^%PDF-/) {
|
---|
36 | warn "$file is not a PDF file.\n";
|
---|
37 | close(PDF);
|
---|
38 | return;
|
---|
39 | }
|
---|
40 |
|
---|
41 | while (<PDF>) {
|
---|
42 | chomp;
|
---|
43 | push(@urls,$1) if (m~^/URI\s+\(([^)]*)\)\s*$~);
|
---|
44 | }
|
---|
45 |
|
---|
46 | close(PDF);
|
---|
47 |
|
---|
48 | return @urls;
|
---|
49 | }
|
---|
50 |
|
---|
51 | 1;
|
---|
52 |
|
---|
53 | __END__
|
---|
54 |
|
---|
55 | # Test code
|
---|
56 | print "reader.pdf: ",
|
---|
57 | join(",",list("/local/lib/acrobat/Reader/help/reader.pdf")),"\n";
|
---|
58 | print "Acrobat.pdf: ",
|
---|
59 | join(",",list("/local/lib/acrobat/Reader/Acrobat.pdf")),"\n";
|
---|
60 | print "License.pdf: ",
|
---|
61 | join(",",list("/local/lib/acrobat/Reader/License.pdf")),"\n";
|
---|
62 | print "MapTypes.pdf: ",
|
---|
63 | join(",",list("/local/lib/acrobat/Reader/MapTypes.pdf")),"\n";
|
---|
64 | list("/etc/services");
|
---|
Note:
See
TracBrowser
for help on using the repository browser.