source: gsdl/tags/gsdl-2_71-distribution/gsdl/packages/w3mir/w3mir-1.0.8/w3pdfuri.pm@ 14121

Last change on this file since 14121 was 719, checked in by davidb, 25 years ago

added w3mir package

  • Property svn:keywords set to Author Date Id Revision
File size: 1.3 KB
Line 
1# -*- perl -*-
2#
3# w3pdfuri.pm -- extract uris from a adobe acrobat pdf file.
4# version 0.1
5$VERSION=0.1;
6#
7# Assumptions:
8# - File starts with %PDF
9# - All URLS match this RE: ^\/URI\s+\(([^)]*)\)\s*$ (found by inspection of
10# one (1) pdf file (the reader.pdf in the acrobat distribution).
11# - pdf files are potentially very large, so we read it from disk
12# record by record.
13# - Record separator is ^M
14#
15# History:
16# - 19/02/97 janl: Version 0.1, seems to work with PDF-1.2
17#
18
19package w3pdfuri;
20
21use strict;
22
23sub list ($) {
24 my ($file) = @_;
25
26 my @urls=();
27 local($/)="\r";
28
29 unless (open(PDF,"< $file")) {
30 warn "Could not open $file for input: $!\n";
31 return;
32 }
33
34 $_ = <PDF>;
35 unless (/^%PDF-/) {
36 warn "$file is not a PDF file.\n";
37 close(PDF);
38 return;
39 }
40
41 while (<PDF>) {
42 chomp;
43 push(@urls,$1) if (m~^/URI\s+\(([^)]*)\)\s*$~);
44 }
45
46 close(PDF);
47
48 return @urls;
49}
50
511;
52
53__END__
54
55# Test code
56print "reader.pdf: ",
57 join(",",list("/local/lib/acrobat/Reader/help/reader.pdf")),"\n";
58print "Acrobat.pdf: ",
59 join(",",list("/local/lib/acrobat/Reader/Acrobat.pdf")),"\n";
60print "License.pdf: ",
61 join(",",list("/local/lib/acrobat/Reader/License.pdf")),"\n";
62print "MapTypes.pdf: ",
63 join(",",list("/local/lib/acrobat/Reader/MapTypes.pdf")),"\n";
64list("/etc/services");
Note: See TracBrowser for help on using the repository browser.