source: main/trunk/greenstone2/perllib/cpan/URI/Heuristic.pm@ 27174

Last change on this file since 27174 was 27174, checked in by davidb, 11 years ago

Perl modules from CPAN that are used in supporting activate.pl, but not part of the Perl core. Only PMs included.

File size: 6.4 KB
Line 
1package URI::Heuristic;
2
3=head1 NAME
4
5URI::Heuristic - Expand URI using heuristics
6
7=head1 SYNOPSIS
8
9 use URI::Heuristic qw(uf_uristr);
10 $u = uf_uristr("perl"); # http://www.perl.com
11 $u = uf_uristr("www.sol.no/sol"); # http://www.sol.no/sol
12 $u = uf_uristr("aas"); # http://www.aas.no
13 $u = uf_uristr("ftp.funet.fi"); # ftp://ftp.funet.fi
14 $u = uf_uristr("/etc/passwd"); # file:/etc/passwd
15
16=head1 DESCRIPTION
17
18This module provides functions that expand strings into real absolute
19URIs using some built-in heuristics. Strings that already represent
20absolute URIs (i.e. that start with a C<scheme:> part) are never modified
21and are returned unchanged. The main use of these functions is to
22allow abbreviated URIs similar to what many web browsers allow for URIs
23typed in by the user.
24
25The following functions are provided:
26
27=over 4
28
29=item uf_uristr($str)
30
31Tries to make the argument string
32into a proper absolute URI string. The "uf_" prefix stands for "User
33Friendly". Under MacOS, it assumes that any string with a common URL
34scheme (http, ftp, etc.) is a URL rather than a local path. So don't name
35your volumes after common URL schemes and expect uf_uristr() to construct
36valid file: URL's on those volumes for you, because it won't.
37
38=item uf_uri($str)
39
40Works the same way as uf_uristr() but
41returns a C<URI> object.
42
43=back
44
45=head1 ENVIRONMENT
46
47If the hostname portion of a URI does not contain any dots, then
48certain qualified guesses are made. These guesses are governed by
49the following environment variables:
50
51=over 10
52
53=item COUNTRY
54
55The two-letter country code (ISO 3166) for your location. If
56the domain name of your host ends with two letters, then it is taken
57to be the default country. See also L<Locale::Country>.
58
59=item HTTP_ACCEPT_LANGUAGE, LC_ALL, LANG
60
61If COUNTRY is not set, these standard environment variables are
62examined and country (not language) information possibly found in them
63is used as the default country.
64
65=item URL_GUESS_PATTERN
66
67Contains a space-separated list of URL patterns to try. The string
68"ACME" is for some reason used as a placeholder for the host name in
69the URL provided. Example:
70
71 URL_GUESS_PATTERN="www.ACME.no www.ACME.se www.ACME.com"
72 export URL_GUESS_PATTERN
73
74Specifying URL_GUESS_PATTERN disables any guessing rules based on
75country. An empty URL_GUESS_PATTERN disables any guessing that
76involves host name lookups.
77
78=back
79
80=head1 COPYRIGHT
81
82Copyright 1997-1998, Gisle Aas
83
84This library is free software; you can redistribute it and/or
85modify it under the same terms as Perl itself.
86
87=cut
88
89use strict;
90
91use vars qw(@EXPORT_OK $VERSION $MY_COUNTRY %LOCAL_GUESSING $DEBUG);
92
93require Exporter;
94*import = \&Exporter::import;
95@EXPORT_OK = qw(uf_uri uf_uristr uf_url uf_urlstr);
96$VERSION = "4.20";
97
98sub MY_COUNTRY() {
99 for ($MY_COUNTRY) {
100 return $_ if defined;
101
102 # First try the environment.
103 $_ = $ENV{COUNTRY};
104 return $_ if defined;
105
106 # Try the country part of LC_ALL and LANG from environment
107 my @srcs = ($ENV{LC_ALL}, $ENV{LANG});
108 # ...and HTTP_ACCEPT_LANGUAGE before those if present
109 if (my $httplang = $ENV{HTTP_ACCEPT_LANGUAGE}) {
110 # TODO: q-value processing/ordering
111 for $httplang (split(/\s*,\s*/, $httplang)) {
112 if ($httplang =~ /^\s*([a-zA-Z]+)[_-]([a-zA-Z]{2})\s*$/) {
113 unshift(@srcs, "${1}_${2}");
114 last;
115 }
116 }
117 }
118 for (@srcs) {
119 next unless defined;
120 return lc($1) if /^[a-zA-Z]+_([a-zA-Z]{2})(?:[.@]|$)/;
121 }
122
123 # Last bit of domain name. This may access the network.
124 require Net::Domain;
125 my $fqdn = Net::Domain::hostfqdn();
126 $_ = lc($1) if $fqdn =~ /\.([a-zA-Z]{2})$/;
127 return $_ if defined;
128
129 # Give up. Defined but false.
130 return ($_ = 0);
131 }
132}
133
134%LOCAL_GUESSING =
135(
136 'us' => [qw(www.ACME.gov www.ACME.mil)],
137 'gb' => [qw(www.ACME.co.uk www.ACME.org.uk www.ACME.ac.uk)],
138 'au' => [qw(www.ACME.com.au www.ACME.org.au www.ACME.edu.au)],
139 'il' => [qw(www.ACME.co.il www.ACME.org.il www.ACME.net.il)],
140 # send corrections and new entries to <[email protected]>
141);
142# Backwards compatibility; uk != United Kingdom in ISO 3166
143$LOCAL_GUESSING{uk} = $LOCAL_GUESSING{gb};
144
145
146sub uf_uristr ($)
147{
148 local($_) = @_;
149 print STDERR "uf_uristr: resolving $_\n" if $DEBUG;
150 return unless defined;
151
152 s/^\s+//;
153 s/\s+$//;
154
155 if (/^(www|web|home)[a-z0-9-]*(?:\.|$)/i) {
156 $_ = "http://$_";
157
158 } elsif (/^(ftp|gopher|news|wais|https|http)[a-z0-9-]*(?:\.|$)/i) {
159 $_ = lc($1) . "://$_";
160
161 } elsif ($^O ne "MacOS" &&
162 (m,^/, || # absolute file name
163 m,^\.\.?/, || # relative file name
164 m,^[a-zA-Z]:[/\\],) # dosish file name
165 )
166 {
167 $_ = "file:$_";
168
169 } elsif ($^O eq "MacOS" && m/:/) {
170 # potential MacOS file name
171 unless (m/^(ftp|gopher|news|wais|http|https|mailto):/) {
172 require URI::file;
173 my $a = URI::file->new($_)->as_string;
174 $_ = ($a =~ m/^file:/) ? $a : "file:$a";
175 }
176 } elsif (/^\w+([\.\-]\w+)*\@(\w+\.)+\w{2,3}$/) {
177 $_ = "mailto:$_";
178
179 } elsif (!/^[a-zA-Z][a-zA-Z0-9.+\-]*:/) { # no scheme specified
180 if (s/^([-\w]+(?:\.[-\w]+)*)([\/:\?\#]|$)/$2/) {
181 my $host = $1;
182
183 my $scheme = "http";
184 if (/^:(\d+)\b/) {
185 # Some more or less well known ports
186 if ($1 =~ /^[56789]?443$/) {
187 $scheme = "https";
188 } elsif ($1 eq "21") {
189 $scheme = "ftp";
190 }
191 }
192
193 if ($host !~ /\./ && $host ne "localhost") {
194 my @guess;
195 if (exists $ENV{URL_GUESS_PATTERN}) {
196 @guess = map { s/\bACME\b/$host/; $_ }
197 split(' ', $ENV{URL_GUESS_PATTERN});
198 } else {
199 if (MY_COUNTRY()) {
200 my $special = $LOCAL_GUESSING{MY_COUNTRY()};
201 if ($special) {
202 my @special = @$special;
203 push(@guess, map { s/\bACME\b/$host/; $_ }
204 @special);
205 } else {
206 push(@guess, "www.$host." . MY_COUNTRY());
207 }
208 }
209 push(@guess, map "www.$host.$_",
210 "com", "org", "net", "edu", "int");
211 }
212
213
214 my $guess;
215 for $guess (@guess) {
216 print STDERR "uf_uristr: gethostbyname('$guess.')..."
217 if $DEBUG;
218 if (gethostbyname("$guess.")) {
219 print STDERR "yes\n" if $DEBUG;
220 $host = $guess;
221 last;
222 }
223 print STDERR "no\n" if $DEBUG;
224 }
225 }
226 $_ = "$scheme://$host$_";
227
228 } else {
229 # pure junk, just return it unchanged...
230
231 }
232 }
233 print STDERR "uf_uristr: ==> $_\n" if $DEBUG;
234
235 $_;
236}
237
238sub uf_uri ($)
239{
240 require URI;
241 URI->new(uf_uristr($_[0]));
242}
243
244# legacy
245*uf_urlstr = \*uf_uristr;
246
247sub uf_url ($)
248{
249 require URI::URL;
250 URI::URL->new(uf_uristr($_[0]));
251}
252
2531;
Note: See TracBrowser for help on using the repository browser.