source: main/trunk/greenstone2/perllib/cpan/HTTP/Negotiate.pm@ 27181

Last change on this file since 27181 was 27181, checked in by davidb, 11 years ago

Latest libwww-perl (v6x) isn't as self-sufficeint as earlier (v5.x) in terms of supporting Perl modules. Dropping back to to this earlier version so activate.pl runs smoothly when system-installed Perl on Unix system does not have the LWP and related modules installed

File size: 15.7 KB
Line 
1package HTTP::Negotiate;
2
3$VERSION = "5.835";
4sub Version { $VERSION; }
5
6require 5.002;
7require Exporter;
8@ISA = qw(Exporter);
9@EXPORT = qw(choose);
10
11require HTTP::Headers;
12
13$DEBUG = 0;
14
15sub choose ($;$)
16{
17 my($variants, $request) = @_;
18 my(%accept);
19
20 unless (defined $request) {
21 # Create a request object from the CGI environment variables
22 $request = HTTP::Headers->new;
23 $request->header('Accept', $ENV{HTTP_ACCEPT})
24 if $ENV{HTTP_ACCEPT};
25 $request->header('Accept-Charset', $ENV{HTTP_ACCEPT_CHARSET})
26 if $ENV{HTTP_ACCEPT_CHARSET};
27 $request->header('Accept-Encoding', $ENV{HTTP_ACCEPT_ENCODING})
28 if $ENV{HTTP_ACCEPT_ENCODING};
29 $request->header('Accept-Language', $ENV{HTTP_ACCEPT_LANGUAGE})
30 if $ENV{HTTP_ACCEPT_LANGUAGE};
31 }
32
33 # Get all Accept values from the request. Build a hash initialized
34 # like this:
35 #
36 # %accept = ( type => { 'audio/*' => { q => 0.2, mbx => 20000 },
37 # 'audio/basic' => { q => 1 },
38 # },
39 # language => { 'no' => { q => 1 },
40 # }
41 # );
42
43 $request->scan(sub {
44 my($key, $val) = @_;
45
46 my $type;
47 if ($key =~ s/^Accept-//) {
48 $type = lc($key);
49 }
50 elsif ($key eq "Accept") {
51 $type = "type";
52 }
53 else {
54 return;
55 }
56
57 $val =~ s/\s+//g;
58 my $default_q = 1;
59 for my $name (split(/,/, $val)) {
60 my(%param, $param);
61 if ($name =~ s/;(.*)//) {
62 for $param (split(/;/, $1)) {
63 my ($pk, $pv) = split(/=/, $param, 2);
64 $param{lc $pk} = $pv;
65 }
66 }
67 $name = lc $name;
68 if (defined $param{'q'}) {
69 $param{'q'} = 1 if $param{'q'} > 1;
70 $param{'q'} = 0 if $param{'q'} < 0;
71 }
72 else {
73 $param{'q'} = $default_q;
74
75 # This makes sure that the first ones are slightly better off
76 # and therefore more likely to be chosen.
77 $default_q -= 0.0001;
78 }
79 $accept{$type}{$name} = \%param;
80 }
81 });
82
83 # Check if any of the variants specify a language. We do this
84 # because it influences how we treat those without (they default to
85 # 0.5 instead of 1).
86 my $any_lang = 0;
87 for $var (@$variants) {
88 if ($var->[5]) {
89 $any_lang = 1;
90 last;
91 }
92 }
93
94 if ($DEBUG) {
95 print "Negotiation parameters in the request\n";
96 for $type (keys %accept) {
97 print " $type:\n";
98 for $name (keys %{$accept{$type}}) {
99 print " $name\n";
100 for $pv (keys %{$accept{$type}{$name}}) {
101 print " $pv = $accept{$type}{$name}{$pv}\n";
102 }
103 }
104 }
105 }
106
107 my @Q = (); # This is where we collect the results of the
108 # quality calculations
109
110 # Calculate quality for all the variants that are available.
111 for (@$variants) {
112 my($id, $qs, $ct, $enc, $cs, $lang, $bs) = @$_;
113 $qs = 1 unless defined $qs;
114 $ct = '' unless defined $ct;
115 $bs = 0 unless defined $bs;
116 $lang = lc($lang) if $lang; # lg tags are always case-insensitive
117 if ($DEBUG) {
118 print "\nEvaluating $id (ct='$ct')\n";
119 printf " qs = %.3f\n", $qs;
120 print " enc = $enc\n" if $enc && !ref($enc);
121 print " enc = @$enc\n" if $enc && ref($enc);
122 print " cs = $cs\n" if $cs;
123 print " lang = $lang\n" if $lang;
124 print " bs = $bs\n" if $bs;
125 }
126
127 # Calculate encoding quality
128 my $qe = 1;
129 # If the variant has no assigned Content-Encoding, or if no
130 # Accept-Encoding field is present, then the value assigned
131 # is "qe=1". If *all* of the variant's content encodings
132 # are listed in the Accept-Encoding field, then the value
133 # assigned is "qw=1". If *any* of the variant's content
134 # encodings are not listed in the provided Accept-Encoding
135 # field, then the value assigned is "qe=0"
136 if (exists $accept{'encoding'} && $enc) {
137 my @enc = ref($enc) ? @$enc : ($enc);
138 for (@enc) {
139 print "Is encoding $_ accepted? " if $DEBUG;
140 unless(exists $accept{'encoding'}{$_}) {
141 print "no\n" if $DEBUG;
142 $qe = 0;
143 last;
144 }
145 else {
146 print "yes\n" if $DEBUG;
147 }
148 }
149 }
150
151 # Calculate charset quality
152 my $qc = 1;
153 # If the variant's media-type has no charset parameter,
154 # or the variant's charset is US-ASCII, or if no Accept-Charset
155 # field is present, then the value assigned is "qc=1". If the
156 # variant's charset is listed in the Accept-Charset field,
157 # then the value assigned is "qc=1. Otherwise, if the variant's
158 # charset is not listed in the provided Accept-Encoding field,
159 # then the value assigned is "qc=0".
160 if (exists $accept{'charset'} && $cs && $cs ne 'us-ascii' ) {
161 $qc = 0 unless $accept{'charset'}{$cs};
162 }
163
164 # Calculate language quality
165 my $ql = 1;
166 if ($lang && exists $accept{'language'}) {
167 my @lang = ref($lang) ? @$lang : ($lang);
168 # If any of the variant's content languages are listed
169 # in the Accept-Language field, the the value assigned is
170 # the largest of the "q" parameter values for those language
171 # tags.
172 my $q = undef;
173 for (@lang) {
174 next unless exists $accept{'language'}{$_};
175 my $this_q = $accept{'language'}{$_}{'q'};
176 $q = $this_q unless defined $q;
177 $q = $this_q if $this_q > $q;
178 }
179 if(defined $q) {
180 $DEBUG and print " -- Exact language match at q=$q\n";
181 }
182 else {
183 # If there was no exact match and at least one of
184 # the Accept-Language field values is a complete
185 # subtag prefix of the content language tag(s), then
186 # the "q" parameter value of the largest matching
187 # prefix is used.
188 $DEBUG and print " -- No exact language match\n";
189 my $selected = undef;
190 for $al (keys %{ $accept{'language'} }) {
191 if (index($al, "$lang-") == 0) {
192 # $lang starting with $al isn't enough, or else
193 # Accept-Language: hu (Hungarian) would seem
194 # to accept a document in hup (Hupa)
195 $DEBUG and print " -- $al ISA $lang\n";
196 $selected = $al unless defined $selected;
197 $selected = $al if length($al) > length($selected);
198 }
199 else {
200 $DEBUG and print " -- $lang isn't a $al\n";
201 }
202 }
203 $q = $accept{'language'}{$selected}{'q'} if $selected;
204
205 # If none of the variant's content language tags or
206 # tag prefixes are listed in the provided
207 # Accept-Language field, then the value assigned
208 # is "ql=0.001"
209 $q = 0.001 unless defined $q;
210 }
211 $ql = $q;
212 }
213 else {
214 $ql = 0.5 if $any_lang && exists $accept{'language'};
215 }
216
217 my $q = 1;
218 my $mbx = undef;
219 # If no Accept field is given, then the value assigned is "q=1".
220 # If at least one listed media range matches the variant's media
221 # type, then the "q" parameter value assigned to the most specific
222 # of those matched is used (e.g. "text/html;version=3.0" is more
223 # specific than "text/html", which is more specific than "text/*",
224 # which in turn is more specific than "*/*"). If not media range
225 # in the provided Accept field matches the variant's media type,
226 # then the value assigned is "q=0".
227 if (exists $accept{'type'} && $ct) {
228 # First we clean up our content-type
229 $ct =~ s/\s+//g;
230 my $params = "";
231 $params = $1 if $ct =~ s/;(.*)//;
232 my($type, $subtype) = split("/", $ct, 2);
233 my %param = ();
234 for $param (split(/;/, $params)) {
235 my($pk,$pv) = split(/=/, $param, 2);
236 $param{$pk} = $pv;
237 }
238
239 my $sel_q = undef;
240 my $sel_mbx = undef;
241 my $sel_specificness = 0;
242
243 ACCEPT_TYPE:
244 for $at (keys %{ $accept{'type'} }) {
245 print "Consider $at...\n" if $DEBUG;
246 my($at_type, $at_subtype) = split("/", $at, 2);
247 # Is it a match on the type
248 next if $at_type ne '*' && $at_type ne $type;
249 next if $at_subtype ne '*' && $at_subtype ne $subtype;
250 my $specificness = 0;
251 $specificness++ if $at_type ne '*';
252 $specificness++ if $at_subtype ne '*';
253 # Let's see if content-type parameters also match
254 while (($pk, $pv) = each %param) {
255 print "Check if $pk = $pv is true\n" if $DEBUG;
256 next unless exists $accept{'type'}{$at}{$pk};
257 next ACCEPT_TYPE
258 unless $accept{'type'}{$at}{$pk} eq $pv;
259 print "yes it is!!\n" if $DEBUG;
260 $specificness++;
261 }
262 print "Hurray, type match with specificness = $specificness\n"
263 if $DEBUG;
264
265 if (!defined($sel_q) || $sel_specificness < $specificness) {
266 $sel_q = $accept{'type'}{$at}{'q'};
267 $sel_mbx = $accept{'type'}{$at}{'mbx'};
268 $sel_specificness = $specificness;
269 }
270 }
271 $q = $sel_q || 0;
272 $mbx = $sel_mbx;
273 }
274
275 my $Q;
276 if (!defined($mbx) || $mbx >= $bs) {
277 $Q = $qs * $qe * $qc * $ql * $q;
278 }
279 else {
280 $Q = 0;
281 print "Variant's size is too large ==> Q=0\n" if $DEBUG;
282 }
283
284 if ($DEBUG) {
285 $mbx = "undef" unless defined $mbx;
286 printf "Q=%.4f", $Q;
287 print " (q=$q, mbx=$mbx, qe=$qe, qc=$qc, ql=$ql, qs=$qs)\n";
288 }
289
290 push(@Q, [$id, $Q, $bs]);
291 }
292
293
294 @Q = sort { $b->[1] <=> $a->[1] || $a->[2] <=> $b->[2] } @Q;
295
296 return @Q if wantarray;
297 return undef unless @Q;
298 return undef if $Q[0][1] == 0;
299 $Q[0][0];
300}
301
3021;
303
304__END__
305
306
307=head1 NAME
308
309HTTP::Negotiate - choose a variant to serve
310
311=head1 SYNOPSIS
312
313 use HTTP::Negotiate qw(choose);
314
315 # ID QS Content-Type Encoding Char-Set Lang Size
316 $variants =
317 [['var1', 1.000, 'text/html', undef, 'iso-8859-1', 'en', 3000],
318 ['var2', 0.950, 'text/plain', 'gzip', 'us-ascii', 'no', 400],
319 ['var3', 0.3, 'image/gif', undef, undef, undef, 43555],
320 ];
321
322 @preferred = choose($variants, $request_headers);
323 $the_one = choose($variants);
324
325=head1 DESCRIPTION
326
327This module provides a complete implementation of the HTTP content
328negotiation algorithm specified in F<draft-ietf-http-v11-spec-00.ps>
329chapter 12. Content negotiation allows for the selection of a
330preferred content representation based upon attributes of the
331negotiable variants and the value of the various Accept* header fields
332in the request.
333
334The variants are ordered by preference by calling the function
335choose().
336
337The first parameter is reference to an array of the variants to
338choose among.
339Each element in this array is an array with the values [$id, $qs,
340$content_type, $content_encoding, $charset, $content_language,
341$content_length] whose meanings are described
342below. The $content_encoding and $content_language can be either a
343single scalar value or an array reference if there are several values.
344
345The second optional parameter is either a HTTP::Headers or a HTTP::Request
346object which is searched for "Accept*" headers. If this
347parameter is missing, then the accept specification is initialized
348from the CGI environment variables HTTP_ACCEPT, HTTP_ACCEPT_CHARSET,
349HTTP_ACCEPT_ENCODING and HTTP_ACCEPT_LANGUAGE.
350
351In an array context, choose() returns a list of [variant
352identifier, calculated quality, size] tuples. The values are sorted by
353quality, highest quality first. If the calculated quality is the same
354for two variants, then they are sorted by size (smallest first). I<E.g.>:
355
356 (['var1', 1, 2000], ['var2', 0.3, 512], ['var3', 0.3, 1024]);
357
358Note that also zero quality variants are included in the return list
359even if these should never be served to the client.
360
361In a scalar context, it returns the identifier of the variant with the
362highest score or C<undef> if none have non-zero quality.
363
364If the $HTTP::Negotiate::DEBUG variable is set to TRUE, then a lot of
365noise is generated on STDOUT during evaluation of choose().
366
367=head1 VARIANTS
368
369A variant is described by a list of the following values. If the
370attribute does not make sense or is unknown for a variant, then use
371C<undef> instead.
372
373=over 3
374
375=item identifier
376
377This is a string that you use as the name for the variant. This
378identifier for the preferred variants returned by choose().
379
380=item qs
381
382This is a number between 0.000 and 1.000 that describes the "source
383quality". This is what F<draft-ietf-http-v11-spec-00.ps> says about this
384value:
385
386Source quality is measured by the content provider as representing the
387amount of degradation from the original source. For example, a
388picture in JPEG form would have a lower qs when translated to the XBM
389format, and much lower qs when translated to an ASCII-art
390representation. Note, however, that this is a function of the source
391- an original piece of ASCII-art may degrade in quality if it is
392captured in JPEG form. The qs values should be assigned to each
393variant by the content provider; if no qs value has been assigned, the
394default is generally "qs=1".
395
396=item content-type
397
398This is the media type of the variant. The media type does not
399include a charset attribute, but might contain other parameters.
400Examples are:
401
402 text/html
403 text/html;version=2.0
404 text/plain
405 image/gif
406 image/jpg
407
408=item content-encoding
409
410This is one or more content encodings that has been applied to the
411variant. The content encoding is generally used as a modifier to the
412content media type. The most common content encodings are:
413
414 gzip
415 compress
416
417=item content-charset
418
419This is the character set used when the variant contains text.
420The charset value should generally be C<undef> or one of these:
421
422 us-ascii
423 iso-8859-1 ... iso-8859-9
424 iso-2022-jp
425 iso-2022-jp-2
426 iso-2022-kr
427 unicode-1-1
428 unicode-1-1-utf-7
429 unicode-1-1-utf-8
430
431=item content-language
432
433This describes one or more languages that are used in the variant.
434Language is described like this in F<draft-ietf-http-v11-spec-00.ps>: A
435language is in this context a natural language spoken, written, or
436otherwise conveyed by human beings for communication of information to
437other human beings. Computer languages are explicitly excluded.
438
439The language tags are defined by RFC 3066. Examples
440are:
441
442 no Norwegian
443 en International English
444 en-US US English
445 en-cockney
446
447=item content-length
448
449This is the number of bytes used to represent the content.
450
451=back
452
453=head1 ACCEPT HEADERS
454
455The following Accept* headers can be used for describing content
456preferences in a request (This description is an edited extract from
457F<draft-ietf-http-v11-spec-00.ps>):
458
459=over 3
460
461=item Accept
462
463This header can be used to indicate a list of media ranges which are
464acceptable as a response to the request. The "*" character is used to
465group media types into ranges, with "*/*" indicating all media types
466and "type/*" indicating all subtypes of that type.
467
468The parameter q is used to indicate the quality factor, which
469represents the user's preference for that range of media types. The
470parameter mbx gives the maximum acceptable size of the response
471content. The default values are: q=1 and mbx=infinity. If no Accept
472header is present, then the client accepts all media types with q=1.
473
474For example:
475
476 Accept: audio/*;q=0.2;mbx=200000, audio/basic
477
478would mean: "I prefer audio/basic (of any size), but send me any audio
479type if it is the best available after an 80% mark-down in quality and
480its size is less than 200000 bytes"
481
482
483=item Accept-Charset
484
485Used to indicate what character sets are acceptable for the response.
486The "us-ascii" character set is assumed to be acceptable for all user
487agents. If no Accept-Charset field is given, the default is that any
488charset is acceptable. Example:
489
490 Accept-Charset: iso-8859-1, unicode-1-1
491
492
493=item Accept-Encoding
494
495Restricts the Content-Encoding values which are acceptable in the
496response. If no Accept-Encoding field is present, the server may
497assume that the client will accept any content encoding. An empty
498Accept-Encoding means that no content encoding is acceptable. Example:
499
500 Accept-Encoding: compress, gzip
501
502
503=item Accept-Language
504
505This field is similar to Accept, but restricts the set of natural
506languages that are preferred in a response. Each language may be
507given an associated quality value which represents an estimate of the
508user's comprehension of that language. For example:
509
510 Accept-Language: no, en-gb;q=0.8, de;q=0.55
511
512would mean: "I prefer Norwegian, but will accept British English (with
51380% comprehension) or German (with 55% comprehension).
514
515=back
516
517
518=head1 COPYRIGHT
519
520Copyright 1996,2001 Gisle Aas.
521
522This library is free software; you can redistribute it and/or
523modify it under the same terms as Perl itself.
524
525=head1 AUTHOR
526
527Gisle Aas <[email protected]>
528
529=cut
Note: See TracBrowser for help on using the repository browser.