1 | package HTTP::Headers::Util;
|
---|
2 |
|
---|
3 | use strict;
|
---|
4 | use vars qw($VERSION @ISA @EXPORT_OK);
|
---|
5 |
|
---|
6 | $VERSION = "6.03";
|
---|
7 |
|
---|
8 | require Exporter;
|
---|
9 | @ISA=qw(Exporter);
|
---|
10 |
|
---|
11 | @EXPORT_OK=qw(split_header_words _split_header_words join_header_words);
|
---|
12 |
|
---|
13 |
|
---|
14 |
|
---|
15 | sub split_header_words {
|
---|
16 | my @res = &_split_header_words;
|
---|
17 | for my $arr (@res) {
|
---|
18 | for (my $i = @$arr - 2; $i >= 0; $i -= 2) {
|
---|
19 | $arr->[$i] = lc($arr->[$i]);
|
---|
20 | }
|
---|
21 | }
|
---|
22 | return @res;
|
---|
23 | }
|
---|
24 |
|
---|
25 | sub _split_header_words
|
---|
26 | {
|
---|
27 | my(@val) = @_;
|
---|
28 | my @res;
|
---|
29 | for (@val) {
|
---|
30 | my @cur;
|
---|
31 | while (length) {
|
---|
32 | if (s/^\s*(=*[^\s=;,]+)//) { # 'token' or parameter 'attribute'
|
---|
33 | push(@cur, $1);
|
---|
34 | # a quoted value
|
---|
35 | if (s/^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"//) {
|
---|
36 | my $val = $1;
|
---|
37 | $val =~ s/\\(.)/$1/g;
|
---|
38 | push(@cur, $val);
|
---|
39 | # some unquoted value
|
---|
40 | }
|
---|
41 | elsif (s/^\s*=\s*([^;,\s]*)//) {
|
---|
42 | my $val = $1;
|
---|
43 | $val =~ s/\s+$//;
|
---|
44 | push(@cur, $val);
|
---|
45 | # no value, a lone token
|
---|
46 | }
|
---|
47 | else {
|
---|
48 | push(@cur, undef);
|
---|
49 | }
|
---|
50 | }
|
---|
51 | elsif (s/^\s*,//) {
|
---|
52 | push(@res, [@cur]) if @cur;
|
---|
53 | @cur = ();
|
---|
54 | }
|
---|
55 | elsif (s/^\s*;// || s/^\s+//) {
|
---|
56 | # continue
|
---|
57 | }
|
---|
58 | else {
|
---|
59 | die "This should not happen: '$_'";
|
---|
60 | }
|
---|
61 | }
|
---|
62 | push(@res, \@cur) if @cur;
|
---|
63 | }
|
---|
64 | @res;
|
---|
65 | }
|
---|
66 |
|
---|
67 |
|
---|
68 | sub join_header_words
|
---|
69 | {
|
---|
70 | @_ = ([@_]) if @_ && !ref($_[0]);
|
---|
71 | my @res;
|
---|
72 | for (@_) {
|
---|
73 | my @cur = @$_;
|
---|
74 | my @attr;
|
---|
75 | while (@cur) {
|
---|
76 | my $k = shift @cur;
|
---|
77 | my $v = shift @cur;
|
---|
78 | if (defined $v) {
|
---|
79 | if ($v =~ /[\x00-\x20()<>@,;:\\\"\/\[\]?={}\x7F-\xFF]/ || !length($v)) {
|
---|
80 | $v =~ s/([\"\\])/\\$1/g; # escape " and \
|
---|
81 | $k .= qq(="$v");
|
---|
82 | }
|
---|
83 | else {
|
---|
84 | # token
|
---|
85 | $k .= "=$v";
|
---|
86 | }
|
---|
87 | }
|
---|
88 | push(@attr, $k);
|
---|
89 | }
|
---|
90 | push(@res, join("; ", @attr)) if @attr;
|
---|
91 | }
|
---|
92 | join(", ", @res);
|
---|
93 | }
|
---|
94 |
|
---|
95 |
|
---|
96 | 1;
|
---|
97 |
|
---|
98 | __END__
|
---|
99 |
|
---|
100 | =head1 NAME
|
---|
101 |
|
---|
102 | HTTP::Headers::Util - Header value parsing utility functions
|
---|
103 |
|
---|
104 | =head1 SYNOPSIS
|
---|
105 |
|
---|
106 | use HTTP::Headers::Util qw(split_header_words);
|
---|
107 | @values = split_header_words($h->header("Content-Type"));
|
---|
108 |
|
---|
109 | =head1 DESCRIPTION
|
---|
110 |
|
---|
111 | This module provides a few functions that helps parsing and
|
---|
112 | construction of valid HTTP header values. None of the functions are
|
---|
113 | exported by default.
|
---|
114 |
|
---|
115 | The following functions are available:
|
---|
116 |
|
---|
117 | =over 4
|
---|
118 |
|
---|
119 |
|
---|
120 | =item split_header_words( @header_values )
|
---|
121 |
|
---|
122 | This function will parse the header values given as argument into a
|
---|
123 | list of anonymous arrays containing key/value pairs. The function
|
---|
124 | knows how to deal with ",", ";" and "=" as well as quoted values after
|
---|
125 | "=". A list of space separated tokens are parsed as if they were
|
---|
126 | separated by ";".
|
---|
127 |
|
---|
128 | If the @header_values passed as argument contains multiple values,
|
---|
129 | then they are treated as if they were a single value separated by
|
---|
130 | comma ",".
|
---|
131 |
|
---|
132 | This means that this function is useful for parsing header fields that
|
---|
133 | follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
|
---|
134 | the requirement for tokens).
|
---|
135 |
|
---|
136 | headers = #header
|
---|
137 | header = (token | parameter) *( [";"] (token | parameter))
|
---|
138 |
|
---|
139 | token = 1*<any CHAR except CTLs or separators>
|
---|
140 | separators = "(" | ")" | "<" | ">" | "@"
|
---|
141 | | "," | ";" | ":" | "\" | <">
|
---|
142 | | "/" | "[" | "]" | "?" | "="
|
---|
143 | | "{" | "}" | SP | HT
|
---|
144 |
|
---|
145 | quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
|
---|
146 | qdtext = <any TEXT except <">>
|
---|
147 | quoted-pair = "\" CHAR
|
---|
148 |
|
---|
149 | parameter = attribute "=" value
|
---|
150 | attribute = token
|
---|
151 | value = token | quoted-string
|
---|
152 |
|
---|
153 | Each I<header> is represented by an anonymous array of key/value
|
---|
154 | pairs. The keys will be all be forced to lower case.
|
---|
155 | The value for a simple token (not part of a parameter) is C<undef>.
|
---|
156 | Syntactically incorrect headers will not necessarily be parsed as you
|
---|
157 | would want.
|
---|
158 |
|
---|
159 | This is easier to describe with some examples:
|
---|
160 |
|
---|
161 | split_header_words('foo="bar"; port="80,81"; DISCARD, BAR=baz');
|
---|
162 | split_header_words('text/html; charset="iso-8859-1"');
|
---|
163 | split_header_words('Basic realm="\\"foo\\\\bar\\""');
|
---|
164 |
|
---|
165 | will return
|
---|
166 |
|
---|
167 | [foo=>'bar', port=>'80,81', discard=> undef], [bar=>'baz' ]
|
---|
168 | ['text/html' => undef, charset => 'iso-8859-1']
|
---|
169 | [basic => undef, realm => "\"foo\\bar\""]
|
---|
170 |
|
---|
171 | If you don't want the function to convert tokens and attribute keys to
|
---|
172 | lower case you can call it as C<_split_header_words> instead (with a
|
---|
173 | leading underscore).
|
---|
174 |
|
---|
175 | =item join_header_words( @arrays )
|
---|
176 |
|
---|
177 | This will do the opposite of the conversion done by split_header_words().
|
---|
178 | It takes a list of anonymous arrays as arguments (or a list of
|
---|
179 | key/value pairs) and produces a single header value. Attribute values
|
---|
180 | are quoted if needed.
|
---|
181 |
|
---|
182 | Example:
|
---|
183 |
|
---|
184 | join_header_words(["text/plain" => undef, charset => "iso-8859/1"]);
|
---|
185 | join_header_words("text/plain" => undef, charset => "iso-8859/1");
|
---|
186 |
|
---|
187 | will both return the string:
|
---|
188 |
|
---|
189 | text/plain; charset="iso-8859/1"
|
---|
190 |
|
---|
191 | =back
|
---|
192 |
|
---|
193 | =head1 COPYRIGHT
|
---|
194 |
|
---|
195 | Copyright 1997-1998, Gisle Aas
|
---|
196 |
|
---|
197 | This library is free software; you can redistribute it and/or
|
---|
198 | modify it under the same terms as Perl itself.
|
---|
199 |
|
---|