Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: for-distributions/trunk/bin/windows/perl/lib/Encode/MIME/Header.pm@ 14489

Last change on this file since 14489 was 14489, checked in by oranfry, 17 years ago
upgrading to perl 5.8
File size: 5.9 KB

Line
1	package Encode::MIME::Header;
2	use strict;
3	# use warnings;
4	our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
5	use Encode qw(find_encoding encode_utf8 decode_utf8);
6	use MIME::Base64;
7	use Carp;
8
9	my %seed =
10	(
11	decode_b => '1', # decodes 'B' encoding ?
12	decode_q => '1', # decodes 'Q' encoding ?
13	encode => 'B', # encode with 'B' or 'Q' ?
14	bpl => 75, # bytes per line
15	);
16
17	$Encode::Encoding{'MIME-Header'} =
18	bless {
19	%seed,
20	Name => 'MIME-Header',
21	} => __PACKAGE__;
22
23	$Encode::Encoding{'MIME-B'} =
24	bless {
25	%seed,
26	decode_q => 0,
27	Name => 'MIME-B',
28	} => __PACKAGE__;
29
30	$Encode::Encoding{'MIME-Q'} =
31	bless {
32	%seed,
33	decode_q => 1,
34	encode => 'Q',
35	Name => 'MIME-Q',
36	} => __PACKAGE__;
37
38	use base qw(Encode::Encoding);
39
40	sub needs_lines { 1 }
41	sub perlio_ok{ 0 };
42
43	sub decode($$;$){
44	use utf8;
45	my ($obj, $str, $chk) = @_;
46	# zap spaces between encoded words
47	$str =~ s/\?=\s+=\?/\?==\?/gos;
48	# multi-line header to single line
49	$str =~ s/(:?\r\|\n\|\r\n)[ \t]//gos;
50	$str =~
51	s{
52	=\? # begin encoded word
53	([0-9A-Za-z\-_]+) # charset (encoding)
54	\?([QqBb])\? # delimiter
55	(.*?) # Base64-encodede contents
56	\?= # end encoded word
57	}{
58	if (uc($2) eq 'B'){
59	$obj->{decode_b} or croak qq(MIME "B" unsupported);
60	decode_b($1, $3);
61	}elsif(uc($2) eq 'Q'){
62	$obj->{decode_q} or croak qq(MIME "Q" unsupported);
63	decode_q($1, $3);
64	}else{
65	croak qq(MIME "$2" encoding is nonexistent!);
66	}
67	}egox;
68	$_[1] = '' if $chk;
69	return $str;
70	}
71
72	sub decode_b{
73	my $enc = shift;
74	my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
75	my $db64 = decode_base64(shift);
76	return $d->name eq 'utf8' ?
77	Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ);
78	}
79
80	sub decode_q{
81	my ($enc, $q) = @_;
82	my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
83	$q =~ s/_/ /go;
84	$q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
85	return $d->name eq 'utf8' ?
86	Encode::decode_utf8($q) : $d->decode($q, Encode::FB_PERLQQ);
87	}
88
89	my $especials =
90	join('\|' =>
91	map {quotemeta(chr($_))}
92	unpack("C*", qq{()<>@,;:\"\'/[]?.=}));
93
94	my $re_encoded_word =
95	qr{
96	(?:
97	=\? # begin encoded word
98	(?:[0-9A-Za-z\-_]+) # charset (encoding)
99	\?(?:[QqBb])\? # delimiter
100	(?:.*?) # Base64-encodede contents
101	\?= # end encoded word
102	)
103	}xo;
104
105	my $re_especials = qr{$re_encoded_word\|$especials}xo;
106
107	sub encode($$;$){
108	my ($obj, $str, $chk) = @_;
109	my @line = ();
110	for my $line (split /\r\|\n\|\r\n/o, $str){
111	my (@word, @subline);
112	for my $word (split /($re_especials)/o, $line){
113	if ($word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o){
114	push @word, $obj->_encode($word);
115	}else{
116	push @word, $word;
117	}
118	}
119	my $subline = '';
120	for my $word (@word){
121	use bytes ();
122	if (bytes::length($subline) + bytes::length($word) > $obj->{bpl}){
123	push @subline, $subline;
124	$subline = '';
125	}
126	$subline .= $word;
127	}
128	$subline and push @subline, $subline;
129	push @line, join("\n " => @subline);
130	}
131	$_[1] = '' if $chk;
132	return join("\n", @line);
133	}
134
135	use constant HEAD => '=?UTF-8?';
136	use constant TAIL => '?=';
137	use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
138
139	sub _encode{
140	my ($o, $str) = @_;
141	my $enc = $o->{encode};
142	my $llen = ($o->{bpl} - length(HEAD) - 2 - length(TAIL));
143	# to coerce a floating-point arithmetics, the following contains
144	# .0 in numbers -- dankogai
145	$llen *= $enc eq 'B' ? 3.0/4.0 : 1.0/3.0;
146	my @result = ();
147	my $chunk = '';
148	while(length(my $chr = substr($str, 0, 1, ''))){
149	use bytes ();
150	if (bytes::length($chunk) + bytes::length($chr) > $llen){
151	push @result, SINGLE->{$enc}($chunk);
152	$chunk = '';
153	}
154	$chunk .= $chr;
155	}
156	$chunk and push @result, SINGLE->{$enc}($chunk);
157	return @result;
158	}
159
160	sub _encode_b{
161	HEAD . 'B?' . encode_base64(encode_utf8(shift), '') . TAIL;
162	}
163
164	sub _encode_q{
165	my $chunk = shift;
166	$chunk =~ s{
167	([^0-9A-Za-z])
168	}{
169	join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
170	}egox;
171	return decode_utf8(HEAD . 'Q?' . $chunk . TAIL);
172	}
173
174	1;
175	__END__
176
177	=head1 NAME
178
179	Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
180
181	=head1 SYNOPSIS
182
183	use Encode qw/encode decode/;
184	$utf8 = decode('MIME-Header', $header);
185	$header = encode('MIME-Header', $utf8);
186
187	=head1 ABSTRACT
188
189	This module implements RFC 2047 Mime Header Encoding. There are 3
190	variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
191	difference is described below
192
193	decode() encode()
194	----------------------------------------------
195	MIME-Header Both B and Q =?UTF-8?B?....?=
196	MIME-B B only; Q croaks =?UTF-8?B?....?=
197	MIME-Q Q only; B croaks =?UTF-8?Q?....?=
198
199	=head1 DESCRIPTION
200
201	When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
202	is extracted and decoded for I<X> encoding (B for Base64, Q for
203	Quoted-Printable). Then the decoded chunk is fed to
204	decode(I<encoding>). So long as I<encoding> is supported by Encode,
205	any source encoding is fine.
206
207	When you encode, it just encodes UTF-8 string with I<X> encoding then
208	quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
209	encode are left as is and long lines are folded within 76 bytes per
210	line.
211
212	=head1 BUGS
213
214	It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
215	and =?ISO-8859-1?= but that makes the implementation too complicated.
216	These days major mail agents all support =?UTF-8? so I think it is
217	just good enough.
218
219	Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by
220	Makamaka. Thre are still too many MUAs especially cellular phone
221	handsets which does not grok UTF-8.
222
223	=head1 SEE ALSO
224
225	L<Encode>
226
227	RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other
228	locations.
229
230	=cut

Note: See TracBrowser for help on using the repository browser.

Download in other formats: