1 | package Encode::MIME::Header;
|
---|
2 | use strict;
|
---|
3 | # use warnings;
|
---|
4 | our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
|
---|
5 | use Encode qw(find_encoding encode_utf8 decode_utf8);
|
---|
6 | use MIME::Base64;
|
---|
7 | use Carp;
|
---|
8 |
|
---|
9 | my %seed =
|
---|
10 | (
|
---|
11 | decode_b => '1', # decodes 'B' encoding ?
|
---|
12 | decode_q => '1', # decodes 'Q' encoding ?
|
---|
13 | encode => 'B', # encode with 'B' or 'Q' ?
|
---|
14 | bpl => 75, # bytes per line
|
---|
15 | );
|
---|
16 |
|
---|
17 | $Encode::Encoding{'MIME-Header'} =
|
---|
18 | bless {
|
---|
19 | %seed,
|
---|
20 | Name => 'MIME-Header',
|
---|
21 | } => __PACKAGE__;
|
---|
22 |
|
---|
23 | $Encode::Encoding{'MIME-B'} =
|
---|
24 | bless {
|
---|
25 | %seed,
|
---|
26 | decode_q => 0,
|
---|
27 | Name => 'MIME-B',
|
---|
28 | } => __PACKAGE__;
|
---|
29 |
|
---|
30 | $Encode::Encoding{'MIME-Q'} =
|
---|
31 | bless {
|
---|
32 | %seed,
|
---|
33 | decode_q => 1,
|
---|
34 | encode => 'Q',
|
---|
35 | Name => 'MIME-Q',
|
---|
36 | } => __PACKAGE__;
|
---|
37 |
|
---|
38 | use base qw(Encode::Encoding);
|
---|
39 |
|
---|
40 | sub needs_lines { 1 }
|
---|
41 | sub perlio_ok{ 0 };
|
---|
42 |
|
---|
43 | sub decode($$;$){
|
---|
44 | use utf8;
|
---|
45 | my ($obj, $str, $chk) = @_;
|
---|
46 | # zap spaces between encoded words
|
---|
47 | $str =~ s/\?=\s+=\?/\?==\?/gos;
|
---|
48 | # multi-line header to single line
|
---|
49 | $str =~ s/(:?\r|\n|\r\n)[ \t]//gos;
|
---|
50 | $str =~
|
---|
51 | s{
|
---|
52 | =\? # begin encoded word
|
---|
53 | ([0-9A-Za-z\-_]+) # charset (encoding)
|
---|
54 | \?([QqBb])\? # delimiter
|
---|
55 | (.*?) # Base64-encodede contents
|
---|
56 | \?= # end encoded word
|
---|
57 | }{
|
---|
58 | if (uc($2) eq 'B'){
|
---|
59 | $obj->{decode_b} or croak qq(MIME "B" unsupported);
|
---|
60 | decode_b($1, $3);
|
---|
61 | }elsif(uc($2) eq 'Q'){
|
---|
62 | $obj->{decode_q} or croak qq(MIME "Q" unsupported);
|
---|
63 | decode_q($1, $3);
|
---|
64 | }else{
|
---|
65 | croak qq(MIME "$2" encoding is nonexistent!);
|
---|
66 | }
|
---|
67 | }egox;
|
---|
68 | $_[1] = '' if $chk;
|
---|
69 | return $str;
|
---|
70 | }
|
---|
71 |
|
---|
72 | sub decode_b{
|
---|
73 | my $enc = shift;
|
---|
74 | my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
|
---|
75 | my $db64 = decode_base64(shift);
|
---|
76 | return $d->name eq 'utf8' ?
|
---|
77 | Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ);
|
---|
78 | }
|
---|
79 |
|
---|
80 | sub decode_q{
|
---|
81 | my ($enc, $q) = @_;
|
---|
82 | my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
|
---|
83 | $q =~ s/_/ /go;
|
---|
84 | $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
|
---|
85 | return $d->name eq 'utf8' ?
|
---|
86 | Encode::decode_utf8($q) : $d->decode($q, Encode::FB_PERLQQ);
|
---|
87 | }
|
---|
88 |
|
---|
89 | my $especials =
|
---|
90 | join('|' =>
|
---|
91 | map {quotemeta(chr($_))}
|
---|
92 | unpack("C*", qq{()<>@,;:\"\'/[]?.=}));
|
---|
93 |
|
---|
94 | my $re_encoded_word =
|
---|
95 | qr{
|
---|
96 | (?:
|
---|
97 | =\? # begin encoded word
|
---|
98 | (?:[0-9A-Za-z\-_]+) # charset (encoding)
|
---|
99 | \?(?:[QqBb])\? # delimiter
|
---|
100 | (?:.*?) # Base64-encodede contents
|
---|
101 | \?= # end encoded word
|
---|
102 | )
|
---|
103 | }xo;
|
---|
104 |
|
---|
105 | my $re_especials = qr{$re_encoded_word|$especials}xo;
|
---|
106 |
|
---|
107 | sub encode($$;$){
|
---|
108 | my ($obj, $str, $chk) = @_;
|
---|
109 | my @line = ();
|
---|
110 | for my $line (split /\r|\n|\r\n/o, $str){
|
---|
111 | my (@word, @subline);
|
---|
112 | for my $word (split /($re_especials)/o, $line){
|
---|
113 | if ($word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o){
|
---|
114 | push @word, $obj->_encode($word);
|
---|
115 | }else{
|
---|
116 | push @word, $word;
|
---|
117 | }
|
---|
118 | }
|
---|
119 | my $subline = '';
|
---|
120 | for my $word (@word){
|
---|
121 | use bytes ();
|
---|
122 | if (bytes::length($subline) + bytes::length($word) > $obj->{bpl}){
|
---|
123 | push @subline, $subline;
|
---|
124 | $subline = '';
|
---|
125 | }
|
---|
126 | $subline .= $word;
|
---|
127 | }
|
---|
128 | $subline and push @subline, $subline;
|
---|
129 | push @line, join("\n " => @subline);
|
---|
130 | }
|
---|
131 | $_[1] = '' if $chk;
|
---|
132 | return join("\n", @line);
|
---|
133 | }
|
---|
134 |
|
---|
135 | use constant HEAD => '=?UTF-8?';
|
---|
136 | use constant TAIL => '?=';
|
---|
137 | use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
|
---|
138 |
|
---|
139 | sub _encode{
|
---|
140 | my ($o, $str) = @_;
|
---|
141 | my $enc = $o->{encode};
|
---|
142 | my $llen = ($o->{bpl} - length(HEAD) - 2 - length(TAIL));
|
---|
143 | # to coerce a floating-point arithmetics, the following contains
|
---|
144 | # .0 in numbers -- dankogai
|
---|
145 | $llen *= $enc eq 'B' ? 3.0/4.0 : 1.0/3.0;
|
---|
146 | my @result = ();
|
---|
147 | my $chunk = '';
|
---|
148 | while(length(my $chr = substr($str, 0, 1, ''))){
|
---|
149 | use bytes ();
|
---|
150 | if (bytes::length($chunk) + bytes::length($chr) > $llen){
|
---|
151 | push @result, SINGLE->{$enc}($chunk);
|
---|
152 | $chunk = '';
|
---|
153 | }
|
---|
154 | $chunk .= $chr;
|
---|
155 | }
|
---|
156 | $chunk and push @result, SINGLE->{$enc}($chunk);
|
---|
157 | return @result;
|
---|
158 | }
|
---|
159 |
|
---|
160 | sub _encode_b{
|
---|
161 | HEAD . 'B?' . encode_base64(encode_utf8(shift), '') . TAIL;
|
---|
162 | }
|
---|
163 |
|
---|
164 | sub _encode_q{
|
---|
165 | my $chunk = shift;
|
---|
166 | $chunk =~ s{
|
---|
167 | ([^0-9A-Za-z])
|
---|
168 | }{
|
---|
169 | join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
|
---|
170 | }egox;
|
---|
171 | return decode_utf8(HEAD . 'Q?' . $chunk . TAIL);
|
---|
172 | }
|
---|
173 |
|
---|
174 | 1;
|
---|
175 | __END__
|
---|
176 |
|
---|
177 | =head1 NAME
|
---|
178 |
|
---|
179 | Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
|
---|
180 |
|
---|
181 | =head1 SYNOPSIS
|
---|
182 |
|
---|
183 | use Encode qw/encode decode/;
|
---|
184 | $utf8 = decode('MIME-Header', $header);
|
---|
185 | $header = encode('MIME-Header', $utf8);
|
---|
186 |
|
---|
187 | =head1 ABSTRACT
|
---|
188 |
|
---|
189 | This module implements RFC 2047 Mime Header Encoding. There are 3
|
---|
190 | variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
|
---|
191 | difference is described below
|
---|
192 |
|
---|
193 | decode() encode()
|
---|
194 | ----------------------------------------------
|
---|
195 | MIME-Header Both B and Q =?UTF-8?B?....?=
|
---|
196 | MIME-B B only; Q croaks =?UTF-8?B?....?=
|
---|
197 | MIME-Q Q only; B croaks =?UTF-8?Q?....?=
|
---|
198 |
|
---|
199 | =head1 DESCRIPTION
|
---|
200 |
|
---|
201 | When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
|
---|
202 | is extracted and decoded for I<X> encoding (B for Base64, Q for
|
---|
203 | Quoted-Printable). Then the decoded chunk is fed to
|
---|
204 | decode(I<encoding>). So long as I<encoding> is supported by Encode,
|
---|
205 | any source encoding is fine.
|
---|
206 |
|
---|
207 | When you encode, it just encodes UTF-8 string with I<X> encoding then
|
---|
208 | quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
|
---|
209 | encode are left as is and long lines are folded within 76 bytes per
|
---|
210 | line.
|
---|
211 |
|
---|
212 | =head1 BUGS
|
---|
213 |
|
---|
214 | It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
|
---|
215 | and =?ISO-8859-1?= but that makes the implementation too complicated.
|
---|
216 | These days major mail agents all support =?UTF-8? so I think it is
|
---|
217 | just good enough.
|
---|
218 |
|
---|
219 | Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by
|
---|
220 | Makamaka. Thre are still too many MUAs especially cellular phone
|
---|
221 | handsets which does not grok UTF-8.
|
---|
222 |
|
---|
223 | =head1 SEE ALSO
|
---|
224 |
|
---|
225 | L<Encode>
|
---|
226 |
|
---|
227 | RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other
|
---|
228 | locations.
|
---|
229 |
|
---|
230 | =cut
|
---|