| 234 | | # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047) |
|---|
| 235 | | if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) { |
|---|
| 236 | | my $original_value=$value; |
|---|
| 237 | | my $encoded=$value; |
|---|
| 238 | | $value=""; |
|---|
| 239 | | # we should ignore spaces between consecutive encoded-texts |
|---|
| 240 | | $encoded =~ s@\?=\s+=\?@\?==\?@g; |
|---|
| 241 | | while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) { |
|---|
| 242 | | my ($charset, $encoding, $data)=($2,$3,$4); |
|---|
| 243 | | my ($decoded_data); |
|---|
| 244 | | $value.="$1"; # any leading chars |
|---|
| 245 | | $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends |
|---|
| 246 | | chomp $data; |
|---|
| 247 | | $encoding =~ tr/BQ/bq/; |
|---|
| 248 | | if ($encoding eq "q") { # quoted printable |
|---|
| 249 | | $data =~ s/_/\ /g; # from rfc2047 (sec 4.2.2) |
|---|
| 250 | | $decoded_data=qp_decode($data); |
|---|
| 251 | | # qp_decode adds \n, which is default for body text |
|---|
| 252 | | chomp($decoded_data); |
|---|
| 253 | | } else { # base 64 |
|---|
| 254 | | $decoded_data=base64_decode($data); |
|---|
| 255 | | } |
|---|
| 256 | | $self->convert2unicode($charset, \$decoded_data); |
|---|
| 257 | | $value .= $decoded_data; |
|---|
| 258 | | } # end of while loop |
|---|
| 259 | | |
|---|
| 260 | | # get any trailing characters |
|---|
| 261 | | $self->convert2unicode($default_header_encoding, \$encoded); |
|---|
| 262 | | $value.=$encoded; |
|---|
| 263 | | |
|---|
| 264 | | if ($value =~ /^\s*$/) { # we couldn't extract anything... |
|---|
| 265 | | $self->convert2unicode($default_header_encoding, |
|---|
| 266 | | \$original_value); |
|---|
| 267 | | $value=$original_value; |
|---|
| 268 | | } |
|---|
| 269 | | } # end of if =?...?= |
|---|
| 270 | | |
|---|
| 271 | | # In the absense of other charset information, assume the |
|---|
| 272 | | # header is the default (usually "iso_8859_1") and convert to unicode. |
|---|
| 273 | | else { |
|---|
| 274 | | $self->convert2unicode($default_header_encoding, \$value); |
|---|
| 275 | | } |
|---|
| | 234 | # decode header values, using either =?<charset>?[BQ]?<data>?= (rfc2047) or default_header_encoding |
|---|
| | 235 | $self->decode_header_value($default_header_encoding, \$value); |
|---|
| | 975 | # words with non ascii characters in header values must be encoded in the |
|---|
| | 976 | # following manner =?<charset>?[BQ]?<data>?= (rfc2047) |
|---|
| | 977 | |
|---|
| | 978 | sub decode_header_value { |
|---|
| | 979 | my $self = shift(@_); |
|---|
| | 980 | my ($default_header_encoding, $textref) = @_; |
|---|
| | 981 | |
|---|
| | 982 | if (!$$textref) { |
|---|
| | 983 | # nothing to do! |
|---|
| | 984 | return; |
|---|
| | 985 | } |
|---|
| | 986 | my $value = $$textref; |
|---|
| | 987 | # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047) |
|---|
| | 988 | if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) { |
|---|
| | 989 | my $original_value=$value; |
|---|
| | 990 | my $encoded=$value; |
|---|
| | 991 | $value=""; |
|---|
| | 992 | # we should ignore spaces between consecutive encoded-texts |
|---|
| | 993 | $encoded =~ s@\?=\s+=\?@\?==\?@g; |
|---|
| | 994 | while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) { |
|---|
| | 995 | my ($charset, $encoding, $data)=($2,$3,$4); |
|---|
| | 996 | my ($decoded_data); |
|---|
| | 997 | my $leading_chars = "$1"; |
|---|
| | 998 | $self->convert2unicode($default_header_encoding, \$leading_chars); |
|---|
| | 999 | $value.=$leading_chars; |
|---|
| | 1000 | |
|---|
| | 1001 | $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends |
|---|
| | 1002 | chomp $data; |
|---|
| | 1003 | $encoding =~ tr/BQ/bq/; |
|---|
| | 1004 | if ($encoding eq "q") { # quoted printable |
|---|
| | 1005 | $data =~ s/_/\ /g; # from rfc2047 (sec 4.2.2) |
|---|
| | 1006 | $decoded_data=qp_decode($data); |
|---|
| | 1007 | # qp_decode adds \n, which is default for body text |
|---|
| | 1008 | chomp($decoded_data); |
|---|
| | 1009 | } else { # base 64 |
|---|
| | 1010 | $decoded_data=base64_decode($data); |
|---|
| | 1011 | } |
|---|
| | 1012 | $self->convert2unicode($charset, \$decoded_data); |
|---|
| | 1013 | $value .= $decoded_data; |
|---|
| | 1014 | } # end of while loop |
|---|
| | 1015 | |
|---|
| | 1016 | # get any trailing characters |
|---|
| | 1017 | $self->convert2unicode($default_header_encoding, \$encoded); |
|---|
| | 1018 | $value.=$encoded; |
|---|
| | 1019 | |
|---|
| | 1020 | if ($value =~ /^\s*$/) { # we couldn't extract anything... |
|---|
| | 1021 | $self->convert2unicode($default_header_encoding, |
|---|
| | 1022 | \$original_value); |
|---|
| | 1023 | $value=$original_value; |
|---|
| | 1024 | } |
|---|
| | 1025 | $$textref = $value; |
|---|
| | 1026 | } # end of if =?...?= |
|---|
| | 1027 | |
|---|
| | 1028 | # In the absense of other charset information, assume the |
|---|
| | 1029 | # header is the default (usually "iso_8859_1") and convert to unicode. |
|---|
| | 1030 | else { |
|---|
| | 1031 | $self->convert2unicode($default_header_encoding, $textref); |
|---|
| | 1032 | } |
|---|
| | 1033 | |
|---|
| | 1034 | } |
|---|