Changeset 16345
- Timestamp:
- 2008-07-10T14:41:03+12:00 (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/branches/2.80-fixed/perllib/plugins/EMAILPlug.pm
r12169 r16345 232 232 $value =~ s/^\s+//; 233 233 $value =~ s/\s+$//; 234 # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047) 235 if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) { 236 my $original_value=$value; 237 my $encoded=$value; 238 $value=""; 239 # we should ignore spaces between consecutive encoded-texts 240 $encoded =~ s@\?=\s+=\?@\?==\?@g; 241 while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) { 242 my ($charset, $encoding, $data)=($2,$3,$4); 243 my ($decoded_data); 244 $value.="$1"; # any leading chars 245 $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends 246 chomp $data; 247 $encoding =~ tr/BQ/bq/; 248 if ($encoding eq "q") { # quoted printable 249 $data =~ s/_/\ /g; # from rfc2047 (sec 4.2.2) 250 $decoded_data=qp_decode($data); 251 # qp_decode adds \n, which is default for body text 252 chomp($decoded_data); 253 } else { # base 64 254 $decoded_data=base64_decode($data); 255 } 256 $self->convert2unicode($charset, \$decoded_data); 257 $value .= $decoded_data; 258 } # end of while loop 259 260 # get any trailing characters 261 $self->convert2unicode($default_header_encoding, \$encoded); 262 $value.=$encoded; 263 264 if ($value =~ /^\s*$/) { # we couldn't extract anything... 265 $self->convert2unicode($default_header_encoding, 266 \$original_value); 267 $value=$original_value; 268 } 269 } # end of if =?...?= 270 271 # In the absense of other charset information, assume the 272 # header is the default (usually "iso_8859_1") and convert to unicode. 273 else { 274 $self->convert2unicode($default_header_encoding, \$value); 275 } 234 # decode header values, using either =?<charset>?[BQ]?<data>?= (rfc2047) or default_header_encoding 235 $self->decode_header_value($default_header_encoding, \$value); 276 236 277 237 # Store the metadata … … 407 367 } elsif ($mimetype ne "text/plain") { 408 368 $self->{'doc_obj'} = $doc_obj; # in case we need to associate files... 409 $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$ $textref);369 $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$default_header_encoding,$$textref); 410 370 } else { # mimetype eq text/plain 411 371 … … 507 467 sub text_from_mime_message { 508 468 my $self = shift(@_); 509 my ($mimetype,$mimeinfo,$ text)=(@_);469 my ($mimetype,$mimeinfo,$default_header_encoding,$text)=(@_); 510 470 my $outhandle=$self->{'outhandle'}; 511 471 # Check for multiparts - $mimeinfo will be a boundary … … 597 557 } 598 558 599 $text .= $self->process_multipart_part($message_part, 559 $text .= $self->process_multipart_part($default_header_encoding, 560 $message_part, 600 561 $is_first_part); 601 562 } # foreach message part. … … 618 579 my $msg_text; 619 580 if ($mimetype =~ m@multipart/@) { 620 $msg_text = text_from_mime_message($self, $mimetype, $mimeinfo, 581 $msg_text = text_from_mime_message($self, 582 $mimetype, $mimeinfo, 583 $default_header_encoding, 621 584 $text); 622 585 } else { … … 655 618 sub process_multipart_part { 656 619 my $self = shift; 620 my $default_header_encoding = shift; 657 621 my $message_part = shift; 658 622 my $is_first_part = shift; 659 623 660 624 my $return_text=""; 661 625 my $part_header=$message_part; … … 691 655 $filename=$1; 692 656 $filename =~ s@\r?\s*$@@; # remove trailing space, if any 657 # decode the filename 658 $self->decode_header_value($default_header_encoding, \$filename); 693 659 } 694 660 … … 745 711 $self->text_from_mime_message($message_content_type, 746 712 $message_content_info, 713 $default_header_encoding, 747 714 $message_part_body); 748 715 } else { … … 770 737 my $tmptext= $self->text_from_mime_message($part_content_type, 771 738 $part_content_info, 739 $default_header_encoding, 772 740 $part_body); 773 741 $return_text.=$tmptext; … … 792 760 $encoding=$1; $encoding =~ tr/A-Z/a-z/; 793 761 } 794 my $tmpdir= $ENV{'GSDLHOME'} . "/tmp";762 my $tmpdir=&util::filename_cat($ENV{'GSDLHOME'}, "tmp"); 795 763 my $save_filename=$filename; 796 797 764 # make sure we don't clobber files with same name; 798 765 # need to keep state between .mbx files … … 807 774 $assoc_files->{$filename}=1; 808 775 } 809 open (SAVE, ">$tmpdir/$save_filename") || 810 warn "EMAILPlug: Can't save attachment as $tmpdir/$save_filename: $!"; 776 my $tmp_filename = &util::filename_cat($tmpdir, $save_filename); 777 open (SAVE, ">$tmp_filename") || 778 warn "EMAILPlug: Can't save attachment as $tmp_filename: $!"; 779 binmode(SAVE); # needed on Windows 811 780 my $part_text = $message_part; 812 781 $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header … … 820 789 close SAVE; 821 790 my $doc_obj=$self->{'doc_obj'}; 822 $doc_obj->associate_file("$tmp dir/$save_filename",791 $doc_obj->associate_file("$tmp_filename", 823 792 "$save_filename", 824 793 $part_content_type # mimetype … … 1004 973 } 1005 974 975 # words with non ascii characters in header values must be encoded in the 976 # following manner =?<charset>?[BQ]?<data>?= (rfc2047) 977 978 sub decode_header_value { 979 my $self = shift(@_); 980 my ($default_header_encoding, $textref) = @_; 981 982 if (!$$textref) { 983 # nothing to do! 984 return; 985 } 986 my $value = $$textref; 987 # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047) 988 if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) { 989 my $original_value=$value; 990 my $encoded=$value; 991 $value=""; 992 # we should ignore spaces between consecutive encoded-texts 993 $encoded =~ s@\?=\s+=\?@\?==\?@g; 994 while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) { 995 my ($charset, $encoding, $data)=($2,$3,$4); 996 my ($decoded_data); 997 my $leading_chars = "$1"; 998 $self->convert2unicode($default_header_encoding, \$leading_chars); 999 $value.=$leading_chars; 1000 1001 $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends 1002 chomp $data; 1003 $encoding =~ tr/BQ/bq/; 1004 if ($encoding eq "q") { # quoted printable 1005 $data =~ s/_/\ /g; # from rfc2047 (sec 4.2.2) 1006 $decoded_data=qp_decode($data); 1007 # qp_decode adds \n, which is default for body text 1008 chomp($decoded_data); 1009 } else { # base 64 1010 $decoded_data=base64_decode($data); 1011 } 1012 $self->convert2unicode($charset, \$decoded_data); 1013 $value .= $decoded_data; 1014 } # end of while loop 1015 1016 # get any trailing characters 1017 $self->convert2unicode($default_header_encoding, \$encoded); 1018 $value.=$encoded; 1019 1020 if ($value =~ /^\s*$/) { # we couldn't extract anything... 1021 $self->convert2unicode($default_header_encoding, 1022 \$original_value); 1023 $value=$original_value; 1024 } 1025 $$textref = $value; 1026 } # end of if =?...?= 1027 1028 # In the absense of other charset information, assume the 1029 # header is the default (usually "iso_8859_1") and convert to unicode. 1030 else { 1031 $self->convert2unicode($default_header_encoding, $textref); 1032 } 1033 1034 } 1006 1035 1007 1036
Note:
See TracChangeset
for help on using the changeset viewer.