Changeset 16341
- Timestamp:
- 2008-07-10T14:26:51+12:00 (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/EmailPlugin.pm
r16104 r16341 62 62 # * RFC 1806 - Content Dispositions (ie inline/attachment) 63 63 64 # 12/05/02 Added usage datastructure - John Thompson 64 65 65 package EmailPlugin; 66 66 … … 108 108 'args' => $arguments }; 109 109 110 # Create a new EmailPlugin object with which to parse a file.111 # Accomplished by creating a new SplitTextFile and using bless to112 # turn it into an EmailPlugin.113 114 110 sub new { 115 111 my ($class) = shift (@_); … … 123 119 124 120 $self->{'assoc_filenames'} = {}; # to save attach names so we don't clobber 121 $self->{'tmp_file_paths'} = (); # list of tmp files to delete after processing is finished 125 122 126 123 # this might not actually be true at read-time, but after processing … … 226 223 $value =~ s/^\s+//; 227 224 $value =~ s/\s+$//; 228 # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047) 229 if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) { 230 my $original_value=$value; 231 my $encoded=$value; 232 $value=""; 233 # we should ignore spaces between consecutive encoded-texts 234 $encoded =~ s@\?=\s+=\?@\?==\?@g; 235 while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) { 236 my ($charset, $encoding, $data)=($2,$3,$4); 237 my ($decoded_data); 238 $value.="$1"; # any leading chars 239 $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends 240 chomp $data; 241 $encoding =~ tr/BQ/bq/; 242 if ($encoding eq "q") { # quoted printable 243 $data =~ s/_/\ /g; # from rfc2047 (sec 4.2.2) 244 $decoded_data=qp_decode($data); 245 # qp_decode adds \n, which is default for body text 246 chomp($decoded_data); 247 } else { # base 64 248 $decoded_data=base64_decode($data); 249 } 250 $self->convert2unicode($charset, \$decoded_data); 251 $value .= $decoded_data; 252 } # end of while loop 253 254 # get any trailing characters 255 $self->convert2unicode($default_header_encoding, \$encoded); 256 $value.=$encoded; 257 258 if ($value =~ /^\s*$/) { # we couldn't extract anything... 259 $self->convert2unicode($default_header_encoding, 260 \$original_value); 261 $value=$original_value; 262 } 263 } # end of if =?...?= 264 265 # In the absense of other charset information, assume the 266 # header is the default (usually "iso_8859_1") and convert to unicode. 267 else { 268 $self->convert2unicode($default_header_encoding, \$value); 269 } 225 # decode header values, using either =?<charset>?[BQ]?<data>?= (rfc2047) or default_header_encoding 226 $self->decode_header_value($default_header_encoding, \$value); 270 227 271 228 # Store the metadata … … 407 364 } elsif ($mimetype ne "text/plain") { 408 365 $self->{'doc_obj'} = $doc_obj; # in case we need to associate files... 409 $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$ $textref);366 $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$default_header_encoding,$$textref); 410 367 } else { # mimetype eq text/plain 411 368 … … 457 414 } 458 415 416 # delete any temp files that we have created 417 sub clean_up_after_doc_obj_processing { 418 my $self = shift(@_); 419 420 foreach my $tmp_file_path (@{$self->{'tmp_file_paths'}}) { 421 if (-e $tmp_file_path) { 422 &util::rm($tmp_file_path); 423 } 424 } 425 426 } 459 427 460 428 # Convert a text string into HTML. … … 507 475 sub text_from_mime_message { 508 476 my $self = shift(@_); 509 my ($mimetype,$mimeinfo,$ text)=(@_);477 my ($mimetype,$mimeinfo,$default_header_encoding,$text)=(@_); 510 478 my $outhandle=$self->{'outhandle'}; 511 479 # Check for multiparts - $mimeinfo will be a boundary … … 597 565 } 598 566 599 $text .= $self->process_multipart_part($message_part, 567 $text .= $self->process_multipart_part($default_header_encoding, 568 $message_part, 600 569 $is_first_part); 601 570 } # foreach message part. … … 618 587 my $msg_text; 619 588 if ($mimetype =~ m@multipart/@) { 620 $msg_text = text_from_mime_message($self, $mimetype, $mimeinfo, 589 $msg_text = $self->text_from_mime_message($mimetype, $mimeinfo, 590 $default_header_encoding, 621 591 $text); 622 592 } else { … … 655 625 sub process_multipart_part { 656 626 my $self = shift; 627 my $default_header_encoding = shift; 657 628 my $message_part = shift; 658 629 my $is_first_part = shift; … … 691 662 $filename=$1; 692 663 $filename =~ s@\r?\s*$@@; # remove trailing space, if any 664 # decode the filename 665 $self->decode_header_value($default_header_encoding, \$filename); 666 693 667 } 694 668 … … 745 719 $self->text_from_mime_message($message_content_type, 746 720 $message_content_info, 721 $default_header_encoding, 747 722 $message_part_body); 748 723 } else { … … 770 745 my $tmptext= $self->text_from_mime_message($part_content_type, 771 746 $part_content_info, 747 $default_header_encoding, 772 748 $part_body); 773 749 $return_text.=$tmptext; … … 792 768 $encoding=$1; $encoding =~ tr/A-Z/a-z/; 793 769 } 794 my $tmpdir= $ENV{'GSDLHOME'} . "/tmp";770 my $tmpdir=&util::filename_cat($ENV{'GSDLHOME'}, "tmp"); 795 771 my $save_filename=$filename; 796 772 … … 807 783 $assoc_files->{$filename}=1; 808 784 } 809 open (SAVE, ">$tmpdir/$save_filename") || 810 warn "EmailPlugin: Can't save attachment as $tmpdir/$save_filename: $!"; 785 my $tmp_filename = &util::filename_cat($tmpdir, $save_filename); 786 open (SAVE, ">$tmp_filename") || 787 warn "EMAILPlug: Can't save attachment as $tmp_filename: $!"; 788 binmode(SAVE); # needed on Windows 811 789 my $part_text = $message_part; 812 790 $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header … … 820 798 close SAVE; 821 799 my $doc_obj=$self->{'doc_obj'}; 822 $doc_obj->associate_file("$tmp dir/$save_filename",800 $doc_obj->associate_file("$tmp_filename", 823 801 "$save_filename", 824 802 $part_content_type # mimetype 825 803 ); 826 # clean up tmp area...827 # Can't do this as it hasn't been copied/linked yet!!!828 # &util::rm("$tmpdir/$save_filename"); 804 # add this file to the list of tmp files for deleting later 805 push(@{$self->{'tmp_file_paths'}}, $tmp_filename); 806 829 807 my $outhandle=$self->{'outhandle'}; 830 808 print $outhandle "EmailPlugin: saving attachment \"$filename\"\n"; # … … 1002 980 } 1003 981 return 0; 982 } 983 984 # words with non ascii characters in header values must be encoded in the 985 # following manner =?<charset>?[BQ]?<data>?= (rfc2047) 986 987 sub decode_header_value { 988 my $self = shift(@_); 989 my ($default_header_encoding, $textref) = @_; 990 991 if (!$$textref) { 992 # nothing to do! 993 return; 994 } 995 my $value = $$textref; 996 # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047) 997 if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) { 998 my $original_value=$value; 999 my $encoded=$value; 1000 $value=""; 1001 # we should ignore spaces between consecutive encoded-texts 1002 $encoded =~ s@\?=\s+=\?@\?==\?@g; 1003 while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) { 1004 my ($charset, $encoding, $data)=($2,$3,$4); 1005 my ($decoded_data); 1006 my $leading_chars = "$1"; 1007 $self->convert2unicode($default_header_encoding, \$leading_chars); 1008 $value.=$leading_chars; 1009 1010 $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends 1011 chomp $data; 1012 $encoding =~ tr/BQ/bq/; 1013 if ($encoding eq "q") { # quoted printable 1014 $data =~ s/_/\ /g; # from rfc2047 (sec 4.2.2) 1015 $decoded_data=qp_decode($data); 1016 # qp_decode adds \n, which is default for body text 1017 chomp($decoded_data); 1018 } else { # base 64 1019 $decoded_data=base64_decode($data); 1020 } 1021 $self->convert2unicode($charset, \$decoded_data); 1022 $value .= $decoded_data; 1023 } # end of while loop 1024 1025 # get any trailing characters 1026 $self->convert2unicode($default_header_encoding, \$encoded); 1027 $value.=$encoded; 1028 1029 if ($value =~ /^\s*$/) { # we couldn't extract anything... 1030 $self->convert2unicode($default_header_encoding, 1031 \$original_value); 1032 $value=$original_value; 1033 } 1034 $$textref = $value; 1035 } # end of if =?...?= 1036 1037 # In the absense of other charset information, assume the 1038 # header is the default (usually "iso_8859_1") and convert to unicode. 1039 else { 1040 $self->convert2unicode($default_header_encoding, $textref); 1041 } 1042 1004 1043 } 1005 1044
Note:
See TracChangeset
for help on using the changeset viewer.