Changeset 16341

Show
Ignore:
Timestamp:
10.07.2008 14:26:51 (11 years ago)
Author:
kjdon
Message:

save attachments in binary mode so they work on windows. Use filename_cat instead of hard coding forward slash in paths. added code for deleting tmp files. decode the filename header value - may be encoded like any other header value

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/EmailPlugin.pm

    r16104 r16341  
    6262#       * RFC 1806 - Content Dispositions (ie inline/attachment) 
    6363 
    64 # 12/05/02 Added usage datastructure - John Thompson 
     64 
    6565package EmailPlugin; 
    6666 
     
    108108        'args'     => $arguments }; 
    109109 
    110 # Create a new EmailPlugin object with which to parse a file. 
    111 # Accomplished by creating a new SplitTextFile and using bless to  
    112 # turn it into an EmailPlugin. 
    113  
    114110sub new { 
    115111    my ($class) = shift (@_); 
     
    123119 
    124120    $self->{'assoc_filenames'} = {}; # to save attach names so we don't clobber 
     121    $self->{'tmp_file_paths'} = (); # list of tmp files to delete after processing is finished 
    125122 
    126123    # this might not actually be true at read-time, but after processing 
     
    226223    $value =~ s/^\s+//; 
    227224    $value =~ s/\s+$//; 
    228     # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047) 
    229     if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) { 
    230         my $original_value=$value; 
    231         my $encoded=$value; 
    232         $value=""; 
    233         # we should ignore spaces between consecutive encoded-texts 
    234         $encoded =~ s@\?=\s+=\?@\?==\?@g; 
    235         while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) { 
    236         my ($charset, $encoding, $data)=($2,$3,$4); 
    237         my ($decoded_data); 
    238         $value.="$1"; # any leading chars 
    239         $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends 
    240         chomp $data; 
    241         $encoding =~ tr/BQ/bq/; 
    242         if ($encoding eq "q") { # quoted printable 
    243             $data =~ s/_/\ /g;  # from rfc2047 (sec 4.2.2) 
    244             $decoded_data=qp_decode($data); 
    245             # qp_decode adds \n, which is default for body text 
    246             chomp($decoded_data); 
    247         } else { # base 64 
    248             $decoded_data=base64_decode($data); 
    249         } 
    250         $self->convert2unicode($charset, \$decoded_data); 
    251         $value .= $decoded_data; 
    252           } # end of while loop 
    253          
    254         # get any trailing characters 
    255         $self->convert2unicode($default_header_encoding, \$encoded); 
    256         $value.=$encoded; 
    257  
    258         if ($value =~ /^\s*$/) { # we couldn't extract anything... 
    259           $self->convert2unicode($default_header_encoding, 
    260                      \$original_value); 
    261           $value=$original_value; 
    262         } 
    263         } # end of if =?...?= 
    264  
    265         # In the absense of other charset information, assume the 
    266         # header is the default (usually "iso_8859_1") and convert to unicode. 
    267         else { 
    268         $self->convert2unicode($default_header_encoding, \$value); 
    269     } 
     225    # decode header values, using either =?<charset>?[BQ]?<data>?= (rfc2047) or default_header_encoding 
     226    $self->decode_header_value($default_header_encoding, \$value); 
    270227     
    271228    # Store the metadata 
     
    407364    } elsif ($mimetype ne "text/plain") { 
    408365    $self->{'doc_obj'} = $doc_obj; # in case we need to associate files... 
    409     $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$$textref); 
     366    $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$default_header_encoding,$$textref); 
    410367    } else { # mimetype eq text/plain 
    411368 
     
    457414} 
    458415 
     416# delete any temp files that we have created 
     417sub clean_up_after_doc_obj_processing { 
     418    my $self = shift(@_); 
     419     
     420    foreach my $tmp_file_path (@{$self->{'tmp_file_paths'}}) { 
     421        if (-e $tmp_file_path) { 
     422            &util::rm($tmp_file_path); 
     423        } 
     424    } 
     425     
     426} 
    459427 
    460428# Convert a text string into HTML. 
     
    507475sub text_from_mime_message { 
    508476    my $self = shift(@_); 
    509     my ($mimetype,$mimeinfo,$text)=(@_); 
     477    my ($mimetype,$mimeinfo,$default_header_encoding,$text)=(@_); 
    510478    my $outhandle=$self->{'outhandle'}; 
    511479    # Check for multiparts - $mimeinfo will be a boundary 
     
    597565        } 
    598566 
    599         $text .= $self->process_multipart_part($message_part, 
     567        $text .= $self->process_multipart_part($default_header_encoding, 
     568                               $message_part, 
    600569                               $is_first_part); 
    601570        } # foreach message part. 
     
    618587        my $msg_text; 
    619588        if ($mimetype =~ m@multipart/@) { 
    620         $msg_text = text_from_mime_message($self, $mimetype, $mimeinfo, 
     589        $msg_text = $self->text_from_mime_message($mimetype, $mimeinfo, 
     590                           $default_header_encoding, 
    621591                           $text); 
    622592        } else { 
     
    655625sub process_multipart_part { 
    656626    my $self = shift; 
     627    my $default_header_encoding = shift; 
    657628    my $message_part = shift; 
    658629    my $is_first_part = shift; 
     
    691662    $filename=$1; 
    692663    $filename =~ s@\r?\s*$@@; # remove trailing space, if any 
     664    # decode the filename 
     665    $self->decode_header_value($default_header_encoding, \$filename); 
     666 
    693667    } 
    694668     
     
    745719            $self->text_from_mime_message($message_content_type, 
    746720                          $message_content_info, 
     721                          $default_header_encoding, 
    747722                          $message_part_body); 
    748723        } else { 
     
    770745    my $tmptext= $self->text_from_mime_message($part_content_type, 
    771746                           $part_content_info, 
     747                           $default_header_encoding, 
    772748                           $part_body); 
    773749    $return_text.=$tmptext; 
     
    792768        $encoding=$1; $encoding =~ tr/A-Z/a-z/; 
    793769        } 
    794         my $tmpdir=$ENV{'GSDLHOME'} . "/tmp"; 
     770        my $tmpdir=&util::filename_cat($ENV{'GSDLHOME'}, "tmp"); 
    795771        my $save_filename=$filename; 
    796772         
     
    807783        $assoc_files->{$filename}=1; 
    808784        } 
    809         open (SAVE, ">$tmpdir/$save_filename") || 
    810         warn "EmailPlugin: Can't save attachment as $tmpdir/$save_filename: $!"; 
     785        my $tmp_filename = &util::filename_cat($tmpdir, $save_filename); 
     786        open (SAVE, ">$tmp_filename") || 
     787        warn "EMAILPlug: Can't save attachment as $tmp_filename: $!"; 
     788        binmode(SAVE); # needed on Windows 
    811789        my $part_text = $message_part; 
    812790        $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header 
     
    820798        close SAVE; 
    821799        my $doc_obj=$self->{'doc_obj'}; 
    822         $doc_obj->associate_file("$tmpdir/$save_filename", 
     800        $doc_obj->associate_file("$tmp_filename", 
    823801                     "$save_filename", 
    824802                     $part_content_type # mimetype 
    825803                     ); 
    826         # clean up tmp area... 
    827         # Can't do this as it hasn't been copied/linked yet!!! 
    828 #           &util::rm("$tmpdir/$save_filename"); 
     804        # add this file to the list of tmp files for deleting later 
     805        push(@{$self->{'tmp_file_paths'}}, $tmp_filename); 
     806 
    829807        my $outhandle=$self->{'outhandle'}; 
    830808        print $outhandle "EmailPlugin: saving attachment \"$filename\"\n"; # 
     
    1002980    } 
    1003981    return 0; 
     982} 
     983 
     984# words with non ascii characters in header values must be encoded in the  
     985# following manner =?<charset>?[BQ]?<data>?= (rfc2047) 
     986 
     987sub decode_header_value { 
     988    my $self = shift(@_); 
     989    my ($default_header_encoding, $textref) = @_; 
     990     
     991    if (!$$textref) { 
     992    # nothing to do! 
     993    return; 
     994    } 
     995    my $value = $$textref; 
     996    # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047) 
     997    if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) { 
     998    my $original_value=$value; 
     999    my $encoded=$value; 
     1000    $value=""; 
     1001    # we should ignore spaces between consecutive encoded-texts 
     1002    $encoded =~ s@\?=\s+=\?@\?==\?@g; 
     1003    while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) { 
     1004        my ($charset, $encoding, $data)=($2,$3,$4); 
     1005        my ($decoded_data); 
     1006        my $leading_chars = "$1"; 
     1007        $self->convert2unicode($default_header_encoding, \$leading_chars); 
     1008        $value.=$leading_chars;  
     1009         
     1010        $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends 
     1011        chomp $data; 
     1012        $encoding =~ tr/BQ/bq/; 
     1013        if ($encoding eq "q") { # quoted printable 
     1014        $data =~ s/_/\ /g;  # from rfc2047 (sec 4.2.2) 
     1015        $decoded_data=qp_decode($data); 
     1016        # qp_decode adds \n, which is default for body text 
     1017        chomp($decoded_data); 
     1018        } else { # base 64 
     1019        $decoded_data=base64_decode($data); 
     1020        } 
     1021        $self->convert2unicode($charset, \$decoded_data); 
     1022        $value .= $decoded_data; 
     1023    } # end of while loop 
     1024     
     1025    # get any trailing characters 
     1026    $self->convert2unicode($default_header_encoding, \$encoded); 
     1027    $value.=$encoded; 
     1028     
     1029    if ($value =~ /^\s*$/) { # we couldn't extract anything... 
     1030        $self->convert2unicode($default_header_encoding, 
     1031                   \$original_value); 
     1032        $value=$original_value; 
     1033    } 
     1034    $$textref = $value; 
     1035    } # end of if =?...?= 
     1036     
     1037    # In the absense of other charset information, assume the 
     1038    # header is the default (usually "iso_8859_1") and convert to unicode. 
     1039    else { 
     1040    $self->convert2unicode($default_header_encoding, $textref); 
     1041    }    
     1042     
    10041043} 
    10051044