Changeset 16345

Show
Ignore:
Timestamp:
10.07.2008 14:41:03 (11 years ago)
Author:
kjdon
Message:

save attachments in binary mode so they work on windows. Use filename_cat instead of hard coding forward slash in paths. added code for deleting tmp files. decode the filename header value - may be encoded like any other header value

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gsdl/branches/2.80-fixed/perllib/plugins/EMAILPlug.pm

    r12169 r16345  
    232232    $value =~ s/^\s+//; 
    233233    $value =~ s/\s+$//; 
    234     # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047) 
    235     if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) { 
    236         my $original_value=$value; 
    237         my $encoded=$value; 
    238         $value=""; 
    239         # we should ignore spaces between consecutive encoded-texts 
    240         $encoded =~ s@\?=\s+=\?@\?==\?@g; 
    241         while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) { 
    242         my ($charset, $encoding, $data)=($2,$3,$4); 
    243         my ($decoded_data); 
    244         $value.="$1"; # any leading chars 
    245         $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends 
    246         chomp $data; 
    247         $encoding =~ tr/BQ/bq/; 
    248         if ($encoding eq "q") { # quoted printable 
    249             $data =~ s/_/\ /g;  # from rfc2047 (sec 4.2.2) 
    250             $decoded_data=qp_decode($data); 
    251             # qp_decode adds \n, which is default for body text 
    252             chomp($decoded_data); 
    253         } else { # base 64 
    254             $decoded_data=base64_decode($data); 
    255         } 
    256         $self->convert2unicode($charset, \$decoded_data); 
    257         $value .= $decoded_data; 
    258           } # end of while loop 
    259          
    260         # get any trailing characters 
    261         $self->convert2unicode($default_header_encoding, \$encoded); 
    262         $value.=$encoded; 
    263  
    264         if ($value =~ /^\s*$/) { # we couldn't extract anything... 
    265           $self->convert2unicode($default_header_encoding, 
    266                      \$original_value); 
    267           $value=$original_value; 
    268         } 
    269         } # end of if =?...?= 
    270  
    271         # In the absense of other charset information, assume the 
    272         # header is the default (usually "iso_8859_1") and convert to unicode. 
    273         else { 
    274         $self->convert2unicode($default_header_encoding, \$value); 
    275     } 
     234    # decode header values, using either =?<charset>?[BQ]?<data>?= (rfc2047) or default_header_encoding 
     235    $self->decode_header_value($default_header_encoding, \$value); 
    276236     
    277237    # Store the metadata 
     
    407367    } elsif ($mimetype ne "text/plain") { 
    408368    $self->{'doc_obj'} = $doc_obj; # in case we need to associate files... 
    409     $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$$textref); 
     369    $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$default_header_encoding,$$textref); 
    410370    } else { # mimetype eq text/plain 
    411371 
     
    507467sub text_from_mime_message { 
    508468    my $self = shift(@_); 
    509     my ($mimetype,$mimeinfo,$text)=(@_); 
     469    my ($mimetype,$mimeinfo,$default_header_encoding,$text)=(@_); 
    510470    my $outhandle=$self->{'outhandle'}; 
    511471    # Check for multiparts - $mimeinfo will be a boundary 
     
    597557        } 
    598558 
    599         $text .= $self->process_multipart_part($message_part, 
     559        $text .= $self->process_multipart_part($default_header_encoding, 
     560                               $message_part, 
    600561                               $is_first_part); 
    601562        } # foreach message part. 
     
    618579        my $msg_text; 
    619580        if ($mimetype =~ m@multipart/@) { 
    620         $msg_text = text_from_mime_message($self, $mimetype, $mimeinfo, 
     581        $msg_text = text_from_mime_message($self,  
     582                           $mimetype, $mimeinfo, 
     583                           $default_header_encoding, 
    621584                           $text); 
    622585        } else { 
     
    655618sub process_multipart_part { 
    656619    my $self = shift; 
     620    my $default_header_encoding = shift; 
    657621    my $message_part = shift; 
    658622    my $is_first_part = shift; 
    659  
     623     
    660624    my $return_text=""; 
    661625    my $part_header=$message_part; 
     
    691655    $filename=$1; 
    692656    $filename =~ s@\r?\s*$@@; # remove trailing space, if any 
     657    # decode the filename 
     658    $self->decode_header_value($default_header_encoding, \$filename); 
    693659    } 
    694660     
     
    745711            $self->text_from_mime_message($message_content_type, 
    746712                          $message_content_info, 
     713                          $default_header_encoding, 
    747714                          $message_part_body); 
    748715        } else { 
     
    770737    my $tmptext= $self->text_from_mime_message($part_content_type, 
    771738                           $part_content_info, 
     739                           $default_header_encoding, 
    772740                           $part_body); 
    773741    $return_text.=$tmptext; 
     
    792760        $encoding=$1; $encoding =~ tr/A-Z/a-z/; 
    793761        } 
    794         my $tmpdir=$ENV{'GSDLHOME'} . "/tmp"; 
     762        my $tmpdir=&util::filename_cat($ENV{'GSDLHOME'}, "tmp"); 
    795763        my $save_filename=$filename; 
    796          
    797764        # make sure we don't clobber files with same name; 
    798765        # need to keep state between .mbx files 
     
    807774        $assoc_files->{$filename}=1; 
    808775        } 
    809         open (SAVE, ">$tmpdir/$save_filename") || 
    810         warn "EMAILPlug: Can't save attachment as $tmpdir/$save_filename: $!"; 
     776        my $tmp_filename = &util::filename_cat($tmpdir, $save_filename); 
     777        open (SAVE, ">$tmp_filename") || 
     778        warn "EMAILPlug: Can't save attachment as $tmp_filename: $!"; 
     779        binmode(SAVE); # needed on Windows 
    811780        my $part_text = $message_part; 
    812781        $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header 
     
    820789        close SAVE; 
    821790        my $doc_obj=$self->{'doc_obj'}; 
    822         $doc_obj->associate_file("$tmpdir/$save_filename", 
     791        $doc_obj->associate_file("$tmp_filename", 
    823792                     "$save_filename", 
    824793                     $part_content_type # mimetype 
     
    1004973} 
    1005974 
     975# words with non ascii characters in header values must be encoded in the  
     976# following manner =?<charset>?[BQ]?<data>?= (rfc2047) 
     977 
     978sub decode_header_value { 
     979    my $self = shift(@_); 
     980    my ($default_header_encoding, $textref) = @_; 
     981     
     982    if (!$$textref) { 
     983    # nothing to do! 
     984    return; 
     985    } 
     986    my $value = $$textref; 
     987    # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047) 
     988    if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) { 
     989    my $original_value=$value; 
     990    my $encoded=$value; 
     991    $value=""; 
     992    # we should ignore spaces between consecutive encoded-texts 
     993    $encoded =~ s@\?=\s+=\?@\?==\?@g; 
     994    while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) { 
     995        my ($charset, $encoding, $data)=($2,$3,$4); 
     996        my ($decoded_data); 
     997        my $leading_chars = "$1"; 
     998        $self->convert2unicode($default_header_encoding, \$leading_chars); 
     999        $value.=$leading_chars;  
     1000         
     1001        $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends 
     1002        chomp $data; 
     1003        $encoding =~ tr/BQ/bq/; 
     1004        if ($encoding eq "q") { # quoted printable 
     1005        $data =~ s/_/\ /g;  # from rfc2047 (sec 4.2.2) 
     1006        $decoded_data=qp_decode($data); 
     1007        # qp_decode adds \n, which is default for body text 
     1008        chomp($decoded_data); 
     1009        } else { # base 64 
     1010        $decoded_data=base64_decode($data); 
     1011        } 
     1012        $self->convert2unicode($charset, \$decoded_data); 
     1013        $value .= $decoded_data; 
     1014    } # end of while loop 
     1015     
     1016    # get any trailing characters 
     1017    $self->convert2unicode($default_header_encoding, \$encoded); 
     1018    $value.=$encoded; 
     1019     
     1020    if ($value =~ /^\s*$/) { # we couldn't extract anything... 
     1021        $self->convert2unicode($default_header_encoding, 
     1022                   \$original_value); 
     1023        $value=$original_value; 
     1024    } 
     1025    $$textref = $value; 
     1026    } # end of if =?...?= 
     1027     
     1028    # In the absense of other charset information, assume the 
     1029    # header is the default (usually "iso_8859_1") and convert to unicode. 
     1030    else { 
     1031    $self->convert2unicode($default_header_encoding, $textref); 
     1032    }    
     1033     
     1034} 
    10061035 
    10071036