Changeset 16345


Ignore:
Timestamp:
07/10/08 14:41:03 (13 years ago)
Author:
kjdon
Message:

save attachments in binary mode so they work on windows. Use filename_cat instead of hard coding forward slash in paths. added code for deleting tmp files. decode the filename header value - may be encoded like any other header value

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/branches/2.80-fixed/perllib/plugins/EMAILPlug.pm

    r12169 r16345  
    232232    $value =~ s/^\s+//;
    233233    $value =~ s/\s+$//;
    234     # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047)
    235     if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) {
    236         my $original_value=$value;
    237         my $encoded=$value;
    238         $value="";
    239         # we should ignore spaces between consecutive encoded-texts
    240         $encoded =~ s@\?=\s+=\?@\?==\?@g;
    241         while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) {
    242         my ($charset, $encoding, $data)=($2,$3,$4);
    243         my ($decoded_data);
    244         $value.="$1"; # any leading chars
    245         $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends
    246         chomp $data;
    247         $encoding =~ tr/BQ/bq/;
    248         if ($encoding eq "q") { # quoted printable
    249             $data =~ s/_/\ /g;  # from rfc2047 (sec 4.2.2)
    250             $decoded_data=qp_decode($data);
    251             # qp_decode adds \n, which is default for body text
    252             chomp($decoded_data);
    253         } else { # base 64
    254             $decoded_data=base64_decode($data);
    255         }
    256         $self->convert2unicode($charset, \$decoded_data);
    257         $value .= $decoded_data;
    258           } # end of while loop
    259        
    260         # get any trailing characters
    261         $self->convert2unicode($default_header_encoding, \$encoded);
    262         $value.=$encoded;
    263 
    264         if ($value =~ /^\s*$/) { # we couldn't extract anything...
    265           $self->convert2unicode($default_header_encoding,
    266                      \$original_value);
    267           $value=$original_value;
    268         }
    269         } # end of if =?...?=
    270 
    271         # In the absense of other charset information, assume the
    272         # header is the default (usually "iso_8859_1") and convert to unicode.
    273         else {
    274         $self->convert2unicode($default_header_encoding, \$value);
    275     }
     234    # decode header values, using either =?<charset>?[BQ]?<data>?= (rfc2047) or default_header_encoding
     235    $self->decode_header_value($default_header_encoding, \$value);
    276236   
    277237    # Store the metadata
     
    407367    } elsif ($mimetype ne "text/plain") {
    408368    $self->{'doc_obj'} = $doc_obj; # in case we need to associate files...
    409     $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$$textref);
     369    $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$default_header_encoding,$$textref);
    410370    } else { # mimetype eq text/plain
    411371
     
    507467sub text_from_mime_message {
    508468    my $self = shift(@_);
    509     my ($mimetype,$mimeinfo,$text)=(@_);
     469    my ($mimetype,$mimeinfo,$default_header_encoding,$text)=(@_);
    510470    my $outhandle=$self->{'outhandle'};
    511471    # Check for multiparts - $mimeinfo will be a boundary
     
    597557        }
    598558
    599         $text .= $self->process_multipart_part($message_part,
     559        $text .= $self->process_multipart_part($default_header_encoding,
     560                               $message_part,
    600561                               $is_first_part);
    601562        } # foreach message part.
     
    618579        my $msg_text;
    619580        if ($mimetype =~ m@multipart/@) {
    620         $msg_text = text_from_mime_message($self, $mimetype, $mimeinfo,
     581        $msg_text = text_from_mime_message($self,
     582                           $mimetype, $mimeinfo,
     583                           $default_header_encoding,
    621584                           $text);
    622585        } else {
     
    655618sub process_multipart_part {
    656619    my $self = shift;
     620    my $default_header_encoding = shift;
    657621    my $message_part = shift;
    658622    my $is_first_part = shift;
    659 
     623   
    660624    my $return_text="";
    661625    my $part_header=$message_part;
     
    691655    $filename=$1;
    692656    $filename =~ s@\r?\s*$@@; # remove trailing space, if any
     657    # decode the filename
     658    $self->decode_header_value($default_header_encoding, \$filename);
    693659    }
    694660   
     
    745711            $self->text_from_mime_message($message_content_type,
    746712                          $message_content_info,
     713                          $default_header_encoding,
    747714                          $message_part_body);
    748715        } else {
     
    770737    my $tmptext= $self->text_from_mime_message($part_content_type,
    771738                           $part_content_info,
     739                           $default_header_encoding,
    772740                           $part_body);
    773741    $return_text.=$tmptext;
     
    792760        $encoding=$1; $encoding =~ tr/A-Z/a-z/;
    793761        }
    794         my $tmpdir=$ENV{'GSDLHOME'} . "/tmp";
     762        my $tmpdir=&util::filename_cat($ENV{'GSDLHOME'}, "tmp");
    795763        my $save_filename=$filename;
    796        
    797764        # make sure we don't clobber files with same name;
    798765        # need to keep state between .mbx files
     
    807774        $assoc_files->{$filename}=1;
    808775        }
    809         open (SAVE, ">$tmpdir/$save_filename") ||
    810         warn "EMAILPlug: Can't save attachment as $tmpdir/$save_filename: $!";
     776        my $tmp_filename = &util::filename_cat($tmpdir, $save_filename);
     777        open (SAVE, ">$tmp_filename") ||
     778        warn "EMAILPlug: Can't save attachment as $tmp_filename: $!";
     779        binmode(SAVE); # needed on Windows
    811780        my $part_text = $message_part;
    812781        $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header
     
    820789        close SAVE;
    821790        my $doc_obj=$self->{'doc_obj'};
    822         $doc_obj->associate_file("$tmpdir/$save_filename",
     791        $doc_obj->associate_file("$tmp_filename",
    823792                     "$save_filename",
    824793                     $part_content_type # mimetype
     
    1004973}
    1005974
     975# words with non ascii characters in header values must be encoded in the
     976# following manner =?<charset>?[BQ]?<data>?= (rfc2047)
     977
     978sub decode_header_value {
     979    my $self = shift(@_);
     980    my ($default_header_encoding, $textref) = @_;
     981   
     982    if (!$$textref) {
     983    # nothing to do!
     984    return;
     985    }
     986    my $value = $$textref;
     987    # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047)
     988    if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) {
     989    my $original_value=$value;
     990    my $encoded=$value;
     991    $value="";
     992    # we should ignore spaces between consecutive encoded-texts
     993    $encoded =~ s@\?=\s+=\?@\?==\?@g;
     994    while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) {
     995        my ($charset, $encoding, $data)=($2,$3,$4);
     996        my ($decoded_data);
     997        my $leading_chars = "$1";
     998        $self->convert2unicode($default_header_encoding, \$leading_chars);
     999        $value.=$leading_chars;
     1000       
     1001        $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends
     1002        chomp $data;
     1003        $encoding =~ tr/BQ/bq/;
     1004        if ($encoding eq "q") { # quoted printable
     1005        $data =~ s/_/\ /g;  # from rfc2047 (sec 4.2.2)
     1006        $decoded_data=qp_decode($data);
     1007        # qp_decode adds \n, which is default for body text
     1008        chomp($decoded_data);
     1009        } else { # base 64
     1010        $decoded_data=base64_decode($data);
     1011        }
     1012        $self->convert2unicode($charset, \$decoded_data);
     1013        $value .= $decoded_data;
     1014    } # end of while loop
     1015   
     1016    # get any trailing characters
     1017    $self->convert2unicode($default_header_encoding, \$encoded);
     1018    $value.=$encoded;
     1019   
     1020    if ($value =~ /^\s*$/) { # we couldn't extract anything...
     1021        $self->convert2unicode($default_header_encoding,
     1022                   \$original_value);
     1023        $value=$original_value;
     1024    }
     1025    $$textref = $value;
     1026    } # end of if =?...?=
     1027   
     1028    # In the absense of other charset information, assume the
     1029    # header is the default (usually "iso_8859_1") and convert to unicode.
     1030    else {
     1031    $self->convert2unicode($default_header_encoding, $textref);
     1032    }   
     1033   
     1034}
    10061035
    10071036
Note: See TracChangeset for help on using the changeset viewer.