Changeset 16341


Ignore:
Timestamp:
2008-07-10T14:26:51+12:00 (16 years ago)
Author:
kjdon
Message:

save attachments in binary mode so they work on windows. Use filename_cat instead of hard coding forward slash in paths. added code for deleting tmp files. decode the filename header value - may be encoded like any other header value

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/EmailPlugin.pm

    r16104 r16341  
    6262#       * RFC 1806 - Content Dispositions (ie inline/attachment)
    6363
    64 # 12/05/02 Added usage datastructure - John Thompson
     64
    6565package EmailPlugin;
    6666
     
    108108        'args'     => $arguments };
    109109
    110 # Create a new EmailPlugin object with which to parse a file.
    111 # Accomplished by creating a new SplitTextFile and using bless to
    112 # turn it into an EmailPlugin.
    113 
    114110sub new {
    115111    my ($class) = shift (@_);
     
    123119
    124120    $self->{'assoc_filenames'} = {}; # to save attach names so we don't clobber
     121    $self->{'tmp_file_paths'} = (); # list of tmp files to delete after processing is finished
    125122
    126123    # this might not actually be true at read-time, but after processing
     
    226223    $value =~ s/^\s+//;
    227224    $value =~ s/\s+$//;
    228     # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047)
    229     if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) {
    230         my $original_value=$value;
    231         my $encoded=$value;
    232         $value="";
    233         # we should ignore spaces between consecutive encoded-texts
    234         $encoded =~ s@\?=\s+=\?@\?==\?@g;
    235         while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) {
    236         my ($charset, $encoding, $data)=($2,$3,$4);
    237         my ($decoded_data);
    238         $value.="$1"; # any leading chars
    239         $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends
    240         chomp $data;
    241         $encoding =~ tr/BQ/bq/;
    242         if ($encoding eq "q") { # quoted printable
    243             $data =~ s/_/\ /g;  # from rfc2047 (sec 4.2.2)
    244             $decoded_data=qp_decode($data);
    245             # qp_decode adds \n, which is default for body text
    246             chomp($decoded_data);
    247         } else { # base 64
    248             $decoded_data=base64_decode($data);
    249         }
    250         $self->convert2unicode($charset, \$decoded_data);
    251         $value .= $decoded_data;
    252           } # end of while loop
    253        
    254         # get any trailing characters
    255         $self->convert2unicode($default_header_encoding, \$encoded);
    256         $value.=$encoded;
    257 
    258         if ($value =~ /^\s*$/) { # we couldn't extract anything...
    259           $self->convert2unicode($default_header_encoding,
    260                      \$original_value);
    261           $value=$original_value;
    262         }
    263         } # end of if =?...?=
    264 
    265         # In the absense of other charset information, assume the
    266         # header is the default (usually "iso_8859_1") and convert to unicode.
    267         else {
    268         $self->convert2unicode($default_header_encoding, \$value);
    269     }
     225    # decode header values, using either =?<charset>?[BQ]?<data>?= (rfc2047) or default_header_encoding
     226    $self->decode_header_value($default_header_encoding, \$value);
    270227   
    271228    # Store the metadata
     
    407364    } elsif ($mimetype ne "text/plain") {
    408365    $self->{'doc_obj'} = $doc_obj; # in case we need to associate files...
    409     $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$$textref);
     366    $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$default_header_encoding,$$textref);
    410367    } else { # mimetype eq text/plain
    411368
     
    457414}
    458415
     416# delete any temp files that we have created
     417sub clean_up_after_doc_obj_processing {
     418    my $self = shift(@_);
     419   
     420    foreach my $tmp_file_path (@{$self->{'tmp_file_paths'}}) {
     421        if (-e $tmp_file_path) {
     422            &util::rm($tmp_file_path);
     423        }
     424    }
     425   
     426}
    459427
    460428# Convert a text string into HTML.
     
    507475sub text_from_mime_message {
    508476    my $self = shift(@_);
    509     my ($mimetype,$mimeinfo,$text)=(@_);
     477    my ($mimetype,$mimeinfo,$default_header_encoding,$text)=(@_);
    510478    my $outhandle=$self->{'outhandle'};
    511479    # Check for multiparts - $mimeinfo will be a boundary
     
    597565        }
    598566
    599         $text .= $self->process_multipart_part($message_part,
     567        $text .= $self->process_multipart_part($default_header_encoding,
     568                               $message_part,
    600569                               $is_first_part);
    601570        } # foreach message part.
     
    618587        my $msg_text;
    619588        if ($mimetype =~ m@multipart/@) {
    620         $msg_text = text_from_mime_message($self, $mimetype, $mimeinfo,
     589        $msg_text = $self->text_from_mime_message($mimetype, $mimeinfo,
     590                           $default_header_encoding,
    621591                           $text);
    622592        } else {
     
    655625sub process_multipart_part {
    656626    my $self = shift;
     627    my $default_header_encoding = shift;
    657628    my $message_part = shift;
    658629    my $is_first_part = shift;
     
    691662    $filename=$1;
    692663    $filename =~ s@\r?\s*$@@; # remove trailing space, if any
     664    # decode the filename
     665    $self->decode_header_value($default_header_encoding, \$filename);
     666
    693667    }
    694668   
     
    745719            $self->text_from_mime_message($message_content_type,
    746720                          $message_content_info,
     721                          $default_header_encoding,
    747722                          $message_part_body);
    748723        } else {
     
    770745    my $tmptext= $self->text_from_mime_message($part_content_type,
    771746                           $part_content_info,
     747                           $default_header_encoding,
    772748                           $part_body);
    773749    $return_text.=$tmptext;
     
    792768        $encoding=$1; $encoding =~ tr/A-Z/a-z/;
    793769        }
    794         my $tmpdir=$ENV{'GSDLHOME'} . "/tmp";
     770        my $tmpdir=&util::filename_cat($ENV{'GSDLHOME'}, "tmp");
    795771        my $save_filename=$filename;
    796772       
     
    807783        $assoc_files->{$filename}=1;
    808784        }
    809         open (SAVE, ">$tmpdir/$save_filename") ||
    810         warn "EmailPlugin: Can't save attachment as $tmpdir/$save_filename: $!";
     785        my $tmp_filename = &util::filename_cat($tmpdir, $save_filename);
     786        open (SAVE, ">$tmp_filename") ||
     787        warn "EMAILPlug: Can't save attachment as $tmp_filename: $!";
     788        binmode(SAVE); # needed on Windows
    811789        my $part_text = $message_part;
    812790        $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header
     
    820798        close SAVE;
    821799        my $doc_obj=$self->{'doc_obj'};
    822         $doc_obj->associate_file("$tmpdir/$save_filename",
     800        $doc_obj->associate_file("$tmp_filename",
    823801                     "$save_filename",
    824802                     $part_content_type # mimetype
    825803                     );
    826         # clean up tmp area...
    827         # Can't do this as it hasn't been copied/linked yet!!!
    828 #           &util::rm("$tmpdir/$save_filename");
     804        # add this file to the list of tmp files for deleting later
     805        push(@{$self->{'tmp_file_paths'}}, $tmp_filename);
     806
    829807        my $outhandle=$self->{'outhandle'};
    830808        print $outhandle "EmailPlugin: saving attachment \"$filename\"\n"; #
     
    1002980    }
    1003981    return 0;
     982}
     983
     984# words with non ascii characters in header values must be encoded in the
     985# following manner =?<charset>?[BQ]?<data>?= (rfc2047)
     986
     987sub decode_header_value {
     988    my $self = shift(@_);
     989    my ($default_header_encoding, $textref) = @_;
     990   
     991    if (!$$textref) {
     992    # nothing to do!
     993    return;
     994    }
     995    my $value = $$textref;
     996    # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047)
     997    if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) {
     998    my $original_value=$value;
     999    my $encoded=$value;
     1000    $value="";
     1001    # we should ignore spaces between consecutive encoded-texts
     1002    $encoded =~ s@\?=\s+=\?@\?==\?@g;
     1003    while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) {
     1004        my ($charset, $encoding, $data)=($2,$3,$4);
     1005        my ($decoded_data);
     1006        my $leading_chars = "$1";
     1007        $self->convert2unicode($default_header_encoding, \$leading_chars);
     1008        $value.=$leading_chars;
     1009       
     1010        $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends
     1011        chomp $data;
     1012        $encoding =~ tr/BQ/bq/;
     1013        if ($encoding eq "q") { # quoted printable
     1014        $data =~ s/_/\ /g;  # from rfc2047 (sec 4.2.2)
     1015        $decoded_data=qp_decode($data);
     1016        # qp_decode adds \n, which is default for body text
     1017        chomp($decoded_data);
     1018        } else { # base 64
     1019        $decoded_data=base64_decode($data);
     1020        }
     1021        $self->convert2unicode($charset, \$decoded_data);
     1022        $value .= $decoded_data;
     1023    } # end of while loop
     1024   
     1025    # get any trailing characters
     1026    $self->convert2unicode($default_header_encoding, \$encoded);
     1027    $value.=$encoded;
     1028   
     1029    if ($value =~ /^\s*$/) { # we couldn't extract anything...
     1030        $self->convert2unicode($default_header_encoding,
     1031                   \$original_value);
     1032        $value=$original_value;
     1033    }
     1034    $$textref = $value;
     1035    } # end of if =?...?=
     1036   
     1037    # In the absense of other charset information, assume the
     1038    # header is the default (usually "iso_8859_1") and convert to unicode.
     1039    else {
     1040    $self->convert2unicode($default_header_encoding, $textref);
     1041    }   
     1042   
    10041043}
    10051044
Note: See TracChangeset for help on using the changeset viewer.