Context Navigation

← Previous Changeset
Next Changeset →

Changeset 16345

Timestamp:

2008-07-10T14:41:03+12:00 (16 years ago)

Author:

kjdon

Message:

save attachments in binary mode so they work on windows. Use filename_cat instead of hard coding forward slash in paths. added code for deleting tmp files. decode the filename header value - may be encoded like any other header value

File:

: 1 edited

gsdl/branches/2.80-fixed/perllib/plugins/EMAILPlug.pm (modified) (13 diffs)

Legend:

: Unmodified
: Added
: Removed

gsdl/branches/2.80-fixed/perllib/plugins/EMAILPlug.pm

-              r12169
+              r16345
     $value =~ s/^\s+//;
     $value =~ s/\s+$//;
+    # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047)
+    if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) {
+        my $original_value=$value;
+        my $encoded=$value;
+        $value="";
+        # we should ignore spaces between consecutive encoded-texts
+        $encoded =~ s@\?=\s+=\?@\?==\?@g;
+        while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) {
+        my ($charset, $encoding, $data)=($2,$3,$4);
+        my ($decoded_data);
+        $value.="$1"; # any leading chars
+        $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends
+        chomp $data;
+        $encoding =~ tr/BQ/bq/;
+        if ($encoding eq "q") { # quoted printable
+            $data =~ s/_/\ /g;  # from rfc2047 (sec 4.2.2)
+            $decoded_data=qp_decode($data);
+            # qp_decode adds \n, which is default for body text
+            chomp($decoded_data);
+        } else { # base 64
+            $decoded_data=base64_decode($data);
+        }
+        $self->convert2unicode($charset, \$decoded_data);
+        $value .= $decoded_data;
+          } # end of while loop
+        # get any trailing characters
+        $self->convert2unicode($default_header_encoding, \$encoded);
+        $value.=$encoded;
+        if ($value =~ /^\s*$/) { # we couldn't extract anything...
+          $self->convert2unicode($default_header_encoding,
+                     \$original_value);
+          $value=$original_value;
+        }
+        } # end of if =?...?=
+        # In the absense of other charset information, assume the
+        # header is the default (usually "iso_8859_1") and convert to unicode.
+        else {
+        $self->convert2unicode($default_header_encoding, \$value);
+    }
+    # decode header values, using either =?<charset>?[BQ]?<data>?= (rfc2047) or default_header_encoding
+    $self->decode_header_value($default_header_encoding, \$value);
     # Store the metadata
 …
     } elsif ($mimetype ne "text/plain") {
     $self->{'doc_obj'} = $doc_obj; # in case we need to associate files...
     $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$$textref);
+    $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$default_header_encoding,$$textref);
     } else { # mimetype eq text/plain
 …
 sub text_from_mime_message {
     my $self = shift(@_);
     my ($mimetype,$mimeinfo,$text)=(@_);
+    my ($mimetype,$mimeinfo,$default_header_encoding,$text)=(@_);
     my $outhandle=$self->{'outhandle'};
     # Check for multiparts - $mimeinfo will be a boundary
 …
+        }
+        $text .= $self->process_multipart_part($message_part,
+        $text .= $self->process_multipart_part($default_header_encoding,
+                               $message_part,
                                $is_first_part);
         } # foreach message part.
 …
         my $msg_text;
         if ($mimetype =~ m@multipart/@) {
+        $msg_text = text_from_mime_message($self, $mimetype, $mimeinfo,
+        $msg_text = text_from_mime_message($self,
+                           $mimetype, $mimeinfo,
+                           $default_header_encoding,
                            $text);
         } else {
 …
 sub process_multipart_part {
     my $self = shift;
+    my $default_header_encoding = shift;
     my $message_part = shift;
     my $is_first_part = shift;
     my $return_text="";
     my $part_header=$message_part;
 …
     $filename=$1;
     $filename =~ s@\r?\s*$@@; # remove trailing space, if any
+    # decode the filename
+    $self->decode_header_value($default_header_encoding, \$filename);
+    }
 …
             $self->text_from_mime_message($message_content_type,
                           $message_content_info,
+                          $default_header_encoding,
                           $message_part_body);
         } else {
 …
     my $tmptext= $self->text_from_mime_message($part_content_type,
                            $part_content_info,
+                           $default_header_encoding,
                            $part_body);
     $return_text.=$tmptext;
 …
         $encoding=$1; $encoding =~ tr/A-Z/a-z/;
+        }
         my $tmpdir=$ENV{'GSDLHOME'} . "/tmp";
+        my $tmpdir=&util::filename_cat($ENV{'GSDLHOME'}, "tmp");
         my $save_filename=$filename;
         # make sure we don't clobber files with same name;
         # need to keep state between .mbx files
 …
         $assoc_files->{$filename}=1;
+        }
+        open (SAVE, ">$tmpdir/$save_filename") ||
+        warn "EMAILPlug: Can't save attachment as $tmpdir/$save_filename: $!";
+        my $tmp_filename = &util::filename_cat($tmpdir, $save_filename);
+        open (SAVE, ">$tmp_filename") ||
+        warn "EMAILPlug: Can't save attachment as $tmp_filename: $!";
+        binmode(SAVE); # needed on Windows
         my $part_text = $message_part;
         $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header
 …
         close SAVE;
         my $doc_obj=$self->{'doc_obj'};
         $doc_obj->associate_file("$tmpdir/$save_filename",
+        $doc_obj->associate_file("$tmp_filename",
                      "$save_filename",
                      $part_content_type # mimetype
 …
+}
+# words with non ascii characters in header values must be encoded in the
+# following manner =?<charset>?[BQ]?<data>?= (rfc2047)
+sub decode_header_value {
+    my $self = shift(@_);
+    my ($default_header_encoding, $textref) = @_;
+    if (!$$textref) {
+    # nothing to do!
+    return;
+    }
+    my $value = $$textref;
+    # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047)
+    if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) {
+    my $original_value=$value;
+    my $encoded=$value;
+    $value="";
+    # we should ignore spaces between consecutive encoded-texts
+    $encoded =~ s@\?=\s+=\?@\?==\?@g;
+    while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=//i) {
+        my ($charset, $encoding, $data)=($2,$3,$4);
+        my ($decoded_data);
+        my $leading_chars = "$1";
+        $self->convert2unicode($default_header_encoding, \$leading_chars);
+        $value.=$leading_chars;
+        $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends
+        chomp $data;
+        $encoding =~ tr/BQ/bq/;
+        if ($encoding eq "q") { # quoted printable
+        $data =~ s/_/\ /g;  # from rfc2047 (sec 4.2.2)
+        $decoded_data=qp_decode($data);
+        # qp_decode adds \n, which is default for body text
+        chomp($decoded_data);
+        } else { # base 64
+        $decoded_data=base64_decode($data);
+        }
+        $self->convert2unicode($charset, \$decoded_data);
+        $value .= $decoded_data;
+    } # end of while loop
+    # get any trailing characters
+    $self->convert2unicode($default_header_encoding, \$encoded);
+    $value.=$encoded;
+    if ($value =~ /^\s*$/) { # we couldn't extract anything...
+        $self->convert2unicode($default_header_encoding,
+                   \$original_value);
+        $value=$original_value;
+    }
+    $$textref = $value;
+    } # end of if =?...?=
+    # In the absense of other charset information, assume the
+    # header is the default (usually "iso_8859_1") and convert to unicode.
+    else {
+    $self->convert2unicode($default_header_encoding, $textref);
+    }
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 16345

Legend:

gsdl/branches/2.80-fixed/perllib/plugins/EMAILPlug.pm

Download in other formats: