Ignore:
Timestamp:
2001-07-31T13:58:05+12:00 (23 years ago)
Author:
jrm21
Message:
  1. we escape 'and' chars in headers so greenstone doesn't try to expand it

as metadata.

  1. fixed up FromName and FromAddr if the From: field isn't in the "Name" <addr>

format.

  1. MIME boundaries are allowed spaces in them (didn't read RFC properly...)
  2. Body is now correctly interpreted if in quoted-printable or base64 and

content-type is text/* (not multipart).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/EMAILPlug.pm

    r2662 r2680  
    128128    $Headers =~ s/\r?\n\r?\n(.*)$//s;
    129129    $$textref = $1;
     130    # escape [] so it isn't re-interpreted as metadata
     131    $Headers =~ s/\[/&#91;/g; $Headers =~ s/\]/&#93;/g;
     132
    130133
    131134    # Unfold headers - see rfc822
     
    166169    # Extract the name and e-mail address from the From metadata
    167170    $frommeta = $raw{"From"};
    168     $frommeta =~ m/(.*)<(.*)>/;
    169     my $fromnamemeta=$1;
    170     my $fromaddrmeta=$2;
     171    my $fromnamemeta;
     172    my $fromaddrmeta;
     173
     174    $frommeta =~ s/\s*$//;  # Remove trailing space, if any
     175
     176    if ($frommeta =~ m/(.+)\s*<(.+)>/) {
     177    $fromnamemeta=$1;
     178    $fromaddrmeta=$2;
     179    } elsif ($frommeta =~ m/(.+@.+)\s+\((.*)\)/) {
     180    $fromnamemeta=$2;
     181    $fromaddrmeta=$1;
     182    }
    171183    if (!defined($fromaddrmeta)) {
    172184    $fromaddrmeta=$frommeta;
    173185    }
     186    $fromaddrmeta=~s/<//; $fromaddrmeta=~s/>//;
    174187    $doc_obj->add_utf8_metadata ($cursection, "FromAddr", $fromaddrmeta);
    175188
    176189    if (defined($fromnamemeta)) {
    177190    $fromnamemeta =~ s/\"//g;
    178     $fromnamemeta =~ s/(.*) /$1/;  # Remove trailing space
    179191    }
    180192    else {
     
    220232    my $mimeinfo="";
    221233    # Do MIME and encoding stuff
    222     if ($Headers =~ /^content\-type:\s*([\w\/\-]+)\s*\;?\s*([^\s]+)\s*$/mi)
     234    if ($Headers =~ /^content\-type:\s*([\w\/\-]+)\s*\;?\s*(.+?)\s*$/mi)
    223235    {
    224236        $mimetype=$1;
     
    227239    }
    228240
     241    my $transfer_encoding="7bit";
     242    if ($Headers =~ /^content-transfer-encoding:\s*([^\s]+)\s*$/mi) {
     243    $transfer_encoding=$1;
     244    }
    229245    if ($mimetype ne "text/plain") {
    230     $$textref=text_from_mime_message($mimetype,$mimeinfo,$$textref);
    231     } # end of not text/plain
     246    $$textref=text_from_mime_message($mimetype,$mimeinfo,$$textref,
     247                     $outhandle);
     248    } elsif ($transfer_encoding =~ /quoted\-printable/) {
     249    $$textref=qp_decode($$textref);
     250    } elsif ($transfer_encoding =~ /base64/) {
     251    $$textref=base64_decode($$textref);
     252    }
    232253   
    233254
     
    294315# the textref we are given DOES NOT include the header.
    295316sub text_from_mime_message {
    296     my ($mimetype,$mimeinfo,$text)=(@_);
     317    my ($mimetype,$mimeinfo,$text,$outhandle)=(@_);
    297318
    298319    # Check for multiparts - $mimeinfo will be a boundary
    299320    if ($mimetype =~ /multipart/) {
    300321    $boundary="";
    301     if ($mimeinfo =~ /boundary="?([^\s]+?)"?\s*$/im) {
     322    if ($mimeinfo =~ /boundary=\"?([^"]+)\"?\s*$/im) {
    302323        $boundary=$1;
    303324    }
     
    305326    # message ends with "--$boundary--"
    306327    # RFC says boundary is <70 chars, [A-Za-z'()+_,-./:=?], so escape any
    307     # that perl might want to interpolate.
     328    # that perl might want to interpolate. Also allows spaces...
    308329
    309330    $boundary=~s/\\/\\\\/g;
     
    313334    shift @message_parts;
    314335    my $last=pop @message_parts;
     336    # if our boundaries are a bit dodgy and we only found 1 part...
     337    if (!defined($last)) {$last="";}
    315338    # make sure it is only -- and whitespace
    316339    if ($last !~ /^\-\-\s*$/ms) {
     
    358381        foreach my $message_part (@message_parts) {
    359382        my $part_header=$message_part;
    360         $part_header=~s/\r?\n\r?\n(.*)$//sg;
    361         my $part_body=$1;
     383        my $part_body;
     384        if ($part_header=~s/\r?\n\r?\n(.*)$//sg) {
     385            $part_body=$1;
     386        } else {
     387            # no header... use defaults
     388            $part_body=$message_part;
     389            $part_header="Content-type: text/plain; charset=us-ascii";
     390        }
    362391        $part_header =~ s/\r?\n[\t\ ]+/ /gs; #unfold
    363392        my $part_content_type="";
     
    367396            $part_content_type="message/rfc822";
    368397        }
    369         if ($part_header =~ m@^content\-type:\s*([\w+/\-]+)\s*\;?\s*([^\s]+)@mi) {
     398        if ($part_header =~ m@^content\-type:\s*([\w+/\-]+)\s*\;?\s*([^\s]*)@mi) {
    370399            $part_content_type=$1; $part_content_type =~ tr/A-Z/a-z/;
    371400            $part_content_info=$2;
     
    423452                text_from_mime_message($message_content_type,
    424453                               $message_content_info,
    425                                $message_part_body);
     454                               $message_part_body,
     455                               $outhandle);
    426456            } else {
    427457                $message_part_body=text_from_part($part_body);
     
    443473            $tmptext=text_from_mime_message($part_content_type,
    444474                            $part_content_info,
    445                             $part_body);
     475                            $part_body,
     476                            $outhandle);
    446477            $text.=$tmptext;
    447478        } elsif ($text eq "") {
Note: See TracChangeset for help on using the changeset viewer.