Changeset 9971


Ignore:
Timestamp:
2005-05-26T13:38:19+12:00 (19 years ago)
Author:
jrm21
Message:

text_from_part now takes an optional parameter with the part's headers.
tidied in a few places.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/EMAILPlug.pm

    r9823 r9971  
    148148# This plugin splits the mbox mail files at lines starting with From<sp>
    149149# It is supposed to be "\n\nFrom ", but this isn't always used.
     150# add \d{4} so that the line ends in a year (in case the text has an
     151# unescaped "From " at the start of a line).
    150152sub get_default_split_exp {
    151     return q^\nFrom .*\n^;
     153    return q^\nFrom .*\d{4}\n^;
    152154   
    153155}
     
    410412
    411413    if ($mimetype eq "text/html") {
    412     $$textref= $self->text_from_part("$Headers\n$$textref");
     414    $$textref= $self->text_from_part($$textref, $Headers);
    413415    } elsif ($mimetype ne "text/plain") {
    414416    $self->{'doc_obj'} = $doc_obj; # in case we need to associate files...
     
    619621    {
    620622        $mimetype=$1;
     623        $mimeinfo=$2;
    621624        $mimetype =~ tr/[A-Z]/[a-z]/;
    622         $mimeinfo=$2;
    623         #if ($mimeinfo =~ /charset=\"([^\"]+)\"/) {
    624         #   $charset = $1;
    625         #}
     625
    626626        my $msg_text;
    627627        if ($mimetype =~ m@multipart/@) {
     
    629629                           $text);
    630630        } else {
    631         $msg_text=$self->text_from_part($text);
     631        $msg_text=$self->text_from_part($text,$msg_header);
    632632        }
    633633
     
    753753                          $message_part_body);
    754754        } else {
    755         $message_part_body= $self->text_from_part($part_body);
     755        $message_part_body=$self->text_from_part($part_body,
     756                            $message_part_headers);
    756757        $rfc822_formatted_body=text_into_html($message_part_body);
    757758        }
     
    863864# Process a MIME part. Return "" if we can't decode it.
    864865# should only be called for parts with type "text/*" ?
     866# Either pass the entire mime part (including the part's header),
     867# or pass the mime part's text and optionally the part's header.
    865868sub text_from_part {
    866869    my $self = shift;
    867870    my $text = shift || '';
    868     my $part_header = $text;
    869 
    870     # check for empty part header (leading blank line)
    871     if ($text =~ /^\s*\r?\n/) {
    872     $part_header="Content-type: text/plain; charset=us-ascii";
    873     } else {
    874     $part_header =~ s/\r?\n\r?\n(.*)$//s;
    875     $text=$1; if (!defined($text)) {$text="";}
    876     }
    877     $part_header =~ s/\r?\n[\t ]+/ /gs; #unfold
    878     $part_header =~ /content\-type:\s*([\w\.\-\/]+).*?charset=\"?([^\;\"\s]+)\"?/is;
    879     my $type=$1;
    880     my $charset=$2;
    881     if (!defined($type)) {$type="";}
    882     if (!defined($charset)) {$charset="ascii";}
     871    my $part_header = shift;
     872
     873    my $type="text/plain"; # default, overridden from part header
     874    my $charset="ascii"; # default, overridden from part header
     875
     876    if (! $part_header) { # no header argument was given. check the body
     877    $part_header = $text;
     878    # check for empty part header (leading blank line)
     879    if ($text =~ /^\s*\r?\n/) {
     880        $part_header="Content-type: text/plain; charset=us-ascii";
     881    } else {
     882        $part_header =~ s/\r?\n\r?\n(.*)$//s;
     883        $text=$1; if (!defined($text)) {$text="";}
     884    }
     885    $part_header =~ s/\r?\n[\t ]+/ /gs; #unfold
     886    }
     887
     888    if ($part_header =~
     889    /content\-type:\s*([\w\.\-\/]+).*?charset=\"?([^\;\"\s]+)\"?/is) {
     890    $type=$1;
     891    $charset=$2;
     892    }
    883893    my $encoding="";
    884894    if ($part_header =~ /^content\-transfer\-encoding:\s*([^\s]+)/mis) {
Note: See TracChangeset for help on using the changeset viewer.