Changeset 6062


Ignore:
Timestamp:
2003-12-01T14:56:55+13:00 (20 years ago)
Author:
jrm21
Message:

"use strict" and picked up quite a few typos.

escape _ into \_ otherwise any greenstone macros in the email get expanded.

moved some stuff into a separate function to ease clarity

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/EMAILPlug.pm

    r5924 r6062  
    8080}
    8181
     82use strict;
     83no strict "refs"; # so we can use a variable as a filehandle for print $out
    8284#  sub print_usage {
    8385#      print STDERR "\n  usage: plugin EMAILPlug [options]\n\n";
     
    199201
    200202    # Get a default encoding for the header - RFC says should be ascii...
    201     my $default_heading_encoding="iso_8859_1";
     203    my $default_header_encoding="iso_8859_1";
    202204
    203205    # We don't know what character set is the user's default...
     
    268270          $self->convert2unicode($default_header_encoding,
    269271                     \$original_value);
    270           $value=original_value;
     272          $value=$original_value;
    271273        }
    272274        } # end of if =?...?=
    273275
    274276        # In the absense of other charset information, assume the
    275         # header is the default (usually "iso_8859_1") and convert it to unicode.
     277        # header is the default (usually "iso_8859_1") and convert to unicode.
    276278        else {
    277279        $self->convert2unicode($default_header_encoding, \$value);
     
    279281   
    280282    # Store the metadata
     283    $value =~ s@_@\\_@g; # protect against GS macro language
    281284    $raw{$name} = $value;
    282285    }
    283286
    284287    # Extract the name and e-mail address from the From metadata
    285     $frommeta = $raw{"From"};
     288    my $frommeta = $raw{"From"};
    286289    my $fromnamemeta;
    287290    my $fromaddrmeta;
     
    302305    # minor attempt to prevent spam-bots from harvesting addresses...
    303306    $fromaddrmeta=~s/@/@/;
     307
    304308    $doc_obj->add_utf8_metadata ($cursection, "FromAddr", $fromaddrmeta);
    305309
     
    382386    $transfer_encoding=$1;
    383387    }
     388
    384389    if ($mimetype eq "text/html") {
    385390    $$textref= $self->text_from_part("$Headers\n$$textref");
    386391    } elsif ($mimetype ne "text/plain") {
    387392    $self->{'doc_obj'} = $doc_obj; # in case we need to associate files...
    388     $$textref= $self->text_from_mime_message($mimetype,$mimeinfo,$$textref,
    389                          $outhandle);
    390     } elsif ($transfer_encoding =~ /quoted\-printable/) {
    391     $$textref=qp_decode($$textref);
     393    $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$$textref);
     394    } else { # mimetype eq text/plain
     395    $$textref = &text_into_html($$textref);
     396    $$textref =~ s@_@\\_@g; # protect against GS macro language
     397
     398    if ($transfer_encoding =~ /quoted\-printable/) {
     399        $$textref=qp_decode($$textref);
     400    } elsif ($transfer_encoding =~ /base64/) {
     401        $$textref=base64_decode($$textref);
     402    }
    392403    $self->convert2unicode($charset, $textref);
    393     } elsif ($transfer_encoding =~ /base64/) {
    394     $$textref=base64_decode($$textref);
    395     $self->convert2unicode($charset, $textref);
    396     } else {
    397     $self->convert2unicode($charset, $textref);
    398     }
     404    }
     405
    399406   
    400407
     
    407414    $Headers =~ s/\[/[/g; $Headers =~ s/\]/]/g;
    408415    $self->convert2unicode($charset, \$Headers);
     416
     417    $Headers =~ s@_@\\_@g; # protect against GS macro language
     418
    409419    $doc_obj->add_utf8_metadata ($cursection, "Headers", $Headers);
    410420
     
    420430
    421431    # Add text to document object
    422     if ($mimetype eq "text/plain") {
    423     $$textref = &text_into_html($$textref);
    424     }
    425432    $$textref = "No message" unless ($$textref =~ /\w/);
     433
    426434    $doc_obj->add_utf8_text($cursection, $$textref);
    427435
     
    460468    # assume URI doesn't finish with a '.'
    461469    $text =~ s@((http|ftp|https)://[\w\-]+(\.[\w\-]+)*/?((&amp;|\.)?[\w\?\=\-_/~]+)*(\#[\w\.\-_]*)?)@<a href=\"$1\">$1<\/a>@g;
    462 
    463     $text =~ s@_@\\_@g; # protect against greenstone macros...
    464470
    465471
     
    481487sub text_from_mime_message {
    482488    my $self = shift(@_);
    483     my ($mimetype,$mimeinfo,$text,$outhandle)=(@_);
    484 
     489    my ($mimetype,$mimeinfo,$text)=(@_);
     490    my $outhandle=$self->{'outhandle'};
    485491    # Check for multiparts - $mimeinfo will be a boundary
    486492    if ($mimetype =~ /multipart/) {
    487     $boundary="";
     493    my $boundary="";
    488494    if ($mimeinfo =~ m@boundary=(\"[^\"]+\"|[^\s]+)\s*$@im) {
    489495        $boundary=$1;
     
    557563        pop @message_parts;
    558564        }
     565        my $is_first_part=1;
    559566        foreach my $message_part (@message_parts) {
    560         my $part_header=$message_part;
    561         my $part_body;
    562         if ($message_part=~ /^\s*\n/) {
    563             # no header... use defaults
    564             $part_body=$message_part;
    565             $part_header="Content-type: text/plain; charset=us-ascii";
    566         } elsif ($part_header=~s/\r?\n\r?\n(.*)$//s) {
    567             $part_body=$1;
    568         } else {
    569             # something's gone wrong...
    570             $part_header="";
    571             $part_body=$message_part;
    572         }
    573        
    574         $part_header =~ s/\r?\n[\t\ ]+/ /gs; #unfold
    575         my $part_content_type="";
    576         my $part_content_info="";
     567        if ($is_first_part && $text ne "") {$is_first_part=0;}
     568
    577569        if ($mimetype eq "multipart/digest") {
    578             # default type - RTFRFC!!
    579             $part_content_type="message/rfc822";
    580         }
    581         if ($part_header =~ m@^content\-type:\s*([\w\.\-/]+)\s*(\;.*)?$@mi) {
    582             $part_content_type=$1; $part_content_type =~ tr/A-Z/a-z/;
    583             $part_content_info=$2;
    584             if (!defined($part_content_info)) {
    585             $part_content_info="";
    586             } else {
    587             $part_content_info =~ s/^\;\s*//;
    588             $part_content_info =~ s/\s*$//;
     570            # default type - RTFRFC!! Set if not already set
     571            $message_part =~ m@^(.*)\n\r?\n@s;
     572            my $part_header=$1;
     573            if ($part_header !~ m@^content-type@mi) {
     574            $message_part="Content-type: message/rfc822\n"
     575                . $message_part; # prepend default type
    589576            }
    590577        }
    591         my $filename="";
    592         if ($part_header =~ m@name=\"?([\w\.\-\\/]+)\"?@mis) {
    593             $filename=$1;
    594         }
    595 
    596         # disposition - either inline or attachment.
    597         # NOT CURRENTLY USED - we display all text types instead...
    598         # $part_header =~ /^content\-disposition:\s*([\w+])/mis;
    599 
    600         # add <<attachment>> to each part except the first...
    601         if ($text ne "") {
    602             $text.="\n<p><hr><strong>&lt;&lt;attachment&gt;&gt;</>";
    603             # add part info header
    604             $text.="<br>Type: $part_content_type<br>\n";
    605             if ($filename ne "") {
    606             $text.="Filename: $filename\n";
    607             }
    608             $text.="</strong></p>\n";
    609         }
    610         if ($part_content_type =~ m@text/@)
    611         {
    612             my $part_text= $self->text_from_part($message_part);
    613             if ($part_content_type !~ m@text/(ht|x)ml@) {
    614             $part_text = text_into_html($part_text);
    615             }
    616             if ($part_text eq "") {
    617             $part_text = '&lt;&lt;empty message&gt;&gt;';
    618             }
    619             $text .= $part_text;
    620         } elsif ($part_content_type =~ m@message/rfc822@) {
    621             # This is a forwarded message
    622             my $message_part_headers=$part_body;
    623             $message_part_headers=~s/\r?\n\r?\n(.*)$//s;
    624             my $message_part_body=$1;
    625             $message_part_headers =~ s/\r?\n[\t\ ]+/ /gs; #unfold
    626 
    627             my $rfc822_formatted_body=""; # put result in here
    628             if ($message_part_headers =~
    629             /^content\-type:\s*([\w\.\-\/]+)\s*(\;.*)?$/ims)
    630             {
    631             # The message header uses MIME flags
    632             my $message_content_type=$1;
    633             my $message_content_info=$2;
    634             if (!defined($message_content_info)) {
    635                 $message_content_info="";
    636             } else {
    637                 $message_content_info =~ s/^\;\s*//;
    638                 $message_content_info =~ s/\s*$//;
    639             }
    640             $message_content_type =~ tr/A-Z/a-z/;
    641             if ($message_content_type =~ /multipart/) {
    642                 $rfc822_formatted_body=
    643                   $self->text_from_mime_message($message_content_type,
    644                                 $message_content_info,
    645                                 $message_part_body,
    646                                 $outhandle);
    647             } else {
    648                 $message_part_body= $self->text_from_part($part_body);
    649                 $rfc822_formatted_body=text_into_html($message_part_body);
    650             }
    651             } else {
    652             # message doesn't use MIME flags
    653             $rfc822_formatted_body=text_into_html($message_part_body);
    654             }
    655             # Add the returned text to the output
    656             # don't put all the headers...
    657 #           $message_part_headers =~ s/^(X\-.*|received|message\-id|return\-path):.*\n//img;
    658             my $brief_headers=get_brief_headers($message_part_headers);
    659             $text.=text_into_html($brief_headers);
    660             $text.="<p>\n";
    661             $text.=$rfc822_formatted_body;
    662             $text.="</p>\n";
    663             # end of message/rfc822
    664         } elsif ($part_content_type =~ /multipart/) {
    665             # recurse again
    666 
    667             $tmptext= $self->text_from_mime_message($part_content_type,
    668                                 $part_content_info,
    669                                 $part_body,
    670                                 $outhandle);
    671             $text.=$tmptext;
    672         } else {
    673             # this part isn't text/* or another message...
    674             if ($text eq "") {
    675             # this is the first part of a multipart, or only part!
    676             $text="\n<p><hr><strong>&lt;&lt;attachment&gt;&gt;</>";
    677             # add part info header
    678             $text.="<br>Type: $part_content_type<br>\n";
    679             $text.="Filename: $filename</strong></p>\n";
    680             }
    681 
    682             # save attachment by default
    683             if (!$self->{'ignore_attachments'}
    684             && $filename ne "") { # this part has a file...
    685             my $encoding="8bit";
    686             if ($part_header =~
    687                 /^content-transfer-encoding:\s*(\w+)/mi ) {
    688                 $encoding=$1; $encoding =~ tr/A-Z/a-z/;
    689             }
    690             my $tmpdir=$ENV{'GSDLHOME'} . "/tmp";
    691             my $save_filename="$filename";
    692 
    693             # make sure we don't clobber files with same name;
    694             # need to keep state between .mbx files
    695             my $assoc_files=$self->{'assoc_filenames'};
    696             if ($assoc_files{$filename}) { # it's been set...
    697                 $assoc_files{$filename}++;
    698                 $filename =~ m/(.+)\.(\w+)$/;
    699                 my ($filestem, $ext)=($1,$2);
    700                 $save_filename="${filestem}_"
    701                 . $assoc_files{$filename} . ".$ext";
    702             } else { # first file with this name
    703                 $assoc_files{$filename}=1;
    704             }
    705             open (SAVE, ">$tmpdir/$save_filename") ||
    706                 warn "EMAILPlug: Can't save attachment as $tmpdir/$save_filename: $!";
    707             $part_text = $message_part;
    708             $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header
    709             if ($encoding eq "base64") {
    710                 print SAVE base64_decode($part_text);
    711             } elsif ($encoding eq "quoted-printable") {
    712                 print SAVE qp_decode($part_text);
    713             } else { # 7bit, 8bit, binary, etc...
    714                 print SAVE $part_text;
    715             }
    716             close SAVE;
    717             my $doc_obj=$self->{'doc_obj'};
    718             $doc_obj->associate_file("$tmpdir/$save_filename",
    719                          "$save_filename",
    720                          $part_content_type # mimetype
    721                          );
    722             # clean up tmp area...
    723             # Can't do this as it hasn't been copied/linked yet!!!
    724 #           &util::rm("$tmpdir/$save_filename");
    725             print $outhandle "EMAILPlug: saving attachment \"$filename\"\n"; #
    726            
    727             # be nice if "download" was a translatable macro :(
    728             $text .="<a href=\"_httpdocimg_/$save_filename\">download</a>";
    729             } # end of save attachment
    730         } # end of !text/message part
     578
     579        $text .= $self->process_multipart_part($message_part,
     580                               $is_first_part);
    731581        } # foreach message part.
    732582    } else {
     
    745595        $mimetype =~ tr/[A-Z]/[a-z]/;
    746596        $mimeinfo=$2;
    747         if ($mimeinfo =~ /charset=\"([^\"]+)\"/) {
    748         $charset = $1;
    749         }
     597        #if ($mimeinfo =~ /charset=\"([^\"]+)\"/) {
     598        #   $charset = $1;
     599        #}
    750600        my $msg_text;
    751601        if ($mimetype =~ m@multipart/@) {
    752602        $msg_text = text_from_mime_message($self, $mimetype, $mimeinfo,
    753                            $text, $outhandle);
    754         } else {$msg_text=$text;}
     603                           $text);
     604        } else {$msg_text=text_from_part($text);}
    755605
    756606        my $brief_header=text_into_html(get_brief_headers($msg_header));
     
    765615
    766616    return $text;
     617}
     618
     619
     620
     621
     622sub process_multipart_part {
     623    my $self = shift;
     624    my $message_part = shift;
     625    my $is_first_part = shift;
     626
     627    my $return_text="";
     628    my $part_header=$message_part;
     629    my $part_body;
     630    if ($message_part=~ /^\s*\n/) {
     631    # no header... use defaults
     632    $part_body=$message_part;
     633    $part_header="Content-type: text/plain; charset=us-ascii";
     634    } elsif ($part_header=~s/\r?\n\r?\n(.*)$//s) {
     635    $part_body=$1;
     636    } else {
     637    # something's gone wrong...
     638    $part_header="";
     639    $part_body=$message_part;
     640    }
     641   
     642    $part_header =~ s/\r?\n[\t\ ]+/ /gs; #unfold
     643    my $part_content_type="";
     644    my $part_content_info="";
     645
     646    if ($part_header =~ m@^content\-type:\s*([\w\.\-/]+)\s*(\;.*)?$@mi) {
     647    $part_content_type=$1; $part_content_type =~ tr/A-Z/a-z/;
     648    $part_content_info=$2;
     649    if (!defined($part_content_info)) {
     650        $part_content_info="";
     651    } else {
     652        $part_content_info =~ s/^\;\s*//;
     653        $part_content_info =~ s/\s*$//;
     654    }
     655    }
     656    my $filename="";
     657    if ($part_header =~ m@name=\"?([^\"]+)\"?@mis) {
     658    $filename=$1;
     659    }
     660   
     661    # disposition - either inline or attachment.
     662    # NOT CURRENTLY USED - we display all text types instead...
     663    # $part_header =~ /^content\-disposition:\s*([\w+])/mis;
     664   
     665    # add <<attachment>> to each part except the first...
     666    if (!$is_first_part) {
     667    $return_text.="\n<p><hr><strong>&lt;&lt;attachment&gt;&gt;";
     668    # add part info header
     669    my $header_text="<br>Type: $part_content_type<br>\n";
     670    if ($filename ne "") {
     671        $header_text.="Filename: $filename\n";
     672    }
     673    $header_text =~ s@_@\\_@g;
     674    $return_text.=$header_text . "</strong></p>\n<p>\n";
     675    }
     676
     677    if ($part_content_type =~ m@text/@)
     678    {
     679    my $part_text= $self->text_from_part($message_part);
     680    if ($part_content_type !~ m@text/(ht|x)ml@) {
     681        $part_text = text_into_html($part_text);
     682    }
     683    if ($part_text eq "") {
     684        $part_text = ' ';
     685    }
     686    $return_text .= $part_text;
     687    } elsif ($part_content_type =~ m@message/rfc822@) {
     688    # This is a forwarded message
     689    my $message_part_headers=$part_body;
     690    $message_part_headers=~s/\r?\n\r?\n(.*)$//s;
     691    my $message_part_body=$1;
     692    $message_part_headers =~ s/\r?\n[\t\ ]+/ /gs; #unfold
     693   
     694    my $rfc822_formatted_body=""; # put result in here
     695    if ($message_part_headers =~
     696        /^content\-type:\s*([\w\.\-\/]+)\s*(\;.*)?$/ims)
     697    {
     698        # The message header uses MIME flags
     699        my $message_content_type=$1;
     700        my $message_content_info=$2;
     701        if (!defined($message_content_info)) {
     702        $message_content_info="";
     703        } else {
     704        $message_content_info =~ s/^\;\s*//;
     705        $message_content_info =~ s/\s*$//;
     706        }
     707        $message_content_type =~ tr/A-Z/a-z/;
     708        if ($message_content_type =~ /multipart/) {
     709        $rfc822_formatted_body=
     710            $self->text_from_mime_message($message_content_type,
     711                          $message_content_info,
     712                          $message_part_body);
     713        } else {
     714        $message_part_body= $self->text_from_part($part_body);
     715        $rfc822_formatted_body=text_into_html($message_part_body);
     716        }
     717    } else {
     718        # message doesn't use MIME flags
     719        $rfc822_formatted_body=text_into_html($message_part_body);
     720        $rfc822_formatted_body =~ s@_@\\_@g;
     721    }
     722    # Add the returned text to the output
     723    # don't put all the headers...
     724#   $message_part_headers =~ s/^(X\-.*|received|message\-id|return\-path):.*\n//img;
     725    my $brief_headers=get_brief_headers($message_part_headers);
     726    $return_text.=text_into_html($brief_headers);
     727    $return_text.="</p><p>\n";
     728    $return_text.=$rfc822_formatted_body;
     729    $return_text.="</p>\n";
     730    # end of message/rfc822
     731    } elsif ($part_content_type =~ /multipart/) {
     732    # recurse again
     733   
     734    my $tmptext= $self->text_from_mime_message($part_content_type,
     735                           $part_content_info,
     736                           $part_body);
     737    $return_text.=$tmptext;
     738    } else {
     739    # this part isn't text/* or another message...
     740    if ($is_first_part) {
     741        # this is the first part of a multipart, or only part!
     742        $return_text="\n<p><hr><strong>&lt;&lt;attachment&gt;&gt;";
     743        # add part info header
     744        my $header_text="<br>Type: $part_content_type<br>\n";
     745        $header_text.="Filename: $filename</strong></p>\n<p>\n";
     746        $header_text =~ s@_@\\_@g;
     747        $return_text.=$header_text;
     748    }
     749   
     750    # save attachment by default
     751    if (!$self->{'ignore_attachments'}
     752        && $filename ne "") { # this part has a file...
     753        my $encoding="8bit";
     754        if ($part_header =~
     755        /^content-transfer-encoding:\s*(\w+)/mi ) {
     756        $encoding=$1; $encoding =~ tr/A-Z/a-z/;
     757        }
     758        my $tmpdir=$ENV{'GSDLHOME'} . "/tmp";
     759        my $save_filename=$filename;
     760       
     761        # make sure we don't clobber files with same name;
     762        # need to keep state between .mbx files
     763        my $assoc_files=$self->{'assoc_filenames'};
     764        if ($assoc_files->{$filename}) { # it's been set...
     765        $assoc_files->{$filename}++;
     766        $filename =~ m/(.+)\.(\w+)$/;
     767        my ($filestem, $ext)=($1,$2);
     768        $save_filename="${filestem}_"
     769            . $assoc_files->{$filename} . ".$ext";
     770        } else { # first file with this name
     771        $assoc_files->{$filename}=1;
     772        }
     773        open (SAVE, ">$tmpdir/$save_filename") ||
     774        warn "EMAILPlug: Can't save attachment as $tmpdir/$save_filename: $!";
     775        my $part_text = $message_part;
     776        $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header
     777        if ($encoding eq "base64") {
     778        print SAVE base64_decode($part_text);
     779        } elsif ($encoding eq "quoted-printable") {
     780        print SAVE qp_decode($part_text);
     781        } else { # 7bit, 8bit, binary, etc...
     782        print SAVE $part_text;
     783        }
     784        close SAVE;
     785        my $doc_obj=$self->{'doc_obj'};
     786        $doc_obj->associate_file("$tmpdir/$save_filename",
     787                     "$save_filename",
     788                     $part_content_type # mimetype
     789                     );
     790        # clean up tmp area...
     791        # Can't do this as it hasn't been copied/linked yet!!!
     792#           &util::rm("$tmpdir/$save_filename");
     793        my $outhandle=$self->{'outhandle'};
     794        print $outhandle "EMAILPlug: saving attachment \"$filename\"\n"; #
     795       
     796        # be nice if "download" was a translatable macro :(
     797        $return_text .="<a href=\"_httpdocimg_/$save_filename\">download</a>";
     798    } # end of save attachment
     799    } # end of !text/message part
     800
     801
     802    return $return_text;
    767803}
    768804
     
    785821
    786822# Process a MIME part. Return "" if we can't decode it.
     823# should only be called for parts with type "text/*" ?
    787824sub text_from_part {
    788825    my $self = shift;
     
    833870    # convert to unicode
    834871    $self->convert2unicode($charset, \$text);
     872   
     873    $text =~ s@_@\\_@g; # protect against GS macro language
    835874    return $text;
    836875}
     876
     877
    837878
    838879
Note: See TracChangeset for help on using the changeset viewer.