Changeset 6062
- Timestamp:
- 2003-12-01T14:56:55+13:00 (20 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/EMAILPlug.pm
r5924 r6062 80 80 } 81 81 82 use strict; 83 no strict "refs"; # so we can use a variable as a filehandle for print $out 82 84 # sub print_usage { 83 85 # print STDERR "\n usage: plugin EMAILPlug [options]\n\n"; … … 199 201 200 202 # Get a default encoding for the header - RFC says should be ascii... 201 my $default_head ing_encoding="iso_8859_1";203 my $default_header_encoding="iso_8859_1"; 202 204 203 205 # We don't know what character set is the user's default... … … 268 270 $self->convert2unicode($default_header_encoding, 269 271 \$original_value); 270 $value= original_value;272 $value=$original_value; 271 273 } 272 274 } # end of if =?...?= 273 275 274 276 # In the absense of other charset information, assume the 275 # header is the default (usually "iso_8859_1") and convert itto unicode.277 # header is the default (usually "iso_8859_1") and convert to unicode. 276 278 else { 277 279 $self->convert2unicode($default_header_encoding, \$value); … … 279 281 280 282 # Store the metadata 283 $value =~ s@_@\\_@g; # protect against GS macro language 281 284 $raw{$name} = $value; 282 285 } 283 286 284 287 # Extract the name and e-mail address from the From metadata 285 $frommeta = $raw{"From"};288 my $frommeta = $raw{"From"}; 286 289 my $fromnamemeta; 287 290 my $fromaddrmeta; … … 302 305 # minor attempt to prevent spam-bots from harvesting addresses... 303 306 $fromaddrmeta=~s/@/@/; 307 304 308 $doc_obj->add_utf8_metadata ($cursection, "FromAddr", $fromaddrmeta); 305 309 … … 382 386 $transfer_encoding=$1; 383 387 } 388 384 389 if ($mimetype eq "text/html") { 385 390 $$textref= $self->text_from_part("$Headers\n$$textref"); 386 391 } elsif ($mimetype ne "text/plain") { 387 392 $self->{'doc_obj'} = $doc_obj; # in case we need to associate files... 388 $$textref= $self->text_from_mime_message($mimetype,$mimeinfo,$$textref, 389 $outhandle); 390 } elsif ($transfer_encoding =~ /quoted\-printable/) { 391 $$textref=qp_decode($$textref); 393 $$textref=$self->text_from_mime_message($mimetype,$mimeinfo,$$textref); 394 } else { # mimetype eq text/plain 395 $$textref = &text_into_html($$textref); 396 $$textref =~ s@_@\\_@g; # protect against GS macro language 397 398 if ($transfer_encoding =~ /quoted\-printable/) { 399 $$textref=qp_decode($$textref); 400 } elsif ($transfer_encoding =~ /base64/) { 401 $$textref=base64_decode($$textref); 402 } 392 403 $self->convert2unicode($charset, $textref); 393 } elsif ($transfer_encoding =~ /base64/) { 394 $$textref=base64_decode($$textref); 395 $self->convert2unicode($charset, $textref); 396 } else { 397 $self->convert2unicode($charset, $textref); 398 } 404 } 405 399 406 400 407 … … 407 414 $Headers =~ s/\[/[/g; $Headers =~ s/\]/]/g; 408 415 $self->convert2unicode($charset, \$Headers); 416 417 $Headers =~ s@_@\\_@g; # protect against GS macro language 418 409 419 $doc_obj->add_utf8_metadata ($cursection, "Headers", $Headers); 410 420 … … 420 430 421 431 # Add text to document object 422 if ($mimetype eq "text/plain") {423 $$textref = &text_into_html($$textref);424 }425 432 $$textref = "No message" unless ($$textref =~ /\w/); 433 426 434 $doc_obj->add_utf8_text($cursection, $$textref); 427 435 … … 460 468 # assume URI doesn't finish with a '.' 461 469 $text =~ s@((http|ftp|https)://[\w\-]+(\.[\w\-]+)*/?((&|\.)?[\w\?\=\-_/~]+)*(\#[\w\.\-_]*)?)@<a href=\"$1\">$1<\/a>@g; 462 463 $text =~ s@_@\\_@g; # protect against greenstone macros...464 470 465 471 … … 481 487 sub text_from_mime_message { 482 488 my $self = shift(@_); 483 my ($mimetype,$mimeinfo,$text ,$outhandle)=(@_);484 489 my ($mimetype,$mimeinfo,$text)=(@_); 490 my $outhandle=$self->{'outhandle'}; 485 491 # Check for multiparts - $mimeinfo will be a boundary 486 492 if ($mimetype =~ /multipart/) { 487 $boundary="";493 my $boundary=""; 488 494 if ($mimeinfo =~ m@boundary=(\"[^\"]+\"|[^\s]+)\s*$@im) { 489 495 $boundary=$1; … … 557 563 pop @message_parts; 558 564 } 565 my $is_first_part=1; 559 566 foreach my $message_part (@message_parts) { 560 my $part_header=$message_part; 561 my $part_body; 562 if ($message_part=~ /^\s*\n/) { 563 # no header... use defaults 564 $part_body=$message_part; 565 $part_header="Content-type: text/plain; charset=us-ascii"; 566 } elsif ($part_header=~s/\r?\n\r?\n(.*)$//s) { 567 $part_body=$1; 568 } else { 569 # something's gone wrong... 570 $part_header=""; 571 $part_body=$message_part; 572 } 573 574 $part_header =~ s/\r?\n[\t\ ]+/ /gs; #unfold 575 my $part_content_type=""; 576 my $part_content_info=""; 567 if ($is_first_part && $text ne "") {$is_first_part=0;} 568 577 569 if ($mimetype eq "multipart/digest") { 578 # default type - RTFRFC!! 579 $part_content_type="message/rfc822"; 580 } 581 if ($part_header =~ m@^content\-type:\s*([\w\.\-/]+)\s*(\;.*)?$@mi) { 582 $part_content_type=$1; $part_content_type =~ tr/A-Z/a-z/; 583 $part_content_info=$2; 584 if (!defined($part_content_info)) { 585 $part_content_info=""; 586 } else { 587 $part_content_info =~ s/^\;\s*//; 588 $part_content_info =~ s/\s*$//; 570 # default type - RTFRFC!! Set if not already set 571 $message_part =~ m@^(.*)\n\r?\n@s; 572 my $part_header=$1; 573 if ($part_header !~ m@^content-type@mi) { 574 $message_part="Content-type: message/rfc822\n" 575 . $message_part; # prepend default type 589 576 } 590 577 } 591 my $filename=""; 592 if ($part_header =~ m@name=\"?([\w\.\-\\/]+)\"?@mis) { 593 $filename=$1; 594 } 595 596 # disposition - either inline or attachment. 597 # NOT CURRENTLY USED - we display all text types instead... 598 # $part_header =~ /^content\-disposition:\s*([\w+])/mis; 599 600 # add <<attachment>> to each part except the first... 601 if ($text ne "") { 602 $text.="\n<p><hr><strong><<attachment>></>"; 603 # add part info header 604 $text.="<br>Type: $part_content_type<br>\n"; 605 if ($filename ne "") { 606 $text.="Filename: $filename\n"; 607 } 608 $text.="</strong></p>\n"; 609 } 610 if ($part_content_type =~ m@text/@) 611 { 612 my $part_text= $self->text_from_part($message_part); 613 if ($part_content_type !~ m@text/(ht|x)ml@) { 614 $part_text = text_into_html($part_text); 615 } 616 if ($part_text eq "") { 617 $part_text = '<<empty message>>'; 618 } 619 $text .= $part_text; 620 } elsif ($part_content_type =~ m@message/rfc822@) { 621 # This is a forwarded message 622 my $message_part_headers=$part_body; 623 $message_part_headers=~s/\r?\n\r?\n(.*)$//s; 624 my $message_part_body=$1; 625 $message_part_headers =~ s/\r?\n[\t\ ]+/ /gs; #unfold 626 627 my $rfc822_formatted_body=""; # put result in here 628 if ($message_part_headers =~ 629 /^content\-type:\s*([\w\.\-\/]+)\s*(\;.*)?$/ims) 630 { 631 # The message header uses MIME flags 632 my $message_content_type=$1; 633 my $message_content_info=$2; 634 if (!defined($message_content_info)) { 635 $message_content_info=""; 636 } else { 637 $message_content_info =~ s/^\;\s*//; 638 $message_content_info =~ s/\s*$//; 639 } 640 $message_content_type =~ tr/A-Z/a-z/; 641 if ($message_content_type =~ /multipart/) { 642 $rfc822_formatted_body= 643 $self->text_from_mime_message($message_content_type, 644 $message_content_info, 645 $message_part_body, 646 $outhandle); 647 } else { 648 $message_part_body= $self->text_from_part($part_body); 649 $rfc822_formatted_body=text_into_html($message_part_body); 650 } 651 } else { 652 # message doesn't use MIME flags 653 $rfc822_formatted_body=text_into_html($message_part_body); 654 } 655 # Add the returned text to the output 656 # don't put all the headers... 657 # $message_part_headers =~ s/^(X\-.*|received|message\-id|return\-path):.*\n//img; 658 my $brief_headers=get_brief_headers($message_part_headers); 659 $text.=text_into_html($brief_headers); 660 $text.="<p>\n"; 661 $text.=$rfc822_formatted_body; 662 $text.="</p>\n"; 663 # end of message/rfc822 664 } elsif ($part_content_type =~ /multipart/) { 665 # recurse again 666 667 $tmptext= $self->text_from_mime_message($part_content_type, 668 $part_content_info, 669 $part_body, 670 $outhandle); 671 $text.=$tmptext; 672 } else { 673 # this part isn't text/* or another message... 674 if ($text eq "") { 675 # this is the first part of a multipart, or only part! 676 $text="\n<p><hr><strong><<attachment>></>"; 677 # add part info header 678 $text.="<br>Type: $part_content_type<br>\n"; 679 $text.="Filename: $filename</strong></p>\n"; 680 } 681 682 # save attachment by default 683 if (!$self->{'ignore_attachments'} 684 && $filename ne "") { # this part has a file... 685 my $encoding="8bit"; 686 if ($part_header =~ 687 /^content-transfer-encoding:\s*(\w+)/mi ) { 688 $encoding=$1; $encoding =~ tr/A-Z/a-z/; 689 } 690 my $tmpdir=$ENV{'GSDLHOME'} . "/tmp"; 691 my $save_filename="$filename"; 692 693 # make sure we don't clobber files with same name; 694 # need to keep state between .mbx files 695 my $assoc_files=$self->{'assoc_filenames'}; 696 if ($assoc_files{$filename}) { # it's been set... 697 $assoc_files{$filename}++; 698 $filename =~ m/(.+)\.(\w+)$/; 699 my ($filestem, $ext)=($1,$2); 700 $save_filename="${filestem}_" 701 . $assoc_files{$filename} . ".$ext"; 702 } else { # first file with this name 703 $assoc_files{$filename}=1; 704 } 705 open (SAVE, ">$tmpdir/$save_filename") || 706 warn "EMAILPlug: Can't save attachment as $tmpdir/$save_filename: $!"; 707 $part_text = $message_part; 708 $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header 709 if ($encoding eq "base64") { 710 print SAVE base64_decode($part_text); 711 } elsif ($encoding eq "quoted-printable") { 712 print SAVE qp_decode($part_text); 713 } else { # 7bit, 8bit, binary, etc... 714 print SAVE $part_text; 715 } 716 close SAVE; 717 my $doc_obj=$self->{'doc_obj'}; 718 $doc_obj->associate_file("$tmpdir/$save_filename", 719 "$save_filename", 720 $part_content_type # mimetype 721 ); 722 # clean up tmp area... 723 # Can't do this as it hasn't been copied/linked yet!!! 724 # &util::rm("$tmpdir/$save_filename"); 725 print $outhandle "EMAILPlug: saving attachment \"$filename\"\n"; # 726 727 # be nice if "download" was a translatable macro :( 728 $text .="<a href=\"_httpdocimg_/$save_filename\">download</a>"; 729 } # end of save attachment 730 } # end of !text/message part 578 579 $text .= $self->process_multipart_part($message_part, 580 $is_first_part); 731 581 } # foreach message part. 732 582 } else { … … 745 595 $mimetype =~ tr/[A-Z]/[a-z]/; 746 596 $mimeinfo=$2; 747 if ($mimeinfo =~ /charset=\"([^\"]+)\"/) {748 $charset = $1;749 }597 #if ($mimeinfo =~ /charset=\"([^\"]+)\"/) { 598 # $charset = $1; 599 #} 750 600 my $msg_text; 751 601 if ($mimetype =~ m@multipart/@) { 752 602 $msg_text = text_from_mime_message($self, $mimetype, $mimeinfo, 753 $text , $outhandle);754 } else {$msg_text= $text;}603 $text); 604 } else {$msg_text=text_from_part($text);} 755 605 756 606 my $brief_header=text_into_html(get_brief_headers($msg_header)); … … 765 615 766 616 return $text; 617 } 618 619 620 621 622 sub process_multipart_part { 623 my $self = shift; 624 my $message_part = shift; 625 my $is_first_part = shift; 626 627 my $return_text=""; 628 my $part_header=$message_part; 629 my $part_body; 630 if ($message_part=~ /^\s*\n/) { 631 # no header... use defaults 632 $part_body=$message_part; 633 $part_header="Content-type: text/plain; charset=us-ascii"; 634 } elsif ($part_header=~s/\r?\n\r?\n(.*)$//s) { 635 $part_body=$1; 636 } else { 637 # something's gone wrong... 638 $part_header=""; 639 $part_body=$message_part; 640 } 641 642 $part_header =~ s/\r?\n[\t\ ]+/ /gs; #unfold 643 my $part_content_type=""; 644 my $part_content_info=""; 645 646 if ($part_header =~ m@^content\-type:\s*([\w\.\-/]+)\s*(\;.*)?$@mi) { 647 $part_content_type=$1; $part_content_type =~ tr/A-Z/a-z/; 648 $part_content_info=$2; 649 if (!defined($part_content_info)) { 650 $part_content_info=""; 651 } else { 652 $part_content_info =~ s/^\;\s*//; 653 $part_content_info =~ s/\s*$//; 654 } 655 } 656 my $filename=""; 657 if ($part_header =~ m@name=\"?([^\"]+)\"?@mis) { 658 $filename=$1; 659 } 660 661 # disposition - either inline or attachment. 662 # NOT CURRENTLY USED - we display all text types instead... 663 # $part_header =~ /^content\-disposition:\s*([\w+])/mis; 664 665 # add <<attachment>> to each part except the first... 666 if (!$is_first_part) { 667 $return_text.="\n<p><hr><strong><<attachment>>"; 668 # add part info header 669 my $header_text="<br>Type: $part_content_type<br>\n"; 670 if ($filename ne "") { 671 $header_text.="Filename: $filename\n"; 672 } 673 $header_text =~ s@_@\\_@g; 674 $return_text.=$header_text . "</strong></p>\n<p>\n"; 675 } 676 677 if ($part_content_type =~ m@text/@) 678 { 679 my $part_text= $self->text_from_part($message_part); 680 if ($part_content_type !~ m@text/(ht|x)ml@) { 681 $part_text = text_into_html($part_text); 682 } 683 if ($part_text eq "") { 684 $part_text = ' '; 685 } 686 $return_text .= $part_text; 687 } elsif ($part_content_type =~ m@message/rfc822@) { 688 # This is a forwarded message 689 my $message_part_headers=$part_body; 690 $message_part_headers=~s/\r?\n\r?\n(.*)$//s; 691 my $message_part_body=$1; 692 $message_part_headers =~ s/\r?\n[\t\ ]+/ /gs; #unfold 693 694 my $rfc822_formatted_body=""; # put result in here 695 if ($message_part_headers =~ 696 /^content\-type:\s*([\w\.\-\/]+)\s*(\;.*)?$/ims) 697 { 698 # The message header uses MIME flags 699 my $message_content_type=$1; 700 my $message_content_info=$2; 701 if (!defined($message_content_info)) { 702 $message_content_info=""; 703 } else { 704 $message_content_info =~ s/^\;\s*//; 705 $message_content_info =~ s/\s*$//; 706 } 707 $message_content_type =~ tr/A-Z/a-z/; 708 if ($message_content_type =~ /multipart/) { 709 $rfc822_formatted_body= 710 $self->text_from_mime_message($message_content_type, 711 $message_content_info, 712 $message_part_body); 713 } else { 714 $message_part_body= $self->text_from_part($part_body); 715 $rfc822_formatted_body=text_into_html($message_part_body); 716 } 717 } else { 718 # message doesn't use MIME flags 719 $rfc822_formatted_body=text_into_html($message_part_body); 720 $rfc822_formatted_body =~ s@_@\\_@g; 721 } 722 # Add the returned text to the output 723 # don't put all the headers... 724 # $message_part_headers =~ s/^(X\-.*|received|message\-id|return\-path):.*\n//img; 725 my $brief_headers=get_brief_headers($message_part_headers); 726 $return_text.=text_into_html($brief_headers); 727 $return_text.="</p><p>\n"; 728 $return_text.=$rfc822_formatted_body; 729 $return_text.="</p>\n"; 730 # end of message/rfc822 731 } elsif ($part_content_type =~ /multipart/) { 732 # recurse again 733 734 my $tmptext= $self->text_from_mime_message($part_content_type, 735 $part_content_info, 736 $part_body); 737 $return_text.=$tmptext; 738 } else { 739 # this part isn't text/* or another message... 740 if ($is_first_part) { 741 # this is the first part of a multipart, or only part! 742 $return_text="\n<p><hr><strong><<attachment>>"; 743 # add part info header 744 my $header_text="<br>Type: $part_content_type<br>\n"; 745 $header_text.="Filename: $filename</strong></p>\n<p>\n"; 746 $header_text =~ s@_@\\_@g; 747 $return_text.=$header_text; 748 } 749 750 # save attachment by default 751 if (!$self->{'ignore_attachments'} 752 && $filename ne "") { # this part has a file... 753 my $encoding="8bit"; 754 if ($part_header =~ 755 /^content-transfer-encoding:\s*(\w+)/mi ) { 756 $encoding=$1; $encoding =~ tr/A-Z/a-z/; 757 } 758 my $tmpdir=$ENV{'GSDLHOME'} . "/tmp"; 759 my $save_filename=$filename; 760 761 # make sure we don't clobber files with same name; 762 # need to keep state between .mbx files 763 my $assoc_files=$self->{'assoc_filenames'}; 764 if ($assoc_files->{$filename}) { # it's been set... 765 $assoc_files->{$filename}++; 766 $filename =~ m/(.+)\.(\w+)$/; 767 my ($filestem, $ext)=($1,$2); 768 $save_filename="${filestem}_" 769 . $assoc_files->{$filename} . ".$ext"; 770 } else { # first file with this name 771 $assoc_files->{$filename}=1; 772 } 773 open (SAVE, ">$tmpdir/$save_filename") || 774 warn "EMAILPlug: Can't save attachment as $tmpdir/$save_filename: $!"; 775 my $part_text = $message_part; 776 $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header 777 if ($encoding eq "base64") { 778 print SAVE base64_decode($part_text); 779 } elsif ($encoding eq "quoted-printable") { 780 print SAVE qp_decode($part_text); 781 } else { # 7bit, 8bit, binary, etc... 782 print SAVE $part_text; 783 } 784 close SAVE; 785 my $doc_obj=$self->{'doc_obj'}; 786 $doc_obj->associate_file("$tmpdir/$save_filename", 787 "$save_filename", 788 $part_content_type # mimetype 789 ); 790 # clean up tmp area... 791 # Can't do this as it hasn't been copied/linked yet!!! 792 # &util::rm("$tmpdir/$save_filename"); 793 my $outhandle=$self->{'outhandle'}; 794 print $outhandle "EMAILPlug: saving attachment \"$filename\"\n"; # 795 796 # be nice if "download" was a translatable macro :( 797 $return_text .="<a href=\"_httpdocimg_/$save_filename\">download</a>"; 798 } # end of save attachment 799 } # end of !text/message part 800 801 802 return $return_text; 767 803 } 768 804 … … 785 821 786 822 # Process a MIME part. Return "" if we can't decode it. 823 # should only be called for parts with type "text/*" ? 787 824 sub text_from_part { 788 825 my $self = shift; … … 833 870 # convert to unicode 834 871 $self->convert2unicode($charset, \$text); 872 873 $text =~ s@_@\\_@g; # protect against GS macro language 835 874 return $text; 836 875 } 876 877 837 878 838 879
Note:
See TracChangeset
for help on using the changeset viewer.