Changeset 3630 for trunk/gsdl/perllib
- Timestamp:
- 2002-12-10T17:58:13+13:00 (22 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/EMAILPlug.pm
r3627 r3630 7 7 # University of Waikato, New Zealand. 8 8 # 9 # Copyright (C) 1999-200 1New Zealand Digital Library Project9 # Copyright (C) 1999-2002 New Zealand Digital Library Project 10 10 # 11 11 # This program is free software; you can redistribute it and/or modify … … 83 83 print STDERR "\n usage: plugin EMAILPlug [options]\n\n"; 84 84 print STDERR " options:\n"; 85 print STDERR " - process_expA perl regular expression used to split files\n";85 print STDERR " -split_exp A perl regular expression used to split files\n"; 86 86 print STDERR " containing many messages into individual documents.\n\n"; 87 print STDERR " -no_attachments Do not save message attachments.\n\n"; 87 88 88 89 } … … 93 94 'type' => "string", 94 95 'reqd' => "no", 95 'deft' => q^(?i)\.hb$^} , 96 { 'name' => "block_exp", 96 'deft' => q@([\\/]\d+|\.(mbx|email|eml))$@ 97 }, 98 { 'name' => "no_attachments", 99 'desc' => "Do not save message attachments.", 100 'type' => "flag", 101 'reqd' => "no" 102 }, 103 { 'name' => "block_exp", 97 104 'desc' => "Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.", 98 105 'type' => "string", 99 106 'reqd' => "no", 100 'deft' => q^ \.jpg$^}107 'deft' => q^^} 101 108 ]; 102 109 … … 121 128 if (!parsargv::parse(\@_, 122 129 q^split_exp/.*/^, \$self->{'split_exp'}, 130 q^no_attachments^, \$self->{'ignore_attachments'}, 123 131 "allow_extra_options")) { 124 132 print STDERR "\nIncorrect options passed to $class."; … … 126 134 die "\n"; 127 135 } 136 $self->{'assoc_filenames'} = {}; # to save attach names so we don't clobber 137 128 138 # this might not actually be true at read-time, but after processing 129 139 # it should all be utf8. … … 351 361 # more than one parameter given to Content-type. 352 362 # eg: Content-type: text/plain; charset="us-ascii"; format="flowed" 353 if ($Headers =~ /^content\-type:\s*([\w\/\-]+)\s*(\;\s*.*)\s*$/mi)363 if ($Headers =~ m@^content\-type:\s*([\w\.\-/]+)\s*(\;\s*.+)?\s*$@mi) 354 364 { 355 365 $mimetype=$1; … … 379 389 $$textref= $self->text_from_part("$Headers\n$$textref"); 380 390 } elsif ($mimetype ne "text/plain") { 391 $self->{'doc_obj'} = $doc_obj; # in case we need to associate files... 381 392 $$textref= $self->text_from_mime_message($mimetype,$mimeinfo,$$textref, 382 393 $outhandle); … … 556 567 $part_body=$message_part; 557 568 $part_header="Content-type: text/plain; charset=us-ascii"; 558 } elsif ($part_header=~s/\r?\n\r?\n(.*)$//s g) {569 } elsif ($part_header=~s/\r?\n\r?\n(.*)$//s) { 559 570 $part_body=$1; 560 571 } else { … … 571 582 $part_content_type="message/rfc822"; 572 583 } 573 if ($part_header =~ m@^content\-type:\s*([\w +/\-]+)\s*\;?\s*(.*?)\s*$@mi) {584 if ($part_header =~ m@^content\-type:\s*([\w\.\-/]+)\s*\;?\s*(.*?)\s*$@mi) { 574 585 $part_content_type=$1; $part_content_type =~ tr/A-Z/a-z/; 575 586 $part_content_info=$2; … … 594 605 $text.="</strong></p>\n"; 595 606 } 596 597 607 if ($part_content_type =~ m@text/@) 598 608 { … … 608 618 # This is a forwarded message 609 619 my $message_part_headers=$part_body; 610 $message_part_headers =~ s/\r?\n[\t\ ]+/ /gs; #unfold611 620 $message_part_headers=~s/\r?\n\r?\n(.*)$//s; 612 621 my $message_part_body=$1; 622 $message_part_headers =~ s/\r?\n[\t\ ]+/ /gs; #unfold 613 623 614 624 my $rfc822_formatted_body=""; # put result in here 615 625 if ($message_part_headers =~ 616 /^content\-type:\s*([\w\ /\-]+)\s*\;?\s*(.*?)\s*$/ims)626 /^content\-type:\s*([\w\.\-\/]+)\s*\;?\s*(.*?)\s*$/ims) 617 627 { 618 628 # The message header uses MIME flags … … 639 649 # Add the returned text to the output 640 650 # don't put all the headers... 641 $message_part_headers =~ s/^(X\-.*|received|message\-id|return\-path):.*\n//img; 642 $text.=text_into_html($message_part_headers); 651 # $message_part_headers =~ s/^(X\-.*|received|message\-id|return\-path):.*\n//img; 652 my $brief_headers=get_brief_headers($message_part_headers); 653 $text.=text_into_html($brief_headers); 643 654 $text.="<p>\n"; 644 655 $text.=$rfc822_formatted_body; 656 $text.="</p>\n"; 645 657 # end of message/rfc822 646 658 } elsif ($part_content_type =~ /multipart/) { … … 652 664 $outhandle); 653 665 $text.=$tmptext; 654 } elsif ($text eq "") { 655 # we can't do anything with this part, but if it's the first 656 # part then make sure it is mentioned.. 666 } else { 667 # this part isn't text/* or another message... 668 if ($text eq "") { 669 # this is the first part of a multipart, or only part! 670 $text="\n<p><hr><strong><<attachment>></>"; 671 # add part info header 672 $text.="<br>Type: $part_content_type<br>\n"; 673 $text.="Filename: $filename</strong></p>\n"; 674 } 675 676 # save attachment by default 677 if (!$self->{'ignore_attachments'} 678 && $filename ne "") { # this part has a file... 679 my $encoding="8bit"; 680 if ($part_header =~ 681 /^content-transfer-encoding:\s*(\w+)/mi ) { 682 $encoding=$1; $encoding =~ tr/A-Z/a-z/; 683 } 684 my $tmpdir=$ENV{'GSDLHOME'} . "/tmp"; 685 my $save_filename="$filename"; 686 687 # make sure we don't clobber files with same name; 688 # need to keep state between .mbx files 689 my $assoc_files=$self->{'assoc_filenames'}; 690 if ($assoc_files{$filename}) { # it's been set... 691 $assoc_files{$filename}++; 692 $filename =~ m/(.+)\.(\w+)$/; 693 my ($filestem, $ext)=($1,$2); 694 $save_filename="${filestem}_" 695 . $assoc_files{$filename} . ".$ext"; 696 } else { # first file with this name 697 $assoc_files{$filename}=1; 698 } 699 open (SAVE, ">$tmpdir/$save_filename") || 700 warn "EMAILPlug: Can't save attachment as $tmpdir/$save_filename: $!"; 701 $part_text=$message_part; 702 $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header 703 if ($encoding eq "base64") { 704 print SAVE base64_decode($part_text); 705 } elsif ($encoding eq "quoted-printable") { 706 print SAVE qp_decode($part_text); 707 } else { # 7bit, 8bit, binary, etc... 708 print SAVE $part_text; 709 } 710 close SAVE; 711 my $doc_obj=$self->{'doc_obj'}; 712 $doc_obj->associate_file("$tmpdir/$save_filename", 713 "$save_filename", 714 $part_content_type # mimetype 715 ); 716 # clean up tmp area... 717 # Can't do this as it hasn't been copied/linked yet!!! 718 # &util::rm("$tmpdir/$save_filename"); 719 print $outhandle "EMAILPlug: saving attachment \"$filename\"\n"; # 657 720 658 $text.="\n<p><hr><strong><<attachment>></>"; 659 # add part info header 660 $text.="<br>Type: $part_content_type<br>\n"; 661 if ($filename ne "") { 662 $text.="Filename: $filename\n"; 663 } 664 $text.="</strong></p>\n"; 665 } 721 # be nice if "download" was a translatable macro :( 722 $text .="<a href=\"_httpdocimg_/$save_filename\">download</a>"; 723 } # end of save attachment 724 } # end of !text/message part 666 725 } # foreach message part. 667 726 } else { … … 675 734 $text = $1; 676 735 677 if ($msg_header =~ /^content\-type:\s*([\w\ /\-]+)\s*\;?\s*(.+?)\s*$/mi)736 if ($msg_header =~ /^content\-type:\s*([\w\.\-\/]+)\s*\;?\s*(.+?)\s*$/mi) 678 737 { 679 738 $mimetype=$1; … … 689 748 } else {$msg_text=$text;} 690 749 691 my $brief_header=""; 692 if ($msg_header =~ /^(From:.*)$/im) {$brief_header.="$1<br>";} 693 if ($msg_header =~ /^(To:.*)$/im) {$brief_header.="$1<br>";} 694 if ($msg_header =~ /^(Cc:.*)$/im) {$brief_header.="$1<br>";} 695 if ($msg_header =~ /^(Subject:.*)$/im) {$brief_header.="$1<br>";} 696 if ($msg_header =~ /^(Date:.*)$/im) {$brief_header.="$1<br>";} 750 my $brief_header=text_into_html(get_brief_headers($msg_header)); 697 751 $text= "\n<b><<attached message>></b><br>"; 698 752 $text.= "<table><tr><td width=\"5%\"> </td>\n"; … … 708 762 709 763 710 711 764 # Return only the "important" headers from a set of message headers 765 sub get_brief_headers { 766 my $msg_header = shift; 767 my $brief_header = ""; 768 769 # Order matters! 770 if ($msg_header =~ /^(From:.*)$/im) {$brief_header.="$1\n";} 771 if ($msg_header =~ /^(To:.*)$/im) {$brief_header.="$1\n";} 772 if ($msg_header =~ /^(Cc:.*)$/im) {$brief_header.="$1\n";} 773 if ($msg_header =~ /^(Subject:.*)$/im) {$brief_header.="$1\n";} 774 if ($msg_header =~ /^(Date:.*)$/im) {$brief_header.="$1\n";} 775 776 return $brief_header; 777 } 712 778 713 779 … … 726 792 } 727 793 $part_header =~ s/\r?\n[\t ]+/ /gs; #unfold 728 $part_header =~ /content\-type:\s*([\w\ /]+).*?charset=\"?([^\;\"\s]+)\"?/is;794 $part_header =~ /content\-type:\s*([\w\.\-\/]+).*?charset=\"?([^\;\"\s]+)\"?/is; 729 795 my $type=$1; 730 796 my $charset=$2; … … 743 809 } elsif ($encoding !~ /[78]bit/) { # leave 7/8 bit as is. 744 810 # rfc2045 also allows binary, which we ignore (for now). 745 # maybe this shouldn't go to stderr, but anyway...746 print STDERR "EMAILPlug: unknownencoding: $encoding\n";811 my $outhandle=$self->{'outhandle'}; 812 print $outhandle "EMAILPlug: unknown transfer encoding: $encoding\n"; 747 813 return ""; 748 814 }
Note:
See TracChangeset
for help on using the changeset viewer.