Changeset 10834 for trunk/gsdl/perllib/plugins/EMAILPlug.pm
- Timestamp:
- 2005-11-02T17:33:24+13:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/EMAILPlug.pm
r10827 r10834 547 547 { 548 548 # Use the HTML version 549 $part_text = $self->text_from_part($message_part);549 $part_text = $self->text_from_part($message_part); 550 550 $mimetype="text/html"; 551 551 last; … … 646 646 $id =~ s![<>\[\]]!!g; # remove [ ] < and > 647 647 $id =~ s![_&]!-!g; # replace symbols that might cause problems 648 $id =~ s!\.!-!g; # . means section to greenstone doc ids! 648 649 $id =~ s!@!-!g; # replace @ symbol, to avoid spambots 649 650 return $id; … … 864 865 my $part_header = shift; 865 866 867 866 868 my $type="text/plain"; # default, overridden from part header 867 869 my $charset=undef; # convert2unicode() will guess if necessary … … 901 903 } 902 904 } 905 903 906 if ($type eq "text/html") { 904 907 # only get stuff between <body> tags, or <html> tags. … … 914 917 # convert to unicode 915 918 $self->convert2unicode($charset, \$text); 916 917 919 $text =~ s@_@\\_@g; # protect against GS macro language 918 920 return $text; … … 974 976 } 975 977 978 # returns 0 if valid utf-8, 1 if invalid 979 sub is_utf8 { 980 my $self = shift; 981 my $textref = shift; 982 983 $$textref =~ m/^/g; # to set \G 984 my $badbytesfound=0; 985 while ($$textref =~ m!\G.*?([\x80-\xff]+)!sg) { 986 my $highbytes=$1; 987 my $highbyteslength=length($highbytes); 988 # replace any non utf8 complaint bytes 989 $highbytes =~ /^/g; # set pos() 990 while ($highbytes =~ 991 m!\G (?: [\xc0-\xdf][\x80-\xbf] | # 2 byte utf-8 992 [\xe0-\xef][\x80-\xbf]{2} | # 3 byte 993 [\xf0-\xf7][\x80-\xbf]{3} | # 4 byte 994 [\xf8-\xfb][\x80-\xbf]{4} | # 5 byte 995 [\xfc-\xfd][\x80-\xbf]{5} # 6 byte 996 )*([\x80-\xff])? !xg 997 ) { 998 my $badbyte=$1; 999 if (!defined $badbyte) {next} # hit end of string 1000 return 1; 1001 } 1002 } 1003 return 0; 1004 } 1005 1006 1007 976 1008 sub convert2unicode { 977 1009 my $self = shift(@_); … … 985 1017 if (! defined $charset) { 986 1018 # check if we have valid utf-8 987 if ($$textref =~ /^(?: [\0-\x7f] | # ascii 988 [\xc0-\xdf][\x80-\xbf] | # 2 byte utf-8 989 [\xe0-\xef][\x80-\xbf]{2} | # 3 byte 990 [\xf0-\xf7][\x80-\xbf]{3} | # 4 byte 991 [\xf8-\xfb][\x80-\xbf]{4} | # 5 byte 992 [\xfc-\xfd][\x80-\xbf]{5} | # 6 byte 993 )+ /x) { 994 $charset = "utf8"; 995 } 996 1019 if ($self->is_utf8($textref)) { $charset = "utf8" } 997 1020 998 1021 # default to latin
Note:
See TracChangeset
for help on using the changeset viewer.