Changeset 3073
- Timestamp:
- 2002-04-03T15:44:42+12:00 (21 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/EMAILPlug.pm
r2918 r3073 279 279 my $mimeinfo=""; 280 280 my $charset = "iso_8859_1"; 281 # Do MIME and encoding stuff 282 if ($Headers =~ /^content\-type:\s*([\w\/\-]+)\s*\;?\s*(.+?)\s*$/mi) 281 # Do MIME and encoding stuff. Allow \s in mimeinfo in case there is 282 # more than one parameter given to Content-type. 283 # eg: Content-type: text/plain; charset="us-ascii"; format="flowed" 284 if ($Headers =~ /^content\-type:\s*([\w\/\-]+)\s*(\;\s*.*)\s*$/mi) 283 285 { 284 286 $mimetype=$1; 285 287 $mimetype =~ tr/[A-Z]/[a-z]/; 288 289 if ($mimetype eq "text") { # for pre-RFC2045 messages (c. 1996) 290 $mimetype = "text/plain"; 291 } 292 286 293 $mimeinfo=$2; 287 if ($mimeinfo =~ /charset=\"([^\"]+)\"/) { 294 if (!defined $mimeinfo) { 295 $mimeinfo=""; 296 } else { # strip leading and trailing stuff 297 $mimeinfo =~ s/^\;\s*//; 298 $mimeinfo =~ s/\s*$//; 299 } 300 if ($mimeinfo =~ /charset=\"([^\"]+)\"/i) { 288 301 $charset = $1; 289 302 } … … 325 338 $Title .= "<br>From: " . text_into_html($raw{'From'}); 326 339 $Title .= "<br>Date: " . text_into_html($raw{'DateText'}); 340 $Title =~ s/\[/[/g; $Title =~ s/\]/]/g; 327 341 328 342 $doc_obj->add_utf8_metadata ($cursection, "Title", $Title); … … 760 774 $charset = "iso_8859_1" if ($charset eq "us_ascii" || $charset eq "ascii"); 761 775 776 if ($charset eq "iso_8859_1") { 777 # test if the mailer lied, and it has win1252 chars in it... 778 # 1252 has characters between 0x80 and 0x9f, 8859-1 doesn't 779 if ($$textref =~ m/[\x80-\x9f]/) { 780 my $outhandle = $self->{'outhandle'}; 781 print $outhandle "EMAILPlug: Headers claim ISO charset but MS "; 782 print $outhandle "codepage 1252 detected.\n"; 783 $charset = "windows_1252"; 784 } 785 } 762 786 $$textref=&unicode::unicode2utf8(&unicode::convert2unicode($charset,$textref)); 763 787 }
Note:
See TracChangeset
for help on using the changeset viewer.