Changeset 2847
- Timestamp:
- 2001-11-23T16:14:39+13:00 (22 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/EMAILPlug.pm
r2781 r2847 180 180 while ($encoded =~ s/(.*?)=\?([^\?]*)\?([bq])\?([^\?]+)\?=\s*//i) { 181 181 my ($charset, $encoding, $data)=($2,$3,$4); 182 my $decoded_data;182 my ($decoded_data); 183 183 $value.="$1"; # any leading chars 184 184 $data=~s/^\s*//; $data=~s/\s*$//; # strip whitespace from ends … … 191 191 $decoded_data=base64_decode($data); 192 192 } 193 if (defined($charset)) { 194 $charset=~tr/A-Z/a-z/; 195 $charset=~s/\-/_/g; 196 $charset=~s/gb2312/gb/; 197 # assumes EUC-KR, not ISO-2022 !? 198 $charset=~s/ks_c_5601_1987/korean/; 199 } else {$charset="ascii";} 200 if ($charset eq "ascii" || $charset eq "us_ascii") { 201 # technically possible to have this explicitly... 202 $value.=$decoded_data; 203 } else { 204 my $utf8_text=&unicode::unicode2utf8 205 ( 206 &unicode::convert2unicode($charset,\$decoded_data) 207 ); 208 $value.=$utf8_text; 209 } 210 } # end of while loop 211 $value.=$encoded; # get any trailing characters 193 $self->convert2unicode($charset, \$decoded_data); 194 $value .= $decoded_data; 195 } # end of while loop 196 197 # get any trailing characters 198 $self->convert2unicode("iso_8859_1", \$encoded); 199 $value.=$encoded; 200 212 201 if ($value =~ /^\s*$/) { # we couldn't extract anything... 213 $value=original_value; 202 $self->convert2unicode("iso_8859_1", \$original_value); 203 $value=original_value; 214 204 } 215 205 } # end of if =?...?= … … 289 279 my $mimetype="text/plain"; 290 280 my $mimeinfo=""; 281 my $charset = "iso_8859_1"; 291 282 # Do MIME and encoding stuff 292 283 if ($Headers =~ /^content\-type:\s*([\w\/\-]+)\s*\;?\s*(.+?)\s*$/mi) … … 295 286 $mimetype =~ tr/[A-Z]/[a-z]/; 296 287 $mimeinfo=$2; 288 if ($mimeinfo =~ /charset=\"([^\"]+)\"/) { 289 $charset = $1; 290 } 297 291 } 298 292 … … 302 296 } 303 297 if ($mimetype ne "text/plain") { 304 $$textref= text_from_mime_message($mimetype,$mimeinfo,$$textref,305 $outhandle);298 $$textref= $self->text_from_mime_message($mimetype,$mimeinfo,$$textref, 299 $outhandle); 306 300 } elsif ($transfer_encoding =~ /quoted\-printable/) { 307 301 $$textref=qp_decode($$textref); 308 302 } elsif ($transfer_encoding =~ /base64/) { 309 303 $$textref=base64_decode($$textref); 304 } else { 305 $self->convert2unicode($charset, $textref); 310 306 } 311 307 … … 379 375 # the textref we are given DOES NOT include the header. 380 376 sub text_from_mime_message { 377 my $self = shift(@_); 381 378 my ($mimetype,$mimeinfo,$text,$outhandle)=(@_); 382 379 … … 419 416 { 420 417 # Use the HTML version 421 $part_text= text_from_part($message_part);418 $part_text= $self->text_from_part($message_part); 422 419 $mimetype="text/html"; 423 420 last; … … 429 426 { 430 427 # Use the plain version 431 $part_text= text_from_part($message_part);428 $part_text= $self->text_from_part($message_part); 432 429 if ($part_text =~/[^\s]/) { 433 430 $part_text="<pre>".$part_text."</pre>"; … … 439 436 } 440 437 if ($part_text eq "") { # use first part 441 $part_text= text_from_part(shift @message_parts);438 $part_text= $self->text_from_part(shift @message_parts); 442 439 } 443 440 if ($part_text eq "") { # we couldn't get anything!!! … … 498 495 if ($part_content_type =~ m@text/@) 499 496 { 500 my $part_text= text_from_part($message_part);497 my $part_text= $self->text_from_part($message_part); 501 498 if ($part_content_type !~ m@text/(ht|x)ml@) { 502 499 $part_text=text_into_html($part_text); … … 526 523 if ($message_content_type =~ /multipart/) { 527 524 $rfc822_formatted_body= 528 529 530 531 525 $self->text_from_mime_message($message_content_type, 526 $message_content_info, 527 $message_part_body, 528 $outhandle); 532 529 } else { 533 $message_part_body= text_from_part($part_body);530 $message_part_body= $self->text_from_part($part_body); 534 531 $rfc822_formatted_body=text_into_html($message_part_body); 535 532 } … … 548 545 # recurse again 549 546 550 $tmptext= text_from_mime_message($part_content_type,551 $part_content_info,552 $part_body,553 $outhandle);547 $tmptext= $self->text_from_mime_message($part_content_type, 548 $part_content_info, 549 $part_body, 550 $outhandle); 554 551 $text.=$tmptext; 555 552 } elsif ($text eq "") { … … 581 578 # Process a MIME part. Return "" if we can't decode it. 582 579 sub text_from_part { 580 my $self = shift(@_); 583 581 my $text=shift; 584 582 my $part_header=$text; … … 625 623 } 626 624 # convert to unicode 627 # first get our character encoding name in the right form. 628 $charset=~tr/A-Z/a-z/; 629 $charset=~s/\-/_/g; 630 if ($charset ne "us_ascii" && $charset ne "ascii") { 631 $charset=~s/gb2312/gb/; 632 # assumes EUC-KR, not ISO-2022 !? 633 $charset=~s/ks_c_5601_1987/korean/; 634 my @unicode_array=&unicode::convert2unicode($charset,\$text); 635 my $utf8_text=&unicode::unicode2utf8(@unicode_array); 636 $text=$utf8_text; 637 } 625 $self->convert2unicode($charset, \$text); 638 626 return $text; 639 627 } … … 708 696 } 709 697 698 sub convert2unicode { 699 my $self = shift(@_); 700 my ($charset, $textref) = @_; 701 702 # first get our character encoding name in the right form. 703 $charset = "iso_8859_1" unless defined $charset; 704 $charset=~tr/A-Z/a-z/; 705 $charset=~s/\-/_/g; 706 $charset=~s/gb2312/gb/; 707 # assumes EUC-KR, not ISO-2022 !? 708 $charset=~s/ks_c_5601_1987/korean/; 709 710 # It appears that we can't always trust ascii text so we'll treat it 711 # as iso-8859-1 (letting characters above 0x80 through without 712 # converting them to utf-8 will result in invalid XML documents 713 # which can't be parsed at build time). 714 $charset = "iso_8859_1" if ($charset eq "us_ascii" || $charset eq "ascii"); 715 716 $$textref=&unicode::unicode2utf8(&unicode::convert2unicode($charset,$textref)); 717 } 718 710 719 711 720 # Perl packages have to return true if they are run.
Note:
See TracChangeset
for help on using the changeset viewer.