Changeset 3132 for trunk/gsdl/perllib/plugins/EMAILPlug.pm
- Timestamp:
- 2002-05-22T17:27:41+12:00 (22 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/EMAILPlug.pm
r3111 r3132 140 140 $$textref = $1; 141 141 142 # See if headers include non-ascii - RFC says whole header should be ascii.143 # not yet implemented, as we don't know what character set is the144 # user's default... We can do textcat to guess, or we can just choose145 # one of the charset fields later in the document (if there are any...)146 # if ($Headers =~ /([[:^ascii:]])/) {147 # }148 149 142 # Unfold headers - see rfc822 150 143 $Headers =~ s/\r?\n[\t\ ]+/ /gs; … … 155 148 $raw{$name} = "No $name value"; 156 149 } 150 151 # Get a default encoding for the header - RFC says should be ascii... 152 my $default_heading_encoding="iso_8859_1"; 153 154 # We don't know what character set is the user's default... 155 # We could use textcat to guess... for now we'll look at mime content-type 156 # if ($Headers =~ /([[:^ascii:]])/) { 157 # } 158 if ($Headers =~ /^Content\-type:.*charset=\"?([a-z0-9\-_]+)/mi) { 159 $default_header_encoding=$1; 160 $default_header_encoding =~ s@\-@_@g; 161 $default_header_encoding =~ tr/A-Z/a-z/; 162 } 163 157 164 158 165 # Examine each line of the headers … … 179 186 180 187 # decode headers if stored using =?<charset>?[BQ]?<data>?= (rfc2047) 181 if ($value =~ /=\? /) {188 if ($value =~ /=\?.*\?[BbQq]\?.*\?=/) { 182 189 my $original_value=$value; 183 190 my $encoded=$value; … … 201 208 $value .= $decoded_data; 202 209 } # end of while loop 203 210 204 211 # get any trailing characters 205 $self->convert2unicode( "iso_8859_1", \$encoded);212 $self->convert2unicode($default_header_encoding, \$encoded); 206 213 $value.=$encoded; 207 214 208 215 if ($value =~ /^\s*$/) { # we couldn't extract anything... 209 $self->convert2unicode("iso_8859_1", \$original_value); 216 $self->convert2unicode($default_header_encoding, 217 \$original_value); 210 218 $value=original_value; 211 219 } 212 } # end of if =?...?= 220 } else { # end of if =?...?= 221 $self->convert2unicode($default_header_encoding, \$value); 222 } 223 224 213 225 214 226 # Store the metadata … … 283 295 my $mimetype="text/plain"; 284 296 my $mimeinfo=""; 285 my $charset = "iso_8859_1";297 my $charset = $default_header_encoding; 286 298 # Do MIME and encoding stuff. Allow \s in mimeinfo in case there is 287 299 # more than one parameter given to Content-type. … … 375 387 376 388 # Convert problem characters into HTML symbols 377 $text =~ s/&/&/g o;378 $text =~ s/</</g o;379 $text =~ s/>/>/g o;380 $text =~ s/\"/"/g o;389 $text =~ s/&/&/g; 390 $text =~ s/</</g; 391 $text =~ s/>/>/g; 392 $text =~ s/\"/"/g; 381 393 382 394 # convert email addresses and URIs into links … … 392 404 393 405 # Clean up whitespace and convert \n charaters to <BR> or <P> 394 $text =~ s/ +/ /g o;395 $text =~ s/\s*$// o;396 $text =~ s/^\s*// o;397 $text =~ s/\n/\n< BR>/go;398 $text =~ s/< BR>\s*<BR>/<P>/go;406 $text =~ s/ +/ /g; 407 $text =~ s/\s*$//g; 408 $text =~ s/^\s*//g; 409 $text =~ s/\n/\n<br>/g; 410 $text =~ s/<br>\s*<br>/<p>/gi; 399 411 400 412 return $text; … … 644 656 # Process a MIME part. Return "" if we can't decode it. 645 657 sub text_from_part { 646 my $self = shift (@_);647 my $text=shift ;658 my $self = shift; 659 my $text=shift || ''; 648 660 my $part_header=$text; 649 661 # check for empty part header (leading blank line)
Note:
See TracChangeset
for help on using the changeset viewer.