Changeset 10827


Ignore:
Timestamp:
2005-11-02T15:50:56+13:00 (18 years ago)
Author:
jrm21
Message:

1) include %xx bits when making hrefs out of urls
2) test if text is valid utf-8 first instead of just defaulting to

iso-8859-1 latin charset

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/EMAILPlug.pm

    r10254 r10827  
    487487    # assume hostnames are \.\w\- only, then might have a trailing '/.*'
    488488    # assume URI doesn't finish with a '.'
    489     $text =~ s@((http|ftp|https)://[\w\-]+(\.[\w\-]+)*/?((&amp;|\.)?[\w\?\=\-_/~]+)*(\#[\w\.\-_]*)?)@<a href=\"$1\">$1<\/a>@g;
     489    $text =~ s@((http|ftp|https)://[\w\-]+(\.[\w\-]+)*/?((&amp;|\.|\%[a-f0-9]{2})?[\w\?\=\-_/~]+)*(\#[\w\.\-_]*)?)@<a href=\"$1\">$1<\/a>@gi;
    490490
    491491
     
    700700    $return_text.="\n<p><hr><strong>&lt;&lt;attachment&gt;&gt;";
    701701    # add part info header
    702     my $header_text="<br>Type: $part_content_type<br>\n";
     702    my $header_text = "<br>Type: $part_content_type<br>\n";
    703703    if ($filename ne "") {
    704         $header_text.="Filename: $filename\n";
     704        $header_text .= "Filename: $filename\n";
    705705    }
    706706    $header_text =~ s@_@\\_@g;
    707     $return_text.=$header_text . "</strong></p>\n<p>\n";
     707    $return_text .= $header_text . "</strong></p>\n<p>\n";
    708708    }
    709709
    710710    if ($part_content_type =~ m@text/@)
    711711    {
    712     my $part_text= $self->text_from_part($message_part);
     712    # $message_part includes the mime part headers
     713    my $part_text = $self->text_from_part($message_part);
    713714    if ($part_content_type !~ m@text/(ht|x)ml@) {
    714715        $part_text = text_into_html($part_text);
     
    864865
    865866    my $type="text/plain"; # default, overridden from part header
    866     my $charset="ascii"; # default, overridden from part header
     867    my $charset=undef;     # convert2unicode() will guess if necessary
    867868
    868869    if (! $part_header) { # no header argument was given. check the body
     
    982983  }
    983984
     985  if (! defined $charset) {
     986      # check if we have valid utf-8
     987      if ($$textref =~ /^(?: [\0-\x7f]          | # ascii
     988              [\xc0-\xdf][\x80-\xbf]    | # 2 byte utf-8
     989              [\xe0-\xef][\x80-\xbf]{2} | # 3 byte
     990              [\xf0-\xf7][\x80-\xbf]{3} | # 4 byte
     991              [\xf8-\xfb][\x80-\xbf]{4} | # 5 byte
     992              [\xfc-\xfd][\x80-\xbf]{5} | # 6 byte
     993              )+ /x) {
     994      $charset = "utf8";
     995      }
     996
     997
     998      # default to latin
     999      $charset = "iso_8859_1" if ! defined($charset);
     1000  }
     1001
    9841002  # first get our character encoding name in the right form.
    985   $charset = "iso_8859_1" unless defined $charset;
    9861003  $charset =~ tr/A-Z/a-z/; # lowercase
    9871004  $charset =~ s/\-/_/g;
Note: See TracChangeset for help on using the changeset viewer.