Ignore:
Timestamp:
2008-08-13T16:05:10+12:00 (16 years ago)
Author:
ak19
Message:

Only removes comments in head tag now when working out the encoding

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/ReadTextFile.pm

    r16753 r16765  
    340340    (exists $self->{'converted_to'} && $self->{'converted_to'} eq 'HTML')){
    341341
    342     # remove comments, including multiline ones, so that we don't match on
     342    # remove comments in head, including multiline ones, so that we don't match on
    343343    # inactive tags (those that are nested inside comments)
    344     $text =~ s/<!--.*?-->//sg;
     344    my ($head) = ($text =~ m/<head>(.*)<\/head>/si);
     345    $head =~ s/<!--.*?-->//sg;
    345346
    346347    # remove <title>stuff</title> -- as titles tend often to be in English
     
    352353        $best_encoding = $1;
    353354    }
    354     # check the meta http-equiv charset tag unless it is commented out
    355     elsif ($text =~ m/<meta http-equiv.*content-type.*charset=(.+?)\"/i) {             
     355    # check the meta http-equiv charset tag
     356    elsif ($head =~ m/<meta http-equiv.*content-type.*charset=(.+?)\"/si) {           
    356357        $best_encoding = $1;
    357358#       print STDERR "**** meta tag found, encoding is: $best_encoding\n";
Note: See TracChangeset for help on using the changeset viewer.