Changeset 22636

Show
Ignore:
Timestamp:
16.08.2010 11:41:22 (9 years ago)
Author:
davidb
Message:

Using -utf8 as options to html-tidy leads to wrong encoding for HTML docs in demo collection. Using '-raw' works better.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/HTMLPlugin.pm

    r22594 r22636  
    15771577 
    15781578 
    1579     my ($language, $encoding) = $self->textcat_get_language_encoding ($input_filename); 
    1580     #print STDERR "*** encoding = $encoding\n"; 
     1579###    my ($language, $encoding) = $self->textcat_get_language_encoding ($input_filename); 
     1580###    #print STDERR "*** encoding = $encoding\n"; 
    15811581 
    15821582    # run html-tidy on the tmp file to make it a proper XML file 
     
    15871587    my $tidy_cmd = "tidy"; 
    15881588    $tidy_cmd .= " -q" if ($self->{'verbosity'} <= 2); 
    1589     $tidy_cmd .= " -utf8 -wrap 0 -asxml \"$tmp_filename\""; 
     1589##    $tidy_cmd .= " -utf8 -wrap 0 -asxml \"$tmp_filename\""; 
     1590    $tidy_cmd .= " -raw -wrap 0 -asxml \"$tmp_filename\""; 
    15901591    if ($self->{'verbosity'} <= 2) { 
    15911592    if ($ENV{'GSDLOS'} =~ m/^windows/i) {