Changeset 23835

Show
Ignore:
Timestamp:
29.03.2011 20:19:34 (8 years ago)
Author:
ak19
Message:

Dr Bainbridge fixed interlinking failure on Mac OS when filenames (and therefore links to files on the system) have characters that don't occur in English. The problem had to do with the URL obtained from the href in the HTML page not matching up with the URL encoded normalised decomposed URL stored in the doc.xml and the gdb database. The latter is the right form to store the URL in, since it refers accurately to the file as it exists on the system. So the former was changed by using normalization with decomposition on the href link in the text at the correct part of the replace_href_links of the HTMLPlugin.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/HTMLPlugin.pm

    r23760 r23835  
    447447#    my $utf8_file = $self->filename_to_utf8_metadata($file); 
    448448#    $utf8_file =~ s/&\#095;/_/g; 
    449 #    variable below used to be utf8_file     
     449#    variable below used to be utf8_file 
     450 
    450451    my $url_encoded_file = &unicode::raw_filename_to_url_encoded($tailname); 
    451452    my $utf8_url_encoded_file = &unicode::raw_filename_to_utf8_url_encoded($tailname); 
     
    796797    my $self = shift (@_); 
    797798    my ($front, $link, $back, $base_dir, $file, $doc_obj, $section) = @_; 
    798      
     799 
    799800    # remove quotes from link at start and end if necessary 
    800801    if ($link=~/^[\"\']/) { 
     
    821822    # is taken in to account 
    822823    my ($href, $hash_part, $rl) = $self->format_link ($link, $base_dir, $file); 
    823   
     824 
    824825    # href may use '\'s where '/'s should be on Windows 
    825826    $href =~ s/\\/\//g; 
     
    853854        # If web page didn't give encoding, then default to utf8 
    854855        my $content_encoding= $self->{'content_encoding'} || "utf8"; 
    855      
     856 
    856857        if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) { 
    857858        print STDERR "**** Encoding with '$content_encoding', href: $href\n"; 
    858859        } 
    859860 
     861        # on Darwin, the unicode filenames are stored on the file 
     862        # system in decomposed form, so any href link (including when  
     863        # URL-encoded) should refer to the decomposed name of the file 
     864        if ($ENV{'GSDLOS'} =~ /^darwin$/i) { 
     865        $href = normalize('D', $href); # Normalization Form D (decomposition)  
     866        } 
     867 
    860868        $href = encode($content_encoding,$href); 
    861869    } 
    862870 
    863     $href = &unicode::raw_filename_to_utf8_url_encoded($href); 
     871    $href = &unicode::raw_filename_to_utf8_url_encoded($href);   
    864872    $href = &unicode::filename_to_url($href); 
    865873