Ignore:
Timestamp:
2011-03-29T20:19:34+13:00 (13 years ago)
Author:
ak19
Message:

Dr Bainbridge fixed interlinking failure on Mac OS when filenames (and therefore links to files on the system) have characters that don't occur in English. The problem had to do with the URL obtained from the href in the HTML page not matching up with the URL encoded normalised decomposed URL stored in the doc.xml and the gdb database. The latter is the right form to store the URL in, since it refers accurately to the file as it exists on the system. So the former was changed by using normalization with decomposition on the href link in the text at the correct part of the replace_href_links of the HTMLPlugin.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/HTMLPlugin.pm

    r23760 r23835  
    447447#    my $utf8_file = $self->filename_to_utf8_metadata($file);
    448448#    $utf8_file =~ s/&\#095;/_/g;
    449 #    variable below used to be utf8_file   
     449#    variable below used to be utf8_file
     450
    450451    my $url_encoded_file = &unicode::raw_filename_to_url_encoded($tailname);
    451452    my $utf8_url_encoded_file = &unicode::raw_filename_to_utf8_url_encoded($tailname);
     
    796797    my $self = shift (@_);
    797798    my ($front, $link, $back, $base_dir, $file, $doc_obj, $section) = @_;
    798    
     799
    799800    # remove quotes from link at start and end if necessary
    800801    if ($link=~/^[\"\']/) {
     
    821822    # is taken in to account
    822823    my ($href, $hash_part, $rl) = $self->format_link ($link, $base_dir, $file);
    823  
     824
    824825    # href may use '\'s where '/'s should be on Windows
    825826    $href =~ s/\\/\//g;
     
    853854        # If web page didn't give encoding, then default to utf8
    854855        my $content_encoding= $self->{'content_encoding'} || "utf8";
    855    
     856
    856857        if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
    857858        print STDERR "**** Encoding with '$content_encoding', href: $href\n";
    858859        }
    859860
     861        # on Darwin, the unicode filenames are stored on the file
     862        # system in decomposed form, so any href link (including when
     863        # URL-encoded) should refer to the decomposed name of the file
     864        if ($ENV{'GSDLOS'} =~ /^darwin$/i) {
     865        $href = normalize('D', $href); # Normalization Form D (decomposition)
     866        }
     867
    860868        $href = encode($content_encoding,$href);
    861869    }
    862870
    863     $href = &unicode::raw_filename_to_utf8_url_encoded($href);
     871    $href = &unicode::raw_filename_to_utf8_url_encoded($href); 
    864872    $href = &unicode::filename_to_url($href);
    865873
Note: See TracChangeset for help on using the changeset viewer.