Changeset 29763

Show
Ignore:
Timestamp:
26.02.2015 12:30:42 (5 years ago)
Author:
ak19
Message:

on macos, accented chars in filenames are in decomposed form, eg the letter plus the accent. Convert to canonical composed form for looking up in the extra metadata table.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm

    r29760 r29763  
    4444use Encode::Locale; 
    4545use Encode; 
     46use Unicode::Normalize; 
    4647 
    4748BEGIN { 
     
    545546    #my $unicode_subfile = &Encode::decode("utf8", $raw_subfile); 
    546547    my $unicode_subfile = &Encode::decode(locale => $raw_subfile); 
    547      
     548 
     549    # Normalise the filename to canonical composition - on mac, filenames use decopmposed form for accented chars 
     550    $unicode_subfile = NFC($unicode_subfile); 
     551 
    548552    my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile); 
    549553    my $raw_full_filename  
     
    598602 
    599603    ### $subfile by this point is url-encoded => all ASCII chars => no need to encode as UTF8 
    600     ###print STDERR "subfile = $subfile, raw_subfile = $raw_subfile\n"; 
    601     ###print STDERR &unicode::debug_unicode_string("subfile = $subfile, raw_subfile = $raw_subfile\n"); 
     604       print STDERR "****** subfile = $subfile, raw_subfile = $raw_subfile\n"; 
     605    print STDERR &unicode::debug_unicode_string("subfile = $subfile, raw_subfile = $raw_subfile\n"); 
    602606    # instead of using the subfile, we need unicode aware string 
    603607    ###my $lookup_name = decode("utf8", $raw_subfile); 
     
    607611        foreach my $filespec (@extrametakeys) { 
    608612        ## use the url-encoded filename to do the filename comparison 
    609         print STDERR "comparing against filespec $filespec ("; 
     613        print STDERR "### comparing against filespec $filespec ("; 
    610614        print STDERR &unicode::debug_unicode_string("$filespec"); 
    611615        print STDERR ")\n";