Changeset 29745

Show
Ignore:
Timestamp:
16.02.2015 14:41:02 (5 years ago)
Author:
kjdon
Message:

using Encode::decode to make the filenames 'unicode aware'. For file_block_read(), only done so that the print statement looks nice. But for read(), we are using the filename to look up in the extrametadata hash thing for metadata coming from metadata.xml. This is unicode aware, so our lookup name needs to be so too. Some debug and extra stuff left in, needs to be removed once windows testing done

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm

    r28489 r29745  
    332332 
    333333    # Recursively read each $raw_subfile 
    334     print $outhandle "DirectoryPlugin block recurring: $raw_file_subfile\n" if ($verbosity > 2); 
     334    print $outhandle "DirectoryPlugin block recurring: ". Encode::decode("utf8", $raw_file_subfile) ."\n" if ($verbosity > 2); 
    335335     
    336336    #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir, 
     
    407407    } 
    408408    @dir = sort readdir (DIR); 
    409     map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dir; 
     409    map { $_ = &unicode::raw_filename_to_url_encoded($_); print STDERR "****$_\n"; } @dir; 
    410410    closedir (DIR); 
    411411 
     
    541541    my $this_file_base_dir = $base_dir; 
    542542    my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile); 
     543    my $unicode_subfile = &Encode::decode("utf8", $raw_subfile); 
    543544 
    544545    my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile); 
     
    550551        next; 
    551552    } 
    552     #print STDERR "processing $raw_full_filename\n"; 
     553    print STDERR "processing $raw_full_filename\n"; 
    553554    # Follow Windows shortcuts 
    554555    if ($raw_subfile =~ m/(?i)\.lnk$/ && (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin"))) { 
     
    593594    } 
    594595 
    595     # $subfile by this point is url-encoded => all ASCII chars => no need to encode as UTF8 
    596  
     596    ### $subfile by this point is url-encoded => all ASCII chars => no need to encode as UTF8 
     597    ###print STDERR "subfile = $subfile, raw_subfile = $raw_subfile\n"; 
     598    ###print STDERR &unicode::debug_unicode_string("subfile = $subfile, raw_subfile = $raw_subfile\n"); 
     599    # instead of using the subfile, we need unicode aware string 
     600    ###my $lookup_name = decode("utf8", $raw_subfile); 
     601    ####print STDERR "lookup nmae = $lookup_name\n"; 
    597602    # Next add metadata read in XML files (if it is supplied) 
    598603    if ($additionalmetadata == 1) { 
    599604        foreach my $filespec (@extrametakeys) { 
    600605        ## use the url-encoded filename to do the filename comparison 
    601  
    602         if ($subfile =~ /^$filespec$/) { 
    603             print $outhandle "File \"$subfile\" matches filespec \"$filespec\"\n"  
     606        print STDERR "comparing against filespec $filespec\n"; 
     607        print STDERR &unicode::debug_unicode_string("comparing against filespec $filespec\n"); 
     608        if ($unicode_subfile =~ /^$filespec$/) { 
     609        ###if ($lookup_name =~ /^$filespec$/) { 
     610            print $outhandle "File \"$unicode_subfile\" matches filespec \"$filespec\"\n"  
    604611            if ($verbosity > 2); 
    605612            my $mdref = &extrametautil::getmetadata(\%extrametadata, $filespec); 
     
    634641            if (!$block_hash->{'reindex_files'}->{$raw_full_filename}) { 
    635642            # filename has been around for longer than inf_timestamp 
    636             print $outhandle "**** Skipping $subfile\n" if ($verbosity >3); 
     643            print $outhandle "**** Skipping $unicode_subfile\n" if ($verbosity >3); 
    637644            next; 
    638645            } 
     
    652659 
    653660    # Recursively read each $subfile 
    654     print $outhandle "DirectoryPlugin recurring: $subfile\n" if ($verbosity > 2); 
     661    print $outhandle "DirectoryPlugin recurring: $unicode_subfile\n" if ($verbosity > 2); 
    655662     
    656663    $count += &plugin::read ($pluginfo, $this_file_base_dir,