Changeset 29745 for main


Ignore:
Timestamp:
2015-02-16T14:41:02+13:00 (9 years ago)
Author:
kjdon
Message:

using Encode::decode to make the filenames 'unicode aware'. For file_block_read(), only done so that the print statement looks nice. But for read(), we are using the filename to look up in the extrametadata hash thing for metadata coming from metadata.xml. This is unicode aware, so our lookup name needs to be so too. Some debug and extra stuff left in, needs to be removed once windows testing done

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/DirectoryPlugin.pm

    r28489 r29745  
    332332
    333333    # Recursively read each $raw_subfile
    334     print $outhandle "DirectoryPlugin block recurring: $raw_file_subfile\n" if ($verbosity > 2);
     334    print $outhandle "DirectoryPlugin block recurring: ". Encode::decode("utf8", $raw_file_subfile) ."\n" if ($verbosity > 2);
    335335   
    336336    #$count += &plugin::file_block_read ($pluginfo, $this_file_base_dir,
     
    407407    }
    408408    @dir = sort readdir (DIR);
    409     map { $_ = &unicode::raw_filename_to_url_encoded($_) } @dir;
     409    map { $_ = &unicode::raw_filename_to_url_encoded($_); print STDERR "****$_\n"; } @dir;
    410410    closedir (DIR);
    411411
     
    541541    my $this_file_base_dir = $base_dir;
    542542    my $raw_subfile = &unicode::url_encoded_to_raw_filename($subfile);
     543    my $unicode_subfile = &Encode::decode("utf8", $raw_subfile);
    543544
    544545    my $raw_file_subfile = &FileUtils::filenameConcatenate($file, $raw_subfile);
     
    550551        next;
    551552    }
    552     #print STDERR "processing $raw_full_filename\n";
     553    print STDERR "processing $raw_full_filename\n";
    553554    # Follow Windows shortcuts
    554555    if ($raw_subfile =~ m/(?i)\.lnk$/ && (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin"))) {
     
    593594    }
    594595
    595     # $subfile by this point is url-encoded => all ASCII chars => no need to encode as UTF8
    596 
     596    ### $subfile by this point is url-encoded => all ASCII chars => no need to encode as UTF8
     597    ###print STDERR "subfile = $subfile, raw_subfile = $raw_subfile\n";
     598    ###print STDERR &unicode::debug_unicode_string("subfile = $subfile, raw_subfile = $raw_subfile\n");
     599    # instead of using the subfile, we need unicode aware string
     600    ###my $lookup_name = decode("utf8", $raw_subfile);
     601    ####print STDERR "lookup nmae = $lookup_name\n";
    597602    # Next add metadata read in XML files (if it is supplied)
    598603    if ($additionalmetadata == 1) {
    599604        foreach my $filespec (@extrametakeys) {
    600605        ## use the url-encoded filename to do the filename comparison
    601 
    602         if ($subfile =~ /^$filespec$/) {
    603             print $outhandle "File \"$subfile\" matches filespec \"$filespec\"\n"
     606        print STDERR "comparing against filespec $filespec\n";
     607        print STDERR &unicode::debug_unicode_string("comparing against filespec $filespec\n");
     608        if ($unicode_subfile =~ /^$filespec$/) {
     609        ###if ($lookup_name =~ /^$filespec$/) {
     610            print $outhandle "File \"$unicode_subfile\" matches filespec \"$filespec\"\n"
    604611            if ($verbosity > 2);
    605612            my $mdref = &extrametautil::getmetadata(\%extrametadata, $filespec);
     
    634641            if (!$block_hash->{'reindex_files'}->{$raw_full_filename}) {
    635642            # filename has been around for longer than inf_timestamp
    636             print $outhandle "**** Skipping $subfile\n" if ($verbosity >3);
     643            print $outhandle "**** Skipping $unicode_subfile\n" if ($verbosity >3);
    637644            next;
    638645            }
     
    652659
    653660    # Recursively read each $subfile
    654     print $outhandle "DirectoryPlugin recurring: $subfile\n" if ($verbosity > 2);
     661    print $outhandle "DirectoryPlugin recurring: $unicode_subfile\n" if ($verbosity > 2);
    655662   
    656663    $count += &plugin::read ($pluginfo, $this_file_base_dir,
Note: See TracChangeset for help on using the changeset viewer.