Changeset 31445

Show
Ignore:
Timestamp:
27.02.2017 14:37:11 (3 years ago)
Author:
ak19
Message:

added a method guessing_filesystem_encoding. use this to try and work out what encoding to use when turning import folder subdirs into perl native unicode strings

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/BasePlugin.pm

    r31438 r31445  
    836836 
    837837 
     838sub guess_filesystem_encoding 
     839{ 
     840   my $self = shift (@_);  
     841    # Look to file system to provide a character encoding 
     842   my $deduced_filename_encoding = ""; 
     843    # If Windows NTFS, then -- assuming we work with long file names got through 
     844    # Win32::GetLongFilePath() -- then the underlying file system is UTF16 
     845 
     846    if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) { 
     847        # Can do better than working with the DOS character encoding returned by locale      
     848        $deduced_filename_encoding = "unicode"; 
     849    } 
     850    else { 
     851        # Unix of some form or other 
     852 
     853        # See if we can determine the file system encoding through locale 
     854        $deduced_filename_encoding = $self->locale_encoding(); 
     855         
     856    } 
     857        print STDERR "guessing filesystem encoding is $deduced_filename_encoding\n"; 
     858        return $deduced_filename_encoding; 
     859} 
     860 
     861 
    838862 
    839863sub deduce_filename_encoding 
     
    11921216    return undef unless $self->can_process_this_file($filename_full_path); 
    11931217     
     1218    #print STDERR "**** BEFORE READ INTO DOC OBJ: $file\n"; 
    11941219    my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_); 
    1195      
     1220    #print STDERR "**** AFTER READ INTO DOC OBJ: $file\n"; 
     1221     
    11961222    if ((defined $process_status) && ($process_status == 1)) { 
    11971223