Changeset 31445


Ignore:
Timestamp:
2017-02-27T14:37:11+13:00 (7 years ago)
Author:
ak19
Message:

added a method guessing_filesystem_encoding. use this to try and work out what encoding to use when turning import folder subdirs into perl native unicode strings

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/BasePlugin.pm

    r31438 r31445  
    836836
    837837
     838sub guess_filesystem_encoding
     839{
     840   my $self = shift (@_);
     841    # Look to file system to provide a character encoding
     842   my $deduced_filename_encoding = "";
     843    # If Windows NTFS, then -- assuming we work with long file names got through
     844    # Win32::GetLongFilePath() -- then the underlying file system is UTF16
     845
     846    if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
     847        # Can do better than working with the DOS character encoding returned by locale     
     848        $deduced_filename_encoding = "unicode";
     849    }
     850    else {
     851        # Unix of some form or other
     852
     853        # See if we can determine the file system encoding through locale
     854        $deduced_filename_encoding = $self->locale_encoding();
     855       
     856    }
     857        print STDERR "guessing filesystem encoding is $deduced_filename_encoding\n";
     858        return $deduced_filename_encoding;
     859}
     860
     861
    838862
    839863sub deduce_filename_encoding
     
    11921216    return undef unless $self->can_process_this_file($filename_full_path);
    11931217   
     1218    #print STDERR "**** BEFORE READ INTO DOC OBJ: $file\n";
    11941219    my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_);
    1195    
     1220    #print STDERR "**** AFTER READ INTO DOC OBJ: $file\n";
     1221   
    11961222    if ((defined $process_status) && ($process_status == 1)) {
    11971223   
Note: See TracChangeset for help on using the changeset viewer.