Ignore:
Timestamp:
2010-11-28T23:24:22+13:00 (13 years ago)
Author:
davidb
Message:

Modifications to code to support filename encoding issues when tested under Windows

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/BasePlugin.pm

    r23347 r23352  
    565565
    566566    elsif ($filename_encoding eq "auto-filesystem-encoding")
    567     {
     567    {   
    568568    # try locale
    569569    $filename_encoding = $self->locale_encoding();
     
    740740
    741741# uses locale
    742 sub get_filesystem_encoding {
     742sub get_filesystem_encoding
     743{
    743744
    744745    my $self = shift(@_);
     
    748749
    749750    eval {
     751    # Works for Windows as well, returning the DOS code page in use
    750752    use POSIX qw(locale_h);
    751753   
     
    788790   
    789791    }
     792
    790793    return $filesystem_encoding;
    791794}
     
    840843   
    841844    if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {
    842         # See if we can determine the file system encoding through locale
    843         $deduced_filename_encoding = $self->locale_encoding();
    844 
    845         # if locale shows us filesystem is utf8, check to see filename is consistent
    846         # => if not, then we have an "alien" filename on our hands
    847 
    848         if ($deduced_filename_encoding =~ m/^utf-?8$/i) {
    849             if (!&unicode::check_is_utf8($file)) {
    850                 # "alien" filename, so revert
    851                 $deduced_filename_encoding = undef;
    852             }
     845
     846    # Look to file system to provide a character encoding
     847
     848    # If Windows NTFS, then -- assuming we work with long file names got through
     849    # Win32::GetLongFilePath() -- then the underlying file system is UTF16
     850
     851    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
     852        # Can do better than working with the DOS character encoding returned by locale     
     853        $deduced_filename_encoding = "unicode";
     854    }
     855    else {
     856        # Unix of some form or other
     857
     858        # See if we can determine the file system encoding through locale
     859        $deduced_filename_encoding = $self->locale_encoding();
     860   
     861        # if locale shows us filesystem is utf8, check to see filename is consistent
     862        # => if not, then we have an "alien" filename on our hands
     863       
     864        if ($deduced_filename_encoding =~ m/^utf-?8$/i) {
     865        if (!&unicode::check_is_utf8($file)) {
     866            # "alien" filename, so revert
     867            $deduced_filename_encoding = undef;
    853868        }
    854     }
    855    
     869        }
     870    }
     871    }
    856872   
    857873#    if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {
     
    884900sub set_Source_metadata {
    885901    my $self = shift (@_); 
    886     my ($doc_obj, $raw_file, $filename_encoding) = @_;
     902    my ($doc_obj, $raw_filename, $filename_encoding) = @_;
    887903
    888904    # 1. Sets the filename (Source) for display encoded as Unicode if possible,
     
    890906    # 2. Sets the url ref (SourceFile) to the URL encoded version
    891907    #    of filename for generated files
     908   
     909    my ($unused_full_rf, $raw_file) = &util::get_full_filenames("", $raw_filename);
    892910
    893911    my $top_section = $doc_obj->get_top_section();
    894    
     912
     913    my $octet_file = $raw_file;
     914
    895915    # UTF-8 version of filename
    896     if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
    897         print STDERR "****** Setting Source Metadata given: $raw_file\n";
    898     }
     916    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
     917    print STDERR "****** Setting Source Metadata given: $octet_file\n";
     918    }
     919   
     920    # Deal with (on Windows) raw filenames that are in their
     921    # abbreviated DOS form
     922
     923    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
     924    if ((defined $filename_encoding) && ($filename_encoding eq "unicode")) {
     925        if (-e $raw_filename) {
     926        require Win32;
     927       
     928##      print STDERR "**** raw filename before LPN: $raw_filename\n";
     929        my $unicode_filename = Win32::GetLongPathName($raw_filename);
     930       
     931        my $unused_full_uf;
     932        ($unused_full_uf, $octet_file) = &util::get_full_filenames("", $unicode_filename);
     933
     934##      print STDERR "**** raw filename after LPN: $raw_filename\n";       
     935        }
     936    }
     937    }
    899938
    900939    my $url_encoded_filename;
    901940    if (defined $filename_encoding) {
    902         # => Generate a pretty print version of filename that is mapped to Unicode
    903        
    904         # Use filename_encoding to map raw filename to a Perl unicode-aware string
    905         $url_encoded_filename = decode($filename_encoding,$raw_file);       
     941    # => Generate a pretty print version of filename that is mapped to Unicode
     942   
     943    # Use filename_encoding to map raw filename to a Perl unicode-aware string
     944    $url_encoded_filename = decode($filename_encoding,$octet_file);     
    906945    }
    907946    else {
    908         # otherwise generate %xx encoded version of filename for char > 127
    909         $url_encoded_filename = &unicode::raw_filename_to_url_encoded($raw_file);
    910     }
    911    
    912     if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
    913         print STDERR "***** saving Source as:             $url_encoded_filename\n";
    914     }
    915 
     947    # otherwise generate %xx encoded version of filename for char > 127
     948    $url_encoded_filename = &unicode::raw_filename_to_url_encoded($octet_file);
     949    }
     950   
     951    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
     952    print STDERR "***** saving Source as:             $url_encoded_filename\n";
     953    }
     954   
    916955   
    917956    # Source is the UTF8 display name - not necessarily the name of the file on the system
    918957    $doc_obj->set_utf8_metadata_element($top_section, "Source", $url_encoded_filename);
    919 
     958   
    920959    my $renamed_raw_file = &util::rename_file($raw_file, $self->{'file_rename_method'});
    921960    # If using URL encoding, then SourceFile is the url-reference to url-encoded
     
    926965                    $renamed_raw_url);
    927966
    928     if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
    929         print STDERR "***** saving SourceFile as:         $renamed_raw_url\n";
    930     }
     967    if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
     968    print STDERR "***** saving SourceFile as:         $renamed_raw_url\n";
     969    }
    931970}
    932971   
     
    9881027 
    9891028
    990     my $plugin_filename_encoding = $self->{'filename_encoding'};
     1029    my $plugin_filename_encoding = $self->{'filename_encoding'};
    9911030    my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
    992     $self->set_Source_metadata($doc_obj,$filename_no_path,$filename_encoding);
     1031    $self->set_Source_metadata($doc_obj,$filename_full_path,$filename_encoding,$filename_full_path);
    9931032
    9941033    # plugin specific stuff - what args do we need here??
Note: See TracChangeset for help on using the changeset viewer.