Changeset 29816 for main


Ignore:
Timestamp:
2015-03-30T13:42:14+13:00 (9 years ago)
Author:
kjdon
Message:

removing debug and commented out test lines

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/util.pm

    r29810 r29816  
    154154    require Win32;
    155155
    156     print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
    157 
    158156    my $unicode_filename = decode("utf8",$utf8_filename);
    159157    $real_filename = Win32::GetShortPathName($unicode_filename);
     
    171169       
    172170    my $unicode_filename = $raw_file;
    173     print STDERR "&&&&&&&&&&&&&& raw file = $unicode_filename ".&unicode::debug_unicode_string($unicode_filename)."\n";
    174171    if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
    175         print STDERR "windows, upgrading...\n";
    176         $unicode_filename = &util::get_dirsep_tail(&util::upgrade_if_dos_filename(&FileUtils::filenameConcatenate($directory, $raw_file), 0));
    177            
     172        # Try turning a short version to the long version
     173        # If there are "funny" characters in the file name, that can't be represented in the ANSI code, then we will have a short weird version, eg E74~1.txt
     174        $unicode_filename = &util::get_dirsep_tail(&util::upgrade_if_dos_filename(&FileUtils::filenameConcatenate($directory, $raw_file), 0));
     175       
     176       
     177        if ($unicode_filename eq $raw_file) {
     178        # This means the original filename *was* able to be encoded in the local ANSI file encoding (eg windows_1252), so now we turn it back to perl's unicode
    178179       
    179             if ($unicode_filename eq $raw_file) {
    180                 print STDERR "long = original\n";
    181                 $unicode_filename = &Encode::decode(locale_fs => $unicode_filename);
    182                 } else {
    183                     print STDERR "long not equal original\n";
    184                     #$unicode_filename = &Encode::decode("UTF-16le" => $unicode_filename);
    185                     }
     180        $unicode_filename = &Encode::decode(locale_fs => $unicode_filename);
     181        }
     182        # else This means we did have one of the funny filenames. the getLongPathName (used in upgrade_if_dos_filename) will return unicode, so we don't need to do anything more.
     183       
    186184                   
    187185    } else {
    188         print STDERR "not windows, not upgrading. decoding in utf8\n";
    189         $unicode_filename = &Encode::decode("utf-8", $unicode_filename);
     186        # we had a utf-8 string, turn it into perl internal unicode
     187        $unicode_filename = &Encode::decode("utf-8", $unicode_filename);
    190188   
    191189       
    192         }
    193     #
    194     #$unicode_filename = &Encode::decode("ascii", $unicode_filename);
    195     #$unicode_filename = &Encode::decode(locale_fs => $unicode_filename);
    196     print STDERR "after decoding = $unicode_filename ".&unicode::debug_unicode_string($unicode_filename)."\n";
    197     #Does the filenaem have url encoded chars in it?
    198         if (&unicode::is_url_encoded($unicode_filename)) {
    199             $unicode_filename = &unicode::url_decode($unicode_filename);
    200             }
    201     print STDERR "after url decoding: $unicode_filename\n";
     190    }
     191    #Does the filename have url encoded chars in it?
     192    if (&unicode::is_url_encoded($unicode_filename)) {
     193        $unicode_filename = &unicode::url_decode($unicode_filename);
     194    }
     195   
    202196    # Normalise the filename to canonical composition - on mac, filenames use decopmposed form for accented chars
    203197    if ($ENV{'GSDLOS'} =~ m/^darwin$/i) {
    204        $unicode_filename = normalize('C', $unicode_filename); # Composed form 'C'
    205        }
     198        $unicode_filename = normalize('C', $unicode_filename); # Composed form 'C'
     199    }
    206200    return $unicode_filename;
    207201
Note: See TracChangeset for help on using the changeset viewer.