Changeset 29816

Show
Ignore:
Timestamp:
30.03.2015 13:42:14 (5 years ago)
Author:
kjdon
Message:

removing debug and commented out test lines

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/util.pm

    r29810 r29816  
    154154    require Win32; 
    155155 
    156     print STDERR "***** utf8 filename = $utf8_filename\n\n\n"; 
    157  
    158156    my $unicode_filename = decode("utf8",$utf8_filename); 
    159157    $real_filename = Win32::GetShortPathName($unicode_filename); 
     
    171169         
    172170    my $unicode_filename = $raw_file; 
    173     print STDERR "&&&&&&&&&&&&&& raw file = $unicode_filename ".&unicode::debug_unicode_string($unicode_filename)."\n"; 
    174171    if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) { 
    175         print STDERR "windows, upgrading...\n"; 
    176         $unicode_filename = &util::get_dirsep_tail(&util::upgrade_if_dos_filename(&FileUtils::filenameConcatenate($directory, $raw_file), 0)); 
    177              
     172        # Try turning a short version to the long version 
     173        # If there are "funny" characters in the file name, that can't be represented in the ANSI code, then we will have a short weird version, eg E74~1.txt 
     174        $unicode_filename = &util::get_dirsep_tail(&util::upgrade_if_dos_filename(&FileUtils::filenameConcatenate($directory, $raw_file), 0)); 
     175         
     176         
     177        if ($unicode_filename eq $raw_file) { 
     178        # This means the original filename *was* able to be encoded in the local ANSI file encoding (eg windows_1252), so now we turn it back to perl's unicode 
    178179         
    179             if ($unicode_filename eq $raw_file) { 
    180                 print STDERR "long = original\n"; 
    181                 $unicode_filename = &Encode::decode(locale_fs => $unicode_filename); 
    182                 } else { 
    183                     print STDERR "long not equal original\n"; 
    184                     #$unicode_filename = &Encode::decode("UTF-16le" => $unicode_filename); 
    185                     } 
     180        $unicode_filename = &Encode::decode(locale_fs => $unicode_filename); 
     181        } 
     182        # else This means we did have one of the funny filenames. the getLongPathName (used in upgrade_if_dos_filename) will return unicode, so we don't need to do anything more. 
     183         
    186184                     
    187185    } else { 
    188         print STDERR "not windows, not upgrading. decoding in utf8\n"; 
    189         $unicode_filename = &Encode::decode("utf-8", $unicode_filename); 
     186        # we had a utf-8 string, turn it into perl internal unicode 
     187        $unicode_filename = &Encode::decode("utf-8", $unicode_filename); 
    190188     
    191189         
    192         } 
    193     # 
    194     #$unicode_filename = &Encode::decode("ascii", $unicode_filename); 
    195     #$unicode_filename = &Encode::decode(locale_fs => $unicode_filename); 
    196     print STDERR "after decoding = $unicode_filename ".&unicode::debug_unicode_string($unicode_filename)."\n"; 
    197     #Does the filenaem have url encoded chars in it? 
    198         if (&unicode::is_url_encoded($unicode_filename)) { 
    199             $unicode_filename = &unicode::url_decode($unicode_filename); 
    200             } 
    201     print STDERR "after url decoding: $unicode_filename\n"; 
     190    } 
     191    #Does the filename have url encoded chars in it? 
     192    if (&unicode::is_url_encoded($unicode_filename)) { 
     193        $unicode_filename = &unicode::url_decode($unicode_filename); 
     194    } 
     195     
    202196    # Normalise the filename to canonical composition - on mac, filenames use decopmposed form for accented chars 
    203197    if ($ENV{'GSDLOS'} =~ m/^darwin$/i) { 
    204        $unicode_filename = normalize('C', $unicode_filename); # Composed form 'C' 
    205        } 
     198        $unicode_filename = normalize('C', $unicode_filename); # Composed form 'C' 
     199    } 
    206200    return $unicode_filename; 
    207201