Ignore:
Timestamp:
2010-12-01T11:40:36+13:00 (13 years ago)
Author:
davidb
Message:

Additional routines (and few upgraded) to help support Greenstone working with filenames under Windows when then go beyond Latin-1 and start turning up in their DOS abbreviated form (e.g. Test~1.txt)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/util.pm

    r23314 r23362  
    2626package util;
    2727
     28use strict;
     29
     30use Encode;
    2831use File::Copy;
    2932use File::Basename;
    30 
    31 use strict;
    32 
    3333
    3434# removes files (but not directories)
     
    513513}
    514514
     515# Primarily for filenames generated by processing
     516# content of HTML files (which are mapped to UTF-8 internally)
     517#
     518# To turn this into an octet string that really exists on the file
     519# system:
     520# 1. don't need to do anything special for Unix-based systems
     521#   (as underlying file system is byte-code)
     522# 2. need to map to short DOS filenames for Windows
     523
     524sub utf8_to_real_filename
     525{
     526    my ($utf8_filename) = @_;
     527
     528    my $real_filename;
     529
     530    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
     531    require Win32;
     532    my $unicode_filename = decode("utf8",$utf8_filename);
     533    $real_filename = Win32::GetShortPathName($unicode_filename);
     534    }
     535    else {
     536    $real_filename = $utf8_filename;
     537    }
     538
     539    return $real_filename;
     540}
     541
     542
     543sub fd_exists
     544{
     545    my $filename_full_path = shift @_;
     546    my $test_op = shift @_ || "-e";
     547
     548    # By default tests for existance of file or directory (-e)
     549    # Can be made more specific by providing second parameter (e.g. -f or -d)
     550
     551    my $exists = 0;
     552
     553    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
     554    require Win32;
     555    my $filename_short_path = Win32::GetShortPathName($filename_full_path);
     556    if (!defined $filename_short_path) {
     557        # Was probably still in UTF8 form (not what is needed on Windows)
     558        my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
     559        if (defined $unicode_filename_full_path) {
     560        $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
     561        }
     562    }
     563    $filename_full_path = $filename_short_path;
     564    }
     565
     566    if (defined $filename_full_path) {
     567    $exists = eval "($test_op \$filename_full_path)";
     568    }
     569
     570    return $exists;
     571}
     572
     573sub file_exists
     574{
     575    my ($filename_full_path) = @_;
     576
     577    return fd_exists($filename_full_path,"-f");
     578}
     579
     580sub dir_exists
     581{
     582    my ($filename_full_path) = @_;
     583
     584    return fd_exists($filename_full_path,"-d");
     585}
    515586
    516587
     
    9341005
    9351006
     1007
    9361008# returns 1 if filename1 and filename2 point to the same
    9371009# file or directory
     
    9531025}
    9541026
     1027
     1028sub filename_within_directory
     1029{
     1030    my ($filename,$within_dir) = @_;
     1031   
     1032    my $dirsep = &util::get_dirsep();
     1033    if ($within_dir !~ m/$dirsep$/) {
     1034    $within_dir .= $dirsep;
     1035    }
     1036   
     1037    $within_dir =~ s/\\/\\\\/g; # escape DOS style file separator
     1038   
     1039    if ($filename =~ m/^$within_dir(.*)$/) {
     1040    $filename = $1;
     1041    }
     1042   
     1043    return $filename;
     1044}
     1045
    9551046sub filename_within_collection
    9561047{
     
    9601051   
    9611052    if (defined $collect_dir) {
    962     my $dirsep = &util::get_dirsep();
    963     if ($collect_dir !~ m/$dirsep$/) {
    964         $collect_dir .= $dirsep;
    965     }
    966    
    967     $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
    968    
     1053
    9691054    # if from within GSDLCOLLECTDIR, then remove directory prefix
    9701055    # so source_filename is realative to it.  This is done to aid
     
    9761061    # GSDLCOLLECTDIR subsequently needs to be put back on to turn
    9771062    # it back into a full pathname.
    978    
    979     if ($filename =~ /^$collect_dir(.*)$/) {
    980         $filename = $1;
    981     }
     1063
     1064    $filename = filename_within_directory($filename,$collect_dir);
    9821065    }
    9831066   
    9841067    return $filename;
    9851068}
     1069
     1070sub prettyprint_file
     1071{
     1072    my ($base_dir,$file) = @_;
     1073
     1074    my $filename_full_path = &util::filename_cat($base_dir,$file);
     1075
     1076    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
     1077    require Win32;
     1078
     1079    # For some reason base_dir in the form c:/a/b/c
     1080    # This leads to confusion later on, so turn it back into
     1081    # the more usual Windows form
     1082    $base_dir =~ s/\//\\/g;
     1083    my $long_base_dir = Win32::GetLongPathName($base_dir);
     1084    my $long_full_path = Win32::GetLongPathName($filename_full_path);
     1085
     1086    $file = filename_within_directory($long_full_path,$long_base_dir);
     1087    }
     1088
     1089    return $file;
     1090}
     1091
     1092
     1093sub upgrade_if_dos_filename
     1094{
     1095    my ($filename_full_path) = @_;
     1096
     1097    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
     1098    require Win32;
     1099    # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
     1100    # to its long (Windows) version
     1101    $filename_full_path = Win32::GetLongPathName($filename_full_path);
     1102    # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
     1103    $filename_full_path =~ s/^(.)/\l$1/;
     1104    }
     1105
     1106    return $filename_full_path;
     1107}
     1108
    9861109
    9871110sub filename_is_absolute
Note: See TracChangeset for help on using the changeset viewer.