Ignore:
Timestamp:
2013-08-28T21:02:52+12:00 (11 years ago)
Author:
ak19
Message:

Added new subroutines to convert from absolute paths to Greenstone locations into paths containing a placeholder for the longest Greenstone prefix matched (path to collection folder, collect folder, site folder or else to GSDLHOME/GSDL3HOME folder). The reverse function has also been added. These haven't been used yet where they need to be, but have been tested to work when called from a testing script.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/util.pm

    r28176 r28177  
    12311231   
    12321232    elsif (defined $site && $site) { # site non-empty, so get default collect dir for GS3
    1233     if (defined $ENV{'GSDL3HOME'}) {
    1234         return &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'},"sites",$site,"collect");
    1235     } else {
    1236         return &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites",$site,"collect");
    1237     }
     1233    return &FileUtils::filenameConcatenate(&util::get_site_dir($site),"collect");
    12381234    }
    12391235   
     
    12411237    return &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"collect");
    12421238    }
     1239}
     1240
     1241# returns path to current GS3 site.
     1242# Returns "" if $site parameter is empty string, undefined or otherwise false
     1243sub get_site_dir {
     1244    my ($site) = @_;
     1245   
     1246    if (defined $ENV{'GSDL3HOME'}) {
     1247    return &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'},"sites",$site); # web folder
     1248    }
     1249    elsif (defined $ENV{'GSDL3SRCHOME'}) {
     1250    return &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites",$site);
     1251    }
     1252    else { # site has no meaning, return "" (covers the case of this subroutine having been called for a GS2 situation)
     1253    return "";
     1254    }
     1255}
     1256
     1257# returns path to GSDL3HOME if GS3, or GSDLHOME if GS2
     1258sub get_gs_home {
     1259    my ($site) = @_;
     1260
     1261    if (defined $site && $site) { # not undefined or empty string, so GS3 case
     1262    if (defined $ENV{'GSDL3HOME'}) {
     1263        return $ENV{'GSDL3HOME'};
     1264    } else {
     1265        return &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web");
     1266    }
     1267    } else {
     1268    return $ENV{'GSDLHOME'};
     1269    }
     1270}
     1271
     1272sub is_abs_path {
     1273    my ($path) = @_;
     1274
     1275    return 1 if($path =~ m@^/@); # full paths begin with forward slash on linux/mac
     1276    return 1 if($path =~ m@^([a-zA-Z]\:|\\)@); # full paths begin with drive letter colon for Win or \ for volume, http://stackoverflow.com/questions/13011013/get-only-volume-name-from-filepath
     1277
     1278    return 0;
     1279
     1280    # File::Spec loads the module appropriate for the OS, but abs paths in archiveinf-doc.xml may be generated on another OS to this one
     1281    #return File::Spec->file_name_is_absolute( $path );   
     1282}
     1283
     1284# returns an array of the FULL PATHS to the specific collection given, to the general collect_dir,
     1285# to the site directory if gs3 and to the greenstone home folder (GSDLHOME or GSDL3HOME).
     1286# These elements are returned in this ORDER, in order to allow matching from longest to shortest path.
     1287sub get_common_gs_paths {
     1288
     1289    my ($site, $qualified_collection_name) = @_;
     1290   
     1291    my $collect_dir = &util::get_working_collect_dir($site);
     1292    my $collection_path = "";
     1293    if($qualified_collection_name ne "") {
     1294    $collection_path = &util::resolve_collection_dir($collect_dir, $qualified_collection_name, $site);
     1295    }
     1296   
     1297    my $site_dir = &util::get_site_dir($site); # returns "" for GS2
     1298    my $gs_home = &util::get_gs_home(); # GSDLHOME or GSDL3HOME folder
     1299   
     1300    return ($collection_path, $collect_dir, $site_dir, $gs_home); # list in this order: from longest to shortest path
     1301    #my @gs_paths = ($collection_path, $collect_dir, $site_dir, $gs_home); # list in this order: from longest to shortest path
     1302    #return @gs_paths;
     1303}
     1304
     1305# This subroutine is for improving portability of Greenstone collections from one OS to another,
     1306# to be used to convert absolute paths going into db files into paths with placeholders instead.
     1307# This sub works with util::get_common_gs_paths and takes a path to a greenstone file and, if it's
     1308# an absolute path, then it will replace the longest matching greenstone-path prefix of the given
     1309# path with a placeholder to match.
     1310# The Greenstone-path prefixes that can be matched are the following common Greenstone paths:
     1311# the path to the current (specific) collection, the path to the general GS collect directory,
     1312# the path to the site directory if GS3, else the path to the GSDLHOME/GSDL3HOME folder.
     1313# The longest matching prefix will be replaced with the equivalent placeholder:
     1314# @THISCOLLECTPATH@, else @COLLECTHOME@, else @SITEHOME@, else @GSDLHOME@.
     1315sub abspath_to_placeholders {
     1316    my $path = shift(@_); # path to convert from absolute to one with placeholders
     1317
     1318    my ($collection_path, $collect_dir, $site_dir, $gs_home) = @_;
     1319
     1320    return $path unless is_abs_path($path); # path is relative
     1321   
     1322    # now we know we're dealing with absolute paths and have to replace gs prefixes with placeholders
     1323
     1324    my @gs_paths = ($collection_path, $collect_dir, $site_dir, $gs_home); # list in this order: from longest to shortest path
     1325
     1326    my %placeholder_map = ($gs_home => '@GSDLHOME@', # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array
     1327               $site_dir => '@SITEHOME@',
     1328               $collect_dir => '@COLLECTHOME@',
     1329               $collection_path => '@THISCOLLECTPATH@'
     1330    );
     1331
     1332
     1333    # The sequence of elements in @gs_paths matters   
     1334    # Need to loop starting from the *longest* matching path (the path to the specific collection)
     1335    # to the shortest matching path (the path to gsdlhome/gsdl3home folder):
     1336
     1337    foreach my $gs_path (@gs_paths) {
     1338    my $re_path =  &util::filename_to_regex($gs_path); # escape for regex
     1339
     1340    next if($re_path eq ""); # site can be empty
     1341
     1342    if($path =~ m/^$re_path/) { # case sensitive or not for OS?
     1343
     1344        my $placeholder = $placeholder_map{$gs_path}; # get the placeholder to replace the matched path with
     1345
     1346        $path =~ s/^$re_path/$placeholder/; #case sensitive or not?
     1347        #$path =~ s/^[\\\/]//; # remove gs_path's trailing separator left behind at the start of the path
     1348        last; # done
     1349    }
     1350    }
     1351   
     1352    return $path;
     1353}
     1354
     1355# Function that does the reverse of the util::abspath_to_placeholders subroutine
     1356# Once again, call this with the values returned from util::get_common_gs_paths
     1357sub placeholders_to_abspath {
     1358    my $path = shift(@_); # path that can contain placeholders to convert to resolved absolute path
     1359
     1360    my ($collection_path, $collect_dir, $site_dir, $gs_home) = @_;
     1361
     1362    return $path if($path !~ m/@/); # path contains no placeholders
     1363   
     1364    # replace placeholders with gs prefixes
     1365
     1366    my @placeholders = ('@THISCOLLECTPATH@', '@COLLECTHOME@', '@SITEHOME@', '@GSDLHOME@'); # order of paths not crucial in this case,
     1367                       # but listed here from longest to shortest once placeholders are have been resolved
     1368
     1369    my %placeholder_to_gspath_map = ('@GSDLHOME@' => $gs_home, # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array
     1370               '@SITEHOME@' => $site_dir,
     1371               '@COLLECTHOME@' => $collect_dir,
     1372               '@THISCOLLECTPATH@' => $collection_path
     1373    );
     1374
     1375
     1376    foreach my $placeholder (@placeholders) {
     1377    my $gs_path = $placeholder_to_gspath_map{$placeholder};
     1378
     1379    next if($gs_path eq ""); # no value for sitehome for GS2
     1380
     1381    #my $sep = ($ENV{'GSDLOS'} eq "windows") ? "\\" : "/";
     1382    #$gs_path = $gs_path . "$sep" if ($gs_path !~ m/$sep$/); # add in any trailing separator to the path prefix
     1383
     1384    if($path =~ m/^$placeholder/) {
     1385        $path =~ s/^$placeholder/$gs_path/;
     1386        last; # done
     1387    }
     1388    }
     1389   
     1390    return $path;
    12431391}
    12441392
Note: See TracChangeset for help on using the changeset viewer.