Changeset 28177

Show
Ignore:
Timestamp:
28.08.2013 21:02:52 (6 years ago)
Author:
ak19
Message:

Added new subroutines to convert from absolute paths to Greenstone locations into paths containing a placeholder for the longest Greenstone prefix matched (path to collection folder, collect folder, site folder or else to GSDLHOME/GSDL3HOME folder). The reverse function has also been added. These haven't been used yet where they need to be, but have been tested to work when called from a testing script.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/util.pm

    r28176 r28177  
    12311231     
    12321232    elsif (defined $site && $site) { # site non-empty, so get default collect dir for GS3 
    1233     if (defined $ENV{'GSDL3HOME'}) { 
    1234         return &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'},"sites",$site,"collect"); 
    1235     } else { 
    1236         return &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites",$site,"collect"); 
    1237     } 
     1233    return &FileUtils::filenameConcatenate(&util::get_site_dir($site),"collect"); 
    12381234    } 
    12391235     
     
    12411237    return &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"collect"); 
    12421238    } 
     1239} 
     1240 
     1241# returns path to current GS3 site. 
     1242# Returns "" if $site parameter is empty string, undefined or otherwise false 
     1243sub get_site_dir { 
     1244    my ($site) = @_; 
     1245     
     1246    if (defined $ENV{'GSDL3HOME'}) { 
     1247    return &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'},"sites",$site); # web folder 
     1248    }  
     1249    elsif (defined $ENV{'GSDL3SRCHOME'}) { 
     1250    return &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites",$site); 
     1251    } 
     1252    else { # site has no meaning, return "" (covers the case of this subroutine having been called for a GS2 situation) 
     1253    return ""; 
     1254    } 
     1255} 
     1256 
     1257# returns path to GSDL3HOME if GS3, or GSDLHOME if GS2 
     1258sub get_gs_home { 
     1259    my ($site) = @_; 
     1260 
     1261    if (defined $site && $site) { # not undefined or empty string, so GS3 case 
     1262    if (defined $ENV{'GSDL3HOME'}) { 
     1263        return $ENV{'GSDL3HOME'}; 
     1264    } else { 
     1265        return &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web"); 
     1266    } 
     1267    } else { 
     1268    return $ENV{'GSDLHOME'}; 
     1269    } 
     1270} 
     1271 
     1272sub is_abs_path { 
     1273    my ($path) = @_; 
     1274 
     1275    return 1 if($path =~ m@^/@); # full paths begin with forward slash on linux/mac 
     1276    return 1 if($path =~ m@^([a-zA-Z]\:|\\)@); # full paths begin with drive letter colon for Win or \ for volume, http://stackoverflow.com/questions/13011013/get-only-volume-name-from-filepath 
     1277 
     1278    return 0; 
     1279 
     1280    # File::Spec loads the module appropriate for the OS, but abs paths in archiveinf-doc.xml may be generated on another OS to this one 
     1281    #return File::Spec->file_name_is_absolute( $path );     
     1282} 
     1283 
     1284# returns an array of the FULL PATHS to the specific collection given, to the general collect_dir, 
     1285# to the site directory if gs3 and to the greenstone home folder (GSDLHOME or GSDL3HOME).  
     1286# These elements are returned in this ORDER, in order to allow matching from longest to shortest path. 
     1287sub get_common_gs_paths { 
     1288 
     1289    my ($site, $qualified_collection_name) = @_; 
     1290     
     1291    my $collect_dir = &util::get_working_collect_dir($site); 
     1292    my $collection_path = ""; 
     1293    if($qualified_collection_name ne "") { 
     1294    $collection_path = &util::resolve_collection_dir($collect_dir, $qualified_collection_name, $site); 
     1295    } 
     1296     
     1297    my $site_dir = &util::get_site_dir($site); # returns "" for GS2 
     1298    my $gs_home = &util::get_gs_home(); # GSDLHOME or GSDL3HOME folder 
     1299     
     1300    return ($collection_path, $collect_dir, $site_dir, $gs_home); # list in this order: from longest to shortest path 
     1301    #my @gs_paths = ($collection_path, $collect_dir, $site_dir, $gs_home); # list in this order: from longest to shortest path 
     1302    #return @gs_paths; 
     1303} 
     1304 
     1305# This subroutine is for improving portability of Greenstone collections from one OS to another, 
     1306# to be used to convert absolute paths going into db files into paths with placeholders instead. 
     1307# This sub works with util::get_common_gs_paths and takes a path to a greenstone file and, if it's 
     1308# an absolute path, then it will replace the longest matching greenstone-path prefix of the given  
     1309# path with a placeholder to match. 
     1310# The Greenstone-path prefixes that can be matched are the following common Greenstone paths:  
     1311# the path to the current (specific) collection, the path to the general GS collect directory,  
     1312# the path to the site directory if GS3, else the path to the GSDLHOME/GSDL3HOME folder. 
     1313# The longest matching prefix will be replaced with the equivalent placeholder:  
     1314# @THISCOLLECTPATH@, else @COLLECTHOME@, else @SITEHOME@, else @GSDLHOME@. 
     1315sub abspath_to_placeholders { 
     1316    my $path = shift(@_); # path to convert from absolute to one with placeholders 
     1317 
     1318    my ($collection_path, $collect_dir, $site_dir, $gs_home) = @_; 
     1319 
     1320    return $path unless is_abs_path($path); # path is relative 
     1321     
     1322    # now we know we're dealing with absolute paths and have to replace gs prefixes with placeholders 
     1323 
     1324    my @gs_paths = ($collection_path, $collect_dir, $site_dir, $gs_home); # list in this order: from longest to shortest path 
     1325 
     1326    my %placeholder_map = ($gs_home => '@GSDLHOME@', # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array 
     1327               $site_dir => '@SITEHOME@', 
     1328               $collect_dir => '@COLLECTHOME@', 
     1329               $collection_path => '@THISCOLLECTPATH@' 
     1330    ); 
     1331 
     1332 
     1333    # The sequence of elements in @gs_paths matters     
     1334    # Need to loop starting from the *longest* matching path (the path to the specific collection)  
     1335    # to the shortest matching path (the path to gsdlhome/gsdl3home folder): 
     1336 
     1337    foreach my $gs_path (@gs_paths) {  
     1338    my $re_path =  &util::filename_to_regex($gs_path); # escape for regex 
     1339 
     1340    next if($re_path eq ""); # site can be empty 
     1341 
     1342    if($path =~ m/^$re_path/) { # case sensitive or not for OS? 
     1343 
     1344        my $placeholder = $placeholder_map{$gs_path}; # get the placeholder to replace the matched path with 
     1345 
     1346        $path =~ s/^$re_path/$placeholder/; #case sensitive or not? 
     1347        #$path =~ s/^[\\\/]//; # remove gs_path's trailing separator left behind at the start of the path 
     1348        last; # done 
     1349    } 
     1350    } 
     1351     
     1352    return $path; 
     1353} 
     1354 
     1355# Function that does the reverse of the util::abspath_to_placeholders subroutine 
     1356# Once again, call this with the values returned from util::get_common_gs_paths 
     1357sub placeholders_to_abspath { 
     1358    my $path = shift(@_); # path that can contain placeholders to convert to resolved absolute path 
     1359 
     1360    my ($collection_path, $collect_dir, $site_dir, $gs_home) = @_; 
     1361 
     1362    return $path if($path !~ m/@/); # path contains no placeholders 
     1363     
     1364    # replace placeholders with gs prefixes  
     1365 
     1366    my @placeholders = ('@THISCOLLECTPATH@', '@COLLECTHOME@', '@SITEHOME@', '@GSDLHOME@'); # order of paths not crucial in this case,  
     1367                       # but listed here from longest to shortest once placeholders are have been resolved 
     1368 
     1369    my %placeholder_to_gspath_map = ('@GSDLHOME@' => $gs_home, # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array 
     1370               '@SITEHOME@' => $site_dir, 
     1371               '@COLLECTHOME@' => $collect_dir, 
     1372               '@THISCOLLECTPATH@' => $collection_path 
     1373    ); 
     1374 
     1375 
     1376    foreach my $placeholder (@placeholders) {  
     1377    my $gs_path = $placeholder_to_gspath_map{$placeholder}; 
     1378 
     1379    next if($gs_path eq ""); # no value for sitehome for GS2 
     1380 
     1381    #my $sep = ($ENV{'GSDLOS'} eq "windows") ? "\\" : "/"; 
     1382    #$gs_path = $gs_path . "$sep" if ($gs_path !~ m/$sep$/); # add in any trailing separator to the path prefix 
     1383 
     1384    if($path =~ m/^$placeholder/) { 
     1385        $path =~ s/^$placeholder/$gs_path/; 
     1386        last; # done 
     1387    } 
     1388    } 
     1389     
     1390    return $path; 
    12431391} 
    12441392