- Timestamp:
- 2013-02-26T11:38:47+13:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/parallel-building/trunk/src/perllib/util.pm
r24626 r26961 46 46 # and are regular files 47 47 foreach my $file (@files) { 48 49 if (&util::isHDFS($file)) 50 { 51 &util::executeHDFSCommand('rm', $file); 52 } 53 else 54 { 48 55 if (!-e $file) { 49 56 print STDERR "util::rm $file does not exist\n"; … … 53 60 push (@filefiles, $file); 54 61 } 55 } 56 62 } 63 } 64 57 65 # remove the files 58 66 my $numremoved = unlink @filefiles; … … 96 104 # recursively remove the files 97 105 foreach my $file (@files_array) { 106 107 # HDFS support 108 if (&util::isHDFS($file)) 109 { 110 # HDFS doesn't really lend itself to a choosy delete, unless you want 111 # it to be really, really, unbearably slow. 112 &util::executeHDFSCommand('rmr', $file); 113 next; 114 } 115 116 98 117 $file =~ s/[\/\\]+$//; # remove trailing slashes 99 118 … … 133 152 134 153 # recursive removal 135 sub rm_r { 136 my (@files) = @_; 137 138 # use the more general (but reterospectively written function 139 # filtered_rm_r function() 140 141 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions 154 sub rm_r 155 { 156 my (@files) = @_; 157 # use the more general (but reterospectively written function 158 # filtered_rm_r function() 159 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions 142 160 } 143 161 … … 149 167 my $dest = pop (@_); 150 168 my (@srcfiles) = @_; 169 170 # moving a file within or into HDFS 171 if (&util::isHDFS($dest)) 172 { 173 foreach my $src (@srcfiles) 174 { 175 if (&util::isHDFS($src)) 176 { 177 &util::executeHDFSCommand('mv', $src, $dest); 178 } 179 else 180 { 181 &util::executeHDFSCommand('put', $src, $dest); 182 &util::rm_r($src); 183 } 184 } 185 return; 186 } 151 187 152 188 # remove trailing slashes from source and destination files … … 166 202 # move the files 167 203 foreach my $file (@srcfiles) { 204 205 # moving a file out of HDFS 206 if (&util::isHDFS($file)) 207 { 208 &util::executeHDFSCommand('get', $file, $dest); 209 &util::rm_r($file); 210 next; 211 } 212 168 213 my $tempdest = $dest; 169 214 if (-d $tempdest) { … … 174 219 print STDERR "util::mv $file does not exist\n"; 175 220 } else { 176 rename ($file, $tempdest); 177 } 178 } 221 if(!rename ($file, $tempdest)) { 222 print STDERR "**** Failed to rename $file to $tempdest\n"; 223 &File::Copy::copy($file, $tempdest); 224 &rm($file); 225 } 226 elsif(-e $file) { # rename (partially) succeeded) but srcfile still exists after rename 227 #print STDERR "*** srcfile $file still exists after rename to $tempdest\n"; 228 if(!-e $tempdest) { 229 print STDERR "@@@@ ERROR: $tempdest does not exist\n"; 230 } 231 # Sometimes the rename operation fails (as does File::Copy::move). 232 # This turns out to be because the files are hardlinked. 233 # Need to do a copy-delete in this case, however, the copy step is not necessary: 234 # the srcfile got renamed into tempdest, but srcfile itself still exists, delete it. 235 #&File::Copy::copy($file, $tempdest); 236 237 &rm($file); 238 } 239 } 240 } 241 } 242 243 # Move the contents of source directory into target directory 244 # (as opposed to merely replacing target dir with the src dir) 245 # This can overwrite any files with duplicate names in the target 246 # but other files and folders in the target will continue to exist 247 sub mv_dir_contents { 248 my ($src_dir, $dest_dir) = @_; 249 250 # Obtain listing of all files within src_dir 251 # Note that readdir lists relative paths, as well as . and .. 252 opendir(DIR, "$src_dir"); 253 my @files= readdir(DIR); 254 close(DIR); 255 256 my @full_path_files = (); 257 foreach my $file (@files) { 258 # process all except . and .. 259 unless($file eq "." || $file eq "..") { 260 261 my $dest_subdir = &filename_cat($dest_dir, $file); # $file is still a relative path 262 263 # construct absolute paths 264 $file = &filename_cat($src_dir, $file); # $file is now an absolute path 265 266 # Recurse on directories which have an equivalent in target dest_dir 267 # If $file is a directory that already exists in target $dest_dir, 268 # then a simple move operation will fail (definitely on Windows). 269 if(-d $file && -d $dest_subdir) { 270 #print STDERR "**** $file is a directory also existing in target, its contents to be copied to $dest_subdir\n"; 271 &mv_dir_contents($file, $dest_subdir); 272 273 # now all content is moved across, delete empty dir in source folder 274 if(&is_dir_empty($file)) { 275 if (!rmdir $file) { 276 print STDERR "ERROR. util::mv_dir_contents couldn't remove directory $file\n"; 277 } 278 } else { # error 279 print STDERR "ERROR. util::mv_dir_contents: subfolder $file still non-empty after moving contents to $dest_subdir\n"; 280 } 281 } else { # process files and any directories that don't already exist with a simple move 282 push(@full_path_files, $file); 283 } 284 } 285 } 286 287 if(!&dir_exists($dest_dir)) { # create target toplevel folder or subfolders if they don't exist 288 &mk_dir($dest_dir); 289 } 290 291 #print STDERR "@@@@@ Copying files |".join(",", @full_path_files)."| to: $dest_dir\n"; 292 293 if(@full_path_files) { # if non-empty, there's something to copy across 294 &mv(@full_path_files, $dest_dir); 295 } 179 296 } 180 297 181 298 182 299 # copies a file or a group of files 183 sub cp { 184 my $dest = pop (@_); 185 my (@srcfiles) = @_; 186 187 # remove trailing slashes from source and destination files 188 $dest =~ s/[\\\/]+$//; 189 map {$_ =~ s/[\\\/]+$//;} @srcfiles; 190 191 # a few sanity checks 192 if (scalar (@srcfiles) == 0) { 193 print STDERR "util::cp no destination directory given\n"; 194 return; 195 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) { 196 print STDERR "util::cp if multiple source files are given the ". 197 "destination must be a directory\n"; 198 return; 199 } 200 201 # copy the files 202 foreach my $file (@srcfiles) { 203 my $tempdest = $dest; 204 if (-d $tempdest) { 205 my ($filename) = $file =~ /([^\\\/]+)$/; 206 $tempdest .= "/$filename"; 207 } 208 if (!-e $file) { 209 print STDERR "util::cp $file does not exist\n"; 210 } elsif (!-f $file) { 211 print STDERR "util::cp $file is not a plain file\n"; 212 } else { 213 &File::Copy::copy ($file, $tempdest); 214 } 215 } 300 sub cp 301 { 302 my $dest = pop (@_); 303 my (@srcfiles) = @_; 304 305 # remove trailing slashes from source and destination files 306 $dest =~ s/[\\\/]+$//; 307 map {$_ =~ s/[\\\/]+$//;} @srcfiles; 308 309 # a few sanity checks 310 if (scalar (@srcfiles) == 0) 311 { 312 print STDERR "util::cp no destination directory given\n"; 313 return 0; 314 } 315 elsif ((scalar (@srcfiles) > 1) && (!&util::dir_exists($dest))) 316 { 317 print STDERR "util::cp if multiple source files are given the destination must be a directory\n"; 318 return 0; 319 } 320 321 # copying a file into or within HDFS 322 if (&util::isHDFS($dest)) 323 { 324 foreach my $src (@srcfiles) 325 { 326 &util::executeHDFSCommand('put', $src, $dest); 327 &util::rm_r($src); 328 } 329 return; 330 } 331 332 # copy the files 333 foreach my $file (@srcfiles) 334 { 335 my $tempdest = $dest; 336 if (&util::dir_exists($tempdest)) 337 { 338 my ($filename) = $file =~ /([^\\\/]+)$/; 339 $tempdest .= "/$filename"; 340 } 341 if (!&util::file_exists($file)) 342 { 343 if (&util::dir_exists($file)) 344 { 345 print STDERR "util::cp $file is not a plain file\n"; 346 } 347 else 348 { 349 print STDERR "util::cp $file does not exist\n"; 350 } 351 } 352 elsif (&util::isHDFS($file)) 353 { 354 &util::executeHDFSCommand('get', $file, $dest); 355 } 356 else 357 { 358 &File::Copy::copy ($file, $tempdest); 359 } 360 } 216 361 } 217 362 … … 393 538 } 394 539 395 sub mk_dir { 396 my ($dir) = @_; 397 540 # /** @function mk_dir() 541 # * Extend mkdir to allow it to silently fail in the case where code is 542 # * 'competing' to create a directory first. 543 # * @param $dir the full path of the directory to create 544 # * @param $can_fail 1 if the mkdir can fail silently (optional) 545 # * @return 1 on success, 0 on failure 546 # */ 547 sub mk_dir 548 { 549 my ($dir, $can_fail) = @_; 550 my $mkdir_ok = 0; 551 if (&util::isHDFS($dir)) 552 { 553 # unhelpfully HDFS mkdir returns 0 on success, -1 on failure 554 my $result = &util::executeHDFSCommand('mkdir', $dir); 555 if ($result == 0) 556 { 557 $mkdir_ok = 1; 558 } 559 } 560 else 561 { 398 562 my $store_umask = umask(0002); 399 563 my $mkdir_ok = mkdir ($dir, 0777); 400 564 umask($store_umask); 401 402 if (!$mkdir_ok) 403 { 404 print STDERR "util::mk_dir could not create directory $dir\n"; 405 return; 406 } 565 } 566 # only output an error if this call wasn't marked as can_fail 567 if (!$mkdir_ok && (!defined $can_fail || !$can_fail)) 568 { 569 print STDERR "util::mk_dir could not create directory: $dir\n error: $!\n"; 570 } 571 return $mkdir_ok; 407 572 } 408 573 … … 410 575 # on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently 411 576 # slightly faster (surprisingly) - Stefan. 412 sub mk_all_dir { 413 my ($dir) = @_; 414 415 # use / for the directory separator, remove duplicate and 416 # trailing slashes 417 $dir=~s/[\\\/]+/\//g; 418 $dir=~s/[\\\/]+$//; 419 420 # make sure the cache directory exists 421 my $dirsofar = ""; 422 my $first = 1; 423 foreach my $dirname (split ("/", $dir)) { 424 $dirsofar .= "/" unless $first; 425 $first = 0; 426 427 $dirsofar .= $dirname; 428 429 next if $dirname =~ /^(|[a-z]:)$/i; 430 if (!-e $dirsofar) 431 { 432 my $store_umask = umask(0002); 433 my $mkdir_ok = mkdir ($dirsofar, 0777); 434 umask($store_umask); 435 if (!$mkdir_ok) 436 { 437 print STDERR "util::mk_all_dir could not create directory $dirsofar\n"; 438 return; 439 } 440 } 441 } 577 sub mk_all_dir 578 { 579 my ($dir) = @_; 580 581 ###rint "-> util::mk_all_dir($dir)\n"; 582 583 # support for HDFS 584 if (&util::isHDFS($dir)) 585 { 586 # HDFS's version of mkdir does it recursively anyway 587 my $result = &util::executeHDFSCommand('mkdir', $dir); 588 return ($result == 0); 589 } 590 591 # use / for the directory separator, remove duplicate and 592 # trailing slashes 593 $dir=~s/[\\\/]+/\//g; 594 $dir=~s/[\\\/]+$//; 595 596 # ensure the directory doesn't already exist 597 if (-e $dir) 598 { 599 return 0; 600 } 601 602 # make sure the cache directory exists 603 my $dirsofar = ""; 604 my $first = 1; 605 foreach my $dirname (split ("/", $dir)) 606 { 607 $dirsofar .= "/" unless $first; 608 $first = 0; 609 610 $dirsofar .= $dirname; 611 612 next if $dirname =~ /^(|[a-z]:)$/i; 613 if (!-e $dirsofar) 614 { 615 my $store_umask = umask(0002); 616 my $mkdir_ok = mkdir ($dirsofar, 0777); 617 umask($store_umask); 618 if (!$mkdir_ok) 619 { 620 print STDERR "util::mk_all_dir could not create directory $dirsofar\n"; 621 return 0; 622 } 623 } 624 } 625 return (-e $dir); 442 626 } 443 627 … … 446 630 my ($src, $dest, $verbosity) = @_; 447 631 632 print "&util::hard_link( $src, $dest, $verbosity)\n"; 633 448 634 # remove trailing slashes from source and destination files 449 635 $src =~ s/[\\\/]+$//; … … 452 638 ## print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n"; 453 639 # a few sanity checks 454 if ( -e $dest) {640 if (&util::file_exists($dest)) { 455 641 # destination file already exists 456 642 return; 457 643 } 458 elsif (! -e $src) {644 elsif (!&util::file_exists($src)) { 459 645 print STDERR "util::hard_link source file \"$src\" does not exist\n"; 460 646 return 1; 461 647 } 462 elsif ( -d $src) {648 elsif (!&util::file_exists($src) && &util::dir_exists($src)) { 463 649 print STDERR "util::hard_link source \"$src\" is a directory\n"; 464 650 return 1; … … 466 652 467 653 my $dest_dir = &File::Basename::dirname($dest); 468 mk_all_dir($dest_dir) if (!-e $dest_dir); 469 654 if (!&util::dir_exists($dest_dir)) 655 { 656 mk_all_dir($dest_dir); 657 } 658 659 # HDFS Support - we can't ever link, copy instead 660 if (&util::isHDFS($src)) 661 { 662 if (&util::isHDFS($dest)) 663 { 664 &util::executeHDFSCommand('put', $src, $dest); 665 return 0; 666 } 667 else 668 { 669 &util::executeHDFSCommand('get', $src, $dest); 670 return 0; 671 } 672 } 673 elsif (&util::isHDFS($dest)) 674 { 675 &util::executeHDFSCommand('put', $src, $dest); 676 return 0; 677 } 470 678 471 679 if (!link($src, $dest)) { … … 553 761 } 554 762 555 556 763 sub fd_exists 557 764 { … … 563 770 564 771 my $exists = 0; 772 773 # Support for HDFS [jmt12] 774 if (&util::isHDFS($filename_full_path)) 775 { 776 # very limited test op support for HDFS 777 if ($test_op ne '-d' && $test_op ne '-e' && $test_op ne '-z') 778 { 779 $test_op = '-e'; 780 } 781 my $result = &util::executeHDFSCommand('test ' . $test_op, $filename_full_path); 782 return ($result == 0); 783 } 565 784 566 785 if ($ENV{'GSDLOS'} =~ m/^windows$/i) { … … 587 806 { 588 807 my ($filename_full_path) = @_; 589 808 # if ($filename_full_path =~ /^hdfs:/) 809 # { 810 # print "-> util::file_exists(" . $filename_full_path . ")\n"; 811 # } 590 812 return fd_exists($filename_full_path,"-f"); 591 813 } … … 594 816 { 595 817 my ($filename_full_path) = @_; 596 818 # if ($filename_full_path =~ /^hdfs:/) 819 # { 820 # print "-> util::dir_exists(" . $filename_full_path . ")\n"; 821 # } 597 822 return fd_exists($filename_full_path,"-d"); 598 823 } … … 837 1062 my $filename = shift (@_); 838 1063 839 # need to put single backslash back to double so that regex works 840 if ($ENV{'GSDLOS'} =~ /^windows$/i) { 841 $filename =~ s/\\/\\\\/g; 842 } 1064 # need to make single backslashes double so that regex works 1065 $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i); 1066 1067 # note that the first part of a substitution is a regex, so RE chars need to be escaped, 1068 # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally 1069 $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them 1070 $filename =~ s@\(@\\(@g; # escape brackets 1071 $filename =~ s@\)@\\)@g; # escape brackets 1072 $filename =~ s@\[@\\[@g; # escape brackets 1073 $filename =~ s@\]@\\]@g; # escape brackets 1074 1075 return $filename; 1076 } 1077 1078 sub unregex_filename { 1079 my $filename = shift (@_); 1080 1081 # need to put doubled backslashes for regex back to single 1082 $filename =~ s/\\\./\./g; # remove RE syntax for . 1083 $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "(" 1084 $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")" 1085 $filename =~ s@\\\[@[@g; # remove RE syntax for [ => "\[" turns into "[" 1086 $filename =~ s@\\\]@]@g; # remove RE syntax for ] => "\]" turns into "]" 1087 1088 # \\ goes to \ 1089 # This is the last step in reverse mirroring the order of steps in filename_to_regex() 1090 $filename =~ s/\\\\/\\/g; # remove RE syntax for \ 843 1091 return $filename; 844 1092 } … … 869 1117 $filename =~ s/[\\\/]$//; 870 1118 1119 # restore protocols if present [jmt12] 1120 $filename =~ s/(file|hdfs|https?):\/([^\/])/$1:\/\/$2/g; 1121 871 1122 return $filename; 872 1123 } … … 902 1153 } 903 1154 904 my $oid_warned_about_periods = 0;905 1155 906 1156 sub tidy_up_oid { 907 my ($OID, $verbosity) = @_; 908 if (!defined $verbosity) 909 { 910 $verbosity = 2; 911 } 1157 my ($OID) = @_; 912 1158 if ($OID =~ /\./) { 913 if (!$oid_warned_about_periods) 914 { 915 print STDERR "Warning, identifier $OID contains periods (.), removing them for this and future documents\n"; 916 $oid_warned_about_periods = 1; 917 } 1159 print STDERR "Warning, identifier $OID contains periods (.), removing them\n"; 918 1160 $OID =~ s/\.//g; #remove any periods 919 1161 } … … 935 1177 936 1178 # do not prepend any value/path that's already in the environment variable 937 if ($ENV{'GSDLOS'} =~ /^windows$/i) 938 { 939 my $escaped_val = $val; 940 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex 941 if (!defined($ENV{$var})) { 942 $ENV{$var} = "$val"; 943 } 944 elsif($ENV{$var} !~ m/$escaped_val/) { 945 $ENV{$var} = "$val;".$ENV{$var}; 946 } 947 } 948 else { 949 if (!defined($ENV{$var})) { 950 $ENV{$var} = "$val"; 951 } 952 elsif($ENV{$var} !~ m/$val/) { 953 $ENV{$var} = "$val:".$ENV{$var}; 954 } 1179 1180 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex 1181 if (!defined($ENV{$var})) { 1182 $ENV{$var} = "$val"; 1183 } 1184 elsif($ENV{$var} !~ m/$escaped_val/) { 1185 $ENV{$var} = "$val;".$ENV{$var}; 955 1186 } 956 1187 } … … 958 1189 sub envvar_append { 959 1190 my ($var,$val) = @_; 960 1191 961 1192 # do not append any value/path that's already in the environment variable 962 if ($ENV{'GSDLOS'} =~ /^windows$/i) 963 { 964 my $escaped_val = $val; 965 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex 966 if (!defined($ENV{$var})) { 967 $ENV{$var} = "$val"; 968 } 969 elsif($ENV{$var} !~ m/$escaped_val/) { 970 $ENV{$var} .= ";$val"; 971 } 972 } 973 else { 974 if (!defined($ENV{$var})) { 975 $ENV{$var} = "$val"; 976 } 977 elsif($ENV{$var} !~ m/$val/) { 978 $ENV{$var} .= ":$val"; 979 } 980 } 1193 1194 my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex 1195 if (!defined($ENV{$var})) { 1196 $ENV{$var} = "$val"; 1197 } 1198 elsif($ENV{$var} !~ m/$escaped_val/) { 1199 $ENV{$var} .= ";$val"; 1200 } 981 1201 } 982 1202 … … 1047 1267 } 1048 1268 1049 1269 # If filename is relative to within_dir, returns the relative path of filename to that directory 1270 # with slashes in the filename returned as they were in the original (absolute) filename. 1050 1271 sub filename_within_directory 1051 1272 { … … 1057 1278 } 1058 1279 1059 $within_dir =~ s/\\/\\\\/g; # escape DOS style file separator 1060 1280 $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets 1061 1281 if ($filename =~ m/^$within_dir(.*)$/) { 1062 1282 $filename = $1; … … 1065 1285 return $filename; 1066 1286 } 1287 1288 # If filename is relative to within_dir, returns the relative path of filename to that directory in URL format. 1289 # Filename and within_dir can be any type of slashes, but will be compared as URLs (i.e. unix-style slashes). 1290 # The subpath returned will also be a URL type filename. 1291 sub filename_within_directory_url_format 1292 { 1293 my ($filename,$within_dir) = @_; 1294 1295 # convert parameters only to / slashes if Windows 1296 1297 my $filename_urlformat = &filepath_to_url_format($filename); 1298 my $within_dir_urlformat = &filepath_to_url_format($within_dir); 1299 1300 #if ($within_dir_urlformat !~ m/\/$/) { 1301 # make sure directory ends with a slash 1302 #$within_dir_urlformat .= "/"; 1303 #} 1304 1305 my $within_dir_urlformat_re = &filename_to_regex($within_dir_urlformat); # escape any special RE characters, such as brackets 1306 1307 #print STDERR "@@@@@ $filename_urlformat =~ $within_dir_urlformat_re\n"; 1308 1309 # dir prefix may or may not end with a slash (this is discarded when extracting the sub-filepath) 1310 if ($filename_urlformat =~ m/^$within_dir_urlformat_re(?:\/)*(.*)$/) { 1311 $filename_urlformat = $1; 1312 } 1313 1314 return $filename_urlformat; 1315 } 1316 1317 # Convert parameter to use / slashes if Windows (if on Linux leave any \ as is, 1318 # since on Linux it doesn't represent a file separator but an escape char). 1319 sub filepath_to_url_format 1320 { 1321 my ($filepath) = @_; 1322 if ($ENV{'GSDLOS'} =~ /^windows$/i) { 1323 # Only need to worry about Windows, as Unix style directories already in url-format 1324 # Convert Windows style \ => / 1325 $filepath =~ s@\\@/@g; 1326 } 1327 return $filepath; 1328 } 1329 1330 # regex filepaths on windows may include \\ as path separator. Convert \\ to / 1331 sub filepath_regex_to_url_format 1332 { 1333 my ($filepath) = @_; 1334 if ($ENV{'GSDLOS'} =~ /^windows$/i) { 1335 # Only need to worry about Windows, as Unix style directories already in url-format 1336 # Convert Windows style \\ => / 1337 $filepath =~ s@\\\\@/@g; 1338 } 1339 return $filepath; 1340 1341 } 1342 1343 # Like File::Basename::fileparse, but expects filepath in url format (ie only / slash for dirsep) 1344 # and ignores trailing / 1345 # returns (file, dirs) dirs will be empty if no subdirs 1346 sub url_fileparse 1347 { 1348 my ($filepath) = @_; 1349 # remove trailing / 1350 $filepath =~ s@/$@@; 1351 if ($filepath !~ m@/@) { 1352 return ($filepath, ""); 1353 } 1354 my ($dirs, $file) = $filepath =~ m@(.+/)([^/]+)@; 1355 return ($file, $dirs); 1356 1357 } 1358 1067 1359 1068 1360 sub filename_within_collection … … 1185 1477 { 1186 1478 my ($filename) = @_; 1479 1480 # support for explicit protocol prefixes, for example file: or hdfs:, 1481 # which must be absolute [jmt12] 1482 if ($filename =~ /^\w+:\/\//) 1483 { 1484 return 1; 1485 } 1187 1486 1188 1487 if ($ENV{'GSDLOS'} =~ /^windows$/i) { … … 1502 1801 } 1503 1802 1803 sub os_dir() { 1804 1805 my $gsdlarch = ""; 1806 if(defined $ENV{'GSDLARCH'}) { 1807 $gsdlarch = $ENV{'GSDLARCH'}; 1808 } 1809 return $ENV{'GSDLOS'}.$gsdlarch; 1810 } 1504 1811 1505 1812 # Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file. … … 1519 1826 } else { 1520 1827 $defaultUrlprefix = "/greenstone"; 1521 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", "gsdlsite.cfg");1828 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", &os_dir(), "gsdlsite.cfg"); 1522 1829 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other 1523 1830 } … … 1643 1950 } 1644 1951 1952 # returns the path to the java command in the JRE included with GS (if any), 1953 # quoted to safeguard any spaces in this path, otherwise a simple java 1954 # command is returned which assumes and will try for a system java. 1955 sub get_java_command { 1956 my $java = "java"; 1957 if(defined $ENV{'GSDLHOME'}) { # should be, as this script would be launched from the cmd line 1958 # after running setup.bat or from GLI which also runs setup.bat 1959 my $java_bin = &util::filename_cat($ENV{'GSDLHOME'},"packages","jre","bin"); 1960 if(-d $java_bin) { 1961 $java = &util::filename_cat($java_bin,"java"); 1962 $java = "\"".$java."\""; # quoted to preserve spaces in path 1963 } 1964 } 1965 return $java; 1966 } 1967 1968 1969 # Given the qualified collection name (colgroup/collection), 1970 # returns the collection and colgroup parts 1971 sub get_collection_parts { 1972 # http://perldoc.perl.org/File/Basename.html 1973 # my($filename, $directories, $suffix) = fileparse($path); 1974 # "$directories contains everything up to and including the last directory separator in the $path 1975 # including the volume (if applicable). The remainder of the $path is the $filename." 1976 #my ($collection, $colgroup) = &File::Basename::fileparse($qualified_collection); 1977 1978 my $qualified_collection = shift(@_); 1979 1980 # Since activate.pl can be launched from the command-line, including by a user, 1981 # best not to assume colgroup uses URL-style slashes as would be the case with GLI 1982 # Also allow for the accidental inclusion of multiple slashes 1983 my ($colgroup, $collection) = split(/[\/\\]+/, $qualified_collection); #split('/', $qualified_collection); 1984 1985 if(!defined $collection) { 1986 $collection = $colgroup; 1987 $colgroup = ""; 1988 } 1989 return ($collection, $colgroup); 1990 } 1991 1992 # work out the "collectdir/collection" location 1993 sub resolve_collection_dir { 1994 my ($collect_dir, $qualified_collection, $site) = @_; #, $gs_mode 1995 1996 my ($colgroup, $collection) = &util::get_collection_parts($qualified_collection); 1997 1998 if (defined $collect_dir) { 1999 return &util::filename_cat($collect_dir,$colgroup, $collection); 2000 } 2001 else { 2002 if (defined $site) { 2003 return &util::filename_cat($ENV{'GSDL3HOME'},"sites",$site,"collect",$colgroup, $collection); 2004 } 2005 else { 2006 return &util::filename_cat($ENV{'GSDLHOME'},"collect",$colgroup, $collection); 2007 } 2008 } 2009 } 2010 2011 # ====== John's new fuctions to support HDFS ====== 2012 2013 # /** 2014 # * Executes a HDFS command without caring about the resulting output while 2015 # * still reacting appropriately to failed executions. 2016 # */ 2017 sub executeHDFSCommand 2018 { 2019 my $action = shift(@_); 2020 my $command = 'hadoop fs -' . $action . ' "' . join('" "', @_) . '"'; 2021 my $result = `$command 2>&1`; 2022 my $return_value = $?; 2023 ###rint STDERR "-> util::executeHDFSCommand('" . $command . "') => " . $return_value . "\n"; 2024 return $return_value; 2025 } 2026 2027 # /** @function file_canread() 2028 # */ 2029 sub file_canread 2030 { 2031 my ($filename_full_path) = @_; 2032 # the HDFS support doesn't have '-r' so it will revert to '-e' 2033 return fd_exists($filename_full_path,"-r"); 2034 } 2035 # /** file_canread() **/ 2036 2037 sub file_openfdcommand 2038 { 2039 my ($filename_full_path, $mode) = @_; 2040 ##rint STDERR "-> util::file_openfdcommand('" . $filename_full_path . "', '" . $mode . "')\n"; 2041 # I'll set to read by default, as that is less destructive to precious files 2042 # on your system... 2043 if (!defined $mode) 2044 { 2045 $mode = '<'; 2046 } 2047 my $open_fd_command = $mode . $filename_full_path; 2048 if (&util::isHDFS($filename_full_path)) 2049 { 2050 # currently don't really support append, but might be able to do something 2051 # like: 2052 # hadoop fs -cat /user/username/folder/csv1.csv \ 2053 # /user/username/folder/csv2.csv | hadoop fs -put - \ 2054 # /user/username/folder/output.csv 2055 if ($mode eq '>>' || $mode eq '>') 2056 { 2057 # if the file already exists, put won't clobber it like a proper write 2058 # would, so try to delete it (can fail) 2059 if (&util::file_exists($filename_full_path)) 2060 { 2061 &util::rm($filename_full_path); 2062 } 2063 # then create the command 2064 $open_fd_command = '| ' . &util::generateHDFSCommand('put', '-', $filename_full_path); 2065 } 2066 else 2067 { 2068 $open_fd_command = &util::generateHDFSCommand('cat', $filename_full_path) . ' |'; 2069 } 2070 } 2071 return $open_fd_command; 2072 } 2073 2074 # /** @function file_readdir() 2075 # * Provide a function to return the files within a directory that is aware 2076 # * of protocols other than file:// 2077 # * @param $dirname the full path to the directory 2078 # * @param $dir_ref a reference to an array to populate with files 2079 # */ 2080 sub file_readdir 2081 { 2082 my ($dirname, $dir_ref) = @_; 2083 my $dir_read = 0; 2084 if (&util::isHDFS($dirname)) 2085 { 2086 my $hdfs_command = &util::generateHDFSCommand('ls', $dirname); 2087 my $result = `$hdfs_command 2>&1`; 2088 my @lines = split(/\r?\n/, $result); 2089 foreach my $line (@lines) 2090 { 2091 if ($line =~ /\/([^\/]+)$/) 2092 { 2093 my $file = $1; 2094 push(@{$dir_ref}, $file); 2095 } 2096 } 2097 $dir_read = 1; 2098 } 2099 elsif (opendir(DIR, $dirname)) 2100 { 2101 my @dirs = readdir(DIR); 2102 push(@{$dir_ref}, @dirs); 2103 closedir(DIR); 2104 $dir_read = 1; 2105 } 2106 return $dir_read; 2107 } 2108 # /** file_readdir() **/ 2109 2110 # /** 2111 # */ 2112 sub file_lastmodified 2113 { 2114 my ($filename_full_path) = @_; 2115 my $last_modified = 0; 2116 if (&util::isHDFS($filename_full_path)) 2117 { 2118 my $file_stats = file_stats($filename_full_path); 2119 my $mod_date = $file_stats->{'modification_date'}; 2120 my $mod_time = $file_stats->{'modification_time'}; 2121 # Last modified should be in number of days (as a float) since last 2122 # modified - but I'll just return 0 for now 2123 $last_modified = 0.00; 2124 } 2125 else 2126 { 2127 $last_modified = -M $filename_full_path; 2128 } 2129 return $last_modified; 2130 } 2131 # /** file_lastmodified() **/ 2132 2133 # /** @function file_size 2134 # * Replacement for "-s" in Greenstone buildtime, as we need a version that 2135 # * HDFS aware 2136 # */ 2137 sub file_size 2138 { 2139 my ($filename_full_path) = @_; 2140 my $size = 0; 2141 if (&util::isHDFS($filename_full_path)) 2142 { 2143 my $file_stats = file_stats($filename_full_path); 2144 $size = $file_stats->{'filesize'}; 2145 } 2146 else 2147 { 2148 $size = -s $filename_full_path; 2149 } 2150 return $size; 2151 } 2152 # /** file_size() **/ 2153 2154 sub file_stats 2155 { 2156 my ($filename_full_path) = @_; 2157 my $stats = {}; 2158 if (&util::isHDFS($filename_full_path)) 2159 { 2160 # - LS is the only way to get these details from HDFS (-stat doesn't 2161 # provide enough information) 2162 my $hdfs_command = &util::generateHDFSCommand('ls', $filename_full_path); 2163 my $result = `$hdfs_command 2>&1`; 2164 # - parse the results 2165 if ($result =~ /([ds\-][rwx\-]+)\s+(\d+)\s+([^\s]+)\s+([^\s]+)\s+(\d+)\s+(\d\d\d\d-\d\d-\d\d)\s+(\d\d:\d\d)\s+([^\s]+)$/) 2166 { 2167 $stats->{'filename'} = $8; 2168 $stats->{'replicas'} = $2; 2169 $stats->{'filesize'} = $5; 2170 $stats->{'modification_date'} = $6; 2171 $stats->{'modification_time'} = $7; 2172 $stats->{'permissions'} = $1; 2173 $stats->{'userid'} = $3; 2174 $stats->{'groupid'} = $4; 2175 } 2176 else 2177 { 2178 die("Error! Failed to parse HDFS ls result: " . $result . "\n"); 2179 } 2180 } 2181 return $stats; 2182 } 2183 # /** file_stats() **/ 2184 2185 # /** 2186 # */ 2187 sub generateHDFSCommand 2188 { 2189 my $flags = shift(@_); 2190 return 'hadoop fs -' . $flags . ' "' . join('" "', @_) . '"'; 2191 } 2192 # /** generateHDFSCommand() **/ 2193 2194 # /** @function isHDFS() 2195 # * Determine if the given path exists within a HDFS system by checking for 2196 # * the expected protocol prefix. 2197 # * @param $full_path the path to check 2198 # * @return 1 if within HDFS, 0 otherwise 2199 # */ 2200 sub isHDFS 2201 { 2202 my ($full_path) = @_; 2203 return (lc(substr($full_path, 0, 7)) eq 'hdfs://'); 2204 } 2205 # /** isHDFS() **/ 1645 2206 1646 2207 1;
Note:
See TracChangeset
for help on using the changeset viewer.