source: main/trunk/greenstone2/perllib/util.pm@ 22119

Last change on this file since 22119 was 22119, checked in by max, 14 years ago

Don't check for NTFS anymore because link should take care of saying if hardlinking is supported or not.

  • Property svn:keywords set to Author Date Id Revision
File size: 33.8 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use File::Copy;
29use File::Basename;
30
31use strict;
32
33
34# removes files (but not directories)
35sub rm {
36 my (@files) = @_;
37
38 my @filefiles = ();
39
40 # make sure the files we want to delete exist
41 # and are regular files
42 foreach my $file (@files) {
43 if (!-e $file) {
44 print STDERR "util::rm $file does not exist\n";
45 } elsif ((!-f $file) && (!-l $file)) {
46 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
47 } else {
48 push (@filefiles, $file);
49 }
50 }
51
52 # remove the files
53 my $numremoved = unlink @filefiles;
54
55 # check to make sure all of them were removed
56 if ($numremoved != scalar(@filefiles)) {
57 print STDERR "util::rm Not all files were removed\n";
58 }
59}
60
61
62
63# recursive removal
64sub filtered_rm_r {
65 my ($files,$file_accept_re,$file_reject_re) = @_;
66
67 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
68
69 # recursively remove the files
70 foreach my $file (@files_array) {
71 $file =~ s/[\/\\]+$//; # remove trailing slashes
72
73 if (!-e $file) {
74 print STDERR "util::filtered_rm_r $file does not exist\n";
75
76 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
77 # get the contents of this directory
78 if (!opendir (INDIR, $file)) {
79 print STDERR "util::filtered_rm_r could not open directory $file\n";
80 } else {
81 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
82 closedir (INDIR);
83
84 # remove all the files in this directory
85 map {$_="$file/$_";} @filedir;
86 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
87
88 if (!defined $file_accept_re && !defined $file_reject_re) {
89 # remove this directory
90 if (!rmdir $file) {
91 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
92 }
93 }
94 }
95 } else {
96 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
97
98 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
99 # remove this file
100 &rm ($file);
101 }
102 }
103 }
104}
105
106
107# recursive removal
108sub rm_r {
109 my (@files) = @_;
110
111 # use the more general (but reterospectively written function
112 # filtered_rm_r function()
113
114 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
115}
116
117
118
119
120# moves a file or a group of files
121sub mv {
122 my $dest = pop (@_);
123 my (@srcfiles) = @_;
124
125 # remove trailing slashes from source and destination files
126 $dest =~ s/[\\\/]+$//;
127 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
128
129 # a few sanity checks
130 if (scalar (@srcfiles) == 0) {
131 print STDERR "util::mv no destination directory given\n";
132 return;
133 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
134 print STDERR "util::mv if multiple source files are given the ".
135 "destination must be a directory\n";
136 return;
137 }
138
139 # move the files
140 foreach my $file (@srcfiles) {
141 my $tempdest = $dest;
142 if (-d $tempdest) {
143 my ($filename) = $file =~ /([^\\\/]+)$/;
144 $tempdest .= "/$filename";
145 }
146 if (!-e $file) {
147 print STDERR "util::mv $file does not exist\n";
148 } else {
149 rename ($file, $tempdest);
150 }
151 }
152}
153
154
155# copies a file or a group of files
156sub cp {
157 my $dest = pop (@_);
158 my (@srcfiles) = @_;
159
160 # remove trailing slashes from source and destination files
161 $dest =~ s/[\\\/]+$//;
162 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
163
164 # a few sanity checks
165 if (scalar (@srcfiles) == 0) {
166 print STDERR "util::cp no destination directory given\n";
167 return;
168 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
169 print STDERR "util::cp if multiple source files are given the ".
170 "destination must be a directory\n";
171 return;
172 }
173
174 # copy the files
175 foreach my $file (@srcfiles) {
176 my $tempdest = $dest;
177 if (-d $tempdest) {
178 my ($filename) = $file =~ /([^\\\/]+)$/;
179 $tempdest .= "/$filename";
180 }
181 if (!-e $file) {
182 print STDERR "util::cp $file does not exist\n";
183 } elsif (!-f $file) {
184 print STDERR "util::cp $file is not a plain file\n";
185 } else {
186 &File::Copy::copy ($file, $tempdest);
187 }
188 }
189}
190
191
192
193# recursively copies a file or group of files
194# syntax: cp_r (sourcefiles, destination directory)
195# destination must be a directory - to copy one file to
196# another use cp instead
197sub cp_r {
198 my $dest = pop (@_);
199 my (@srcfiles) = @_;
200
201 # a few sanity checks
202 if (scalar (@srcfiles) == 0) {
203 print STDERR "util::cp_r no destination directory given\n";
204 return;
205 } elsif (-f $dest) {
206 print STDERR "util::cp_r destination must be a directory\n";
207 return;
208 }
209
210 # create destination directory if it doesn't exist already
211 if (! -d $dest) {
212 my $store_umask = umask(0002);
213 mkdir ($dest, 0777);
214 umask($store_umask);
215 }
216
217 # copy the files
218 foreach my $file (@srcfiles) {
219
220 if (!-e $file) {
221 print STDERR "util::cp_r $file does not exist\n";
222
223 } elsif (-d $file) {
224 # make the new directory
225 my ($filename) = $file =~ /([^\\\/]*)$/;
226 $dest = &util::filename_cat ($dest, $filename);
227 my $store_umask = umask(0002);
228 mkdir ($dest, 0777);
229 umask($store_umask);
230
231 # get the contents of this directory
232 if (!opendir (INDIR, $file)) {
233 print STDERR "util::cp_r could not open directory $file\n";
234 } else {
235 my @filedir = readdir (INDIR);
236 closedir (INDIR);
237 foreach my $f (@filedir) {
238 next if $f =~ /^\.\.?$/;
239 # copy all the files in this directory
240 my $ff = &util::filename_cat ($file, $f);
241 &cp_r ($ff, $dest);
242 }
243 }
244
245 } else {
246 &cp($file, $dest);
247 }
248 }
249}
250# recursively copies a file or group of files
251# syntax: cp_r (sourcefiles, destination directory)
252# destination must be a directory - to copy one file to
253# another use cp instead
254sub cp_r_nosvn {
255 my $dest = pop (@_);
256 my (@srcfiles) = @_;
257
258 # a few sanity checks
259 if (scalar (@srcfiles) == 0) {
260 print STDERR "util::cp_r no destination directory given\n";
261 return;
262 } elsif (-f $dest) {
263 print STDERR "util::cp_r destination must be a directory\n";
264 return;
265 }
266
267 # create destination directory if it doesn't exist already
268 if (! -d $dest) {
269 my $store_umask = umask(0002);
270 mkdir ($dest, 0777);
271 umask($store_umask);
272 }
273
274 # copy the files
275 foreach my $file (@srcfiles) {
276
277 if (!-e $file) {
278 print STDERR "util::cp_r $file does not exist\n";
279
280 } elsif (-d $file) {
281 # make the new directory
282 my ($filename) = $file =~ /([^\\\/]*)$/;
283 $dest = &util::filename_cat ($dest, $filename);
284 my $store_umask = umask(0002);
285 mkdir ($dest, 0777);
286 umask($store_umask);
287
288 # get the contents of this directory
289 if (!opendir (INDIR, $file)) {
290 print STDERR "util::cp_r could not open directory $file\n";
291 } else {
292 my @filedir = readdir (INDIR);
293 closedir (INDIR);
294 foreach my $f (@filedir) {
295 next if $f =~ /^\.\.?$/;
296 next if $f =~ /^\.svn$/;
297 # copy all the files in this directory
298 my $ff = &util::filename_cat ($file, $f);
299 &cp_r ($ff, $dest);
300 }
301 }
302
303 } else {
304 &cp($file, $dest);
305 }
306 }
307}
308
309# copies a directory and its contents, excluding subdirectories, into a new directory
310sub cp_r_toplevel {
311 my $dest = pop (@_);
312 my (@srcfiles) = @_;
313
314 # a few sanity checks
315 if (scalar (@srcfiles) == 0) {
316 print STDERR "util::cp_r no destination directory given\n";
317 return;
318 } elsif (-f $dest) {
319 print STDERR "util::cp_r destination must be a directory\n";
320 return;
321 }
322
323 # create destination directory if it doesn't exist already
324 if (! -d $dest) {
325 my $store_umask = umask(0002);
326 mkdir ($dest, 0777);
327 umask($store_umask);
328 }
329
330 # copy the files
331 foreach my $file (@srcfiles) {
332
333 if (!-e $file) {
334 print STDERR "util::cp_r $file does not exist\n";
335
336 } elsif (-d $file) {
337 # make the new directory
338 my ($filename) = $file =~ /([^\\\/]*)$/;
339 $dest = &util::filename_cat ($dest, $filename);
340 my $store_umask = umask(0002);
341 mkdir ($dest, 0777);
342 umask($store_umask);
343
344 # get the contents of this directory
345 if (!opendir (INDIR, $file)) {
346 print STDERR "util::cp_r could not open directory $file\n";
347 } else {
348 my @filedir = readdir (INDIR);
349 closedir (INDIR);
350 foreach my $f (@filedir) {
351 next if $f =~ /^\.\.?$/;
352
353 # copy all the files in this directory, but not directories
354 my $ff = &util::filename_cat ($file, $f);
355 if (-f $ff) {
356 &cp($ff, $dest);
357 #&cp_r ($ff, $dest);
358 }
359 }
360 }
361
362 } else {
363 &cp($file, $dest);
364 }
365 }
366}
367
368sub mk_dir {
369 my ($dir) = @_;
370
371 my $store_umask = umask(0002);
372 my $mkdir_ok = mkdir ($dir, 0777);
373 umask($store_umask);
374
375 if (!$mkdir_ok)
376 {
377 print STDERR "util::mk_dir could not create directory $dir\n";
378 return;
379 }
380}
381
382# in case anyone cares - I did some testing (using perls Benchmark module)
383# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
384# slightly faster (surprisingly) - Stefan.
385sub mk_all_dir {
386 my ($dir) = @_;
387
388 # use / for the directory separator, remove duplicate and
389 # trailing slashes
390 $dir=~s/[\\\/]+/\//g;
391 $dir=~s/[\\\/]+$//;
392
393 # make sure the cache directory exists
394 my $dirsofar = "";
395 my $first = 1;
396 foreach my $dirname (split ("/", $dir)) {
397 $dirsofar .= "/" unless $first;
398 $first = 0;
399
400 $dirsofar .= $dirname;
401
402 next if $dirname =~ /^(|[a-z]:)$/i;
403 if (!-e $dirsofar)
404 {
405 my $store_umask = umask(0002);
406 my $mkdir_ok = mkdir ($dirsofar, 0777);
407 umask($store_umask);
408 if (!$mkdir_ok)
409 {
410 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
411 return;
412 }
413 }
414 }
415}
416
417# make hard link to file if supported by OS, otherwise copy the file
418sub hard_link {
419 my ($src, $dest, $verbosity) = @_;
420
421 # remove trailing slashes from source and destination files
422 $src =~ s/[\\\/]+$//;
423 $dest =~ s/[\\\/]+$//;
424
425 # a few sanity checks
426 if (-e $dest) {
427 # destination file already exists
428 return;
429 }
430 elsif (!-e $src) {
431 print STDERR "util::hard_link source file $src does not exist\n";
432 return 1;
433 }
434 elsif (-d $src) {
435 print STDERR "util::hard_link source $src is a directory\n";
436 return 1;
437 }
438
439 my $dest_dir = &File::Basename::dirname($dest);
440 mk_all_dir($dest_dir) if (!-e $dest_dir);
441
442
443 if (!link($src, $dest)) {
444 if ((!defined $verbosity) || ($verbosity>2)) {
445 print STDERR "util::hard_link: unable to create hard link. ";
446 print STDERR " Copying file: $src -> $dest\n";
447 }
448 &File::Copy::copy ($src, $dest);
449 }
450 return 0;
451}
452
453# make soft link to file if supported by OS, otherwise copy file
454sub soft_link {
455 my ($src, $dest, $ensure_paths_absolute) = @_;
456
457 # remove trailing slashes from source and destination files
458 $src =~ s/[\\\/]+$//;
459 $dest =~ s/[\\\/]+$//;
460
461 # Ensure file paths are absolute IF requested to do so
462 # Soft_linking didn't work for relative paths
463 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
464 # We need to ensure that the src file is the absolute path
465 # See http://perldoc.perl.org/File/Spec.html
466 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
467 $src = File::Spec->rel2abs($src); # make absolute
468 }
469 # Might as well ensure that the destination file's absolute path is used
470 if(!File::Spec->file_name_is_absolute( $dest )) {
471 $dest = File::Spec->rel2abs($dest); # make absolute
472 }
473 }
474
475 # a few sanity checks
476 if (!-e $src) {
477 print STDERR "util::soft_link source file $src does not exist\n";
478 return 0;
479 }
480
481 my $dest_dir = &File::Basename::dirname($dest);
482 mk_all_dir($dest_dir) if (!-e $dest_dir);
483
484 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
485 # symlink not supported on windows
486 &File::Copy::copy ($src, $dest);
487
488 } elsif (!eval {symlink($src, $dest)}) {
489 print STDERR "util::soft_link: unable to create soft link.\n";
490 return 0;
491 }
492
493 return 1;
494}
495
496
497
498
499# updates a copy of a directory in some other part of the filesystem
500# verbosity settings are: 0=low, 1=normal, 2=high
501# both $fromdir and $todir should be absolute paths
502sub cachedir {
503 my ($fromdir, $todir, $verbosity) = @_;
504 $verbosity = 1 unless defined $verbosity;
505
506 # use / for the directory separator, remove duplicate and
507 # trailing slashes
508 $fromdir=~s/[\\\/]+/\//g;
509 $fromdir=~s/[\\\/]+$//;
510 $todir=~s/[\\\/]+/\//g;
511 $todir=~s/[\\\/]+$//;
512
513 &mk_all_dir ($todir);
514
515 # get the directories in ascending order
516 if (!opendir (FROMDIR, $fromdir)) {
517 print STDERR "util::cachedir could not read directory $fromdir\n";
518 return;
519 }
520 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
521 closedir (FROMDIR);
522
523 if (!opendir (TODIR, $todir)) {
524 print STDERR "util::cacedir could not read directory $todir\n";
525 return;
526 }
527 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
528 closedir (TODIR);
529
530 my $fromi = 0;
531 my $toi = 0;
532
533 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
534# print "fromi: $fromi toi: $toi\n";
535
536 # see if we should delete a file/directory
537 # this should happen if the file/directory
538 # is not in the from list or if its a different
539 # size, or has an older timestamp
540 if ($toi < scalar(@todir)) {
541 if (($fromi >= scalar(@fromdir)) ||
542 ($todir[$toi] lt $fromdir[$fromi] ||
543 ($todir[$toi] eq $fromdir[$fromi] &&
544 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
545 $verbosity)))) {
546
547 # the files are different
548 &rm_r("$todir/$todir[$toi]");
549 splice(@todir, $toi, 1); # $toi stays the same
550
551 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
552 # the files are the same
553 # if it is a directory, check its contents
554 if (-d "$todir/$todir[$toi]") {
555 &cachedir ("$fromdir/$fromdir[$fromi]",
556 "$todir/$todir[$toi]", $verbosity);
557 }
558
559 $toi++;
560 $fromi++;
561 next;
562 }
563 }
564
565 # see if we should insert a file/directory
566 # we should insert a file/directory if there
567 # is no tofiles left or if the tofile does not exist
568 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
569 $todir[$toi] gt $fromdir[$fromi])) {
570 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
571 splice (@todir, $toi, 0, $fromdir[$fromi]);
572
573 $toi++;
574 $fromi++;
575 }
576 }
577}
578
579# this function returns -1 if either file is not found
580# assumes that $file1 and $file2 are absolute file names or
581# in the current directory
582# $file2 is allowed to be newer than $file1
583sub differentfiles {
584 my ($file1, $file2, $verbosity) = @_;
585 $verbosity = 1 unless defined $verbosity;
586
587 $file1 =~ s/\/+$//;
588 $file2 =~ s/\/+$//;
589
590 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
591 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
592
593 return -1 unless (-e $file1 && -e $file2);
594 if ($file1name ne $file2name) {
595 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
596 return 1;
597 }
598
599 my @file1stat = stat ($file1);
600 my @file2stat = stat ($file2);
601
602 if (-d $file1) {
603 if (! -d $file2) {
604 print STDERR "one file is a directory\n" if ($verbosity >= 2);
605 return 1;
606 }
607 return 0;
608 }
609
610 # both must be regular files
611 unless (-f $file1 && -f $file2) {
612 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
613 return 1;
614 }
615
616 # the size of the files must be the same
617 if ($file1stat[7] != $file2stat[7]) {
618 print STDERR "different sized files\n" if ($verbosity >= 2);
619 return 1;
620 }
621
622 # the second file cannot be older than the first
623 if ($file1stat[9] > $file2stat[9]) {
624 print STDERR "file is older\n" if ($verbosity >= 2);
625 return 1;
626 }
627
628 return 0;
629}
630
631
632sub get_tmp_filename
633{
634 my $file_ext = shift(@_) || undef;
635
636 my $opt_dot_file_ext = (defined $file_ext) ? ".$file_ext" : "";
637
638 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
639 &mk_all_dir ($tmpdir) unless -e $tmpdir;
640
641 my $count = 1000;
642 my $rand = int(rand $count);
643 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
644
645 while (-e $full_tmp_filename) {
646 $rand = int(rand $count);
647 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
648 $count++;
649 }
650
651 return $full_tmp_filename;
652}
653
654sub get_toplevel_tmp_dir
655{
656 return filename_cat($ENV{'GSDLHOME'}, "tmp");
657}
658
659
660sub filename_to_regex {
661 my $filename = shift (@_);
662
663 # need to put single backslash back to double so that regex works
664 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
665 $filename =~ s/\\/\\\\/g;
666 }
667 return $filename;
668}
669
670sub filename_cat {
671 my $first_file = shift(@_);
672 my (@filenames) = @_;
673
674# Useful for debugging
675# -- might make sense to call caller(0) rather than (1)??
676# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
677# print STDERR "Calling method; $cfilename:$cline $cpackage->$csubr\n";
678
679 # If first_file is not null or empty, then add it back into the list
680 if (defined $first_file && $first_file =~ /\S/) {
681 unshift(@filenames, $first_file);
682 }
683
684 my $filename = join("/", @filenames);
685
686 # remove duplicate slashes and remove the last slash
687 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
688 $filename =~ s/[\\\/]+/\\/g;
689 } else {
690 $filename =~ s/[\/]+/\//g;
691 # DB: want a filename abc\de.html to remain like this
692 }
693 $filename =~ s/[\\\/]$//;
694
695 return $filename;
696}
697
698
699sub pathname_cat {
700 my $first_path = shift(@_);
701 my (@pathnames) = @_;
702
703 # If first_path is not null or empty, then add it back into the list
704 if (defined $first_path && $first_path =~ /\S/) {
705 unshift(@pathnames, $first_path);
706 }
707
708 my $join_char;
709 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
710 $join_char = ";";
711 } else {
712 $join_char = ":";
713 }
714
715 my $pathname = join($join_char, @pathnames);
716
717 # remove duplicate slashes
718 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
719 $pathname =~ s/[\\\/]+/\\/g;
720 } else {
721 $pathname =~ s/[\/]+/\//g;
722 # DB: want a pathname abc\de.html to remain like this
723 }
724
725 return $pathname;
726}
727
728
729sub tidy_up_oid {
730 my ($OID) = @_;
731 if ($OID =~ /\./) {
732 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
733 $OID =~ s/\.//g; #remove any periods
734 }
735 if ($OID =~ /^\s.*\s$/) {
736 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
737 # remove starting and trailing whitespace
738 $OID =~ s/^\s+//;
739 $OID =~ s/\s+$//;
740 }
741 if ($OID =~ /^[\d]*$/) {
742 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
743 $OID = "D" . $OID;
744 }
745
746 return $OID;
747}
748sub envvar_prepend {
749 my ($var,$val) = @_;
750
751 # do not prepend any value/path that's already in the environment variable
752 if ($ENV{'GSDLOS'} =~ /^windows$/i)
753 {
754 my $escaped_val = $val;
755 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
756 if($ENV{$var} !~ m/$escaped_val/) {
757 $ENV{$var} = "$val;".$ENV{$var};
758 }
759 }
760 else {
761 if($ENV{$var} !~ m/$val/) {
762 $ENV{$var} = "$val:".$ENV{$var};
763 }
764 }
765}
766
767sub envvar_append {
768 my ($var,$val) = @_;
769
770 # do not append any value/path that's already in the environment variable
771 if ($ENV{'GSDLOS'} =~ /^windows$/i)
772 {
773 my $escaped_val = $val;
774 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
775 if($ENV{$var} !~ m/$escaped_val/) {
776 $ENV{$var} .= ";$val";
777 }
778 }
779 else {
780 if($ENV{$var} !~ m/$val/) {
781 $ENV{$var} .= ":$val";
782 }
783 }
784}
785
786
787# splits a filename into a prefix and a tail extension using the tail_re, or
788# if that fails, splits on the file_extension . (dot)
789sub get_prefix_and_tail_by_regex {
790
791 my ($filename,$tail_re) = @_;
792
793 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
794 if ((!defined $file_prefix) || (!defined $file_ext)) {
795 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
796 }
797
798 return ($file_prefix,$file_ext);
799}
800
801# get full path and file only path from a base_dir (which may be empty) and
802# file (which may contain directories)
803sub get_full_filenames {
804 my ($base_dir, $file) = @_;
805
806 my $filename_full_path = $file;
807 # add on directory if present
808 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
809
810 my $filename_no_path = $file;
811
812 # remove directory if present
813 $filename_no_path =~ s/^.*[\/\\]//;
814 return ($filename_full_path, $filename_no_path);
815}
816
817# returns the path of a file without the filename -- ie. the directory the file is in
818sub filename_head {
819 my $filename = shift(@_);
820
821 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
822 $filename =~ s/[^\\\\]*$//;
823 }
824 else {
825 $filename =~ s/[^\\\/]*$//;
826 }
827
828 return $filename;
829}
830
831
832# returns 1 if filename1 and filename2 point to the same
833# file or directory
834sub filenames_equal {
835 my ($filename1, $filename2) = @_;
836
837 # use filename_cat to clean up trailing slashes and
838 # multiple slashes
839 $filename1 = filename_cat ($filename1);
840 $filename2 = filename_cat ($filename2);
841
842 # filenames not case sensitive on windows
843 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
844 $filename1 =~ tr/[A-Z]/[a-z]/;
845 $filename2 =~ tr/[A-Z]/[a-z]/;
846 }
847 return 1 if $filename1 eq $filename2;
848 return 0;
849}
850
851sub filename_within_collection
852{
853 my ($filename) = @_;
854
855 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
856
857 if (defined $collect_dir) {
858 my $dirsep = &util::get_dirsep();
859 if ($collect_dir !~ m/$dirsep$/) {
860 $collect_dir .= $dirsep;
861 }
862
863 $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
864
865 # if from within GSDLCOLLECTDIR, then remove directory prefix
866 # so source_filename is realative to it. This is done to aid
867 # portability, i.e. the collection can be moved to somewhere
868 # else on the file system and the archives directory will still
869 # work. This is needed, for example in the applet version of
870 # GLI where GSDLHOME/collect on the server will be different to
871 # the collect directory of the remove user. Of course,
872 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
873 # it back into a full pathname.
874
875 if ($filename =~ /^$collect_dir(.*)$/) {
876 $filename = $1;
877 }
878 }
879
880 return $filename;
881}
882
883sub filename_is_absolute
884{
885 my ($filename) = @_;
886
887 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
888 return ($filename =~ m/^(\w:)?\\/);
889 }
890 else {
891 return ($filename =~ m/^\//);
892 }
893}
894
895
896## @method make_absolute()
897#
898# Ensure the given file path is absolute in respect to the given base path.
899#
900# @param $base_dir A string denoting the base path the given dir must be
901# absolute to.
902# @param $dir The directory to be made absolute as a string. Note that the
903# dir may already be absolute, in which case it will remain
904# unchanged.
905# @return The now absolute form of the directory as a string.
906#
907# @author John Thompson, DL Consulting Ltd.
908# @copy 2006 DL Consulting Ltd.
909#
910#used in buildcol.pl, doesn't work for all cases --kjdon
911sub make_absolute {
912
913 my ($base_dir, $dir) = @_;
914### print STDERR "dir = $dir\n";
915 $dir =~ s/[\\\/]+/\//g;
916 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
917 $dir =~ s|^/tmp_mnt||;
918 1 while($dir =~ s|/[^/]*/\.\./|/|g);
919 $dir =~ s|/[.][.]?/|/|g;
920 $dir =~ tr|/|/|s;
921### print STDERR "dir = $dir\n";
922
923 return $dir;
924}
925## make_absolute() ##
926
927sub get_dirsep {
928
929 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
930 return "\\";
931 } else {
932 return "\/";
933 }
934}
935
936sub get_os_dirsep {
937
938 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
939 return "\\\\";
940 } else {
941 return "\\\/";
942 }
943}
944
945sub get_re_dirsep {
946
947 return "\\\\|\\\/";
948}
949
950
951sub get_dirsep_tail {
952 my ($filename) = @_;
953
954 # returns last part of directory or filename
955 # On unix e.g. a/b.d => b.d
956 # a/b/c => c
957
958 my $dirsep = get_re_dirsep();
959 my @dirs = split (/$dirsep/, $filename);
960 my $tail = pop @dirs;
961
962 # - caused problems under windows
963 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
964
965 return $tail;
966}
967
968
969# if this is running on windows we want binaries to end in
970# .exe, otherwise they don't have to end in any extension
971sub get_os_exe {
972 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
973 return "";
974}
975
976
977# test to see whether this is a big or little endian machine
978sub is_little_endian
979{
980 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
981 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
982 # Otherwise, it's little endian
983
984 #return 0 if $^O =~ /^darwin$/i;
985 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
986
987 # Going back to stating exactly whether the machine is little endian
988 # or big endian, without any special case for Macs. Since for rata it comes
989 # back with little endian and for shuttle with bigendian.
990 return (ord(substr(pack("s",1), 0, 1)) == 1);
991}
992
993
994# will return the collection name if successful, "" otherwise
995sub use_collection {
996 my ($collection, $collectdir) = @_;
997
998 if (!defined $collectdir || $collectdir eq "") {
999 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1000 }
1001
1002 # get and check the collection
1003 if (!defined($collection) || $collection eq "") {
1004 if (defined $ENV{'GSDLCOLLECTION'}) {
1005 $collection = $ENV{'GSDLCOLLECTION'};
1006 } else {
1007 print STDOUT "No collection specified\n";
1008 return "";
1009 }
1010 }
1011
1012 if ($collection eq "modelcol") {
1013 print STDOUT "You can't use modelcol.\n";
1014 return "";
1015 }
1016
1017 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1018 # are defined
1019 $ENV{'GSDLCOLLECTION'} = $collection;
1020 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1021
1022 # make sure this collection exists
1023 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1024 print STDOUT "Invalid collection ($collection).\n";
1025 return "";
1026 }
1027
1028 # everything is ready to go
1029 return $collection;
1030}
1031
1032sub get_current_collection_name {
1033 return $ENV{'GSDLCOLLECTION'};
1034}
1035
1036
1037# will return the collection name if successful, "" otherwise.
1038# Like use_collection (above) but for greenstone 3 (taking account of site level)
1039
1040sub use_site_collection {
1041 my ($site, $collection, $collectdir) = @_;
1042
1043 if (!defined $collectdir || $collectdir eq "") {
1044 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1045 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1046 }
1047
1048 # collectdir explicitly set by this point (using $site variable if required).
1049 # Can call "old" gsdl2 use_collection now.
1050
1051 return use_collection($collection,$collectdir);
1052}
1053
1054
1055
1056sub locate_config_file
1057{
1058 my ($file) = @_;
1059
1060 my $locations = locate_config_files($file);
1061
1062 return shift @$locations; # returns undef if 'locations' is empty
1063}
1064
1065
1066sub locate_config_files
1067{
1068 my ($file) = @_;
1069
1070 my @locations = ();
1071
1072 if (-e $file) {
1073 # Clearly specified (most likely full filename)
1074 # No need to hunt in 'etc' directories, return value unchanged
1075 push(@locations,$file);
1076 }
1077 else {
1078 # Check for collection specific one before looking in global GSDL 'etc'
1079 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1080 my $test_collect_etc_filename
1081 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1082
1083 if (-e $test_collect_etc_filename) {
1084 push(@locations,$test_collect_etc_filename);
1085 }
1086 }
1087 my $test_main_etc_filename
1088 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1089 if (-e $test_main_etc_filename) {
1090 push(@locations,$test_main_etc_filename);
1091 }
1092 }
1093
1094 return \@locations;
1095}
1096
1097
1098sub hyperlink_text
1099{
1100 my ($text) = @_;
1101
1102 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1103 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1104
1105 return $text;
1106}
1107
1108
1109# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1110# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1111sub is_dir_empty
1112{
1113 my ($path) = @_;
1114 opendir DIR, $path;
1115 while(my $entry = readdir DIR) {
1116 next if($entry =~ /^\.\.?$/);
1117 closedir DIR;
1118 return 0;
1119 }
1120 closedir DIR;
1121 return 1;
1122}
1123
1124# Returns the given filename converted using either URL encoding or base64
1125# encoding, as specified by $rename_method. If the given filename has no suffix
1126# (if it is just the tailname), then $no_suffix should be some defined value.
1127# rename_method can be url, none, base64
1128sub rename_file {
1129 my ($filename, $rename_method, $no_suffix) = @_;
1130
1131 if(!$filename) { # undefined or empty string
1132 return $filename;
1133 }
1134
1135 if (!$rename_method) {
1136 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1137 # Debugging information
1138 my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1139 print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1140 $rename_method = "url";
1141 } elsif($rename_method eq "none") {
1142 return $filename; # would have already been renamed
1143 }
1144
1145 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1146 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1147 ###$filename =~ s/ /_/g;
1148
1149 my ($tailname,$dirname,$suffix);
1150 if($no_suffix) { # given a tailname, no suffix
1151 ($tailname,$dirname) = File::Basename::fileparse($filename);
1152 }
1153 else {
1154 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1155 }
1156 $suffix = "" if !$suffix;
1157
1158 if ($rename_method eq "url") {
1159 $tailname = &unicode::url_encode($tailname);
1160 }
1161 elsif ($rename_method eq "base64") {
1162 $tailname = &unicode::base64_encode($tailname);
1163 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1164 }
1165
1166 $filename = "$tailname$suffix";
1167 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1168
1169 return $filename;
1170}
1171
1172
1173# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1174sub rename_ldb_or_bdb_file {
1175 my ($filename_no_ext) = @_;
1176
1177 my $new_filename = "$filename_no_ext.gdb";
1178 return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1179 # try ldb
1180 my $old_filename = "$filename_no_ext.ldb";
1181
1182 if (-f $old_filename) {
1183 print STDERR "Renaming $old_filename to $new_filename\n";
1184 rename ($old_filename, $new_filename)
1185 || print STDERR "Rename failed: $!\n";
1186 return;
1187 }
1188 # try bdb
1189 $old_filename = "$filename_no_ext.bdb";
1190 if (-f $old_filename) {
1191 print STDERR "Renaming $old_filename to $new_filename\n";
1192 rename ($old_filename, $new_filename)
1193 || print STDERR "Rename failed: $!\n";
1194 return;
1195 }
1196}
1197
1198
1199# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1200# By default, /greenstone3 for GS3 or /greenstone for GS2.
1201sub get_greenstone_url_prefix() {
1202 # if already set on a previous occasion, just return that
1203 # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1204 return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1205
1206 my ($configfile, $urlprefix, $defaultUrlprefix);
1207 my @propertynames = ();
1208
1209 if($ENV{'GSDL3SRCHOME'}) {
1210 $defaultUrlprefix = "/greenstone3";
1211 $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1212 push(@propertynames, qw/path\s*\=/);
1213 } else {
1214 $defaultUrlprefix = "/greenstone";
1215 $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", "gsdlsite.cfg");
1216 push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1217 }
1218
1219 $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1220
1221 if(!$urlprefix) { # no values found for URL prefix, use default values
1222 $urlprefix = $defaultUrlprefix;
1223 } else {
1224 #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1225 $urlprefix =~ s/^\///; # remove the starting slash
1226 my @dirs = split(/(\\|\/)/, $urlprefix);
1227 $urlprefix = shift(@dirs);
1228
1229 if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1230 $urlprefix = "/$urlprefix";
1231 }
1232 }
1233
1234 # set for the future
1235 $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1236# print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1237 return $urlprefix;
1238}
1239
1240
1241# Given a config file (xml or java properties file) and a list/array of regular expressions
1242# that represent property names to match on, this function will return the value for the 1st
1243# matching property name. If the return value is undefined, no matching property was found.
1244sub extract_propvalue_from_file() {
1245 my ($configfile, $propertynames) = @_;
1246
1247 my $value;
1248 unless(open(FIN, "<$configfile")) {
1249 print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1250 return $value; # not initialised
1251 }
1252
1253 # Read the entire file at once, as one single line, then close it
1254 my $filecontents;
1255 {
1256 local $/ = undef;
1257 $filecontents = <FIN>;
1258 }
1259 close(FIN);
1260
1261 foreach my $regex (@$propertynames) {
1262 ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1263 if($value) {
1264 $value =~ s/^\"//; # remove any startquotes
1265 $value =~ s/\".*$//; # remove the 1st endquotes (if any) followed by any xml
1266 last; # found value for a matching property, break from loop
1267 }
1268 }
1269
1270 return $value;
1271}
1272
1273
12741;
Note: See TracBrowser for help on using the repository browser.