source: gsdl/trunk/perllib/util.pm@ 19762

Last change on this file since 19762 was 19762, checked in by ak19, 15 years ago

No longer convert spaces to underscores in the rename_file subroutine, since underscores mess up incremental build (file renaming forces incremental building to rebuild everything again since incr building thinks the file with the original file has been deleted and new files have been added).

  • Property svn:keywords set to Author Date Id Revision
File size: 28.9 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use File::Copy;
29use File::Basename;
30
31use strict;
32
33
34# removes files (but not directories)
35sub rm {
36 my (@files) = @_;
37
38 my @filefiles = ();
39
40 # make sure the files we want to delete exist
41 # and are regular files
42 foreach my $file (@files) {
43 if (!-e $file) {
44 print STDERR "util::rm $file does not exist\n";
45 } elsif ((!-f $file) && (!-l $file)) {
46 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
47 } else {
48 push (@filefiles, $file);
49 }
50 }
51
52 # remove the files
53 my $numremoved = unlink @filefiles;
54
55 # check to make sure all of them were removed
56 if ($numremoved != scalar(@filefiles)) {
57 print STDERR "util::rm Not all files were removed\n";
58 }
59}
60
61
62
63# recursive removal
64sub filtered_rm_r {
65 my ($files,$file_accept_re,$file_reject_re) = @_;
66
67 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
68
69 # recursively remove the files
70 foreach my $file (@files_array) {
71 $file =~ s/[\/\\]+$//; # remove trailing slashes
72
73 if (!-e $file) {
74 print STDERR "util::filtered_rm_r $file does not exist\n";
75
76 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
77 # get the contents of this directory
78 if (!opendir (INDIR, $file)) {
79 print STDERR "util::filtered_rm_r could not open directory $file\n";
80 } else {
81 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
82 closedir (INDIR);
83
84 # remove all the files in this directory
85 map {$_="$file/$_";} @filedir;
86 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
87
88 if (!defined $file_accept_re && !defined $file_reject_re) {
89 # remove this directory
90 if (!rmdir $file) {
91 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
92 }
93 }
94 }
95 } else {
96 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
97
98 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
99 # remove this file
100 &rm ($file);
101 }
102 }
103 }
104}
105
106
107# recursive removal
108sub rm_r {
109 my (@files) = @_;
110
111 # use the more general (but reterospectively written function
112 # filtered_rm_r function()
113
114 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
115}
116
117
118
119
120# moves a file or a group of files
121sub mv {
122 my $dest = pop (@_);
123 my (@srcfiles) = @_;
124
125 # remove trailing slashes from source and destination files
126 $dest =~ s/[\\\/]+$//;
127 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
128
129 # a few sanity checks
130 if (scalar (@srcfiles) == 0) {
131 print STDERR "util::mv no destination directory given\n";
132 return;
133 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
134 print STDERR "util::mv if multiple source files are given the ".
135 "destination must be a directory\n";
136 return;
137 }
138
139 # move the files
140 foreach my $file (@srcfiles) {
141 my $tempdest = $dest;
142 if (-d $tempdest) {
143 my ($filename) = $file =~ /([^\\\/]+)$/;
144 $tempdest .= "/$filename";
145 }
146 if (!-e $file) {
147 print STDERR "util::mv $file does not exist\n";
148 } else {
149 rename ($file, $tempdest);
150 }
151 }
152}
153
154
155# copies a file or a group of files
156sub cp {
157 my $dest = pop (@_);
158 my (@srcfiles) = @_;
159
160 # remove trailing slashes from source and destination files
161 $dest =~ s/[\\\/]+$//;
162 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
163
164 # a few sanity checks
165 if (scalar (@srcfiles) == 0) {
166 print STDERR "util::cp no destination directory given\n";
167 return;
168 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
169 print STDERR "util::cp if multiple source files are given the ".
170 "destination must be a directory\n";
171 return;
172 }
173
174 # copy the files
175 foreach my $file (@srcfiles) {
176 my $tempdest = $dest;
177 if (-d $tempdest) {
178 my ($filename) = $file =~ /([^\\\/]+)$/;
179 $tempdest .= "/$filename";
180 }
181 if (!-e $file) {
182 print STDERR "util::cp $file does not exist\n";
183 } elsif (!-f $file) {
184 print STDERR "util::cp $file is not a plain file\n";
185 } else {
186 &File::Copy::copy ($file, $tempdest);
187 }
188 }
189}
190
191
192
193# recursively copies a file or group of files
194# syntax: cp_r (sourcefiles, destination directory)
195# destination must be a directory - to copy one file to
196# another use cp instead
197sub cp_r {
198 my $dest = pop (@_);
199 my (@srcfiles) = @_;
200
201 # a few sanity checks
202 if (scalar (@srcfiles) == 0) {
203 print STDERR "util::cp_r no destination directory given\n";
204 return;
205 } elsif (-f $dest) {
206 print STDERR "util::cp_r destination must be a directory\n";
207 return;
208 }
209
210 # create destination directory if it doesn't exist already
211 if (! -d $dest) {
212 my $store_umask = umask(0002);
213 mkdir ($dest, 0777);
214 umask($store_umask);
215 }
216
217 # copy the files
218 foreach my $file (@srcfiles) {
219
220 if (!-e $file) {
221 print STDERR "util::cp_r $file does not exist\n";
222
223 } elsif (-d $file) {
224 # make the new directory
225 my ($filename) = $file =~ /([^\\\/]*)$/;
226 $dest = &util::filename_cat ($dest, $filename);
227 my $store_umask = umask(0002);
228 mkdir ($dest, 0777);
229 umask($store_umask);
230
231 # get the contents of this directory
232 if (!opendir (INDIR, $file)) {
233 print STDERR "util::cp_r could not open directory $file\n";
234 } else {
235 my @filedir = readdir (INDIR);
236 closedir (INDIR);
237 foreach my $f (@filedir) {
238 next if $f =~ /^\.\.?$/;
239 # copy all the files in this directory
240 my $ff = &util::filename_cat ($file, $f);
241 &cp_r ($ff, $dest);
242 }
243 }
244
245 } else {
246 &cp($file, $dest);
247 }
248 }
249}
250
251# copies a directory and its contents, excluding subdirectories, into a new directory
252sub cp_r_toplevel {
253 my $dest = pop (@_);
254 my (@srcfiles) = @_;
255
256 # a few sanity checks
257 if (scalar (@srcfiles) == 0) {
258 print STDERR "util::cp_r no destination directory given\n";
259 return;
260 } elsif (-f $dest) {
261 print STDERR "util::cp_r destination must be a directory\n";
262 return;
263 }
264
265 # create destination directory if it doesn't exist already
266 if (! -d $dest) {
267 my $store_umask = umask(0002);
268 mkdir ($dest, 0777);
269 umask($store_umask);
270 }
271
272 # copy the files
273 foreach my $file (@srcfiles) {
274
275 if (!-e $file) {
276 print STDERR "util::cp_r $file does not exist\n";
277
278 } elsif (-d $file) {
279 # make the new directory
280 my ($filename) = $file =~ /([^\\\/]*)$/;
281 $dest = &util::filename_cat ($dest, $filename);
282 my $store_umask = umask(0002);
283 mkdir ($dest, 0777);
284 umask($store_umask);
285
286 # get the contents of this directory
287 if (!opendir (INDIR, $file)) {
288 print STDERR "util::cp_r could not open directory $file\n";
289 } else {
290 my @filedir = readdir (INDIR);
291 closedir (INDIR);
292 foreach my $f (@filedir) {
293 next if $f =~ /^\.\.?$/;
294
295 # copy all the files in this directory, but not directories
296 my $ff = &util::filename_cat ($file, $f);
297 if (-f $ff) {
298 &cp($ff, $dest);
299 #&cp_r ($ff, $dest);
300 }
301 }
302 }
303
304 } else {
305 &cp($file, $dest);
306 }
307 }
308}
309
310sub mk_dir {
311 my ($dir) = @_;
312
313 my $store_umask = umask(0002);
314 my $mkdir_ok = mkdir ($dir, 0777);
315 umask($store_umask);
316
317 if (!$mkdir_ok)
318 {
319 print STDERR "util::mk_dir could not create directory $dir\n";
320 return;
321 }
322}
323
324# in case anyone cares - I did some testing (using perls Benchmark module)
325# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
326# slightly faster (surprisingly) - Stefan.
327sub mk_all_dir {
328 my ($dir) = @_;
329
330 # use / for the directory separator, remove duplicate and
331 # trailing slashes
332 $dir=~s/[\\\/]+/\//g;
333 $dir=~s/[\\\/]+$//;
334
335 # make sure the cache directory exists
336 my $dirsofar = "";
337 my $first = 1;
338 foreach my $dirname (split ("/", $dir)) {
339 $dirsofar .= "/" unless $first;
340 $first = 0;
341
342 $dirsofar .= $dirname;
343
344 next if $dirname =~ /^(|[a-z]:)$/i;
345 if (!-e $dirsofar)
346 {
347 my $store_umask = umask(0002);
348 my $mkdir_ok = mkdir ($dirsofar, 0777);
349 umask($store_umask);
350 if (!$mkdir_ok)
351 {
352 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
353 return;
354 }
355 }
356 }
357}
358
359# make hard link to file if supported by OS, otherwise copy the file
360sub hard_link {
361 my ($src, $dest, $verbosity) = @_;
362
363 # remove trailing slashes from source and destination files
364 $src =~ s/[\\\/]+$//;
365 $dest =~ s/[\\\/]+$//;
366
367 # a few sanity checks
368 if (-e $dest) {
369 # destination file already exists
370 return;
371 }
372 elsif (!-e $src) {
373 print STDERR "util::hard_link source file $src does not exist\n";
374 return 1;
375 }
376 elsif (-d $src) {
377 print STDERR "util::hard_link source $src is a directory\n";
378 return 1;
379 }
380
381 my $dest_dir = &File::Basename::dirname($dest);
382 mk_all_dir($dest_dir) if (!-e $dest_dir);
383
384 # link not supported on windows 9x
385 if (($ENV{'GSDLOS'} =~ /^windows$/i) && (Win32::FsType() !~ /^ntfs$/i)) {
386 &File::Copy::copy ($src, $dest);
387
388 } elsif (!link($src, $dest)) {
389 if ((!defined $verbosity) || ($verbosity>2)) {
390 print STDERR "util::hard_link: unable to create hard link. ";
391 print STDERR " Copying file: $src -> $dest\n";
392 }
393 &File::Copy::copy ($src, $dest);
394 }
395 return 0;
396}
397
398# make soft link to file if supported by OS, otherwise copy file
399sub soft_link {
400 my ($src, $dest, $ensure_paths_absolute) = @_;
401
402 # remove trailing slashes from source and destination files
403 $src =~ s/[\\\/]+$//;
404 $dest =~ s/[\\\/]+$//;
405
406 # Ensure file paths are absolute IF requested to do so
407 # Soft_linking didn't work for relative paths
408 if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
409 # We need to ensure that the src file is the absolute path
410 # See http://perldoc.perl.org/File/Spec.html
411 if(!File::Spec->file_name_is_absolute( $src )) { # it's relative
412 $src = File::Spec->rel2abs($src); # make absolute
413 }
414 # Might as well ensure that the destination file's absolute path is used
415 if(!File::Spec->file_name_is_absolute( $dest )) {
416 $dest = File::Spec->rel2abs($dest); # make absolute
417 }
418 }
419
420 # a few sanity checks
421 if (!-e $src) {
422 print STDERR "util::soft_link source file $src does not exist\n";
423 return 0;
424 }
425
426 my $dest_dir = &File::Basename::dirname($dest);
427 mk_all_dir($dest_dir) if (!-e $dest_dir);
428
429 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
430 # symlink not supported on windows
431 &File::Copy::copy ($src, $dest);
432
433 } elsif (!eval {symlink($src, $dest)}) {
434 print STDERR "util::soft_link: unable to create soft link.\n";
435 return 0;
436 }
437
438 return 1;
439}
440
441
442
443
444# updates a copy of a directory in some other part of the filesystem
445# verbosity settings are: 0=low, 1=normal, 2=high
446# both $fromdir and $todir should be absolute paths
447sub cachedir {
448 my ($fromdir, $todir, $verbosity) = @_;
449 $verbosity = 1 unless defined $verbosity;
450
451 # use / for the directory separator, remove duplicate and
452 # trailing slashes
453 $fromdir=~s/[\\\/]+/\//g;
454 $fromdir=~s/[\\\/]+$//;
455 $todir=~s/[\\\/]+/\//g;
456 $todir=~s/[\\\/]+$//;
457
458 &mk_all_dir ($todir);
459
460 # get the directories in ascending order
461 if (!opendir (FROMDIR, $fromdir)) {
462 print STDERR "util::cachedir could not read directory $fromdir\n";
463 return;
464 }
465 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
466 closedir (FROMDIR);
467
468 if (!opendir (TODIR, $todir)) {
469 print STDERR "util::cacedir could not read directory $todir\n";
470 return;
471 }
472 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
473 closedir (TODIR);
474
475 my $fromi = 0;
476 my $toi = 0;
477
478 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
479# print "fromi: $fromi toi: $toi\n";
480
481 # see if we should delete a file/directory
482 # this should happen if the file/directory
483 # is not in the from list or if its a different
484 # size, or has an older timestamp
485 if ($toi < scalar(@todir)) {
486 if (($fromi >= scalar(@fromdir)) ||
487 ($todir[$toi] lt $fromdir[$fromi] ||
488 ($todir[$toi] eq $fromdir[$fromi] &&
489 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
490 $verbosity)))) {
491
492 # the files are different
493 &rm_r("$todir/$todir[$toi]");
494 splice(@todir, $toi, 1); # $toi stays the same
495
496 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
497 # the files are the same
498 # if it is a directory, check its contents
499 if (-d "$todir/$todir[$toi]") {
500 &cachedir ("$fromdir/$fromdir[$fromi]",
501 "$todir/$todir[$toi]", $verbosity);
502 }
503
504 $toi++;
505 $fromi++;
506 next;
507 }
508 }
509
510 # see if we should insert a file/directory
511 # we should insert a file/directory if there
512 # is no tofiles left or if the tofile does not exist
513 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
514 $todir[$toi] gt $fromdir[$fromi])) {
515 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
516 splice (@todir, $toi, 0, $fromdir[$fromi]);
517
518 $toi++;
519 $fromi++;
520 }
521 }
522}
523
524# this function returns -1 if either file is not found
525# assumes that $file1 and $file2 are absolute file names or
526# in the current directory
527# $file2 is allowed to be newer than $file1
528sub differentfiles {
529 my ($file1, $file2, $verbosity) = @_;
530 $verbosity = 1 unless defined $verbosity;
531
532 $file1 =~ s/\/+$//;
533 $file2 =~ s/\/+$//;
534
535 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
536 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
537
538 return -1 unless (-e $file1 && -e $file2);
539 if ($file1name ne $file2name) {
540 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
541 return 1;
542 }
543
544 my @file1stat = stat ($file1);
545 my @file2stat = stat ($file2);
546
547 if (-d $file1) {
548 if (! -d $file2) {
549 print STDERR "one file is a directory\n" if ($verbosity >= 2);
550 return 1;
551 }
552 return 0;
553 }
554
555 # both must be regular files
556 unless (-f $file1 && -f $file2) {
557 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
558 return 1;
559 }
560
561 # the size of the files must be the same
562 if ($file1stat[7] != $file2stat[7]) {
563 print STDERR "different sized files\n" if ($verbosity >= 2);
564 return 1;
565 }
566
567 # the second file cannot be older than the first
568 if ($file1stat[9] > $file2stat[9]) {
569 print STDERR "file is older\n" if ($verbosity >= 2);
570 return 1;
571 }
572
573 return 0;
574}
575
576
577sub get_tmp_filename
578{
579 my $file_ext = shift(@_) || undef;
580
581 my $opt_dot_file_ext = (defined $file_ext) ? ".$file_ext" : "";
582
583 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
584 &mk_all_dir ($tmpdir) unless -e $tmpdir;
585
586 my $count = 1000;
587 my $rand = int(rand $count);
588 my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
589
590 while (-e $full_tmp_filename) {
591 $rand = int(rand $count);
592 $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
593 $count++;
594 }
595
596 return $full_tmp_filename;
597}
598
599sub filename_to_regex {
600 my $filename = shift (@_);
601
602 # need to put single backslash back to double so that regex works
603 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
604 $filename =~ s/\\/\\\\/g;
605 }
606 return $filename;
607}
608
609sub filename_cat {
610 my $first_file = shift(@_);
611 my (@filenames) = @_;
612
613# Useful for debugging
614# -- might make sense to call caller(0) rather than (1)??
615# my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
616# print STDERR "Calling method; $cfilename:$cline $cpackage->$csubr\n";
617
618 # If first_file is not null or empty, then add it back into the list
619 if (defined $first_file && $first_file =~ /\S/) {
620 unshift(@filenames, $first_file);
621 }
622
623 my $filename = join("/", @filenames);
624
625 # remove duplicate slashes and remove the last slash
626 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
627 $filename =~ s/[\\\/]+/\\/g;
628 } else {
629 $filename =~ s/[\/]+/\//g;
630 # DB: want a filename abc\de.html to remain like this
631 }
632 $filename =~ s/[\\\/]$//;
633
634 return $filename;
635}
636
637sub tidy_up_oid {
638 my ($OID) = @_;
639 if ($OID =~ /\./) {
640 print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
641 $OID =~ s/\.//g; #remove any periods
642 }
643 if ($OID =~ /^\s.*\s$/) {
644 print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
645 # remove starting and trailing whitespace
646 $OID =~ s/^\s+//;
647 $OID =~ s/\s+$//;
648 }
649 if ($OID =~ /^[\d]*$/) {
650 print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
651 $OID = "D" . $OID;
652 }
653
654 return $OID;
655}
656sub envvar_prepend {
657 my ($var,$val) = @_;
658
659 # do not prepend any value/path that's already in the environment variable
660 if ($ENV{'GSDLOS'} =~ /^windows$/i)
661 {
662 my $escaped_val = $val;
663 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
664 if($ENV{$var} !~ m/$escaped_val/) {
665 $ENV{$var} = "$val;".$ENV{$var};
666 }
667 }
668 else {
669 if($ENV{$var} !~ m/$val/) {
670 $ENV{$var} = "$val:".$ENV{$var};
671 }
672 }
673}
674
675sub envvar_append {
676 my ($var,$val) = @_;
677
678 # do not append any value/path that's already in the environment variable
679 if ($ENV{'GSDLOS'} =~ /^windows$/i)
680 {
681 my $escaped_val = $val;
682 $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
683 if($ENV{$var} !~ m/$escaped_val/) {
684 $ENV{$var} .= ";$val";
685 }
686 }
687 else {
688 if($ENV{$var} !~ m/$val/) {
689 $ENV{$var} .= ":$val";
690 }
691 }
692}
693
694
695# splits a filename into a prefix and a tail extension using the tail_re, or
696# if that fails, splits on the file_extension . (dot)
697sub get_prefix_and_tail_by_regex {
698
699 my ($filename,$tail_re) = @_;
700
701 my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
702 if ((!defined $file_prefix) || (!defined $file_ext)) {
703 ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
704 }
705
706 return ($file_prefix,$file_ext);
707}
708
709# get full path and file only path from a base_dir (which may be empty) and
710# file (which may contain directories)
711sub get_full_filenames {
712 my ($base_dir, $file) = @_;
713
714 my $filename_full_path = $file;
715 # add on directory if present
716 $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
717
718 my $filename_no_path = $file;
719
720 # remove directory if present
721 $filename_no_path =~ s/^.*[\/\\]//;
722 return ($filename_full_path, $filename_no_path);
723}
724
725# returns the path of a file without the filename -- ie. the directory the file is in
726sub filename_head {
727 my $filename = shift(@_);
728
729 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
730 $filename =~ s/[^\\\\]*$//;
731 }
732 else {
733 $filename =~ s/[^\\\/]*$//;
734 }
735
736 return $filename;
737}
738
739
740# returns 1 if filename1 and filename2 point to the same
741# file or directory
742sub filenames_equal {
743 my ($filename1, $filename2) = @_;
744
745 # use filename_cat to clean up trailing slashes and
746 # multiple slashes
747 $filename1 = filename_cat ($filename1);
748 $filename2 = filename_cat ($filename2);
749
750 # filenames not case sensitive on windows
751 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
752 $filename1 =~ tr/[A-Z]/[a-z]/;
753 $filename2 =~ tr/[A-Z]/[a-z]/;
754 }
755 return 1 if $filename1 eq $filename2;
756 return 0;
757}
758
759sub filename_within_collection
760{
761 my ($filename) = @_;
762
763 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
764
765 if (defined $collect_dir) {
766 my $dirsep = &util::get_dirsep();
767 if ($collect_dir !~ m/$dirsep$/) {
768 $collect_dir .= $dirsep;
769 }
770
771 $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
772
773 # if from within GSDLCOLLECTDIR, then remove directory prefix
774 # so source_filename is realative to it. This is done to aid
775 # portability, i.e. the collection can be moved to somewhere
776 # else on the file system and the archives directory will still
777 # work. This is needed, for example in the applet version of
778 # GLI where GSDLHOME/collect on the server will be different to
779 # the collect directory of the remove user. Of course,
780 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
781 # it back into a full pathname.
782
783 if ($filename =~ /^$collect_dir(.*)$/) {
784 $filename = $1;
785 }
786 }
787
788 return $filename;
789}
790
791sub filename_is_absolute
792{
793 my ($filename) = @_;
794
795 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
796 return ($filename =~ m/^(\w:)?\\/);
797 }
798 else {
799 return ($filename =~ m/^\//);
800 }
801}
802
803
804## @method make_absolute()
805#
806# Ensure the given file path is absolute in respect to the given base path.
807#
808# @param $base_dir A string denoting the base path the given dir must be
809# absolute to.
810# @param $dir The directory to be made absolute as a string. Note that the
811# dir may already be absolute, in which case it will remain
812# unchanged.
813# @return The now absolute form of the directory as a string.
814#
815# @author John Thompson, DL Consulting Ltd.
816# @copy 2006 DL Consulting Ltd.
817#
818#used in buildcol.pl, doesn't work for all cases --kjdon
819sub make_absolute {
820
821 my ($base_dir, $dir) = @_;
822### print STDERR "dir = $dir\n";
823 $dir =~ s/[\\\/]+/\//g;
824 $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
825 $dir =~ s|^/tmp_mnt||;
826 1 while($dir =~ s|/[^/]*/\.\./|/|g);
827 $dir =~ s|/[.][.]?/|/|g;
828 $dir =~ tr|/|/|s;
829### print STDERR "dir = $dir\n";
830
831 return $dir;
832}
833## make_absolute() ##
834
835sub get_dirsep {
836
837 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
838 return "\\";
839 } else {
840 return "\/";
841 }
842}
843
844sub get_os_dirsep {
845
846 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
847 return "\\\\";
848 } else {
849 return "\\\/";
850 }
851}
852
853sub get_re_dirsep {
854
855 return "\\\\|\\\/";
856}
857
858
859sub get_dirsep_tail {
860 my ($filename) = @_;
861
862 # returns last part of directory or filename
863 # On unix e.g. a/b.d => b.d
864 # a/b/c => c
865
866 my $dirsep = get_re_dirsep();
867 my @dirs = split (/$dirsep/, $filename);
868 my $tail = pop @dirs;
869
870 # - caused problems under windows
871 #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
872
873 return $tail;
874}
875
876
877# if this is running on windows we want binaries to end in
878# .exe, otherwise they don't have to end in any extension
879sub get_os_exe {
880 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
881 return "";
882}
883
884
885# test to see whether this is a big or little endian machine
886sub is_little_endian
887{
888 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
889 # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
890 # Otherwise, it's little endian
891
892 #return 0 if $^O =~ /^darwin$/i;
893 #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
894
895 # Going back to stating exactly whether the machine is little endian
896 # or big endian, without any special case for Macs. Since for rata it comes
897 # back with little endian and for shuttle with bigendian.
898 return (ord(substr(pack("s",1), 0, 1)) == 1);
899}
900
901
902# will return the collection name if successful, "" otherwise
903sub use_collection {
904 my ($collection, $collectdir) = @_;
905
906 if (!defined $collectdir || $collectdir eq "") {
907 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
908 }
909
910 # get and check the collection
911 if (!defined($collection) || $collection eq "") {
912 if (defined $ENV{'GSDLCOLLECTION'}) {
913 $collection = $ENV{'GSDLCOLLECTION'};
914 } else {
915 print STDOUT "No collection specified\n";
916 return "";
917 }
918 }
919
920 if ($collection eq "modelcol") {
921 print STDOUT "You can't use modelcol.\n";
922 return "";
923 }
924
925 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
926 # are defined
927 $ENV{'GSDLCOLLECTION'} = $collection;
928 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
929
930 # make sure this collection exists
931 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
932 print STDOUT "Invalid collection ($collection).\n";
933 return "";
934 }
935
936 # everything is ready to go
937 return $collection;
938}
939
940
941
942
943# will return the collection name if successful, "" otherwise.
944# Like use_collection (above) but for greenstone 3 (taking account of site level)
945
946sub use_site_collection {
947 my ($site, $collection, $collectdir) = @_;
948
949 if (!defined $collectdir || $collectdir eq "") {
950 die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
951 $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
952 }
953
954 # collectdir explicitly set by this point (using $site variable if required).
955 # Can call "old" gsdl2 use_collection now.
956
957 return use_collection($collection,$collectdir);
958}
959
960
961
962sub locate_config_file
963{
964 my ($file) = @_;
965
966 my $locations = locate_config_files($file);
967
968 return shift @$locations; # returns undef if 'locations' is empty
969}
970
971
972sub locate_config_files
973{
974 my ($file) = @_;
975
976 my @locations = ();
977
978 if (-e $file) {
979 # Clearly specified (most likely full filename)
980 # No need to hunt in 'etc' directories, return value unchanged
981 push(@locations,$file);
982 }
983 else {
984 # Check for collection specific one before looking in global GSDL 'etc'
985 if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
986 my $test_collect_etc_filename
987 = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
988
989 if (-e $test_collect_etc_filename) {
990 push(@locations,$test_collect_etc_filename);
991 }
992 }
993 my $test_main_etc_filename
994 = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
995 if (-e $test_main_etc_filename) {
996 push(@locations,$test_main_etc_filename);
997 }
998 }
999
1000 return \@locations;
1001}
1002
1003
1004sub hyperlink_text
1005{
1006 my ($text) = @_;
1007
1008 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1009 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1010
1011 return $text;
1012}
1013
1014
1015# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1016# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1017sub is_dir_empty
1018{
1019 my ($path) = @_;
1020 opendir DIR, $path;
1021 while(my $entry = readdir DIR) {
1022 next if($entry =~ /^\.\.?$/);
1023 closedir DIR;
1024 return 0;
1025 }
1026 closedir DIR;
1027 return 1;
1028}
1029
1030# Returns the given filename converted using either URL encoding or base64
1031# encoding, as specified by $rename_method. If the given filename has no suffix
1032# (if it is just the tailname), then $no_suffix should be some defined value.
1033sub rename_file {
1034 my ($filename, $rename_method, $no_suffix) = @_;
1035
1036 if(!$filename) { # undefined or empty string
1037 return $filename;
1038 }
1039
1040 # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1041 ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1042 ###$filename =~ s/ /_/g;
1043
1044 my ($tailname,$dirname,$suffix);
1045 if($no_suffix) { # given a tailname, no suffix
1046 ($tailname,$dirname) = File::Basename::fileparse($filename);
1047 }
1048 else {
1049 ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1050 }
1051 $suffix = "" if !$suffix;
1052
1053 if (!$rename_method) {
1054 print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1055 # Debugging information
1056 my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1057 print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
1058 } elsif($rename_method eq "none") {
1059 return $filename; # would have already been renamed
1060 }
1061
1062 if (!$rename_method || $rename_method eq "url") {
1063 $tailname = &unicode::url_encode($tailname);
1064 }
1065 elsif ($rename_method eq "base64") {
1066 $tailname = &unicode::base64_encode($tailname);
1067 $tailname =~ s/\s*//sg; # for some reason it adds spaces not just at end but also in middle
1068 }
1069
1070 $filename = "$tailname$suffix";
1071 $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1072
1073 return $filename;
1074}
1075
1076# makes sure that the file has a gdb extension
1077sub rename_gdbm_file {
1078 my ($filename_no_ext) = @_;
1079
1080 my $new_filename = "$filename_no_ext.gdb";
1081 return if (-f $new_filename); # if gdb file exists, don't need to do anything
1082 # try ldb
1083 my $old_filename = "$filename_no_ext.ldb";
1084
1085 if (-f $old_filename) {
1086 print STDERR "Renaming $old_filename to $new_filename\n";
1087 rename ($old_filename, $new_filename)
1088 || print STDERR "Rename failed: $!\n";
1089 return;
1090 }
1091 # try bdb
1092 $old_filename = "$filename_no_ext.bdb";
1093 if (-f $old_filename) {
1094 print STDERR "Renaming $old_filename to $new_filename\n";
1095 rename ($old_filename, $new_filename)
1096 || print STDERR "Rename failed: $!\n";
1097 return;
1098 }
1099}
1100
1101
1102
11031;
Note: See TracBrowser for help on using the repository browser.