source: gsdl/trunk/perllib/util.pm@ 14175

Last change on this file since 14175 was 14175, checked in by qq6, 17 years ago

Determine the name of the operating system, make the file extension .bdb for MAC, otherwise it's .ldb extension

  • Property svn:keywords set to Author Date Id Revision
File size: 19.0 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use File::Copy;
29use File::Basename;
30
31
32# removes files (but not directories)
33sub rm {
34 my (@files) = @_;
35 my @filefiles = ();
36
37 # make sure the files we want to delete exist
38 # and are regular files
39 foreach my $file (@files) {
40 if (!-e $file) {
41 print STDERR "util::rm $file does not exist\n";
42 } elsif ((!-f $file) && (!-l $file)) {
43 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
44 } else {
45 push (@filefiles, $file);
46 }
47 }
48
49 # remove the files
50 my $numremoved = unlink @filefiles;
51
52 # check to make sure all of them were removed
53 if ($numremoved != scalar(@filefiles)) {
54 print STDERR "util::rm Not all files were removed\n";
55 }
56}
57
58
59
60# recursive removal
61sub filtered_rm_r {
62 my ($files,$file_accept_re,$file_reject_re) = @_;
63
64 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
65
66 # recursively remove the files
67 foreach my $file (@files_array) {
68 $file =~ s/[\/\\]+$//; # remove trailing slashes
69
70 if (!-e $file) {
71 print STDERR "util::filtered_rm_r $file does not exist\n";
72
73 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
74 # get the contents of this directory
75 if (!opendir (INDIR, $file)) {
76 print STDERR "util::filtered_rm_r could not open directory $file\n";
77 } else {
78 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
79 closedir (INDIR);
80
81 # remove all the files in this directory
82 map {$_="$file/$_";} @filedir;
83 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
84
85 if (!defined $file_accept_re && !defined $file_reject_re) {
86 # remove this directory
87 if (!rmdir $file) {
88 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
89 }
90 }
91 }
92 } else {
93 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
94
95 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
96 # remove this file
97 &rm ($file);
98 }
99 }
100 }
101}
102
103
104# recursive removal
105sub rm_r {
106 my (@files) = @_;
107
108 # use the more general (but reterospectively written function
109 # filtered_rm_r function()
110
111 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
112}
113
114
115
116
117# moves a file or a group of files
118sub mv {
119 my $dest = pop (@_);
120 my (@srcfiles) = @_;
121
122 # remove trailing slashes from source and destination files
123 $dest =~ s/[\\\/]+$//;
124 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
125
126 # a few sanity checks
127 if (scalar (@srcfiles) == 0) {
128 print STDERR "util::mv no destination directory given\n";
129 return;
130 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
131 print STDERR "util::mv if multiple source files are given the ".
132 "destination must be a directory\n";
133 return;
134 }
135
136 # move the files
137 foreach my $file (@srcfiles) {
138 my $tempdest = $dest;
139 if (-d $tempdest) {
140 my ($filename) = $file =~ /([^\\\/]+)$/;
141 $tempdest .= "/$filename";
142 }
143 if (!-e $file) {
144 print STDERR "util::mv $file does not exist\n";
145 } else {
146 rename ($file, $tempdest);
147 }
148 }
149}
150
151
152# copies a file or a group of files
153sub cp {
154 my $dest = pop (@_);
155 my (@srcfiles) = @_;
156
157 # remove trailing slashes from source and destination files
158 $dest =~ s/[\\\/]+$//;
159 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
160
161 # a few sanity checks
162 if (scalar (@srcfiles) == 0) {
163 print STDERR "util::cp no destination directory given\n";
164 return;
165 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
166 print STDERR "util::cp if multiple source files are given the ".
167 "destination must be a directory\n";
168 return;
169 }
170
171 # copy the files
172 foreach my $file (@srcfiles) {
173 my $tempdest = $dest;
174 if (-d $tempdest) {
175 my ($filename) = $file =~ /([^\\\/]+)$/;
176 $tempdest .= "/$filename";
177 }
178 if (!-e $file) {
179 print STDERR "util::cp $file does not exist\n";
180 } elsif (!-f $file) {
181 print STDERR "util::cp $file is not a plain file\n";
182 } else {
183 &File::Copy::copy ($file, $tempdest);
184 }
185 }
186}
187
188
189
190# recursively copies a file or group of files
191# syntax: cp_r (sourcefiles, destination directory)
192# destination must be a directory - to copy one file to
193# another use cp instead
194sub cp_r {
195 my $dest = pop (@_);
196 my (@srcfiles) = @_;
197
198 # a few sanity checks
199 if (scalar (@srcfiles) == 0) {
200 print STDERR "util::cp_r no destination directory given\n";
201 return;
202 } elsif (-f $dest) {
203 print STDERR "util::cp_r destination must be a directory\n";
204 return;
205 }
206
207 # create destination directory if it doesn't exist already
208 if (! -d $dest) {
209 my $store_umask = umask(0002);
210 mkdir ($dest, 0777);
211 umask($store_umask);
212 }
213
214 # copy the files
215 foreach my $file (@srcfiles) {
216
217 if (!-e $file) {
218 print STDERR "util::cp_r $file does not exist\n";
219
220 } elsif (-d $file) {
221 # make the new directory
222 my ($filename) = $file =~ /([^\\\/]*)$/;
223 $dest = &util::filename_cat ($dest, $filename);
224 my $store_umask = umask(0002);
225 mkdir ($dest, 0777);
226 umask($store_umask);
227
228 # get the contents of this directory
229 if (!opendir (INDIR, $file)) {
230 print STDERR "util::cp_r could not open directory $file\n";
231 } else {
232 my @filedir = readdir (INDIR);
233 closedir (INDIR);
234 foreach my $f (@filedir) {
235 next if $f =~ /^\.\.?$/;
236 # copy all the files in this directory
237 my $ff = &util::filename_cat ($file, $f);
238 &cp_r ($ff, $dest);
239 }
240 }
241
242 } else {
243 &cp($file, $dest);
244 }
245 }
246}
247
248# copies a directory and its contents, excluding subdirectories, into a new directory
249sub cp_r_toplevel {
250 my $dest = pop (@_);
251 my (@srcfiles) = @_;
252
253 # a few sanity checks
254 if (scalar (@srcfiles) == 0) {
255 print STDERR "util::cp_r no destination directory given\n";
256 return;
257 } elsif (-f $dest) {
258 print STDERR "util::cp_r destination must be a directory\n";
259 return;
260 }
261
262 # create destination directory if it doesn't exist already
263 if (! -d $dest) {
264 my $store_umask = umask(0002);
265 mkdir ($dest, 0777);
266 umask($store_umask);
267 }
268
269 # copy the files
270 foreach my $file (@srcfiles) {
271
272 if (!-e $file) {
273 print STDERR "util::cp_r $file does not exist\n";
274
275 } elsif (-d $file) {
276 # make the new directory
277 my ($filename) = $file =~ /([^\\\/]*)$/;
278 $dest = &util::filename_cat ($dest, $filename);
279 my $store_umask = umask(0002);
280 mkdir ($dest, 0777);
281 umask($store_umask);
282
283 # get the contents of this directory
284 if (!opendir (INDIR, $file)) {
285 print STDERR "util::cp_r could not open directory $file\n";
286 } else {
287 my @filedir = readdir (INDIR);
288 closedir (INDIR);
289 foreach my $f (@filedir) {
290 next if $f =~ /^\.\.?$/;
291
292 # copy all the files in this directory, but not directories
293 my $ff = &util::filename_cat ($file, $f);
294 if (-f $ff) {
295 &cp($ff, $dest);
296 #&cp_r ($ff, $dest);
297 }
298 }
299 }
300
301 } else {
302 &cp($file, $dest);
303 }
304 }
305}
306
307sub mk_dir {
308 my ($dir) = @_;
309
310 my $store_umask = umask(0002);
311 my $mkdir_ok = mkdir ($dir, 0777);
312 umask($store_umask);
313
314 if (!$mkdir_ok)
315 {
316 print STDERR "util::mk_dir could not create directory $dir\n";
317 return;
318 }
319}
320
321# in case anyone cares - I did some testing (using perls Benchmark module)
322# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
323# slightly faster (surprisingly) - Stefan.
324sub mk_all_dir {
325 my ($dir) = @_;
326
327 # use / for the directory separator, remove duplicate and
328 # trailing slashes
329 $dir=~s/[\\\/]+/\//g;
330 $dir=~s/[\\\/]+$//;
331
332 # make sure the cache directory exists
333 my $dirsofar = "";
334 my $first = 1;
335 foreach my $dirname (split ("/", $dir)) {
336 $dirsofar .= "/" unless $first;
337 $first = 0;
338
339 $dirsofar .= $dirname;
340
341 next if $dirname =~ /^(|[a-z]:)$/i;
342 if (!-e $dirsofar)
343 {
344 my $store_umask = umask(0002);
345 my $mkdir_ok = mkdir ($dirsofar, 0777);
346 umask($store_umask);
347 if (!$mkdir_ok)
348 {
349 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
350 return;
351 }
352 }
353 }
354}
355
356# make hard link to file if supported by OS, otherwise copy the file
357sub hard_link {
358 my ($src, $dest) = @_;
359
360 # remove trailing slashes from source and destination files
361 $src =~ s/[\\\/]+$//;
362 $dest =~ s/[\\\/]+$//;
363
364 # a few sanity checks
365 if (-e $dest) {
366 # destination file already exists
367 return;
368 }
369 elsif (!-e $src) {
370 print STDERR "util::hard_link source file $src does not exist\n";
371 return 1;
372 }
373 elsif (-d $src) {
374 print STDERR "util::hard_link source $src is a directory\n";
375 return 1;
376 }
377
378 my $dest_dir = &File::Basename::dirname($dest);
379 mk_all_dir($dest_dir) if (!-e $dest_dir);
380
381 # link not supported on windows 9x
382 if (($ENV{'GSDLOS'} =~ /^windows$/i) && (Win32::FsType() !~ /^ntfs$/i)) {
383 &File::Copy::copy ($src, $dest);
384
385 } elsif (!link($src, $dest)) {
386 print STDERR "util::hard_link: unable to create hard link. ";
387 print STDERR " Attempting to copy file: $src -> $dest\n";
388 &File::Copy::copy ($src, $dest);
389 }
390 return 0;
391}
392
393# make soft link to file if supported by OS, otherwise copy file
394sub soft_link {
395 my ($src, $dest) = @_;
396
397 # remove trailing slashes from source and destination files
398 $src =~ s/[\\\/]+$//;
399 $dest =~ s/[\\\/]+$//;
400
401 # a few sanity checks
402 if (!-e $src) {
403 print STDERR "util::soft_link source file $src does not exist\n";
404 return 0;
405 }
406
407 my $dest_dir = &File::Basename::dirname($dest);
408 mk_all_dir($dest_dir) if (!-e $dest_dir);
409
410 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
411 # symlink not supported on windows
412 &File::Copy::copy ($src, $dest);
413
414 } elsif (!eval {symlink($src, $dest)}) {
415 print STDERR "util::soft_link: unable to create soft link.\n";
416 return 0;
417 }
418
419 return 1;
420}
421
422
423
424
425# updates a copy of a directory in some other part of the filesystem
426# verbosity settings are: 0=low, 1=normal, 2=high
427# both $fromdir and $todir should be absolute paths
428sub cachedir {
429 my ($fromdir, $todir, $verbosity) = @_;
430 $verbosity = 1 unless defined $verbosity;
431
432 # use / for the directory separator, remove duplicate and
433 # trailing slashes
434 $fromdir=~s/[\\\/]+/\//g;
435 $fromdir=~s/[\\\/]+$//;
436 $todir=~s/[\\\/]+/\//g;
437 $todir=~s/[\\\/]+$//;
438
439 &mk_all_dir ($todir);
440
441 # get the directories in ascending order
442 if (!opendir (FROMDIR, $fromdir)) {
443 print STDERR "util::cachedir could not read directory $fromdir\n";
444 return;
445 }
446 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
447 closedir (FROMDIR);
448
449 if (!opendir (TODIR, $todir)) {
450 print STDERR "util::cacedir could not read directory $todir\n";
451 return;
452 }
453 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
454 closedir (TODIR);
455
456 my $fromi = 0;
457 my $toi = 0;
458
459 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
460# print "fromi: $fromi toi: $toi\n";
461
462 # see if we should delete a file/directory
463 # this should happen if the file/directory
464 # is not in the from list or if its a different
465 # size, or has an older timestamp
466 if ($toi < scalar(@todir)) {
467 if (($fromi >= scalar(@fromdir)) ||
468 ($todir[$toi] lt $fromdir[$fromi] ||
469 ($todir[$toi] eq $fromdir[$fromi] &&
470 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
471 $verbosity)))) {
472
473 # the files are different
474 &rm_r("$todir/$todir[$toi]");
475 splice(@todir, $toi, 1); # $toi stays the same
476
477 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
478 # the files are the same
479 # if it is a directory, check its contents
480 if (-d "$todir/$todir[$toi]") {
481 &cachedir ("$fromdir/$fromdir[$fromi]",
482 "$todir/$todir[$toi]", $verbosity);
483 }
484
485 $toi++;
486 $fromi++;
487 next;
488 }
489 }
490
491 # see if we should insert a file/directory
492 # we should insert a file/directory if there
493 # is no tofiles left or if the tofile does not exist
494 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
495 $todir[$toi] gt $fromdir[$fromi])) {
496 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
497 splice (@todir, $toi, 0, $fromdir[$fromi]);
498
499 $toi++;
500 $fromi++;
501 }
502 }
503}
504
505# this function returns -1 if either file is not found
506# assumes that $file1 and $file2 are absolute file names or
507# in the current directory
508# $file2 is allowed to be newer than $file1
509sub differentfiles {
510 my ($file1, $file2, $verbosity) = @_;
511 $verbosity = 1 unless defined $verbosity;
512
513 $file1 =~ s/\/+$//;
514 $file2 =~ s/\/+$//;
515
516 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
517 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
518
519 return -1 unless (-e $file1 && -e $file2);
520 if ($file1name ne $file2name) {
521 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
522 return 1;
523 }
524
525 my @file1stat = stat ($file1);
526 my @file2stat = stat ($file2);
527
528 if (-d $file1) {
529 if (! -d $file2) {
530 print STDERR "one file is a directory\n" if ($verbosity >= 2);
531 return 1;
532 }
533 return 0;
534 }
535
536 # both must be regular files
537 unless (-f $file1 && -f $file2) {
538 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
539 return 1;
540 }
541
542 # the size of the files must be the same
543 if ($file1stat[7] != $file2stat[7]) {
544 print STDERR "different sized files\n" if ($verbosity >= 2);
545 return 1;
546 }
547
548 # the second file cannot be older than the first
549 if ($file1stat[9] > $file2stat[9]) {
550 print STDERR "file is older\n" if ($verbosity >= 2);
551 return 1;
552 }
553
554 return 0;
555}
556
557
558sub get_tmp_filename {
559 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
560 &mk_all_dir ($tmpdir) unless -e $tmpdir;
561
562 my $count = 1000;
563 my $rand = int(rand $count);
564 while (-e &filename_cat($tmpdir, "F$rand")) {
565 $rand = int(rand $count);
566 $count++;
567 }
568
569 return filename_cat($tmpdir, "F$rand");
570}
571
572
573sub filename_cat {
574 my $first_file = shift(@_);
575 my (@filenames) = @_;
576
577 # Check for empty first filename
578 if ($first_file =~ /\S/) {
579 unshift(@filenames, $first_file);
580 }
581
582 my $filename = join("/", @filenames);
583
584 # remove duplicate slashes and remove the last slash
585 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
586 $filename =~ s/[\\\/]+/\\/g;
587 } else {
588 $filename =~ s/[\/]+/\//g;
589 # DB: want a filename abc\de.html to remain like this
590 }
591 $filename =~ s/[\\\/]$//;
592
593 return $filename;
594}
595
596
597sub envvar_prepend {
598 my ($var,$val) = @_;
599
600 my $current_val = $ENV{$var};
601
602 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
603 $ENV{$var} .= "$val;$current_val";
604 }
605 else {
606 $ENV{$var} .= "$val:$current_val";
607 }
608}
609
610sub envvar_append {
611 my ($var,$val) = @_;
612
613 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
614 $ENV{$var} .= ";$val";
615 }
616 else {
617 $ENV{$var} .= ":$val";
618 }
619}
620
621
622# returns the path of a file without the filename -- ie. the directory the file is in
623sub filename_head {
624 my $filename = shift(@_);
625
626 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
627 $filename =~ s/[^\\\\]*$//;
628 }
629 else {
630 $filename =~ s/[^\\\/]*$//;
631 }
632
633 return $filename;
634}
635
636
637# returns 1 if filename1 and filename2 point to the same
638# file or directory
639sub filenames_equal {
640 my ($filename1, $filename2) = @_;
641
642 # use filename_cat to clean up trailing slashes and
643 # multiple slashes
644 $filename1 = filename_cat ($filename1);
645 $filename2 = filename_cat ($filename2);
646
647 # filenames not case sensitive on windows
648 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
649 $filename1 =~ tr/[A-Z]/[a-z]/;
650 $filename2 =~ tr/[A-Z]/[a-z]/;
651 }
652 return 1 if $filename1 eq $filename2;
653 return 0;
654}
655
656sub filename_within_collection
657{
658 my ($filename) = @_;
659
660 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
661
662 if (defined $collect_dir) {
663 my $dirsep = &util::get_dirsep();
664 if ($collect_dir !~ m/$dirsep$/) {
665 $collect_dir .= $dirsep;
666 }
667
668 $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
669
670 if ($filename =~ /^$collect_dir(.*)$/) {
671 $filename = $1;
672 }
673 }
674
675 return $filename;
676}
677
678
679sub get_dirsep {
680
681 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
682 return "\\";
683 } else {
684 return "\/";
685 }
686}
687
688sub get_os_dirsep {
689
690 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
691 return "\\\\";
692 } else {
693 return "\\\/";
694 }
695}
696
697sub get_re_dirsep {
698
699 return "\\\\|\\\/";
700}
701
702
703# if this is running on windows we want binaries to end in
704# .exe, otherwise they don't have to end in any extension
705sub get_os_exe {
706 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
707 return "";
708}
709
710
711# test to see whether this is a big or little endian machine
712sub is_little_endian {
713 # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
714 # What we do here is, if it is a Macintosh machine (i.e. the Darwin operating system), regardless it is running on the IBM power-pc cpu or it is the x86 Intel-based chip with a power-pc emulator running on top of it, it requires the big-endian data format in the gdbm database file, we make the file extension .bdb; otherwise it's .ldb extension.
715
716 #return 0 if $^O =~ /^darwin$/i;
717 return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
718 return (ord(substr(pack("s",1), 0, 1)) == 1);
719}
720
721
722# will return the collection name if successful, "" otherwise
723sub use_collection {
724 my ($collection, $collectdir) = @_;
725
726 if (!defined $collectdir || $collectdir eq "") {
727 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
728 }
729
730 # get and check the collection
731 if (!defined($collection) || $collection eq "") {
732 if (defined $ENV{'GSDLCOLLECTION'}) {
733 $collection = $ENV{'GSDLCOLLECTION'};
734 } else {
735 print STDOUT "No collection specified\n";
736 return "";
737 }
738 }
739
740 if ($collection eq "modelcol") {
741 print STDOUT "You can't use modelcol.\n";
742 return "";
743 }
744
745 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
746 # are defined
747 $ENV{'GSDLCOLLECTION'} = $collection unless defined $ENV{'GSDLCOLLECTION'};
748 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
749
750 # make sure this collection exists
751 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
752 print STDOUT "Invalid collection ($collection).\n";
753 return "";
754 }
755
756 # everything is ready to go
757 return $collection;
758}
759
760sub hyperlink_text
761{
762 my ($text) = @_;
763
764 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
765 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
766
767 return $text;
768}
769
770
7711;
Note: See TracBrowser for help on using the repository browser.