source: trunk/gsdl/perllib/util.pm@ 10281

Last change on this file since 10281 was 10281, checked in by chi, 19 years ago

Add filename_within_collection().

  • Property svn:keywords set to Author Date Id Revision
File size: 17.0 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use File::Copy;
29use File::Basename;
30
31
32# removes files (but not directories)
33sub rm {
34 my (@files) = @_;
35 my @filefiles = ();
36
37 # make sure the files we want to delete exist
38 # and are regular files
39 foreach my $file (@files) {
40 if (!-e $file) {
41 print STDERR "util::rm $file does not exist\n";
42 } elsif ((!-f $file) && (!-l $file)) {
43 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
44 } else {
45 push (@filefiles, $file);
46 }
47 }
48
49 # remove the files
50 my $numremoved = unlink @filefiles;
51
52 # check to make sure all of them were removed
53 if ($numremoved != scalar(@filefiles)) {
54 print STDERR "util::rm Not all files were removed\n";
55 }
56}
57
58
59
60# recursive removal
61sub filtered_rm_r {
62 my ($files,$file_accept_re,$file_reject_re) = @_;
63
64 my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
65
66 # recursively remove the files
67 foreach my $file (@files_array) {
68 $file =~ s/[\/\\]+$//; # remove trailing slashes
69
70 if (!-e $file) {
71 print STDERR "util::filtered_rm_r $file does not exist\n";
72
73 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
74 # get the contents of this directory
75 if (!opendir (INDIR, $file)) {
76 print STDERR "util::filtered_rm_r could not open directory $file\n";
77 } else {
78 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
79 closedir (INDIR);
80
81 # remove all the files in this directory
82 map {$_="$file/$_";} @filedir;
83 &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
84
85 if (!defined $file_accept_re && !defined $file_reject_re) {
86 # remove this directory
87 if (!rmdir $file) {
88 print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
89 }
90 }
91 }
92 } else {
93 next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
94
95 if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
96 # remove this file
97 &rm ($file);
98 }
99 }
100 }
101}
102
103
104# recursive removal
105sub rm_r {
106 my (@files) = @_;
107
108 # use the more general (but reterospectively written function
109 # filtered_rm_r function()
110
111 filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
112}
113
114
115
116
117# moves a file or a group of files
118sub mv {
119 my $dest = pop (@_);
120 my (@srcfiles) = @_;
121
122 # remove trailing slashes from source and destination files
123 $dest =~ s/[\\\/]+$//;
124 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
125
126 # a few sanity checks
127 if (scalar (@srcfiles) == 0) {
128 print STDERR "util::mv no destination directory given\n";
129 return;
130 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
131 print STDERR "util::mv if multiple source files are given the ".
132 "destination must be a directory\n";
133 return;
134 }
135
136 # move the files
137 foreach my $file (@srcfiles) {
138 my $tempdest = $dest;
139 if (-d $tempdest) {
140 my ($filename) = $file =~ /([^\\\/]+)$/;
141 $tempdest .= "/$filename";
142 }
143 if (!-e $file) {
144 print STDERR "util::mv $file does not exist\n";
145 } else {
146 rename ($file, $tempdest);
147 }
148 }
149}
150
151
152# copies a file or a group of files
153sub cp {
154 my $dest = pop (@_);
155 my (@srcfiles) = @_;
156
157 # remove trailing slashes from source and destination files
158 $dest =~ s/[\\\/]+$//;
159 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
160
161 # a few sanity checks
162 if (scalar (@srcfiles) == 0) {
163 print STDERR "util::cp no destination directory given\n";
164 return;
165 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
166 print STDERR "util::cp if multiple source files are given the ".
167 "destination must be a directory\n";
168 return;
169 }
170
171 # copy the files
172 foreach my $file (@srcfiles) {
173 my $tempdest = $dest;
174 if (-d $tempdest) {
175 my ($filename) = $file =~ /([^\\\/]+)$/;
176 $tempdest .= "/$filename";
177 }
178 if (!-e $file) {
179 print STDERR "util::cp $file does not exist\n";
180 } elsif (!-f $file) {
181 print STDERR "util::cp $file is not a plain file\n";
182 } else {
183 &File::Copy::copy ($file, $tempdest);
184 }
185 }
186}
187
188
189
190# recursively copies a file or group of files
191# syntax: cp_r (sourcefiles, destination directory)
192# destination must be a directory - to copy one file to
193# another use cp instead
194sub cp_r {
195 my $dest = pop (@_);
196 my (@srcfiles) = @_;
197
198 # a few sanity checks
199 if (scalar (@srcfiles) == 0) {
200 print STDERR "util::cp_r no destination directory given\n";
201 return;
202 } elsif (-f $dest) {
203 print STDERR "util::cp_r destination must be a directory\n";
204 return;
205 }
206
207 # create destination directory if it doesn't exist already
208 if (! -d $dest) {
209 my $store_umask = umask(0002);
210 mkdir ($dest, 0777);
211 umask($store_umask);
212 }
213
214 # copy the files
215 foreach my $file (@srcfiles) {
216
217 if (!-e $file) {
218 print STDERR "util::cp_r $file does not exist\n";
219
220 } elsif (-d $file) {
221 # make the new directory
222 my ($filename) = $file =~ /([^\\\/]*)$/;
223 $dest = &util::filename_cat ($dest, $filename);
224 my $store_umask = umask(0002);
225 mkdir ($dest, 0777);
226 umask($store_umask);
227
228 # get the contents of this directory
229 if (!opendir (INDIR, $file)) {
230 print STDERR "util::cp_r could not open directory $file\n";
231 } else {
232 my @filedir = readdir (INDIR);
233 closedir (INDIR);
234 foreach my $f (@filedir) {
235 next if $f =~ /^\.\.?$/;
236 # copy all the files in this directory
237 my $ff = &util::filename_cat ($file, $f);
238 &cp_r ($ff, $dest);
239 }
240 }
241
242 } else {
243 &cp($file, $dest);
244 }
245 }
246}
247
248
249sub mk_dir {
250 my ($dir) = @_;
251
252 my $store_umask = umask(0002);
253 my $mkdir_ok = mkdir ($dir, 0777);
254 umask($store_umask);
255
256 if (!$mkdir_ok)
257 {
258 print STDERR "util::mk_dir could not create directory $dir\n";
259 return;
260 }
261}
262
263# in case anyone cares - I did some testing (using perls Benchmark module)
264# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
265# slightly faster (surprisingly) - Stefan.
266sub mk_all_dir {
267 my ($dir) = @_;
268
269 # use / for the directory separator, remove duplicate and
270 # trailing slashes
271 $dir=~s/[\\\/]+/\//g;
272 $dir=~s/[\\\/]+$//;
273
274 # make sure the cache directory exists
275 my $dirsofar = "";
276 my $first = 1;
277 foreach my $dirname (split ("/", $dir)) {
278 $dirsofar .= "/" unless $first;
279 $first = 0;
280
281 $dirsofar .= $dirname;
282
283 next if $dirname =~ /^(|[a-z]:)$/i;
284 if (!-e $dirsofar)
285 {
286 my $store_umask = umask(0002);
287 my $mkdir_ok = mkdir ($dirsofar, 0777);
288 umask($store_umask);
289 if (!$mkdir_ok)
290 {
291 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
292 return;
293 }
294 }
295 }
296}
297
298# make hard link to file if supported by OS, otherwise copy the file
299sub hard_link {
300 my ($src, $dest) = @_;
301
302 # remove trailing slashes from source and destination files
303 $src =~ s/[\\\/]+$//;
304 $dest =~ s/[\\\/]+$//;
305
306 # a few sanity checks
307 if (-e $dest) {
308 # destination file already exists
309 return;
310 }
311 elsif (!-e $src) {
312 print STDERR "util::hard_link source file $src does not exist\n";
313 return 1;
314 }
315 elsif (-d $src) {
316 print STDERR "util::hard_link source $src is a directory\n";
317 return 1;
318 }
319
320 my $dest_dir = &File::Basename::dirname($dest);
321 mk_all_dir($dest_dir) if (!-e $dest_dir);
322
323 # link not supported on windows 9x
324 if (($ENV{'GSDLOS'} =~ /^windows$/i) && (Win32::FsType() !~ /^ntfs$/i)) {
325 &File::Copy::copy ($src, $dest);
326
327 } elsif (!link($src, $dest)) {
328 print STDERR "util::hard_link: unable to create hard link. ";
329 print STDERR " Attempting to copy file: $src -> $dest\n";
330 &File::Copy::copy ($src, $dest);
331 }
332 return 0;
333}
334
335# make soft link to file if supported by OS, otherwise copy file
336sub soft_link {
337 my ($src, $dest) = @_;
338
339 # remove trailing slashes from source and destination files
340 $src =~ s/[\\\/]+$//;
341 $dest =~ s/[\\\/]+$//;
342
343 # a few sanity checks
344 if (!-e $src) {
345 print STDERR "util::soft_link source file $src does not exist\n";
346 return 0;
347 }
348
349 my $dest_dir = &File::Basename::dirname($dest);
350 mk_all_dir($dest_dir) if (!-e $dest_dir);
351
352 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
353 # symlink not supported on windows
354 &File::Copy::copy ($src, $dest);
355
356 } elsif (!eval {symlink($src, $dest)}) {
357 print STDERR "util::soft_link: unable to create soft link.\n";
358 return 0;
359 }
360
361 return 1;
362}
363
364
365
366
367# updates a copy of a directory in some other part of the filesystem
368# verbosity settings are: 0=low, 1=normal, 2=high
369# both $fromdir and $todir should be absolute paths
370sub cachedir {
371 my ($fromdir, $todir, $verbosity) = @_;
372 $verbosity = 1 unless defined $verbosity;
373
374 # use / for the directory separator, remove duplicate and
375 # trailing slashes
376 $fromdir=~s/[\\\/]+/\//g;
377 $fromdir=~s/[\\\/]+$//;
378 $todir=~s/[\\\/]+/\//g;
379 $todir=~s/[\\\/]+$//;
380
381 &mk_all_dir ($todir);
382
383 # get the directories in ascending order
384 if (!opendir (FROMDIR, $fromdir)) {
385 print STDERR "util::cachedir could not read directory $fromdir\n";
386 return;
387 }
388 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
389 closedir (FROMDIR);
390
391 if (!opendir (TODIR, $todir)) {
392 print STDERR "util::cacedir could not read directory $todir\n";
393 return;
394 }
395 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
396 closedir (TODIR);
397
398 my $fromi = 0;
399 my $toi = 0;
400
401 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
402# print "fromi: $fromi toi: $toi\n";
403
404 # see if we should delete a file/directory
405 # this should happen if the file/directory
406 # is not in the from list or if its a different
407 # size, or has an older timestamp
408 if ($toi < scalar(@todir)) {
409 if (($fromi >= scalar(@fromdir)) ||
410 ($todir[$toi] lt $fromdir[$fromi] ||
411 ($todir[$toi] eq $fromdir[$fromi] &&
412 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
413 $verbosity)))) {
414
415 # the files are different
416 &rm_r("$todir/$todir[$toi]");
417 splice(@todir, $toi, 1); # $toi stays the same
418
419 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
420 # the files are the same
421 # if it is a directory, check its contents
422 if (-d "$todir/$todir[$toi]") {
423 &cachedir ("$fromdir/$fromdir[$fromi]",
424 "$todir/$todir[$toi]", $verbosity);
425 }
426
427 $toi++;
428 $fromi++;
429 next;
430 }
431 }
432
433 # see if we should insert a file/directory
434 # we should insert a file/directory if there
435 # is no tofiles left or if the tofile does not exist
436 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
437 $todir[$toi] gt $fromdir[$fromi])) {
438 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
439 splice (@todir, $toi, 0, $fromdir[$fromi]);
440
441 $toi++;
442 $fromi++;
443 }
444 }
445}
446
447# this function returns -1 if either file is not found
448# assumes that $file1 and $file2 are absolute file names or
449# in the current directory
450# $file2 is allowed to be newer than $file1
451sub differentfiles {
452 my ($file1, $file2, $verbosity) = @_;
453 $verbosity = 1 unless defined $verbosity;
454
455 $file1 =~ s/\/+$//;
456 $file2 =~ s/\/+$//;
457
458 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
459 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
460
461 return -1 unless (-e $file1 && -e $file2);
462 if ($file1name ne $file2name) {
463 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
464 return 1;
465 }
466
467 my @file1stat = stat ($file1);
468 my @file2stat = stat ($file2);
469
470 if (-d $file1) {
471 if (! -d $file2) {
472 print STDERR "one file is a directory\n" if ($verbosity >= 2);
473 return 1;
474 }
475 return 0;
476 }
477
478 # both must be regular files
479 unless (-f $file1 && -f $file2) {
480 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
481 return 1;
482 }
483
484 # the size of the files must be the same
485 if ($file1stat[7] != $file2stat[7]) {
486 print STDERR "different sized files\n" if ($verbosity >= 2);
487 return 1;
488 }
489
490 # the second file cannot be older than the first
491 if ($file1stat[9] > $file2stat[9]) {
492 print STDERR "file is older\n" if ($verbosity >= 2);
493 return 1;
494 }
495
496 return 0;
497}
498
499
500sub get_tmp_filename {
501 my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
502 &mk_all_dir ($tmpdir) unless -e $tmpdir;
503
504 my $count = 1000;
505 my $rand = int(rand $count);
506 while (-e &filename_cat($tmpdir, "F$rand")) {
507 $rand = int(rand $count);
508 $count++;
509 }
510
511 return filename_cat($tmpdir, "F$rand");
512}
513
514
515sub filename_cat {
516 my $first_file = shift(@_);
517 my (@filenames) = @_;
518
519 # Check for empty first filename
520 if ($first_file =~ /\S/) {
521 unshift(@filenames, $first_file);
522 }
523
524 my $filename = join("/", @filenames);
525
526 # remove duplicate slashes and remove the last slash
527 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
528 $filename =~ s/[\\\/]+/\\/g;
529 } else {
530 $filename =~ s/[\/]+/\//g;
531 # DB: want a filename abc\de.html to remain like this
532 }
533 $filename =~ s/[\\\/]$//;
534
535 return $filename;
536}
537
538
539sub envvar_prepend {
540 my ($var,$val) = @_;
541
542 my $current_val = $ENV{$var};
543
544 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
545 $ENV{$var} .= "$val;$current_val";
546 }
547 else {
548 $ENV{$var} .= "$val:$current_val";
549 }
550}
551
552sub envvar_append {
553 my ($var,$val) = @_;
554
555 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
556 $ENV{$var} .= ";$val";
557 }
558 else {
559 $ENV{$var} .= ":$val";
560 }
561}
562
563
564# returns the path of a file without the filename -- ie. the directory the file is in
565sub filename_head {
566 my $filename = shift(@_);
567
568 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
569 $filename =~ s/[^\\\\]*$//;
570 }
571 else {
572 $filename =~ s/[^\\\/]*$//;
573 }
574
575 return $filename;
576}
577
578
579# returns 1 if filename1 and filename2 point to the same
580# file or directory
581sub filenames_equal {
582 my ($filename1, $filename2) = @_;
583
584 # use filename_cat to clean up trailing slashes and
585 # multiple slashes
586 $filename1 = filename_cat ($filename1);
587 $filename2 = filename_cat ($filename2);
588
589 # filenames not case sensitive on windows
590 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
591 $filename1 =~ tr/[A-Z]/[a-z]/;
592 $filename2 =~ tr/[A-Z]/[a-z]/;
593 }
594 return 1 if $filename1 eq $filename2;
595 return 0;
596}
597
598sub filename_within_collection
599{
600 my ($filename) = @_;
601
602 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
603
604 if (defined $collect_dir) {
605 my $dirsep = &util::get_dirsep();
606 if ($collect_dir !~ m/$dirsep$/) {
607 $collect_dir .= $dirsep;
608 }
609
610 $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
611
612 if ($filename =~ /^$collect_dir(.*)$/) {
613 $filename = $1;
614 }
615 }
616
617 return $filename;
618}
619
620
621sub get_dirsep {
622
623 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
624 return "\\";
625 } else {
626 return "\/";
627 }
628}
629
630sub get_os_dirsep {
631
632 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
633 return "\\\\";
634 } else {
635 return "\\\/";
636 }
637}
638
639sub get_re_dirsep {
640
641 return "\\\\|\\\/";
642}
643
644
645# if this is running on windows we want binaries to end in
646# .exe, otherwise they don't have to end in any extension
647sub get_os_exe {
648 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
649 return "";
650}
651
652
653# test to see whether this is a big or little endian machine
654sub is_little_endian {
655 return (ord(substr(pack("s",1), 0, 1)) == 1);
656}
657
658
659# will return the collection name if successful, "" otherwise
660sub use_collection {
661 my ($collection, $collectdir) = @_;
662
663 if (!defined $collectdir || $collectdir eq "") {
664 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
665 }
666
667 # get and check the collection
668 if (!defined($collection) || $collection eq "") {
669 if (defined $ENV{'GSDLCOLLECTION'}) {
670 $collection = $ENV{'GSDLCOLLECTION'};
671 } else {
672 print STDOUT "No collection specified\n";
673 return "";
674 }
675 }
676
677 if ($collection eq "modelcol") {
678 print STDOUT "You can't use modelcol.\n";
679 return "";
680 }
681
682 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
683 # are defined
684 $ENV{'GSDLCOLLECTION'} = $collection unless defined $ENV{'GSDLCOLLECTION'};
685 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
686
687 # make sure this collection exists
688 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
689 print STDOUT "Invalid collection ($collection).\n";
690 return "";
691 }
692
693 # everything is ready to go
694 return $collection;
695}
696
697sub hyperlink_text
698{
699 my ($text) = @_;
700
701 $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
702 $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
703
704 return $text;
705}
706
707
7081;
Note: See TracBrowser for help on using the repository browser.