source: trunk/gsdl/perllib/util.pm@ 2493

Last change on this file since 2493 was 2359, checked in by sjboddie, 23 years ago

Altered the help text a little for mkcol.pl, import.pl, buildcol.pl, and
build so that they now suggest using the "more" pager if the help text
scrolls off the screen (brought about by usability studies under DOS).
Note that this means some debug info that was once printed to STDERR is
now being printed to STDOUT.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.7 KB
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use File::Copy;
29use File::Basename;
30
31
32# removes files (but not directories)
33sub rm {
34 my (@files) = @_;
35 my @filefiles = ();
36
37 # make sure the files we want to delete exist
38 # and are regular files
39 foreach $file (@files) {
40 if (!-e $file) {
41 print STDERR "util::rm $file does not exist\n";
42 } elsif ((!-f $file) && (!-l $file)) {
43 print STDERR "util::rm $file is not a regular (or symbolic) file\n";
44 } else {
45 push (@filefiles, $file);
46 }
47 }
48
49 # remove the files
50 my $numremoved = unlink @filefiles;
51
52 # check to make sure all of them were removed
53 if ($numremoved != scalar(@filefiles)) {
54 print STDERR "util::rm Not all files were removed\n";
55 }
56}
57
58
59# recursive removal
60sub rm_r {
61 my (@files) = @_;
62
63 # recursively remove the files
64 foreach $file (@files) {
65 $file =~ s/[\/\\]+$//; # remove trailing slashes
66
67 if (!-e $file) {
68 print STDERR "util::rm_r $file does not exist\n";
69
70 } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
71 # get the contents of this directory
72 if (!opendir (INDIR, $file)) {
73 print STDERR "util::rm_r could not open directory $file\n";
74 } else {
75 my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
76 closedir (INDIR);
77
78 # remove all the files in this directory
79 &rm_r (map {$_="$file/$_";} @filedir);
80
81 # remove this directory
82 if (!rmdir $file) {
83 print STDERR "util::rm_r couldn't remove directory $file\n";
84 }
85 }
86
87 } else {
88 # remove this file
89 &rm ($file);
90 }
91 }
92}
93
94# moves a file or a group of files
95sub mv {
96 my $dest = pop (@_);
97 my (@srcfiles) = @_;
98
99 # remove trailing slashes from source and destination files
100 $dest =~ s/[\\\/]+$//;
101 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
102
103 # a few sanity checks
104 if (scalar (@srcfiles) == 0) {
105 print STDERR "util::mv no destination directory given\n";
106 return;
107 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
108 print STDERR "util::mv if multiple source files are given the ".
109 "destination must be a directory\n";
110 return;
111 }
112
113 # move the files
114 foreach $file (@srcfiles) {
115 my $tempdest = $dest;
116 if (-d $tempdest) {
117 my ($filename) = $file =~ /([^\\\/]+)$/;
118 $tempdest .= "/$filename";
119 }
120 if (!-e $file) {
121 print STDERR "util::mv $file does not exist\n";
122 } else {
123 rename ($file, $tempdest);
124 }
125 }
126}
127
128
129# copies a file or a group of files
130sub cp {
131 my $dest = pop (@_);
132 my (@srcfiles) = @_;
133
134 # remove trailing slashes from source and destination files
135 $dest =~ s/[\\\/]+$//;
136 map {$_ =~ s/[\\\/]+$//;} @srcfiles;
137
138 # a few sanity checks
139 if (scalar (@srcfiles) == 0) {
140 print STDERR "util::cp no destination directory given\n";
141 return;
142 } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
143 print STDERR "util::cp if multiple source files are given the ".
144 "destination must be a directory\n";
145 return;
146 }
147
148 # copy the files
149 foreach $file (@srcfiles) {
150 my $tempdest = $dest;
151 if (-d $tempdest) {
152 my ($filename) = $file =~ /([^\\\/]+)$/;
153 $tempdest .= "/$filename";
154 }
155 if (!-e $file) {
156 print STDERR "util::cp $file does not exist\n";
157 } elsif (!-f $file) {
158 print STDERR "util::cp $file is not a plain file\n";
159 } else {
160 &File::Copy::copy ($file, $tempdest);
161 }
162 }
163}
164
165
166
167# recursively copies a file or group of files
168# syntax: cp_r (sourcefiles, destination directory)
169# destination must be a directory - to copy one file to
170# another use cp instead
171sub cp_r {
172 my $dest = pop (@_);
173 my (@srcfiles) = @_;
174
175 # a few sanity checks
176 if (scalar (@srcfiles) == 0) {
177 print STDERR "util::cp_r no destination directory given\n";
178 return;
179 } elsif (-f $dest) {
180 print STDERR "util::cp_r destination must be a directory\n";
181 return;
182 }
183
184 # create destination directory if it doesn't exist already
185 if (! -d $dest) {
186 my $store_umask = umask(0002);
187 mkdir ($dest, 0777);
188 umask($store_umask);
189 }
190
191 # copy the files
192 foreach $file (@srcfiles) {
193
194 if (!-e $file) {
195 print STDERR "util::cp_r $file does not exist\n";
196
197 } elsif (-d $file) {
198 # make the new directory
199 my ($filename) = $file =~ /([^\\\/]*)$/;
200 $dest = &util::filename_cat ($dest, $filename);
201 my $store_umask = umask(0002);
202 mkdir ($dest, 0777);
203 umask($store_umask);
204
205 # get the contents of this directory
206 if (!opendir (INDIR, $file)) {
207 print STDERR "util::cp_r could not open directory $file\n";
208 } else {
209 my @filedir = readdir (INDIR);
210 closedir (INDIR);
211 foreach $f (@filedir) {
212 next if $f =~ /^\.\.?$/;
213 # copy all the files in this directory
214 my $ff = &util::filename_cat ($file, $f);
215 &cp_r ($ff, $dest);
216 }
217 }
218
219 } else {
220 &cp($file, $dest);
221 }
222 }
223}
224
225
226sub mk_dir {
227 my ($dir) = @_;
228
229 my $store_umask = umask(0002);
230 my $mkdir_ok = mkdir ($dir, 0777);
231 umask($store_umask);
232
233 if (!$mkdir_ok)
234 {
235 print STDERR "util::mk_dir could not create directory $dir\n";
236 return;
237 }
238}
239
240# in case anyone cares - I did some testing (using perls Benchmark module)
241# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
242# slightly faster (surprisingly) - Stefan.
243sub mk_all_dir {
244 my ($dir) = @_;
245
246 # use / for the directory separator, remove duplicate and
247 # trailing slashes
248 $dir=~s/[\\\/]+/\//g;
249 $dir=~s/[\\\/]+$//;
250
251 # make sure the cache directory exists
252 my $dirsofar = "";
253 my $first = 1;
254 foreach $dirname (split ("/", $dir)) {
255 $dirsofar .= "/" unless $first;
256 $first = 0;
257
258 $dirsofar .= $dirname;
259
260 next if $dirname =~ /^(|[a-z]:)$/i;
261 if (!-e $dirsofar)
262 {
263 my $store_umask = umask(0002);
264 my $mkdir_ok = mkdir ($dirsofar, 0777);
265 umask($store_umask);
266 if (!$mkdir_ok)
267 {
268 print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
269 return;
270 }
271 }
272 }
273}
274
275# make hard link to file if supported by OS, otherwise copy the file
276sub hard_link {
277 my ($src, $dest) = @_;
278
279 # remove trailing slashes from source and destination files
280 $src =~ s/[\\\/]+$//;
281 $dest =~ s/[\\\/]+$//;
282
283 # a few sanity checks
284 if (-e $dest) {
285 # destination file already exists
286 return;
287 }
288 elsif (!-e $src) {
289 print STDERR "util::hard_link source file $src does not exist\n";
290 return;
291 }
292 elsif (-d $src) {
293 print STDERR "util::hard_link source $src is a directory\n";
294 return;
295 }
296
297 my $dest_dir = &File::Basename::dirname($dest);
298 mk_all_dir($dest_dir) if (!-e $dest_dir);
299
300 # link not supported on windows
301 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
302 &File::Copy::copy ($src, $dest);
303
304 } elsif (!link($src, $dest)) {
305 print STDERR "util::hard_link: unable to create hard link. ";
306 print STDERR " Attempting to copy file: $src -> $dest\n";
307 &File::Copy::copy ($src, $dest);
308 }
309}
310
311# make soft link to file if supported by OS, otherwise copy file
312sub soft_link {
313 my ($src, $dest) = @_;
314
315 # remove trailing slashes from source and destination files
316 $src =~ s/[\\\/]+$//;
317 $dest =~ s/[\\\/]+$//;
318
319 # a few sanity checks
320 if (!-e $src) {
321 print STDERR "util::soft_link source file $src does not exist\n";
322 return 0;
323 }
324
325 my $dest_dir = &File::Basename::dirname($dest);
326 mk_all_dir($dest_dir) if (!-e $dest_dir);
327
328 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
329 # symlink not supported on windows
330 &File::Copy::copy ($src, $dest);
331
332 } elsif (!eval {symlink($src, $dest)}) {
333 print STDERR "util::soft_link: unable to create soft link.";
334 return 0;
335 }
336
337 return 1;
338}
339
340
341
342
343# updates a copy of a directory in some other part of the filesystem
344# verbosity settings are: 0=low, 1=normal, 2=high
345# both $fromdir and $todir should be absolute paths
346sub cachedir {
347 my ($fromdir, $todir, $verbosity) = @_;
348 $verbosity = 1 unless defined $verbosity;
349
350 # use / for the directory separator, remove duplicate and
351 # trailing slashes
352 $fromdir=~s/[\\\/]+/\//g;
353 $fromdir=~s/[\\\/]+$//;
354 $todir=~s/[\\\/]+/\//g;
355 $todir=~s/[\\\/]+$//;
356
357 &mk_all_dir ($todir);
358
359 # get the directories in ascending order
360 if (!opendir (FROMDIR, $fromdir)) {
361 print STDERR "util::cachedir could not read directory $fromdir\n";
362 return;
363 }
364 my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
365 closedir (FROMDIR);
366
367 if (!opendir (TODIR, $todir)) {
368 print STDERR "util::cacedir could not read directory $todir\n";
369 return;
370 }
371 my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
372 closedir (TODIR);
373
374 my $fromi = 0;
375 my $toi = 0;
376
377 while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
378# print "fromi: $fromi toi: $toi\n";
379
380 # see if we should delete a file/directory
381 # this should happen if the file/directory
382 # is not in the from list or if its a different
383 # size, or has an older timestamp
384 if ($toi < scalar(@todir)) {
385 if (($fromi >= scalar(@fromdir)) ||
386 ($todir[$toi] lt $fromdir[$fromi] ||
387 ($todir[$toi] eq $fromdir[$fromi] &&
388 &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
389 $verbosity)))) {
390
391 # the files are different
392 &rm_r("$todir/$todir[$toi]");
393 splice(@todir, $toi, 1); # $toi stays the same
394
395 } elsif ($todir[$toi] eq $fromdir[$fromi]) {
396 # the files are the same
397 # if it is a directory, check its contents
398 if (-d "$todir/$todir[$toi]") {
399 &cachedir ("$fromdir/$fromdir[$fromi]",
400 "$todir/$todir[$toi]", $verbosity);
401 }
402
403 $toi++;
404 $fromi++;
405 next;
406 }
407 }
408
409 # see if we should insert a file/directory
410 # we should insert a file/directory if there
411 # is no tofiles left or if the tofile does not exist
412 if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
413 $todir[$toi] gt $fromdir[$fromi])) {
414 &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
415 splice (@todir, $toi, 0, $fromdir[$fromi]);
416
417 $toi++;
418 $fromi++;
419 }
420 }
421}
422
423# this function returns -1 if either file is not found
424# assumes that $file1 and $file2 are absolute file names or
425# in the current directory
426# $file2 is allowed to be newer than $file1
427sub differentfiles {
428 my ($file1, $file2, $verbosity) = @_;
429 $verbosity = 1 unless defined $verbosity;
430
431 $file1 =~ s/\/+$//;
432 $file2 =~ s/\/+$//;
433
434 my ($file1name) = $file1 =~ /\/([^\/]*)$/;
435 my ($file2name) = $file2 =~ /\/([^\/]*)$/;
436
437 return -1 unless (-e $file1 && -e $file2);
438 if ($file1name ne $file2name) {
439 print STDERR "filenames are not the same\n" if ($verbosity >= 2);
440 return 1;
441 }
442
443 @file1stat = stat ($file1);
444 @file2stat = stat ($file2);
445
446 if (-d $file1) {
447 if (! -d $file2) {
448 print STDERR "one file is a directory\n" if ($verbosity >= 2);
449 return 1;
450 }
451 return 0;
452 }
453
454 # both must be regular files
455 unless (-f $file1 && -f $file2) {
456 print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
457 return 1;
458 }
459
460 # the size of the files must be the same
461 if ($file1stat[7] != $file2stat[7]) {
462 print STDERR "different sized files\n" if ($verbosity >= 2);
463 return 1;
464 }
465
466 # the second file cannot be older than the first
467 if ($file1stat[9] > $file2stat[9]) {
468 print STDERR "file is older\n" if ($verbosity >= 2);
469 return 1;
470 }
471
472 return 0;
473}
474
475
476sub get_tmp_filename {
477 my $tmpdir = "$ENV{'GSDLHOME'}/tmp";
478 &mk_all_dir ($tmpdir) unless -e $tmpdir;
479
480 my $count = 1000;
481 my $rand = int(rand $count);
482 while (-e "$tmpdir/F$rand") {
483 $rand = int(rand $count);
484 $count++;
485 }
486
487 return "$tmpdir/F$rand";
488}
489
490
491sub filename_cat {
492 my (@filenames) = @_;
493 my $filename = join("/", @filenames);
494
495 # remove duplicate slashes and remove the last slash
496 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
497 $filename =~ s/[\\\/]+/\\/g;
498 } else {
499 $filename =~ s/[\/]+/\//g;
500 # DB: want a filename abc\de.html to remain like this
501 }
502 $filename =~ s/[\\\/]$//;
503
504 return $filename;
505}
506
507# returns 1 if filename1 and filename2 point to the same
508# file or directory
509sub filenames_equal {
510 my ($filename1, $filename2) = @_;
511
512 # use filename_cat to clean up trailing slashes and
513 # multiple slashes
514 $filename1 = filename_cat ($filename1);
515 $filename1 = filename_cat ($filename1);
516
517 # filenames not case sensitive on windows
518 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
519 $filename1 =~ tr/[A-Z]/[a-z]/;
520 $filename2 =~ tr/[A-Z]/[a-z]/;
521 }
522 return 1 if $filename1 eq $filename2;
523 return 0;
524}
525
526sub get_os_dirsep {
527
528 if ($ENV{'GSDLOS'} =~ /^windows$/i) {
529 return "\\\\";
530 } else {
531 return "\\\/";
532 }
533}
534
535sub get_re_dirsep {
536
537 return "\\\\|\\\/";
538}
539
540
541# if this is running on windows we want binaries to end in
542# .exe, otherwise they don't have to end in any extension
543sub get_os_exe {
544 return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
545 return "";
546}
547
548
549# test to see whether this is a big or little endian machine
550sub is_little_endian {
551 return (ord(substr(pack("s",1), 0, 1)) == 1);
552}
553
554
555# will return the collection name if successful, "" otherwise
556sub use_collection {
557 my ($collection, $collectdir) = @_;
558
559 if (!defined $collectdir || $collectdir eq "") {
560 $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
561 }
562
563 # get and check the collection
564 if (!defined($collection) || $collection eq "") {
565 if (defined $ENV{'GSDLCOLLECTION'}) {
566 $collection = $ENV{'GSDLCOLLECTION'};
567 } else {
568 print STDOUT "No collection specified\n";
569 return "";
570 }
571 }
572
573 if ($collection eq "modelcol") {
574 print STDOUT "You can't use modelcol.\n";
575 return "";
576 }
577
578 # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
579 # are defined
580 $ENV{'GSDLCOLLECTION'} = $collection unless defined $ENV{'GSDLCOLLECTION'};
581 $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
582
583 # make sure this collection exists
584 if (!-e $ENV{'GSDLCOLLECTDIR'}) {
585 print STDOUT "Invalid collection ($collection).\n";
586 return "";
587 }
588
589 # everything is ready to go
590 return $collection;
591}
592
5931;
Note: See TracBrowser for help on using the repository browser.