########################################################################### # # util.pm -- various useful utilities # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### package util; use File::Copy; use File::Basename; use strict; # removes files (but not directories) sub rm { my (@files) = @_; my @filefiles = (); # make sure the files we want to delete exist # and are regular files foreach my $file (@files) { if (!-e $file) { print STDERR "util::rm $file does not exist\n"; } elsif ((!-f $file) && (!-l $file)) { print STDERR "util::rm $file is not a regular (or symbolic) file\n"; } else { push (@filefiles, $file); } } # remove the files my $numremoved = unlink @filefiles; # check to make sure all of them were removed if ($numremoved != scalar(@filefiles)) { print STDERR "util::rm Not all files were removed\n"; } } # recursive removal sub filtered_rm_r { my ($files,$file_accept_re,$file_reject_re) = @_; my @files_array = (ref $files eq "ARRAY") ? @$files : ($files); # recursively remove the files foreach my $file (@files_array) { $file =~ s/[\/\\]+$//; # remove trailing slashes if (!-e $file) { print STDERR "util::filtered_rm_r $file does not exist\n"; } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link # get the contents of this directory if (!opendir (INDIR, $file)) { print STDERR "util::filtered_rm_r could not open directory $file\n"; } else { my @filedir = grep (!/^\.\.?$/, readdir (INDIR)); closedir (INDIR); # remove all the files in this directory map {$_="$file/$_";} @filedir; &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re); if (!defined $file_accept_re && !defined $file_reject_re) { # remove this directory if (!rmdir $file) { print STDERR "util::filtered_rm_r couldn't remove directory $file\n"; } } } } else { next if (defined $file_reject_re && ($file =~ m/$file_reject_re/)); if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) { # remove this file &rm ($file); } } } } # recursive removal sub rm_r { my (@files) = @_; # use the more general (but reterospectively written function # filtered_rm_r function() filtered_rm_r(\@files,undef,undef); # no accept or reject expressions } # moves a file or a group of files sub mv { my $dest = pop (@_); my (@srcfiles) = @_; # remove trailing slashes from source and destination files $dest =~ s/[\\\/]+$//; map {$_ =~ s/[\\\/]+$//;} @srcfiles; # a few sanity checks if (scalar (@srcfiles) == 0) { print STDERR "util::mv no destination directory given\n"; return; } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) { print STDERR "util::mv if multiple source files are given the ". "destination must be a directory\n"; return; } # move the files foreach my $file (@srcfiles) { my $tempdest = $dest; if (-d $tempdest) { my ($filename) = $file =~ /([^\\\/]+)$/; $tempdest .= "/$filename"; } if (!-e $file) { print STDERR "util::mv $file does not exist\n"; } else { rename ($file, $tempdest); } } } # copies a file or a group of files sub cp { my $dest = pop (@_); my (@srcfiles) = @_; # remove trailing slashes from source and destination files $dest =~ s/[\\\/]+$//; map {$_ =~ s/[\\\/]+$//;} @srcfiles; # a few sanity checks if (scalar (@srcfiles) == 0) { print STDERR "util::cp no destination directory given\n"; return; } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) { print STDERR "util::cp if multiple source files are given the ". "destination must be a directory\n"; return; } # copy the files foreach my $file (@srcfiles) { my $tempdest = $dest; if (-d $tempdest) { my ($filename) = $file =~ /([^\\\/]+)$/; $tempdest .= "/$filename"; } if (!-e $file) { print STDERR "util::cp $file does not exist\n"; } elsif (!-f $file) { print STDERR "util::cp $file is not a plain file\n"; } else { &File::Copy::copy ($file, $tempdest); } } } # recursively copies a file or group of files # syntax: cp_r (sourcefiles, destination directory) # destination must be a directory - to copy one file to # another use cp instead sub cp_r { my $dest = pop (@_); my (@srcfiles) = @_; # a few sanity checks if (scalar (@srcfiles) == 0) { print STDERR "util::cp_r no destination directory given\n"; return; } elsif (-f $dest) { print STDERR "util::cp_r destination must be a directory\n"; return; } # create destination directory if it doesn't exist already if (! -d $dest) { my $store_umask = umask(0002); mkdir ($dest, 0777); umask($store_umask); } # copy the files foreach my $file (@srcfiles) { if (!-e $file) { print STDERR "util::cp_r $file does not exist\n"; } elsif (-d $file) { # make the new directory my ($filename) = $file =~ /([^\\\/]*)$/; $dest = &util::filename_cat ($dest, $filename); my $store_umask = umask(0002); mkdir ($dest, 0777); umask($store_umask); # get the contents of this directory if (!opendir (INDIR, $file)) { print STDERR "util::cp_r could not open directory $file\n"; } else { my @filedir = readdir (INDIR); closedir (INDIR); foreach my $f (@filedir) { next if $f =~ /^\.\.?$/; # copy all the files in this directory my $ff = &util::filename_cat ($file, $f); &cp_r ($ff, $dest); } } } else { &cp($file, $dest); } } } # copies a directory and its contents, excluding subdirectories, into a new directory sub cp_r_toplevel { my $dest = pop (@_); my (@srcfiles) = @_; # a few sanity checks if (scalar (@srcfiles) == 0) { print STDERR "util::cp_r no destination directory given\n"; return; } elsif (-f $dest) { print STDERR "util::cp_r destination must be a directory\n"; return; } # create destination directory if it doesn't exist already if (! -d $dest) { my $store_umask = umask(0002); mkdir ($dest, 0777); umask($store_umask); } # copy the files foreach my $file (@srcfiles) { if (!-e $file) { print STDERR "util::cp_r $file does not exist\n"; } elsif (-d $file) { # make the new directory my ($filename) = $file =~ /([^\\\/]*)$/; $dest = &util::filename_cat ($dest, $filename); my $store_umask = umask(0002); mkdir ($dest, 0777); umask($store_umask); # get the contents of this directory if (!opendir (INDIR, $file)) { print STDERR "util::cp_r could not open directory $file\n"; } else { my @filedir = readdir (INDIR); closedir (INDIR); foreach my $f (@filedir) { next if $f =~ /^\.\.?$/; # copy all the files in this directory, but not directories my $ff = &util::filename_cat ($file, $f); if (-f $ff) { &cp($ff, $dest); #&cp_r ($ff, $dest); } } } } else { &cp($file, $dest); } } } sub mk_dir { my ($dir) = @_; my $store_umask = umask(0002); my $mkdir_ok = mkdir ($dir, 0777); umask($store_umask); if (!$mkdir_ok) { print STDERR "util::mk_dir could not create directory $dir\n"; return; } } # in case anyone cares - I did some testing (using perls Benchmark module) # on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently # slightly faster (surprisingly) - Stefan. sub mk_all_dir { my ($dir) = @_; # use / for the directory separator, remove duplicate and # trailing slashes $dir=~s/[\\\/]+/\//g; $dir=~s/[\\\/]+$//; # make sure the cache directory exists my $dirsofar = ""; my $first = 1; foreach my $dirname (split ("/", $dir)) { $dirsofar .= "/" unless $first; $first = 0; $dirsofar .= $dirname; next if $dirname =~ /^(|[a-z]:)$/i; if (!-e $dirsofar) { my $store_umask = umask(0002); my $mkdir_ok = mkdir ($dirsofar, 0777); umask($store_umask); if (!$mkdir_ok) { print STDERR "util::mk_all_dir could not create directory $dirsofar\n"; return; } } } } # make hard link to file if supported by OS, otherwise copy the file sub hard_link { my ($src, $dest) = @_; # remove trailing slashes from source and destination files $src =~ s/[\\\/]+$//; $dest =~ s/[\\\/]+$//; # a few sanity checks if (-e $dest) { # destination file already exists return; } elsif (!-e $src) { print STDERR "util::hard_link source file $src does not exist\n"; return 1; } elsif (-d $src) { print STDERR "util::hard_link source $src is a directory\n"; return 1; } my $dest_dir = &File::Basename::dirname($dest); mk_all_dir($dest_dir) if (!-e $dest_dir); # link not supported on windows 9x if (($ENV{'GSDLOS'} =~ /^windows$/i) && (Win32::FsType() !~ /^ntfs$/i)) { &File::Copy::copy ($src, $dest); } elsif (!link($src, $dest)) { print STDERR "util::hard_link: unable to create hard link. "; print STDERR " Attempting to copy file: $src -> $dest\n"; &File::Copy::copy ($src, $dest); } return 0; } # make soft link to file if supported by OS, otherwise copy file sub soft_link { my ($src, $dest) = @_; # remove trailing slashes from source and destination files $src =~ s/[\\\/]+$//; $dest =~ s/[\\\/]+$//; # a few sanity checks if (!-e $src) { print STDERR "util::soft_link source file $src does not exist\n"; return 0; } my $dest_dir = &File::Basename::dirname($dest); mk_all_dir($dest_dir) if (!-e $dest_dir); if ($ENV{'GSDLOS'} =~ /^windows$/i) { # symlink not supported on windows &File::Copy::copy ($src, $dest); } elsif (!eval {symlink($src, $dest)}) { print STDERR "util::soft_link: unable to create soft link.\n"; return 0; } return 1; } # updates a copy of a directory in some other part of the filesystem # verbosity settings are: 0=low, 1=normal, 2=high # both $fromdir and $todir should be absolute paths sub cachedir { my ($fromdir, $todir, $verbosity) = @_; $verbosity = 1 unless defined $verbosity; # use / for the directory separator, remove duplicate and # trailing slashes $fromdir=~s/[\\\/]+/\//g; $fromdir=~s/[\\\/]+$//; $todir=~s/[\\\/]+/\//g; $todir=~s/[\\\/]+$//; &mk_all_dir ($todir); # get the directories in ascending order if (!opendir (FROMDIR, $fromdir)) { print STDERR "util::cachedir could not read directory $fromdir\n"; return; } my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR))); closedir (FROMDIR); if (!opendir (TODIR, $todir)) { print STDERR "util::cacedir could not read directory $todir\n"; return; } my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR))); closedir (TODIR); my $fromi = 0; my $toi = 0; while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) { # print "fromi: $fromi toi: $toi\n"; # see if we should delete a file/directory # this should happen if the file/directory # is not in the from list or if its a different # size, or has an older timestamp if ($toi < scalar(@todir)) { if (($fromi >= scalar(@fromdir)) || ($todir[$toi] lt $fromdir[$fromi] || ($todir[$toi] eq $fromdir[$fromi] && &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]", $verbosity)))) { # the files are different &rm_r("$todir/$todir[$toi]"); splice(@todir, $toi, 1); # $toi stays the same } elsif ($todir[$toi] eq $fromdir[$fromi]) { # the files are the same # if it is a directory, check its contents if (-d "$todir/$todir[$toi]") { &cachedir ("$fromdir/$fromdir[$fromi]", "$todir/$todir[$toi]", $verbosity); } $toi++; $fromi++; next; } } # see if we should insert a file/directory # we should insert a file/directory if there # is no tofiles left or if the tofile does not exist if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) || $todir[$toi] gt $fromdir[$fromi])) { &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]"); splice (@todir, $toi, 0, $fromdir[$fromi]); $toi++; $fromi++; } } } # this function returns -1 if either file is not found # assumes that $file1 and $file2 are absolute file names or # in the current directory # $file2 is allowed to be newer than $file1 sub differentfiles { my ($file1, $file2, $verbosity) = @_; $verbosity = 1 unless defined $verbosity; $file1 =~ s/\/+$//; $file2 =~ s/\/+$//; my ($file1name) = $file1 =~ /\/([^\/]*)$/; my ($file2name) = $file2 =~ /\/([^\/]*)$/; return -1 unless (-e $file1 && -e $file2); if ($file1name ne $file2name) { print STDERR "filenames are not the same\n" if ($verbosity >= 2); return 1; } my @file1stat = stat ($file1); my @file2stat = stat ($file2); if (-d $file1) { if (! -d $file2) { print STDERR "one file is a directory\n" if ($verbosity >= 2); return 1; } return 0; } # both must be regular files unless (-f $file1 && -f $file2) { print STDERR "one file is not a regular file\n" if ($verbosity >= 2); return 1; } # the size of the files must be the same if ($file1stat[7] != $file2stat[7]) { print STDERR "different sized files\n" if ($verbosity >= 2); return 1; } # the second file cannot be older than the first if ($file1stat[9] > $file2stat[9]) { print STDERR "file is older\n" if ($verbosity >= 2); return 1; } return 0; } sub get_tmp_filename { my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp"); &mk_all_dir ($tmpdir) unless -e $tmpdir; my $count = 1000; my $rand = int(rand $count); while (-e &filename_cat($tmpdir, "F$rand")) { $rand = int(rand $count); $count++; } return filename_cat($tmpdir, "F$rand"); } sub filename_cat { my $first_file = shift(@_); my (@filenames) = @_; # Check for empty first filename if ($first_file =~ /\S/) { unshift(@filenames, $first_file); } my $filename = join("/", @filenames); # remove duplicate slashes and remove the last slash if ($ENV{'GSDLOS'} =~ /^windows$/i) { $filename =~ s/[\\\/]+/\\/g; } else { $filename =~ s/[\/]+/\//g; # DB: want a filename abc\de.html to remain like this } $filename =~ s/[\\\/]$//; return $filename; } sub envvar_prepend { my ($var,$val) = @_; my $current_val = $ENV{$var}; if ($ENV{'GSDLOS'} =~ /^windows$/i) { $ENV{$var} .= "$val;$current_val"; } else { $ENV{$var} .= "$val:$current_val"; } } sub envvar_append { my ($var,$val) = @_; if ($ENV{'GSDLOS'} =~ /^windows$/i) { $ENV{$var} .= ";$val"; } else { $ENV{$var} .= ":$val"; } } # returns the path of a file without the filename -- ie. the directory the file is in sub filename_head { my $filename = shift(@_); if ($ENV{'GSDLOS'} =~ /^windows$/i) { $filename =~ s/[^\\\\]*$//; } else { $filename =~ s/[^\\\/]*$//; } return $filename; } # returns 1 if filename1 and filename2 point to the same # file or directory sub filenames_equal { my ($filename1, $filename2) = @_; # use filename_cat to clean up trailing slashes and # multiple slashes $filename1 = filename_cat ($filename1); $filename2 = filename_cat ($filename2); # filenames not case sensitive on windows if ($ENV{'GSDLOS'} =~ /^windows$/i) { $filename1 =~ tr/[A-Z]/[a-z]/; $filename2 =~ tr/[A-Z]/[a-z]/; } return 1 if $filename1 eq $filename2; return 0; } sub filename_within_collection { my ($filename) = @_; my $collect_dir = $ENV{'GSDLCOLLECTDIR'}; if (defined $collect_dir) { my $dirsep = &util::get_dirsep(); if ($collect_dir !~ m/$dirsep$/) { $collect_dir .= $dirsep; } $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator if ($filename =~ /^$collect_dir(.*)$/) { $filename = $1; } } return $filename; } sub get_dirsep { if ($ENV{'GSDLOS'} =~ /^windows$/i) { return "\\"; } else { return "\/"; } } sub get_os_dirsep { if ($ENV{'GSDLOS'} =~ /^windows$/i) { return "\\\\"; } else { return "\\\/"; } } sub get_re_dirsep { return "\\\\|\\\/"; } sub get_dirsep_tail { my ($filename) = @_; # returns last part of directory or filename # On unix e.g. a/b.d => b.d # a/b/c => c my $dirsep = get_re_dirsep(); my @dirs = split (/$dirsep/, $filename); my $tail = pop @dirs; # - caused problems under windows #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/); return $tail; } # if this is running on windows we want binaries to end in # .exe, otherwise they don't have to end in any extension sub get_os_exe { return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i; return ""; } # test to see whether this is a big or little endian machine sub is_little_endian { # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module; # What we do here is, if it is a Macintosh machine (i.e. the Darwin operating system), regardless it is running on the IBM power-pc cpu or it is the x86 Intel-based chip with a power-pc emulator running on top of it, it requires the big-endian data format in the gdbm database file, we make the file extension .bdb; otherwise it's .ldb extension. #return 0 if $^O =~ /^darwin$/i; return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i; return (ord(substr(pack("s",1), 0, 1)) == 1); } # will return the collection name if successful, "" otherwise sub use_collection { my ($collection, $collectdir) = @_; if (!defined $collectdir || $collectdir eq "") { $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect"); } # get and check the collection if (!defined($collection) || $collection eq "") { if (defined $ENV{'GSDLCOLLECTION'}) { $collection = $ENV{'GSDLCOLLECTION'}; } else { print STDOUT "No collection specified\n"; return ""; } } if ($collection eq "modelcol") { print STDOUT "You can't use modelcol.\n"; return ""; } # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR # are defined $ENV{'GSDLCOLLECTION'} = $collection unless defined $ENV{'GSDLCOLLECTION'}; $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection); # make sure this collection exists if (!-e $ENV{'GSDLCOLLECTDIR'}) { print STDOUT "Invalid collection ($collection).\n"; return ""; } # everything is ready to go return $collection; } # will return the collection name if successful, "" otherwise. # Like use_collection (above) but for greenstone 3 (taking account of site level) sub use_site_collection { my ($site, $collection, $collectdir) = @_; if (!defined $collectdir || $collectdir eq "") { die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'}; $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect"); } # collectdir explicitly set by this point (using $site variable if required). # Can call "old" gsdl2 use_collection now. return use_collection($collection,$collectdir); } sub locate_config_file { my ($file) = @_; my $locations = locate_config_files($file); return shift @$locations; # returns undef if 'locations' is empty } sub locate_config_files { my ($file) = @_; my @locations = (); if (-e $file) { # Clearly specified (most likely full filename) # No need to hunt in 'etc' directories, return value unchanged push(@locations,$file); } else { # Check for collection specific one before looking in global GSDL 'etc' my $test_collect_etc_filename = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file); if (-e $test_collect_etc_filename) { push(@locations,$test_collect_etc_filename); } my $test_main_etc_filename = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file); if (-e $test_main_etc_filename) { push(@locations,$test_main_etc_filename); } } return \@locations; } sub hyperlink_text { my ($text) = @_; $text =~ s/(http:\/\/[^\s]+)/$1<\/a>/mg; $text =~ s/(^|\s+)(www\.(\w|\.)+)/$2<\/a>/mg; return $text; } 1;