###########################################################################
#
# util.pm -- various useful utilities
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################
package util;
use File::Copy;
use File::Basename;
# removes files (but not directories)
sub rm {
my (@files) = @_;
my @filefiles = ();
# make sure the files we want to delete exist
# and are regular files
foreach my $file (@files) {
if (!-e $file) {
print STDERR "util::rm $file does not exist\n";
} elsif ((!-f $file) && (!-l $file)) {
print STDERR "util::rm $file is not a regular (or symbolic) file\n";
} else {
push (@filefiles, $file);
}
}
# remove the files
my $numremoved = unlink @filefiles;
# check to make sure all of them were removed
if ($numremoved != scalar(@filefiles)) {
print STDERR "util::rm Not all files were removed\n";
}
}
# recursive removal
sub filtered_rm_r {
my ($files,$file_accept_re,$file_reject_re) = @_;
my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
# recursively remove the files
foreach my $file (@files_array) {
$file =~ s/[\/\\]+$//; # remove trailing slashes
if (!-e $file) {
print STDERR "util::filtered_rm_r $file does not exist\n";
} elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
# get the contents of this directory
if (!opendir (INDIR, $file)) {
print STDERR "util::filtered_rm_r could not open directory $file\n";
} else {
my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
closedir (INDIR);
# remove all the files in this directory
map {$_="$file/$_";} @filedir;
&filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
if (!defined $file_accept_re && !defined $file_reject_re) {
# remove this directory
if (!rmdir $file) {
print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
}
}
}
} else {
next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
# remove this file
&rm ($file);
}
}
}
}
# recursive removal
sub rm_r {
my (@files) = @_;
# use the more general (but reterospectively written function
# filtered_rm_r function()
filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
}
# moves a file or a group of files
sub mv {
my $dest = pop (@_);
my (@srcfiles) = @_;
# remove trailing slashes from source and destination files
$dest =~ s/[\\\/]+$//;
map {$_ =~ s/[\\\/]+$//;} @srcfiles;
# a few sanity checks
if (scalar (@srcfiles) == 0) {
print STDERR "util::mv no destination directory given\n";
return;
} elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
print STDERR "util::mv if multiple source files are given the ".
"destination must be a directory\n";
return;
}
# move the files
foreach my $file (@srcfiles) {
my $tempdest = $dest;
if (-d $tempdest) {
my ($filename) = $file =~ /([^\\\/]+)$/;
$tempdest .= "/$filename";
}
if (!-e $file) {
print STDERR "util::mv $file does not exist\n";
} else {
rename ($file, $tempdest);
}
}
}
# copies a file or a group of files
sub cp {
my $dest = pop (@_);
my (@srcfiles) = @_;
# remove trailing slashes from source and destination files
$dest =~ s/[\\\/]+$//;
map {$_ =~ s/[\\\/]+$//;} @srcfiles;
# a few sanity checks
if (scalar (@srcfiles) == 0) {
print STDERR "util::cp no destination directory given\n";
return;
} elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
print STDERR "util::cp if multiple source files are given the ".
"destination must be a directory\n";
return;
}
# copy the files
foreach my $file (@srcfiles) {
my $tempdest = $dest;
if (-d $tempdest) {
my ($filename) = $file =~ /([^\\\/]+)$/;
$tempdest .= "/$filename";
}
if (!-e $file) {
print STDERR "util::cp $file does not exist\n";
} elsif (!-f $file) {
print STDERR "util::cp $file is not a plain file\n";
} else {
&File::Copy::copy ($file, $tempdest);
}
}
}
# recursively copies a file or group of files
# syntax: cp_r (sourcefiles, destination directory)
# destination must be a directory - to copy one file to
# another use cp instead
sub cp_r {
my $dest = pop (@_);
my (@srcfiles) = @_;
# a few sanity checks
if (scalar (@srcfiles) == 0) {
print STDERR "util::cp_r no destination directory given\n";
return;
} elsif (-f $dest) {
print STDERR "util::cp_r destination must be a directory\n";
return;
}
# create destination directory if it doesn't exist already
if (! -d $dest) {
my $store_umask = umask(0002);
mkdir ($dest, 0777);
umask($store_umask);
}
# copy the files
foreach my $file (@srcfiles) {
if (!-e $file) {
print STDERR "util::cp_r $file does not exist\n";
} elsif (-d $file) {
# make the new directory
my ($filename) = $file =~ /([^\\\/]*)$/;
$dest = &util::filename_cat ($dest, $filename);
my $store_umask = umask(0002);
mkdir ($dest, 0777);
umask($store_umask);
# get the contents of this directory
if (!opendir (INDIR, $file)) {
print STDERR "util::cp_r could not open directory $file\n";
} else {
my @filedir = readdir (INDIR);
closedir (INDIR);
foreach my $f (@filedir) {
next if $f =~ /^\.\.?$/;
# copy all the files in this directory
my $ff = &util::filename_cat ($file, $f);
&cp_r ($ff, $dest);
}
}
} else {
&cp($file, $dest);
}
}
}
# copies a directory and its contents, excluding subdirectories, into a new directory
sub cp_r_toplevel {
my $dest = pop (@_);
my (@srcfiles) = @_;
# a few sanity checks
if (scalar (@srcfiles) == 0) {
print STDERR "util::cp_r no destination directory given\n";
return;
} elsif (-f $dest) {
print STDERR "util::cp_r destination must be a directory\n";
return;
}
# create destination directory if it doesn't exist already
if (! -d $dest) {
my $store_umask = umask(0002);
mkdir ($dest, 0777);
umask($store_umask);
}
# copy the files
foreach my $file (@srcfiles) {
if (!-e $file) {
print STDERR "util::cp_r $file does not exist\n";
} elsif (-d $file) {
# make the new directory
my ($filename) = $file =~ /([^\\\/]*)$/;
$dest = &util::filename_cat ($dest, $filename);
my $store_umask = umask(0002);
mkdir ($dest, 0777);
umask($store_umask);
# get the contents of this directory
if (!opendir (INDIR, $file)) {
print STDERR "util::cp_r could not open directory $file\n";
} else {
my @filedir = readdir (INDIR);
closedir (INDIR);
foreach my $f (@filedir) {
next if $f =~ /^\.\.?$/;
# copy all the files in this directory, but not directories
my $ff = &util::filename_cat ($file, $f);
if (-f $ff) {
&cp($ff, $dest);
#&cp_r ($ff, $dest);
}
}
}
} else {
&cp($file, $dest);
}
}
}
sub mk_dir {
my ($dir) = @_;
my $store_umask = umask(0002);
my $mkdir_ok = mkdir ($dir, 0777);
umask($store_umask);
if (!$mkdir_ok)
{
print STDERR "util::mk_dir could not create directory $dir\n";
return;
}
}
# in case anyone cares - I did some testing (using perls Benchmark module)
# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
# slightly faster (surprisingly) - Stefan.
sub mk_all_dir {
my ($dir) = @_;
# use / for the directory separator, remove duplicate and
# trailing slashes
$dir=~s/[\\\/]+/\//g;
$dir=~s/[\\\/]+$//;
# make sure the cache directory exists
my $dirsofar = "";
my $first = 1;
foreach my $dirname (split ("/", $dir)) {
$dirsofar .= "/" unless $first;
$first = 0;
$dirsofar .= $dirname;
next if $dirname =~ /^(|[a-z]:)$/i;
if (!-e $dirsofar)
{
my $store_umask = umask(0002);
my $mkdir_ok = mkdir ($dirsofar, 0777);
umask($store_umask);
if (!$mkdir_ok)
{
print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
return;
}
}
}
}
# make hard link to file if supported by OS, otherwise copy the file
sub hard_link {
my ($src, $dest) = @_;
# remove trailing slashes from source and destination files
$src =~ s/[\\\/]+$//;
$dest =~ s/[\\\/]+$//;
# a few sanity checks
if (-e $dest) {
# destination file already exists
return;
}
elsif (!-e $src) {
print STDERR "util::hard_link source file $src does not exist\n";
return 1;
}
elsif (-d $src) {
print STDERR "util::hard_link source $src is a directory\n";
return 1;
}
my $dest_dir = &File::Basename::dirname($dest);
mk_all_dir($dest_dir) if (!-e $dest_dir);
# link not supported on windows 9x
if (($ENV{'GSDLOS'} =~ /^windows$/i) && (Win32::FsType() !~ /^ntfs$/i)) {
&File::Copy::copy ($src, $dest);
} elsif (!link($src, $dest)) {
print STDERR "util::hard_link: unable to create hard link. ";
print STDERR " Attempting to copy file: $src -> $dest\n";
&File::Copy::copy ($src, $dest);
}
return 0;
}
# make soft link to file if supported by OS, otherwise copy file
sub soft_link {
my ($src, $dest) = @_;
# remove trailing slashes from source and destination files
$src =~ s/[\\\/]+$//;
$dest =~ s/[\\\/]+$//;
# a few sanity checks
if (!-e $src) {
print STDERR "util::soft_link source file $src does not exist\n";
return 0;
}
my $dest_dir = &File::Basename::dirname($dest);
mk_all_dir($dest_dir) if (!-e $dest_dir);
if ($ENV{'GSDLOS'} =~ /^windows$/i) {
# symlink not supported on windows
&File::Copy::copy ($src, $dest);
} elsif (!eval {symlink($src, $dest)}) {
print STDERR "util::soft_link: unable to create soft link.\n";
return 0;
}
return 1;
}
# updates a copy of a directory in some other part of the filesystem
# verbosity settings are: 0=low, 1=normal, 2=high
# both $fromdir and $todir should be absolute paths
sub cachedir {
my ($fromdir, $todir, $verbosity) = @_;
$verbosity = 1 unless defined $verbosity;
# use / for the directory separator, remove duplicate and
# trailing slashes
$fromdir=~s/[\\\/]+/\//g;
$fromdir=~s/[\\\/]+$//;
$todir=~s/[\\\/]+/\//g;
$todir=~s/[\\\/]+$//;
&mk_all_dir ($todir);
# get the directories in ascending order
if (!opendir (FROMDIR, $fromdir)) {
print STDERR "util::cachedir could not read directory $fromdir\n";
return;
}
my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
closedir (FROMDIR);
if (!opendir (TODIR, $todir)) {
print STDERR "util::cacedir could not read directory $todir\n";
return;
}
my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
closedir (TODIR);
my $fromi = 0;
my $toi = 0;
while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
# print "fromi: $fromi toi: $toi\n";
# see if we should delete a file/directory
# this should happen if the file/directory
# is not in the from list or if its a different
# size, or has an older timestamp
if ($toi < scalar(@todir)) {
if (($fromi >= scalar(@fromdir)) ||
($todir[$toi] lt $fromdir[$fromi] ||
($todir[$toi] eq $fromdir[$fromi] &&
&differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
$verbosity)))) {
# the files are different
&rm_r("$todir/$todir[$toi]");
splice(@todir, $toi, 1); # $toi stays the same
} elsif ($todir[$toi] eq $fromdir[$fromi]) {
# the files are the same
# if it is a directory, check its contents
if (-d "$todir/$todir[$toi]") {
&cachedir ("$fromdir/$fromdir[$fromi]",
"$todir/$todir[$toi]", $verbosity);
}
$toi++;
$fromi++;
next;
}
}
# see if we should insert a file/directory
# we should insert a file/directory if there
# is no tofiles left or if the tofile does not exist
if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
$todir[$toi] gt $fromdir[$fromi])) {
&cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
splice (@todir, $toi, 0, $fromdir[$fromi]);
$toi++;
$fromi++;
}
}
}
# this function returns -1 if either file is not found
# assumes that $file1 and $file2 are absolute file names or
# in the current directory
# $file2 is allowed to be newer than $file1
sub differentfiles {
my ($file1, $file2, $verbosity) = @_;
$verbosity = 1 unless defined $verbosity;
$file1 =~ s/\/+$//;
$file2 =~ s/\/+$//;
my ($file1name) = $file1 =~ /\/([^\/]*)$/;
my ($file2name) = $file2 =~ /\/([^\/]*)$/;
return -1 unless (-e $file1 && -e $file2);
if ($file1name ne $file2name) {
print STDERR "filenames are not the same\n" if ($verbosity >= 2);
return 1;
}
my @file1stat = stat ($file1);
my @file2stat = stat ($file2);
if (-d $file1) {
if (! -d $file2) {
print STDERR "one file is a directory\n" if ($verbosity >= 2);
return 1;
}
return 0;
}
# both must be regular files
unless (-f $file1 && -f $file2) {
print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
return 1;
}
# the size of the files must be the same
if ($file1stat[7] != $file2stat[7]) {
print STDERR "different sized files\n" if ($verbosity >= 2);
return 1;
}
# the second file cannot be older than the first
if ($file1stat[9] > $file2stat[9]) {
print STDERR "file is older\n" if ($verbosity >= 2);
return 1;
}
return 0;
}
sub get_tmp_filename {
my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
&mk_all_dir ($tmpdir) unless -e $tmpdir;
my $count = 1000;
my $rand = int(rand $count);
while (-e &filename_cat($tmpdir, "F$rand")) {
$rand = int(rand $count);
$count++;
}
return filename_cat($tmpdir, "F$rand");
}
sub filename_cat {
my $first_file = shift(@_);
my (@filenames) = @_;
# Check for empty first filename
if ($first_file =~ /\S/) {
unshift(@filenames, $first_file);
}
my $filename = join("/", @filenames);
# remove duplicate slashes and remove the last slash
if ($ENV{'GSDLOS'} =~ /^windows$/i) {
$filename =~ s/[\\\/]+/\\/g;
} else {
$filename =~ s/[\/]+/\//g;
# DB: want a filename abc\de.html to remain like this
}
$filename =~ s/[\\\/]$//;
return $filename;
}
sub envvar_prepend {
my ($var,$val) = @_;
my $current_val = $ENV{$var};
if ($ENV{'GSDLOS'} =~ /^windows$/i) {
$ENV{$var} .= "$val;$current_val";
}
else {
$ENV{$var} .= "$val:$current_val";
}
}
sub envvar_append {
my ($var,$val) = @_;
if ($ENV{'GSDLOS'} =~ /^windows$/i) {
$ENV{$var} .= ";$val";
}
else {
$ENV{$var} .= ":$val";
}
}
# returns the path of a file without the filename -- ie. the directory the file is in
sub filename_head {
my $filename = shift(@_);
if ($ENV{'GSDLOS'} =~ /^windows$/i) {
$filename =~ s/[^\\\\]*$//;
}
else {
$filename =~ s/[^\\\/]*$//;
}
return $filename;
}
# returns 1 if filename1 and filename2 point to the same
# file or directory
sub filenames_equal {
my ($filename1, $filename2) = @_;
# use filename_cat to clean up trailing slashes and
# multiple slashes
$filename1 = filename_cat ($filename1);
$filename2 = filename_cat ($filename2);
# filenames not case sensitive on windows
if ($ENV{'GSDLOS'} =~ /^windows$/i) {
$filename1 =~ tr/[A-Z]/[a-z]/;
$filename2 =~ tr/[A-Z]/[a-z]/;
}
return 1 if $filename1 eq $filename2;
return 0;
}
sub filename_within_collection
{
my ($filename) = @_;
my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
if (defined $collect_dir) {
my $dirsep = &util::get_dirsep();
if ($collect_dir !~ m/$dirsep$/) {
$collect_dir .= $dirsep;
}
$collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
if ($filename =~ /^$collect_dir(.*)$/) {
$filename = $1;
}
}
return $filename;
}
sub get_dirsep {
if ($ENV{'GSDLOS'} =~ /^windows$/i) {
return "\\";
} else {
return "\/";
}
}
sub get_os_dirsep {
if ($ENV{'GSDLOS'} =~ /^windows$/i) {
return "\\\\";
} else {
return "\\\/";
}
}
sub get_re_dirsep {
return "\\\\|\\\/";
}
# if this is running on windows we want binaries to end in
# .exe, otherwise they don't have to end in any extension
sub get_os_exe {
return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
return "";
}
# test to see whether this is a big or little endian machine
sub is_little_endian {
# To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
# What we do here is, if it is a Macintosh machine (i.e. the Darwin operating system), regardless it is running on the IBM power-pc cpu or it is the x86 Intel-based chip with a power-pc emulator running on top of it, it requires the big-endian data format in the gdbm database file, we make the file extension .bdb; otherwise it's .ldb extension.
#return 0 if $^O =~ /^darwin$/i;
return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
return (ord(substr(pack("s",1), 0, 1)) == 1);
}
# will return the collection name if successful, "" otherwise
sub use_collection {
my ($collection, $collectdir) = @_;
if (!defined $collectdir || $collectdir eq "") {
$collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
}
# get and check the collection
if (!defined($collection) || $collection eq "") {
if (defined $ENV{'GSDLCOLLECTION'}) {
$collection = $ENV{'GSDLCOLLECTION'};
} else {
print STDOUT "No collection specified\n";
return "";
}
}
if ($collection eq "modelcol") {
print STDOUT "You can't use modelcol.\n";
return "";
}
# make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
# are defined
$ENV{'GSDLCOLLECTION'} = $collection unless defined $ENV{'GSDLCOLLECTION'};
$ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
# make sure this collection exists
if (!-e $ENV{'GSDLCOLLECTDIR'}) {
print STDOUT "Invalid collection ($collection).\n";
return "";
}
# everything is ready to go
return $collection;
}
sub hyperlink_text
{
my ($text) = @_;
$text =~ s/(http:\/\/[^\s]+)/$1<\/a>/mg;
$text =~ s/(^|\s+)(www\.(\w|\.)+)/$2<\/a>/mg;
return $text;
}
1;