########################################################################### # # FileUtils.pm -- functions for dealing with files. Will delegate to the # appropriate filesystem driver based upon any file # protocol specified and dependent on configuration as # defined by the collection admin # # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 2013 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### package FileUtils; # Perl Modules use strict; use Symbol qw; # Greenstone Modules use util; # Configuration my $debug = 0; my $linking_disabled = 0; # Globals my $done_link_warning = 0; ## @function _callFunction($driver_name, $function_name, ...) # # Make a function call to a dynamically loaded database driver. # @param $driver_name - The name of the file protocol driver to load # @param $function_name - The function within the driver to call # @param - The parameters to be passed to the function called # sub _callFunction { my $driver_name = shift(@_); my $function_name = shift(@_); &_prettyPrint(0, $driver_name, $function_name, @_) unless (!$debug); # Need to look within fileutils directory my $package_name = 'FileUtils::' . $driver_name; # Try to load the requested infodb type if (!&_loadDriver($package_name)) { &printError('Failed to load requested file protocol driver: ' . $package_name, 1); } # Then make call to the newly created package no strict; # Better check that the function we are about to call exists my $symbol = qualify($function_name, $package_name); unless ( defined &{$symbol} ) { &printError('Function not found: ' . $package_name . '::' . $function_name, 1); } # Call the function and get result if applicable my $result = &{$symbol}(@_); &_prettyPrint(1, $result) unless (!$debug); return $result; } ## callFunction() ## @function _prettyPrint() # # Print a debugging message to STDERR constructed from the based upon # the type. # @param type - If 0, output the start of a function with a listing of its # parameters. If 1, output the result of a function. 2 is # used for function errors. Default to simply printing what- # ever else is in # sub _prettyPrint { my $type = shift(@_); if (!defined $type || $type > 2) { $type = -1; } my ($package, $filename, $line, $function) = caller(1); my $message; # Start of a function if (0 == $type) { $message = $package . '::' . $function . '('; my $argument = shift(@_); my $first = 1; while (defined $argument) { if (!$first) { $message .= ', '; } else { $first = 0; } if ($argument =~ /\D/) { $message .= '"' . $argument . '"'; } else { $message .= $argument; } $argument = shift(@_); } $message .= ')'; } # Result of a function elsif (1 == $type) { $message = $package . '::' . $function . '() => '; my $result = shift(@_); if ($result =~ /\D/) { $message .= '"' . $result . '"'; } else { $message .= $result; } } elsif (2 == $type) { my $error = shift(@_); $message = 'Error in ' . $package . '::' . $function . '()! ' . $error; } # Else we leave the message as it is else { $message = join("\n", @_); } print STDERR "[" . time() . "] " . $message . "\n"; } ## _prettyPrint() # /** @function _determineDriver() # * Given a file path determine the appropriate protocol. For now anything # * other than a full path beginning with an explicit protocol will default # * to using 'local' file functions. # * @return 'local' # */ sub _determineDriver { my $path = shift(@_); &_prettyPrint(0, $path) unless (!$debug); # Determine the appropriate driver from the protocol my $driver = 'LocalFS'; # - this is were I'll eventually have the ability to configure # what driver handles what protocol, hopefully from the collect.cfg my $colon_index = index($path, ':'); if ($colon_index > -1) { my $protocol = substr($path, 0, $colon_index); # check the perl module exists eval 'require "FileUtils/' . $protocol . '.pm"'; #eval #{ # require 'FileUtils/' . $protocol . '.pm'; #}; if ($@) { print STDERR 'Warning! FileUtils::_determineDriver() driver not found (defaulting to local filesystem):' . $protocol . "\n" . $@ . "\n"; } else { $driver = $protocol; } } &_prettyPrint(1, $driver) unless (!$debug); return $driver; } # /** _determineDriver() # /** @function _loadDriver($class, ...) # * Runtime class loading for use in FileUtils to load various protocol # * drivers, possibly configured in the collect.cfg, at runtime. # * @param $class - The class name (including any path) to load # * @param - any function aliases you want exported # */ sub _loadDriver { my $class = shift(@_); &_prettyPrint(0, $class) unless (!$debug); # Convert the Perl Module-like name into a file path (my $file = "$class.pm") =~ s|::|/|g; # - ensure we haven't already loaded this class unless( $INC{$file} ) { require $file; } # - this is the magic that actually instantiates the class (rubberstamp?) # - we pass @_ to action any function aliases exports requested eval { $class->import(@_); }; # - by now the driver file should have been loaded my $result = defined $INC{$file}; &_prettyPrint(1, $result) unless (!$debug); return $result; } # /** _loadDriver($class, ...) **/ ################################################################################ ## @function printError() # sub printError { my ($message, $fatal) = @_; my ($package, $filename, $line, $function) = caller(1); if (defined $!) { $message .= ' (' . $! . ')'; } if (defined $fatal && $fatal) { die('Fatal Error! ' . $package . '::' . $function . '() - ' . $message ."\n"); } else { print STDERR 'Error! ' . $package . '::' . $function . '() - ' . $message ."\n"; } } ## printError() ## @function printWarning # sub printWarning { my ($message) = @_; my ($package, $filename, $line, $function) = caller(1); print STDERR 'Warning! ' . $package . '::' . $function . '() - ' . $message . "\n"; } ## printWarning() ################################################################################ ######################## Legacy function name mappings ######################## ################################################################################ # Note: there are lots of functions involving files/directories/paths etc found # in utils.pm that are not represented here. My intention was to just have those # functions that need to be dynamic based on filesystem, or need some rejigging # to be filesystem aware. This is an argument, I guess, for moving some of the # other functions here so that they are nicely encapsulated - but the question # is what to do with functions like filename_within_directory_url_format() which # is more URL based than file based... dunno. ################################################################################ sub cachedir {return synchronizeDirectory(@_);} sub cp {return copyFiles(@_);} sub cp_r {print "implement cp_r()";} sub cp_r_nosvn {print "implement cp_r_nosvn()";} sub cp_r_toplevel {print "implement cp_r_toplevel()";} sub differentfiles {return &differentFiles(@_);} sub dir_exists {return &directoryExists(@_);} sub file_exists {return &fileExists(@_);} sub file_lastmodified {return &modificationTime(@_);} sub file_readdir {return readDirectory(@_);} sub file_size {return &fileSize(@_);} sub filename_cat {return filenameConcatenate(@_);} sub filename_is_absolute {return &isFilenameAbsolute();}; sub filtered_rm_r {print "implement filtered_rm_r()";} sub hard_link {print "implement hard_link()";} sub is_dir_empty {return &isDirectoryEmpty();} sub mk_all_dir {return &makeAllDirectories(@_);} sub mk_dir {return &makeDirectory(@_);} sub mv {return &moveFiles(@_);} sub mv_dir_contents {print "implement mv_dir_contents()";} sub rm {print "implement rm()";} sub rm_debug {print "implement rm_debug()";} sub rm_r {print "implement rm_r()";} sub soft_link {print "implement soft_link()";} ################################################################################ ########## Common functions ########## ################################################################################ # Note: these are the file-based functions that are not dynamic in themselves, # but that need significant changes to support multiple possible filesystems. ################################################################################ ## @function differentFiles # (previous util.pm version used -e, -d, and 'stat', none of which support # * filesystems such as hadoop) # */ sub differentFiles { my ($file1, $file2, $verbosity) = @_; if (!defined $verbosity) { $verbosity = 1 } # remove trailing slashes $file1 =~ s/\/+$//; $file2 =~ s/\/+$//; # chop off the last part of the path as the file/dir name my ($file1name) = $file1 =~ /\/([^\/]*)$/; my ($file2name) = $file2 =~ /\/([^\/]*)$/; # - cheapest first; test the two filename strings are the same if ($file1name ne $file2name) { print STDERR "filenames are not the same\n" if ($verbosity >= 2); return 1; } if (!&pathExists($file1) || !&pathExists($file2)) { print STDERR "one or other file doesn't exist\n" if ($verbosity >= 2); return -1; } if (&directoryExists($file1)) { if (!&directoryExists($file2)) { print STDERR "one file is a directory\n" if ($verbosity >= 2); return 1; } return 0; } # both must be regular files unless (&fileExists($file1) && &fileExists($file2)) { print STDERR "one file is not a regular file\n" if ($verbosity >= 2); return 1; } # the size of the files must be the same if (&fileSize($file1) != &fileSize($file2)) { print STDERR "different sized files\n" if ($verbosity >= 2); return 1; } # the second file cannot be older than the first if (&modificationTime($file1) > &modificationTime($file2)) { print STDERR "file is older\n" if ($verbosity >= 2); return 1; } return 0; } # /** differentFiles() **/ ## @function fileGetContents() # sub fileGetContents { my ($path) = @_; my $content; my $driver = &FileUtils::_determineDriver($path); my $filesize = &FileUtils::_callFunction($driver, 'fileSize', $path); my $fh; &FileUtils::_callFunction($driver, 'openFileHandle', $path, '<', \$fh); sysread($fh, $content, $filesize); &FileUtils::_callFunction($driver, 'closeFileHandle', \$fh); return $content; } ## fileGetContents() ## @function filePutContents() # sub filePutContents { my $path = shift(@_); my $str = shift(@_); my $driver = &FileUtils::_determineDriver($path); my $fh; &FileUtils::_callFunction($driver, 'openFileHandle', $path, '>', \$fh); print $fh $str; &FileUtils::_callFunction($driver, 'closeFileHandle', \$fh); return 1; } ## filePutContents(path, str) ## @function makeAllDirectories() # # in case anyone cares - I did some testing (using perls Benchmark module) # on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently # slightly faster (surprisingly) - Stefan. # sub makeAllDirectories { my ($raw_dir) = @_; # use / for the directory separator, remove duplicate and # trailing slashes $raw_dir = &sanitizePath($raw_dir); # ensure the directory doesn't already exist if (&directoryExists($raw_dir)) { return 0; } if ($raw_dir =~ /^(.+?:\/\/)?(.*)/) { my $dirsofar = ''; if (defined $1) { $dirsofar = $1; } my $dir = $2; my $first = 1; foreach my $dirname (split ("/", $dir)) { $dirsofar .= "/" unless $first; $first = 0; $dirsofar .= $dirname; next if $dirname =~ /^(|[a-z]:)$/i; if (!&directoryExists($dirsofar)) { my $mkdir_ok = &makeDirectory($dirsofar); if (!$mkdir_ok) { &FileUtils::printError('Could not create directory: ' . $dirsofar); return 0; } } } } return (&directoryExists($raw_dir)); } ## makeAllDirectories() ## @function sanitizePath() # sub sanitizePath { my ($path) = @_; # fortunately filename concatenate will perform all the double slash removal, # end slash removal we need, and in a protocol aware fashion return &filenameConcatenate($path); } ## sanitizePath() ################################################################################ ## @function canRead() # sub canRead { my $path = shift(@_); my $driver = &FileUtils::_determineDriver($path); return &FileUtils::_callFunction($driver, 'canRead', $path, @_); } ## canRead() ## @function closeFileHandle() # sub closeFileHandle { my $path = shift(@_); my $driver = &FileUtils::_determineDriver($path); return &FileUtils::_callFunction($driver, 'closeFileHandle', @_); } ## closeFileHandle() # /** # */ sub copyFiles { return &transferFiles(@_, 'COPY'); } # /** copyFiles() **/ # /** # */ sub directoryExists { my $path = shift(@_); my $driver = &FileUtils::_determineDriver($path); return &FileUtils::_callFunction($driver, 'fileTest', $path, '-d'); } # /** directoryExists($path) **/ # /** @function file_exists($path) # * Determine if the given file path exists on the target filesystem # * @param path - the path to the file to test # * @return true if the file exists, false otherwise sub fileExists { my $path = shift(@_); my $driver = &FileUtils::_determineDriver($path); my $result = &FileUtils::_callFunction($driver, 'fileTest', $path, '-f'); return $result; } # /** fileExists(path) **/ # /** @function filenameConcatenate() # */ sub filenameConcatenate { my $first_path_part = shift(@_); my $path = ''; if (defined $first_path_part) { my $driver = &FileUtils::_determineDriver($first_path_part); $path = &FileUtils::_callFunction($driver, 'filenameConcatenate', $first_path_part, @_); } return $path; } # /** filenameConcatenate() **/ # /** # */ sub fileSize { my $path = shift(@_); my $driver = &_determineDriver($path); return &_callFunction($driver, 'fileSize', $path); } # /** fileSize() **/ ## @function hardLink() # sub hardLink { my $src_file = shift(@_); my $dst_file = shift(@_); my $src_driver = &FileUtils::_determineDriver($src_file); my $dst_driver = &FileUtils::_determineDriver($dst_file); # you can only symbolic link within the same file system - always if ($src_driver eq 'LocalFS' && $src_driver eq $dst_driver && !$linking_disabled) { &FileUtils::_callFunction($src_driver, 'linkFile', 'HARD', $src_file, $dst_file); } # substitute a copy else { if (!$done_link_warning) { if ($src_driver ne 'LocalFS' || $dst_driver ne 'LocalFS') { &printWarning('Cannot symbolic hardlink between non-local file systems - copying all files instead'); } else { &printWarning('Symbolic hardlinking disabled - copying all files instead'); } $done_link_warning = 1; } &transferFiles($src_file, $dst_file, 'COPY'); } } ## hardLink() ## @function isFilenameAbolsute() # # Determine if the given path is an absolute path (as compared to relative) # sub isFilenameAbsolute { my $path = shift(@_); my $driver = &FileUtils::_determineDriver($path); return &FileUtils::_callFunction($driver, 'isFilenameAbsolute', $path, '-l'); } ## isFilenameAbsolute() ## @function isHDFS() # sub isHDFS { my $path = shift(@_); my $driver = &FileUtils::_determineDriver($path); my $result = &FileUtils::_callFunction($driver, 'isHDFS'); ###rint STDERR "[DEBUG] FileUtils::isHDFS(" . $path . ") => " . $result . "\n"; return $result; } ## isHDFS() ## @function isSymbolicLink() # # Determine if the given path is a symbolic link # sub isSymbolicLink { my $path = shift(@_); my $driver = &FileUtils::_determineDriver($path); return &FileUtils::_callFunction($driver, 'fileTest', $path, '-l'); } ## isSymbolicLink() # /** # */ sub makeDirectory { my $path = shift(@_); my $driver = &FileUtils::_determineDriver($path); # check if the directory already exists - in which case our job is done :) my $result = &FileUtils::_callFunction($driver, 'fileTest', $path, '-d'); # not yet - better try and create it then if (!$result) { $result = &FileUtils::_callFunction($driver, 'makeDirectory', $path); } return $result; } # /** makeDirectory(path) **/ # /** # */ sub modificationTime { my $path = shift(@_); my $driver = &_determineDriver($path); return &_callFunction($driver, 'modificationTime', $path); } # /** modificationTime() **/ # /** # */ sub moveFiles { return &transferFiles(@_, 'MOVE'); } # /** moveFiles() **/ # /** # */ sub openFileHandle { my $path = shift(@_); # I'll set mode to read by default, as that is less destructive to precious # files on your system... my $mode = shift(@_) || '<'; # the all important determining of the driver by protocol my $driver = &FileUtils::_determineDriver($path); # the function call will return true on success, with the reference to the # file handle hopefully populated with a lovely file descriptor return &FileUtils::_callFunction($driver, 'openFileHandle', $path, $mode, @_); } # /** openFileHandle($file_handle_ref, $path, $mode) **/ ## @function pathExists() # # Determine if a certain path exists on the file system - regardless of whether # it is a file or a directory (the equivalent of -e) # sub pathExists { my $path = shift(@_); my $driver = &FileUtils::_determineDriver($path); return &FileUtils::_callFunction($driver, 'fileTest', $path, '-e'); } ## pathExists() ## @function readDirectory() # # Provide a function to return the files within a directory that is aware # of protocols other than file:// # @param $dirname the full path to the directory # sub readDirectory { my $path = shift(@_); my $driver = &FileUtils::_determineDriver($path); return &FileUtils::_callFunction($driver, 'readDirectory', $path); } ## readDirectory() ## @function removeFiles() # # Removes files (but not directories) # @param files - An array of filepaths to remove # sub removeFiles { my (@files) = @_; my $num_removed = 0; # Remove the files foreach my $path (@files) { my $driver = &FileUtils::_determineDriver($path); if (&FileUtils::_callFunction($driver, 'removeFiles', $path)) { $num_removed++; } } # Check to make sure all of them were removed if ($num_removed != scalar(@files)) { &printError('Not all files were removed'); $num_removed = 0; } return $num_removed; } ## removeFile(files) ## @function removeFilesFiltered() # sub removeFilesFiltered { my $maybe_paths = shift(@_); my @paths = (ref $maybe_paths eq "ARRAY") ? @$maybe_paths : ($maybe_paths); my $num_removed = 0; foreach my $path (@paths) { my $driver = &FileUtils::_determineDriver($path); $num_removed += &FileUtils::_callFunction($driver, 'removeFilesFiltered', $path, @_); } return $num_removed; } ## removeFilesFiltered() ## @function removeFilesRecursive() # # The equivalent of "rm -rf" with all the dangers therein # sub removeFilesRecursive { my $maybe_paths = shift(@_); my @paths = (ref $maybe_paths eq "ARRAY") ? @$maybe_paths : ($maybe_paths); my $num_removed = 0; foreach my $path (@paths) { my $driver = &FileUtils::_determineDriver($path); $num_removed += &FileUtils::_callFunction($driver, 'removeFilesRecursive', $path); } return $num_removed; } ## removeFilesRecursive() ## @function softLink() # sub softLink { my $src_file = shift(@_); my $dst_file = shift(@_); my $src_driver = &FileUtils::_determineDriver($src_file); my $dst_driver = &FileUtils::_determineDriver($dst_file); # you can only symbolic link within the same file system - always if ($src_driver eq 'LocalFS' && $src_driver eq $dst_driver && !$linking_disabled) { &FileUtils::_callFunction($src_driver, 'linkFile', 'SOFT', $src_file, $dst_file, @_); } # substitute a copy elsif ($src_driver ne 'LocalFS') { if (!$done_link_warning) { &printWarning('Cannot symbolic link on non-local file systems - copying all files instead'); $done_link_warning = 1; } &transferFiles($src_file, $dst_file, 'COPY'); } else { if (!$done_link_warning) { &printWarning('Cannot symbolic link between file systems - copying all files instead'); $done_link_warning = 1; } &transferFiles($src_file, $dst_file, 'COPY'); } } ## softLink() ## @function supportsSymbolicLink() # sub supportsSymbolicLink { my $path = shift(@_); my $driver = &FileUtils::_determineDriver($path); &FileUtils::_callFunction($driver, 'supportsSymbolicLink'); } ## supportsSymbolicLink() ## @function synchronizeDirectory() # sub synchronizeDirectory { my $fromdir = shift(@_); my $driver = &FileUtils::_determineDriver($fromdir); &FileUtils::_callFunction($driver, 'synchronizeDirectory', $fromdir, @_); } ## synchronizeDirectory() ## @function transferFiles() # @param paths - one or more source paths # @param dst_path - the destination path # @param mode - copy or move # sub transferFiles { my $transfer_mode = pop(@_); my $dst_path = pop(@_); my (@src_paths) = @_; &_prettyPrint(0, @src_paths, $dst_path, $transfer_mode) unless (!$debug); my $result = 0; my $dst_driver = &_determineDriver($dst_path); if (scalar (@src_paths) == 0) { &printError('No destination directory given'); } elsif ((scalar (@src_paths) > 1) && (!&directoryExists($dst_path))) { &printError('If multiple source files are given the destination must be a directory'); } else { foreach my $src_path (@src_paths) { my $src_driver = &_determineDriver($src_path); if ($src_driver eq 'LocalFS') { # Local to local if ($dst_driver eq 'LocalFS') { $result += &_callFunction($src_driver, 'transferFile', $transfer_mode, $src_path, $dst_path); } # Local to X else { $result += &_callFunction($dst_driver, 'transferFileFromLocal', $transfer_mode, $src_path, $dst_path); } } # X to Local elsif ($dst_driver eq 'LocalFS') { $result += &_callFunction($src_driver, 'transferFileToLocal', $transfer_mode, $src_path, $dst_path); } # X to X elsif ($src_driver eq $dst_driver) { $result += &_callFunction($src_driver, 'transferFile', $transfer_mode, $src_path, $dst_path); } # X to Y... not supported else { &printError('transfer between two non-local file systems not supported'); } } $result = (scalar(@src_paths) == $result); } return $result; } ## transferFiles() 1;