root/main/trunk/greenstone2/perllib/FileUtils.pm @ 32089

Revision 32089, 33.0 KB (checked in by ak19, 2 years ago)

1. Attempted fix by Kathy and me for Diego's problem of PDFBox's handling of a PDF. When it was set to convert_to_html, it built fine, but convert_to_text produced something that was invalid XML in doc.XML and build failed. Diego reasoned correctly that building ought to succeed in both cases if it succeeded in one case. Kathy found the correct fix for escaping the ampersand character (it wasn't & to & that I'd attempted, nor did using HTML::Entities' encode work either). 2. The fix needed to read and write files, so introducing readUTF8File() and writeUTF8File() into FileUtils?.pm for reusability. Need to still contact John Thompson to ask him if and how these functions need to be modified to support parallel processing, for which FileUtils? was written.

Line 
1###########################################################################
2#
3# FileUtils.pm -- functions for dealing with files. Skeleton for more
4# advanced system using dynamic class cloading available in extensions.
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2013 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package FileUtils;
29
30# Pragma
31use strict;
32use warnings;
33
34use FileHandle;
35
36# Greenstone modules
37use util;
38
39################################################################################
40# util::cachedir()             => FileUtils::synchronizeDirectory()
41# util::cp()                   => FileUtils::copyFiles()
42# util::cp_r()                 => FileUtils::copyFilesRecursive()
43# util::cp_r_nosvn()           => FileUtils::copyFilesRecursiveNoSVN()
44# util::cp_r_toplevel()        => FileUtils::copyFilesRecursiveTopLevel()
45# util::differentfiles()       => FileUtils::differentFiles()
46# util::dir_exists()           => FileUtils::directoryExists()
47# util::fd_exists()            => FileUtils::fileTest()
48# util::file_exists()          => FileUtils::fileExists()
49# util::filename_cat()         => FileUtils::filenameConcatenate()
50# util::filename_is_absolute() => FileUtils::isFilenameAbsolute()
51# util::filtered_rm_r()        => FileUtils::removeFilesFiltered()
52# util::hard_link()            => FileUtils::hardLink()
53# util::is_dir_empty()         => FileUtils::isDirectoryEmpty()
54# util::mk_all_dir()           => FileUtils::makeAllDirectories()
55# util::mk_dir()               => FileUtils::makeDirectory()
56# util::mv()                   => FileUtils::moveFiles()
57# util::mv_dir_contents()      => FileUtils::moveDirectoryContents()
58# util::rm()                   => FileUtils::removeFiles()
59# util::rm_r()                 => FileUtils::removeFilesRecursive()
60# util::soft_link()            => FileUtils::softLink()
61
62# Functions that have been added, but not by John Thompson,
63# So the implementations don't support parallel processing yet, but they print a warning and the
64# correct implementation can be put into here. So that if all calls for reading and writing UTF8
65# file content go through here, then they will do the right thing when the functions are updated.
66#
67#  => FileUtils::readUTF8File()
68#  => FileUtils::writeUTF8File()
69#
70
71# Other functions in this file (perhaps some of these may have counterparts in util.pm too):
72
73#canRead
74#isSymbolicLink
75#modificationTime
76#readDirectory
77#removeFilesDebug
78#sanitizePath
79#openFileHandle
80#closeFileHandle
81#differentFiles
82#filePutContents
83#fileSize
84#readDirectory
85
86################################################################################
87# Note: there are lots of functions involving files/directories/paths
88# etc found in utils.pm that are not represented here. My intention
89# was to just have those functions that need to be dynamic based on
90# filesystem, or need some rejigging to be filesystem aware. There is
91# an argument, I guess, for moving some of the other functions here so
92# that they are nicely encapsulated - but the question is what to do
93# with functions like filename_within_directory_url_format() which is
94# more URL based than file based.
95################################################################################
96
97
98## @function canRead()
99#
100sub canRead
101{
102  my ($filename_full_path) = @_;
103  return &fileTest($filename_full_path, '-R');
104}
105## canRead()
106
107
108## @function closeFileHandle
109#
110sub closeFileHandle
111{
112  my ($path, $fh_ref) = @_;
113  close($$fh_ref);
114}
115## closeFileHandle()
116
117
118## @function copyFiles()
119#
120# copies a file or a group of files
121#
122sub copyFiles
123{
124  my $dest = pop (@_);
125  my (@srcfiles) = @_;
126
127  # remove trailing slashes from source and destination files
128  $dest =~ s/[\\\/]+$//;
129  map {$_ =~ s/[\\\/]+$//;} @srcfiles;
130
131  # a few sanity checks
132  if (scalar (@srcfiles) == 0)
133  {
134    print STDERR "FileUtils::copyFiles() no destination directory given\n";
135    return;
136  }
137  elsif ((scalar (@srcfiles) > 1) && (!-d $dest))
138  {
139    print STDERR "FileUtils::copyFiles() if multiple source files are given the destination must be a directory\n";
140    return;
141  }
142
143  # copy the files
144  foreach my $file (@srcfiles)
145  {
146    my $tempdest = $dest;
147    if (-d $tempdest)
148    {
149      my ($filename) = $file =~ /([^\\\/]+)$/;
150      $tempdest .= "/$filename";
151    }
152    if (!-e $file)
153    {
154      print STDERR "FileUtils::copyFiles() $file does not exist\n";
155    }
156    elsif (!-f $file)
157    {
158      print STDERR "FileUtils::copyFiles() $file is not a plain file\n";
159    }
160    else
161    {
162      &File::Copy::copy ($file, $tempdest);
163    }
164  }
165}
166## copyFiles()
167
168
169## @function copyFilesRecursive()
170#
171# recursively copies a file or group of files syntax: cp_r
172# (sourcefiles, destination directory) destination must be a directory
173# to copy one file to another use cp instead
174#
175sub copyFilesRecursive
176{
177  my $dest = pop (@_);
178  my (@srcfiles) = @_;
179
180  # a few sanity checks
181  if (scalar (@srcfiles) == 0)
182  {
183    print STDERR "FileUtils::copyFilesRecursive() no destination directory given\n";
184    return;
185  }
186  elsif (-f $dest)
187  {
188    print STDERR "FileUtils::copyFilesRecursive() destination must be a directory\n";
189    return;
190  }
191
192  # create destination directory if it doesn't exist already
193  if (! -d $dest)
194  {
195    my $store_umask = umask(0002);
196    mkdir ($dest, 0777);
197    umask($store_umask);
198  }
199
200  # copy the files
201  foreach my $file (@srcfiles)
202  {
203
204    if (!-e $file)
205    {
206      print STDERR "FileUtils::copyFilesRecursive() $file does not exist\n";
207    }
208    elsif (-d $file)
209    {
210      # make the new directory
211      my ($filename) = $file =~ /([^\\\/]*)$/;
212      $dest = &filenameConcatenate($dest, $filename);
213      my $store_umask = umask(0002);
214      mkdir ($dest, 0777);
215      umask($store_umask);
216
217      # get the contents of this directory
218      if (!opendir (INDIR, $file))
219      {
220        print STDERR "FileUtils::copyFilesRecursive() could not open directory $file\n";
221      }
222      else
223      {
224        my @filedir = readdir (INDIR);
225        closedir (INDIR);
226        foreach my $f (@filedir)
227        {
228          next if $f =~ /^\.\.?$/;
229          # copy all the files in this directory
230          my $ff = &filenameConcatenate($file, $f);
231          &copyFilesRecursive($ff, $dest);
232        }
233      }
234
235    }
236    else
237    {
238      &copyFiles($file, $dest);
239    }
240  }
241}
242## copyFilesRecursive()
243
244
245## @function copyFilesRecursiveNoSVN()
246#
247# recursively copies a file or group of files, excluding SVN
248# directories, with syntax: cp_r (sourcefiles, destination directory)
249# destination must be a directory - to copy one file to another use cp
250# instead
251#
252# this should be merged with copyFilesRecursive() at some stage - jmt12
253#
254sub copyFilesRecursiveNoSVN
255{
256  my $dest = pop (@_);
257  my (@srcfiles) = @_;
258
259  # a few sanity checks
260  if (scalar (@srcfiles) == 0)
261  {
262    print STDERR "FileUtils::copyFilesRecursiveNoSVN() no destination directory given\n";
263    return;
264  }
265  elsif (-f $dest)
266  {
267    print STDERR "FileUtils::copyFilesRecursiveNoSVN() destination must be a directory\n";
268    return;
269  }
270
271  # create destination directory if it doesn't exist already
272  if (! -d $dest)
273  {
274    my $store_umask = umask(0002);
275    mkdir ($dest, 0777);
276    umask($store_umask);
277  }
278
279  # copy the files
280  foreach my $file (@srcfiles)
281  {
282    if (!-e $file)
283    {
284      print STDERR "copyFilesRecursiveNoSVN() $file does not exist\n";
285    }
286    elsif (-d $file)
287    {
288      # make the new directory
289      my ($filename) = $file =~ /([^\\\/]*)$/;
290      $dest = &filenameConcatenate($dest, $filename);
291      my $store_umask = umask(0002);
292      mkdir ($dest, 0777);
293      umask($store_umask);
294
295      # get the contents of this directory
296      if (!opendir (INDIR, $file))
297      {
298        print STDERR "copyFilesRecursiveNoSVN() could not open directory $file\n";
299      }
300      else
301      {
302        my @filedir = readdir (INDIR);
303        closedir (INDIR);
304        foreach my $f (@filedir)
305        {
306          next if $f =~ /^\.\.?$/;
307          next if $f =~ /^\.svn$/;
308          # copy all the files in this directory
309          my $ff = &filenameConcatenate($file, $f);
310          # util.pm version incorrectly called cp_r here - jmt12
311          &copyFilesRecursiveNoSVN($ff, $dest);
312        }
313      }
314    }
315    else
316    {
317      &copyFiles($file, $dest);
318    }
319  }
320}
321## copyFilesRecursiveNoSVN()
322
323
324## @function copyFilesRecursiveTopLevel()
325#
326# copies a directory and its contents, excluding subdirectories, into a new directory
327#
328# another candidate for merging in with copyFilesRecursive() - jmt12
329#
330sub copyFilesRecursiveTopLevel
331{
332  my $dest = pop (@_);
333  my (@srcfiles) = @_;
334
335  # a few sanity checks
336  if (scalar (@srcfiles) == 0)
337  {
338    print STDERR "FileUtils::copyFilesRecursiveTopLevel() no destination directory given\n";
339    return;
340  }
341  elsif (-f $dest)
342  {
343    print STDERR "FileUtils::copyFilesRecursiveTopLevel() destination must be a directory\n";
344    return;
345  }
346
347  # create destination directory if it doesn't exist already
348  if (! -d $dest)
349  {
350    my $store_umask = umask(0002);
351    mkdir ($dest, 0777);
352    umask($store_umask);
353  }
354
355  # copy the files
356  foreach my $file (@srcfiles)
357  {
358    if (!-e $file)
359    {
360      print STDERR "FileUtils::copyFilesRecursiveTopLevel() $file does not exist\n";
361    }
362    elsif (-d $file)
363    {
364      # make the new directory
365      my ($filename) = $file =~ /([^\\\/]*)$/;
366      $dest = &filenameConcatenate($dest, $filename);
367      my $store_umask = umask(0002);
368      mkdir ($dest, 0777);
369      umask($store_umask);
370
371      # get the contents of this directory
372      if (!opendir (INDIR, $file))
373      {
374        print STDERR "FileUtils::copyFilesRecursiveTopLevel() could not open directory $file\n";
375      }
376      else
377      {
378        my @filedir = readdir (INDIR);
379        closedir (INDIR);
380        foreach my $f (@filedir)
381        {
382          next if $f =~ /^\.\.?$/;
383
384          # copy all the files in this directory, but not directories
385          my $ff = &filenameConcatenate($file, $f);
386          if (-f $ff)
387          {
388            &copyFiles($ff, $dest);
389            #&cp_r ($ff, $dest);
390          }
391        }
392      }
393    }
394    else
395    {
396      &copyFiles($file, $dest);
397    }
398  }
399}
400## copyFilesRecursiveTopLevel()
401
402
403## @function differentFiles()
404#
405# this function returns -1 if either file is not found assumes that
406# $file1 and $file2 are absolute file names or in the current
407# directory $file2 is allowed to be newer than $file1
408#
409sub differentFiles
410{
411  my ($file1, $file2, $verbosity) = @_;
412  $verbosity = 1 unless defined $verbosity;
413
414  $file1 =~ s/\/+$//;
415  $file2 =~ s/\/+$//;
416
417  my ($file1name) = $file1 =~ /\/([^\/]*)$/;
418  my ($file2name) = $file2 =~ /\/([^\/]*)$/;
419
420  return -1 unless (-e $file1 && -e $file2);
421  if ($file1name ne $file2name)
422  {
423    print STDERR "filenames are not the same\n" if ($verbosity >= 2);
424    return 1;
425  }
426
427  my @file1stat = stat ($file1);
428  my @file2stat = stat ($file2);
429
430  if (-d $file1)
431  {
432    if (! -d $file2)
433    {
434      print STDERR "one file is a directory\n" if ($verbosity >= 2);
435      return 1;
436    }
437    return 0;
438  }
439
440  # both must be regular files
441  unless (-f $file1 && -f $file2)
442  {
443    print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
444    return 1;
445  }
446
447  # the size of the files must be the same
448  if ($file1stat[7] != $file2stat[7])
449  {
450    print STDERR "different sized files\n" if ($verbosity >= 2);
451    return 1;
452  }
453
454  # the second file cannot be older than the first
455  if ($file1stat[9] > $file2stat[9])
456  {
457    print STDERR "file is older\n" if ($verbosity >= 2);
458    return 1;
459  }
460
461  return 0;
462}
463## differentFiles()
464
465
466## @function directoryExists()
467#
468sub directoryExists
469{
470  my ($filename_full_path) = @_;
471  return &fileTest($filename_full_path, '-d');
472}
473## directoryExists()
474
475
476## @function fileExists()
477#
478sub fileExists
479{
480  my ($filename_full_path) = @_;
481  return &fileTest($filename_full_path, '-f');
482}
483## fileExists()
484
485## @function filenameConcatenate()
486#
487sub filenameConcatenate
488{
489  my $first_file = shift(@_);
490  my (@filenames) = @_;
491
492  #   Useful for debugging
493  #     -- might make sense to call caller(0) rather than (1)??
494  #   my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
495  #   print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
496
497  # If first_file is not null or empty, then add it back into the list
498  if (defined $first_file && $first_file =~ /\S/)
499  {
500    unshift(@filenames, $first_file);
501  }
502
503  my $filename = join("/", @filenames);
504
505  # remove duplicate slashes and remove the last slash
506  if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin"))
507  {
508    $filename =~ s/[\\\/]+/\\/g;
509  }
510  else
511  {
512    $filename =~ s/[\/]+/\//g;
513    # DB: want a filename abc\de.html to remain like this
514  }
515  $filename =~ s/[\\\/]$//;
516
517  return $filename;
518}
519## filenameConcatenate()
520
521
522
523## @function javaFilenameConcatenate()
524#
525# Same as filenameConcatenate(), except because on Cygwin
526# the java we run is still Windows native, then this means
527# we want the generate filename to be in native Windows format
528sub javaFilenameConcatenate
529{
530  my (@filenames) = @_;
531
532  my $filename_cat = filenameConcatenate(@filenames);
533
534  if ($^O eq "cygwin") {
535      # java program, using a binary that is native to Windows, so need
536      # Windows directory and path separators
537
538      $filename_cat = `cygpath -wp "$filename_cat"`;
539      chomp($filename_cat);
540      $filename_cat =~ s%\\%\\\\%g;
541  }
542
543  return $filename_cat;
544}
545## javaFilenameConcatenate()
546
547
548## @function filePutContents()
549#
550# Create a file and write the given string directly to it
551# @param $path the full path of the file to write as a String
552# @param $content the String to be written to the file
553#
554sub filePutContents
555{
556  my ($path, $content) = @_;
557  if (open(FOUT, '>:utf8', $path))
558  {
559    print FOUT $content;
560    close(FOUT);
561  }
562  else
563  {
564    die('Error! Failed to open file for writing: ' . $path . "\n");
565  }
566}
567## filePutContents()
568
569## @function fileSize()
570#
571sub fileSize
572{
573  my $path = shift(@_);
574  my $file_size = -s $path;
575  return $file_size;
576}
577## fileSize()
578
579## @function fileTest()
580#
581sub fileTest
582{
583  my $filename_full_path = shift @_;
584  my $test_op = shift @_ || "-e";
585
586  # By default tests for existance of file or directory (-e)
587  # Can be made more specific by providing second parameter (e.g. -f or -d)
588
589  my $exists = 0;
590
591  if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin"))
592  {
593    require Win32;
594    my $filename_short_path = Win32::GetShortPathName($filename_full_path);
595    if (!defined $filename_short_path)
596    {
597      # Was probably still in UTF8 form (not what is needed on Windows)
598      my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
599      if (defined $unicode_filename_full_path)
600      {
601        $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
602      }
603    }
604    $filename_full_path = $filename_short_path;
605  }
606
607  if (defined $filename_full_path)
608  {
609    $exists = eval "($test_op \$filename_full_path)";
610  }
611
612  return $exists || 0;
613}
614## fileTest()
615
616## @function hardLink()
617# make hard link to file if supported by OS, otherwise copy the file
618#
619sub hardLink
620{
621  my ($src, $dest, $verbosity) = @_;
622
623  # remove trailing slashes from source and destination files
624  $src =~ s/[\\\/]+$//;
625  $dest =~ s/[\\\/]+$//;
626
627  ##    print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
628  # a few sanity checks
629  if (-e $dest)
630  {
631    # destination file already exists
632    return;
633  }
634  elsif (!-e $src)
635  {
636    print STDERR "FileUtils::hardLink() source file \"" . $src . "\" does not exist\n";
637    return 1;
638  }
639  elsif (-d $src)
640  {
641    print STDERR "FileUtils::hardLink() source \"" . $src . "\" is a directory\n";
642    return 1;
643  }
644
645  my $dest_dir = &File::Basename::dirname($dest);
646  if (!-e $dest_dir)
647  {
648    &makeAllDirectories($dest_dir);
649  }
650
651  if (!link($src, $dest))
652  {
653    if ((!defined $verbosity) || ($verbosity>2))
654    {
655      print STDERR "FileUtils::hardLink(): unable to create hard link. ";
656      print STDERR " Copying file: $src -> $dest\n";
657    }
658    &File::Copy::copy ($src, $dest);
659  }
660  return 0;
661}
662## hardLink()
663
664## @function isDirectoryEmpty()
665#
666# A method to check if a directory is empty (note that an empty
667# directory still has non-zero size!!!).  Code is from
668# http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
669#
670sub isDirectoryEmpty
671{
672  my ($path) = @_;
673  opendir DIR, $path;
674  while(my $entry = readdir DIR)
675  {
676    next if($entry =~ /^\.\.?$/);
677    closedir DIR;
678    return 0;
679  }
680  closedir DIR;
681  return 1;
682}
683## isDirectoryEmpty()
684
685## @function isFilenameAbsolute()
686#
687sub isFilenameAbsolute
688{
689  my ($filename) = @_;
690  if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin"))
691  {
692    return ($filename =~ m/^(\w:)?\\/);
693  }
694  return ($filename =~ m/^\//);
695}
696# isFilenameAbsolute()
697
698## @function isSymbolicLink
699#
700# Determine if a given path is a symbolic link (soft link)
701#
702sub isSymbolicLink
703{
704  my $path = shift(@_);
705  my $is_soft_link = -l $path;
706  return $is_soft_link;
707}
708## isSymbolicLink()
709
710## @function makeAllDirectories()
711#
712# in case anyone cares - I did some testing (using perls Benchmark module)
713# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
714# slightly faster (surprisingly) - Stefan.
715#
716sub makeAllDirectories
717{
718  my ($dir) = @_;
719
720  # use / for the directory separator, remove duplicate and
721  # trailing slashes
722  $dir=~s/[\\\/]+/\//g;
723  $dir=~s/[\\\/]+$//;
724
725  # make sure the cache directory exists
726  my $dirsofar = "";
727  my $first = 1;
728  foreach my $dirname (split ("/", $dir))
729  {
730    $dirsofar .= "/" unless $first;
731    $first = 0;
732
733    $dirsofar .= $dirname;
734
735    next if $dirname =~ /^(|[a-z]:)$/i;
736    if (!-e $dirsofar)
737    {
738      my $store_umask = umask(0002);
739      my $mkdir_ok = mkdir ($dirsofar, 0777);
740      umask($store_umask);
741      if (!$mkdir_ok)
742      {
743        print STDERR "FileUtils::makeAllDirectories() could not create directory $dirsofar\n";
744        return;
745      }
746    }
747  }
748 return 1;
749}
750## makeAllDirectories()
751
752## @function makeDirectory()
753#
754sub makeDirectory
755{
756  my ($dir) = @_;
757
758  my $store_umask = umask(0002);
759  my $mkdir_ok = mkdir ($dir, 0777);
760  umask($store_umask);
761
762  if (!$mkdir_ok)
763  {
764    print STDERR "FileUtils::makeDirectory() could not create directory $dir\n";
765    return;
766  }
767}
768## makeDirectory()
769
770## @function modificationTime()
771#
772sub modificationTime
773{
774  my $path = shift(@_);
775  my @file_status = stat($path);
776  return $file_status[9];
777}
778## modificationTime()
779
780## @function moveDirectoryContents()
781#
782# Move the contents of source directory into target directory (as
783# opposed to merely replacing target dir with the src dir) This can
784# overwrite any files with duplicate names in the target but other
785# files and folders in the target will continue to exist
786#
787sub moveDirectoryContents
788{
789  my ($src_dir, $dest_dir) = @_;
790
791  # Obtain listing of all files within src_dir
792  # Note that readdir lists relative paths, as well as . and ..
793  opendir(DIR, "$src_dir");
794  my @files= readdir(DIR);
795  close(DIR);
796
797  my @full_path_files = ();
798  foreach my $file (@files)
799  {
800    # process all except . and ..
801    unless($file eq "." || $file eq "..")
802    {
803      my $dest_subdir = &filenameConcatenate($dest_dir, $file); # $file is still a relative path
804
805      # construct absolute paths
806      $file = &filenameConcatenate($src_dir, $file); # $file is now an absolute path
807
808      # Recurse on directories which have an equivalent in target dest_dir
809      # If $file is a directory that already exists in target $dest_dir,
810      # then a simple move operation will fail (definitely on Windows).
811      if(-d $file && -d $dest_subdir)
812      {
813        #print STDERR "**** $file is a directory also existing in target, its contents to be copied to $dest_subdir\n";
814        &moveDirectoryContents($file, $dest_subdir);
815
816        # now all content is moved across, delete empty dir in source folder
817        if(&isDirectoryEmpty($file))
818        {
819          if (!rmdir $file)
820          {
821            print STDERR "ERROR. FileUtils::moveDirectoryContents() couldn't remove directory $file\n";
822          }
823        }
824        # error
825        else
826        {
827          print STDERR "ERROR. FileUtils::moveDirectoryContents(): subfolder $file still non-empty after moving contents to $dest_subdir\n";
828        }
829      }
830      # process files and any directories that don't already exist with a simple move
831      else
832      {
833        push(@full_path_files, $file);
834      }
835    }
836  }
837
838  # create target toplevel folder or subfolders if they don't exist
839  if(!&directoryExists($dest_dir))
840  {
841    &makeDirectory($dest_dir);
842  }
843
844  #print STDERR "@@@@@ Copying files |".join(",", @full_path_files)."| to: $dest_dir\n";
845
846  # if non-empty, there's something to copy across
847  if(@full_path_files)
848  {
849    &moveFiles(@full_path_files, $dest_dir);
850  }
851}
852## moveDirectoryContents()
853
854## @function moveFiles()
855#
856# moves a file or a group of files
857#
858sub moveFiles
859{
860  my $dest = pop (@_);
861  my (@srcfiles) = @_;
862
863  # remove trailing slashes from source and destination files
864  $dest =~ s/[\\\/]+$//;
865  map {$_ =~ s/[\\\/]+$//;} @srcfiles;
866
867  # a few sanity checks
868  if (scalar (@srcfiles) == 0)
869  {
870    print STDERR "FileUtils::moveFiles() no destination directory given\n";
871    return;
872  }
873  elsif ((scalar (@srcfiles) > 1) && (!-d $dest))
874  {
875    print STDERR "FileUtils::moveFiles() if multiple source files are given the destination must be a directory\n";
876    return;
877  }
878
879  # move the files
880  foreach my $file (@srcfiles)
881  {
882    my $tempdest = $dest;
883    if (-d $tempdest)
884    {
885      my ($filename) = $file =~ /([^\\\/]+)$/;
886      $tempdest .= "/$filename";
887    }
888    if (!-e $file)
889    {
890      print STDERR "FileUtils::moveFiles() $file does not exist\n";
891    }
892    else
893    {
894      if(!rename ($file, $tempdest))
895      {
896        print STDERR "**** Failed to rename $file to $tempdest\n";
897        &File::Copy::copy($file, $tempdest);
898        &removeFiles($file);
899      }
900      # rename (partially) succeeded) but srcfile still exists after rename
901      elsif(-e $file)
902      {
903        #print STDERR "*** srcfile $file still exists after rename to $tempdest\n";
904        if(!-e $tempdest)
905        {
906          print STDERR "@@@@ ERROR: $tempdest does not exist\n";
907        }
908        # Sometimes the rename operation fails (as does
909        # File::Copy::move).  This turns out to be because the files
910        # are hardlinked.  Need to do a copy-delete in this case,
911        # however, the copy step is not necessary: the srcfile got
912        # renamed into tempdest, but srcfile itself still exists,
913        # delete it.  &File::Copy::copy($file, $tempdest);
914        &removeFiles($file);
915      }
916    }
917  }
918}
919## moveFiles()
920
921## @function openFileHandle()
922#
923sub openFileHandle
924{
925  my $path = shift(@_);
926  my $mode = shift(@_);
927  my $fh_ref = shift(@_);
928  my $encoding = shift(@_);
929  my $mode_symbol;
930  if ($mode eq 'w' || $mode eq '>')
931  {
932    $mode_symbol = '>';
933    $mode = 'writing';
934  }
935  elsif ($mode eq 'a' || $mode eq '>>')
936  {
937    $mode_symbol = '>>';
938    $mode = 'appending';
939  }
940  else
941  {
942    $mode_symbol = '<';
943    $mode = 'reading';
944  }
945  if (defined $encoding)
946  {
947    $mode_symbol .= ':' . $encoding;
948  }
949  return open($$fh_ref, $mode_symbol, $path);
950}
951## openFileHandle()
952
953
954## @function readDirectory()
955#
956sub readDirectory
957{
958  my $path = shift(@_);
959  my @files;
960  if (opendir(DH, $path))
961  {
962    @files = readdir(DH);
963    close(DH);
964  }
965  else
966  {
967    die("Error! Failed to open directory to list files: " . $path . "\n");
968  }
969  return \@files;
970}
971## readDirectory()
972
973## @function readUTF8File()
974#
975# read contents from a file containing UTF8.
976#
977# Parameter filename, the filepath to read from
978#
979sub readUTF8File
980{
981    my $filename = shift(@_);
982
983    print STDERR "@@@ Warning FileUtils::readFile() not yet implemented for parallel processing. Using regular version...\n";
984   
985    open(FIN,"<$filename") or die "FileUtils::readFile: Unable to open $filename for reading...ERROR: $!\n";
986
987    # decode the bytes in the file with UTF8 enc,
988    # to get unicode aware strings that represent utf8 chars
989    binmode(FIN,":utf8");
990   
991    my $contents;
992    # Read in the entire contents of the file in one hit
993    sysread(FIN, $contents, -s FIN);
994    close(FIN);
995    return $contents;   
996}
997## readUTF8File()
998
999## @function writeUTF8File()
1000#
1001# write UTF8 contents to a file.
1002#
1003# Parameter filename, the filepath to write to
1004# Parameter contentRef, a *reference* to the contents to write out
1005#
1006sub writeUTF8File
1007{
1008    my ($filename, $contentRef) = @_;
1009
1010    print STDERR "@@@ Warning FileUtils::writeFile() not yet implemented for parallel processing. Using regular version...\n";
1011
1012    open(FOUT, ">$filename") or die "FileUtils::writeFile: Unable to open $filename for writing out contents...ERROR: $!\n";
1013    # encode the unicode aware characters in the string as utf8
1014    # before writing out the resulting bytes
1015    binmode(FOUT,":utf8");
1016   
1017    print FOUT $$contentRef;
1018    close(FOUT);
1019}
1020## writeUTF8File()
1021
1022## @function removeFiles()
1023#
1024# removes files (but not directories)
1025#
1026sub removeFiles
1027{
1028  my (@files) = @_;
1029  my @filefiles = ();
1030
1031  # make sure the files we want to delete exist
1032  # and are regular files
1033  foreach my $file (@files)
1034  {
1035    if (!-e $file)
1036    {
1037      print STDERR "FileUtils::removeFiles() $file does not exist\n";
1038    }
1039    elsif ((!-f $file) && (!-l $file))
1040    {
1041      print STDERR "FileUtils::removeFiles() $file is not a regular (or symbolic) file\n";
1042    }
1043    else
1044    {
1045      push (@filefiles, $file);
1046    }
1047  }
1048
1049  # remove the files
1050  my $numremoved = unlink @filefiles;
1051
1052  # check to make sure all of them were removed
1053  if ($numremoved != scalar(@filefiles))
1054  {
1055    print STDERR "FileUtils::removeFiles() Not all files were removed\n";
1056  }
1057}
1058## removeFiles()
1059
1060## @function removeFilesDebug()
1061#
1062# removes files (but not directories) - can rename this to the default
1063# "rm" subroutine when debugging the deletion of individual files.
1064# Unused?
1065#
1066sub removeFilesDebug
1067{
1068  my (@files) = @_;
1069  my @filefiles = ();
1070
1071  # make sure the files we want to delete exist
1072  # and are regular files
1073  foreach my $file (@files)
1074  {
1075    if (!-e $file)
1076    {
1077      print STDERR "FileUtils::removeFilesDebug() " . $file . " does not exist\n";
1078    }
1079    elsif ((!-f $file) && (!-l $file))
1080    {
1081      print STDERR "FileUtils::removeFilesDebug() " . $file . " is not a regular (or symbolic) file\n";
1082    }
1083    # debug message
1084    else
1085    {
1086      unlink($file) or warn "Could not delete file " . $file . ": " . $! . "\n";
1087    }
1088  }
1089}
1090## removeFilesDebug()
1091
1092## @function removeFilesFiltered()
1093#
1094sub removeFilesFiltered
1095{
1096  my ($files,$file_accept_re,$file_reject_re) = @_;
1097
1098  #   my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(2);
1099  #   my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
1100  #   print STDERR "** Calling method (2): $lcfilename:$cline $cpackage->$csubr\n";
1101
1102  my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
1103
1104  # recursively remove the files
1105  foreach my $file (@files_array)
1106  {
1107    $file =~ s/[\/\\]+$//; # remove trailing slashes
1108
1109    if (!-e $file)
1110    {
1111      print STDERR "FileUtils::removeFilesFiltered() $file does not exist\n";
1112    }
1113    # don't recurse down symbolic link
1114    elsif ((-d $file) && (!-l $file))
1115    {
1116      # get the contents of this directory
1117      if (!opendir (INDIR, $file))
1118      {
1119        print STDERR "FileUtils::removeFilesFiltered() could not open directory $file\n";
1120      }
1121      else
1122      {
1123        my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
1124        closedir (INDIR);
1125
1126        # remove all the files in this directory
1127        map {$_="$file/$_";} @filedir;
1128        &removeFilesFiltered(\@filedir,$file_accept_re,$file_reject_re);
1129
1130        if (!defined $file_accept_re && !defined $file_reject_re)
1131        {
1132          # remove this directory
1133          if (!rmdir $file)
1134          {
1135            print STDERR "FileUtils::removeFilesFiltered() couldn't remove directory $file\n";
1136          }
1137        }
1138      }
1139    }
1140    else
1141    {
1142      next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
1143
1144      if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/))
1145      {
1146        # remove this file
1147        &removeFiles($file);
1148      }
1149    }
1150  }
1151}
1152## removeFilesFiltered()
1153
1154## @function removeFilesRecursive()
1155#
1156# The equivalent of "rm -rf" with all the dangers therein
1157#
1158sub removeFilesRecursive
1159{
1160  my (@files) = @_;
1161
1162  # use the more general (but reterospectively written) function
1163  # filtered_rm_r function() with no accept or reject expressions
1164  &removeFilesFiltered(\@files,undef,undef);
1165}
1166## removeFilesRecursive()
1167
1168## @function sanitizePath()
1169#
1170sub sanitizePath
1171{
1172  my ($path) = @_;
1173
1174  # fortunately filename concatenate will perform all the double slash
1175  # removal and end slash removal we need, and in a protocol aware
1176  # fashion
1177  return &filenameConcatenate($path);
1178}
1179## sanitizePath()
1180
1181## @function softLink()
1182#
1183# make soft link to file if supported by OS, otherwise copy file
1184#
1185sub softLink
1186{
1187  my ($src, $dest, $ensure_paths_absolute) = @_;
1188
1189  # remove trailing slashes from source and destination files
1190  $src =~ s/[\\\/]+$//;
1191  $dest =~ s/[\\\/]+$//;
1192
1193  # Ensure file paths are absolute IF requested to do so
1194  # Soft_linking didn't work for relative paths
1195  if(defined $ensure_paths_absolute && $ensure_paths_absolute)
1196  {
1197    # We need to ensure that the src file is the absolute path
1198    # See http://perldoc.perl.org/File/Spec.html
1199    # it's relative
1200    if(!File::Spec->file_name_is_absolute( $src ))
1201    {
1202      $src = File::Spec->rel2abs($src); # make absolute
1203    }
1204    # Might as well ensure that the destination file's absolute path is used
1205    if(!File::Spec->file_name_is_absolute( $dest ))
1206    {
1207      $dest = File::Spec->rel2abs($dest); # make absolute
1208    }
1209  }
1210
1211  # a few sanity checks
1212  if (!-e $src)
1213  {
1214    print STDERR "FileUtils::softLink() source file $src does not exist\n";
1215    return 0;
1216  }
1217
1218  my $dest_dir = &File::Basename::dirname($dest);
1219  if (!-e $dest_dir)
1220  {
1221    &makeAllDirectories($dest_dir);
1222  }
1223
1224  if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin"))
1225  {
1226    # symlink not supported on windows
1227    &File::Copy::copy ($src, $dest);
1228  }
1229  elsif (!eval {symlink($src, $dest)})
1230  {
1231    print STDERR "FileUtils::softLink(): unable to create soft link.\n";
1232    return 0;
1233  }
1234  return 1;
1235}
1236## softLink()
1237
1238## @function synchronizeDirectory()
1239#
1240# updates a copy of a directory in some other part of the filesystem
1241# verbosity settings are: 0=low, 1=normal, 2=high
1242# both $fromdir and $todir should be absolute paths
1243#
1244sub synchronizeDirectory
1245{
1246  my ($fromdir, $todir, $verbosity) = @_;
1247  $verbosity = 1 unless defined $verbosity;
1248
1249  # use / for the directory separator, remove duplicate and
1250  # trailing slashes
1251  $fromdir=~s/[\\\/]+/\//g;
1252  $fromdir=~s/[\\\/]+$//;
1253  $todir=~s/[\\\/]+/\//g;
1254  $todir=~s/[\\\/]+$//;
1255
1256  &makeAllDirectories($todir);
1257
1258  # get the directories in ascending order
1259  if (!opendir (FROMDIR, $fromdir))
1260  {
1261    print STDERR "FileUtils::synchronizeDirectory() could not read directory $fromdir\n";
1262    return;
1263  }
1264  my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
1265  closedir (FROMDIR);
1266
1267  if (!opendir (TODIR, $todir))
1268  {
1269    print STDERR "FileUtils::synchronizeDirectory() could not read directory $todir\n";
1270    return;
1271  }
1272  my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
1273  closedir (TODIR);
1274
1275  my $fromi = 0;
1276  my $toi = 0;
1277
1278  while ($fromi < scalar(@fromdir) || $toi < scalar(@todir))
1279  {
1280    #   print "fromi: $fromi toi: $toi\n";
1281
1282    # see if we should delete a file/directory
1283    # this should happen if the file/directory
1284    # is not in the from list or if its a different
1285    # size, or has an older timestamp
1286    if ($toi < scalar(@todir))
1287    {
1288      if (($fromi >= scalar(@fromdir)) || ($todir[$toi] lt $fromdir[$fromi] || ($todir[$toi] eq $fromdir[$fromi] && &differentFiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]", $verbosity))))
1289      {
1290
1291        # the files are different
1292        &removeFilesRecursive("$todir/$todir[$toi]");
1293        splice(@todir, $toi, 1); # $toi stays the same
1294
1295      }
1296      elsif ($todir[$toi] eq $fromdir[$fromi])
1297      {
1298        # the files are the same
1299        # if it is a directory, check its contents
1300        if (-d "$todir/$todir[$toi]")
1301        {
1302          &synchronizeDirectory("$fromdir/$fromdir[$fromi]", "$todir/$todir[$toi]", $verbosity);
1303        }
1304
1305        $toi++;
1306        $fromi++;
1307        next;
1308      }
1309    }
1310
1311    # see if we should insert a file/directory
1312    # we should insert a file/directory if there
1313    # is no tofiles left or if the tofile does not exist
1314    if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) || $todir[$toi] gt $fromdir[$fromi]))
1315    {
1316      &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
1317      splice (@todir, $toi, 0, $fromdir[$fromi]);
1318
1319      $toi++;
1320      $fromi++;
1321    }
1322  }
1323}
1324## synchronizeDirectory()
1325
13261;
Note: See TracBrowser for help on using the browser.