root/main/trunk/greenstone2/perllib/util.pm @ 25577

Revision 25577, 51.5 KB (checked in by ak19, 7 years ago)

Moved some more general methods from activate.pl to util.pm and in the recently added sub mv_dir_contents, use rmdir to remove empty dirs rather than using rm_r.

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use strict;
29
30use Encode;
31use File::Copy;
32use File::Basename;
33# Config for getting the perlpath in the recommended way, though it uses paths that are
34# hard-coded into the Config file that's generated upon configuring and compiling perl.
35# $^X works better in some cases to return the path to perl used to launch the script,
36# but if launched with plain "perl" (no full-path), that will be just what it returns.
37use Config;
38
39# removes files (but not directories)
40sub rm {
41    my (@files) = @_;
42
43    my @filefiles = ();
44
45    # make sure the files we want to delete exist
46    # and are regular files
47    foreach my $file (@files) {
48    if (!-e $file) {
49        print STDERR "util::rm $file does not exist\n";
50    } elsif ((!-f $file) && (!-l $file)) {
51        print STDERR "util::rm $file is not a regular (or symbolic) file\n";
52    } else {
53        push (@filefiles, $file);
54    }
55    }
56   
57    # remove the files
58    my $numremoved = unlink @filefiles;
59
60    # check to make sure all of them were removed
61    if ($numremoved != scalar(@filefiles)) {
62    print STDERR "util::rm Not all files were removed\n";
63    }
64}
65
66# removes files (but not directories) - can rename this to the default
67# "rm" subroutine  when debugging the deletion of individual files.
68sub rm_debug {
69    my (@files) = @_;
70    my @filefiles = ();
71
72    # make sure the files we want to delete exist
73    # and are regular files
74    foreach my $file (@files) {
75        if (!-e $file) {
76            print STDERR "util::rm $file does not exist\n";
77        } elsif ((!-f $file) && (!-l $file)) {
78            print STDERR "util::rm $file is not a regular (or symbolic) file\n";
79        } else { # debug message
80            unlink($file) or warn "Could not delete file $file: $!\n";
81        }
82    }
83}
84
85
86# recursive removal
87sub filtered_rm_r {
88    my ($files,$file_accept_re,$file_reject_re) = @_;
89
90#   my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(2);
91#   my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
92#   print STDERR "** Calling method (2): $lcfilename:$cline $cpackage->$csubr\n";
93   
94    my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
95
96    # recursively remove the files
97    foreach my $file (@files_array) {
98    $file =~ s/[\/\\]+$//; # remove trailing slashes
99   
100    if (!-e $file) {
101        print STDERR "util::filtered_rm_r $file does not exist\n";
102
103    } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
104        # get the contents of this directory
105        if (!opendir (INDIR, $file)) {
106        print STDERR "util::filtered_rm_r could not open directory $file\n";
107        } else {
108        my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
109        closedir (INDIR);
110               
111        # remove all the files in this directory
112        map {$_="$file/$_";} @filedir;
113        &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
114
115        if (!defined $file_accept_re && !defined $file_reject_re) {
116            # remove this directory
117            if (!rmdir $file) {
118            print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
119            }
120        }
121        }
122    } else {
123        next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
124
125        if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
126        # remove this file 
127        &rm ($file);
128        }
129    }
130    }
131}
132
133
134# recursive removal
135sub rm_r {
136    my (@files) = @_;
137   
138    # use the more general (but reterospectively written function
139    # filtered_rm_r function()
140
141    filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
142}
143
144
145
146
147# moves a file or a group of files
148sub mv {
149    my $dest = pop (@_);
150    my (@srcfiles) = @_;
151
152    # remove trailing slashes from source and destination files
153    $dest =~ s/[\\\/]+$//;
154    map {$_ =~ s/[\\\/]+$//;} @srcfiles;
155
156    # a few sanity checks
157    if (scalar (@srcfiles) == 0) {
158    print STDERR "util::mv no destination directory given\n";
159    return;
160    } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
161    print STDERR "util::mv if multiple source files are given the ".
162        "destination must be a directory\n";
163    return;
164    }
165
166    # move the files
167    foreach my $file (@srcfiles) {
168    my $tempdest = $dest;
169    if (-d $tempdest) {
170        my ($filename) = $file =~ /([^\\\/]+)$/;
171        $tempdest .= "/$filename";
172    }
173    if (!-e $file) {
174        print STDERR "util::mv $file does not exist\n";
175    } else {
176        rename ($file, $tempdest);
177    }
178    }
179}
180
181# Move the contents of source directory into target directory
182# (as opposed to merely replacing target dir with the src dir)
183# This can overwrite any files with duplicate names in the target
184# but other files and folders in the target will continue to exist
185sub mv_dir_contents {
186    my ($src_dir, $dest_dir) = @_;
187   
188    # Obtain listing of all files within src_dir
189    # Note that readdir lists relative paths, as well as . and ..
190    opendir(DIR, "$src_dir");
191    my @files= readdir(DIR);
192    close(DIR);
193   
194    # construct absolute paths
195    my @full_path_files = ();
196    foreach my $file (@files) {
197        # process all except . and ..
198        unless($file eq "." || $file eq "..") {
199            my $tailname = $file;
200            $file = &filename_cat($src_dir, $file);
201               
202            if(-d $file) { # recursion on directories
203                #print STDERR "**** $file is a directory, to be copied to $dest_dir\\$tailname\n";
204                &mv_dir_contents($file, &filename_cat($dest_dir, $tailname));
205               
206                # now all content is moved across, delete empty dir in source folder
207                if(&is_dir_empty($file)) {
208                    if (!rmdir $file) {
209                        print STDERR "util::mv_dir_contents couldn't remove directory $file\n";
210                    }
211                } else { # error
212                    print STDERR "ERROR. util::mv_dir_contents: subfolder $file still non-empty after moving contents to $dest_dir\n";
213                }
214            } else { # process files with a simple move
215                push(@full_path_files, $file);
216            }           
217        }
218    }
219   
220    if(!&dir_exists($dest_dir)) { # create target toplevel folder or subfolders if they don't exist
221        &mk_dir($dest_dir);
222    }
223   
224    #print STDERR "@@@@@ Copying files to: $dest_dir\n";
225    if(@full_path_files) { # if non-empty, there's something to copy across
226        &mv(@full_path_files, $dest_dir);
227    }
228}
229
230
231# copies a file or a group of files
232sub cp {
233    my $dest = pop (@_);
234    my (@srcfiles) = @_;
235
236    # remove trailing slashes from source and destination files
237    $dest =~ s/[\\\/]+$//;
238    map {$_ =~ s/[\\\/]+$//;} @srcfiles;
239
240    # a few sanity checks
241    if (scalar (@srcfiles) == 0) {
242    print STDERR "util::cp no destination directory given\n";
243    return;
244    } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
245    print STDERR "util::cp if multiple source files are given the ".
246        "destination must be a directory\n";
247    return;
248    }
249
250    # copy the files
251    foreach my $file (@srcfiles) {
252    my $tempdest = $dest;
253    if (-d $tempdest) {
254        my ($filename) = $file =~ /([^\\\/]+)$/;
255        $tempdest .= "/$filename";
256    }
257    if (!-e $file) {
258        print STDERR "util::cp $file does not exist\n";
259    } elsif (!-f $file) {
260        print STDERR "util::cp $file is not a plain file\n";
261    } else {
262        &File::Copy::copy ($file, $tempdest);
263    }
264    }
265}
266
267
268
269# recursively copies a file or group of files
270# syntax: cp_r (sourcefiles, destination directory)
271# destination must be a directory - to copy one file to
272# another use cp instead
273sub cp_r {
274    my $dest = pop (@_);
275    my (@srcfiles) = @_;
276
277    # a few sanity checks
278    if (scalar (@srcfiles) == 0) {
279    print STDERR "util::cp_r no destination directory given\n";
280    return;
281    } elsif (-f $dest) {
282    print STDERR "util::cp_r destination must be a directory\n";
283    return;
284    }
285   
286    # create destination directory if it doesn't exist already
287    if (! -d $dest) {
288    my $store_umask = umask(0002);
289    mkdir ($dest, 0777);
290    umask($store_umask);
291    }
292
293    # copy the files
294    foreach my $file (@srcfiles) {
295
296    if (!-e $file) {
297        print STDERR "util::cp_r $file does not exist\n";
298
299    } elsif (-d $file) {
300        # make the new directory
301        my ($filename) = $file =~ /([^\\\/]*)$/;
302        $dest = &util::filename_cat ($dest, $filename);
303        my $store_umask = umask(0002);
304        mkdir ($dest, 0777);
305        umask($store_umask);
306
307        # get the contents of this directory
308        if (!opendir (INDIR, $file)) {
309        print STDERR "util::cp_r could not open directory $file\n";
310        } else {
311        my @filedir = readdir (INDIR);
312        closedir (INDIR);
313        foreach my $f (@filedir) {
314            next if $f =~ /^\.\.?$/;
315            # copy all the files in this directory
316            my $ff = &util::filename_cat ($file, $f);
317            &cp_r ($ff, $dest);
318        }
319        }
320
321    } else {
322        &cp($file, $dest);
323    }
324    }
325}
326# recursively copies a file or group of files
327# syntax: cp_r (sourcefiles, destination directory)
328# destination must be a directory - to copy one file to
329# another use cp instead
330sub cp_r_nosvn {
331    my $dest = pop (@_);
332    my (@srcfiles) = @_;
333
334    # a few sanity checks
335    if (scalar (@srcfiles) == 0) {
336    print STDERR "util::cp_r no destination directory given\n";
337    return;
338    } elsif (-f $dest) {
339    print STDERR "util::cp_r destination must be a directory\n";
340    return;
341    }
342   
343    # create destination directory if it doesn't exist already
344    if (! -d $dest) {
345    my $store_umask = umask(0002);
346    mkdir ($dest, 0777);
347    umask($store_umask);
348    }
349
350    # copy the files
351    foreach my $file (@srcfiles) {
352
353    if (!-e $file) {
354        print STDERR "util::cp_r $file does not exist\n";
355
356    } elsif (-d $file) {
357        # make the new directory
358        my ($filename) = $file =~ /([^\\\/]*)$/;
359        $dest = &util::filename_cat ($dest, $filename);
360        my $store_umask = umask(0002);
361        mkdir ($dest, 0777);
362        umask($store_umask);
363
364        # get the contents of this directory
365        if (!opendir (INDIR, $file)) {
366        print STDERR "util::cp_r could not open directory $file\n";
367        } else {
368        my @filedir = readdir (INDIR);
369        closedir (INDIR);
370        foreach my $f (@filedir) {
371            next if $f =~ /^\.\.?$/;
372            next if $f =~ /^\.svn$/;
373            # copy all the files in this directory
374            my $ff = &util::filename_cat ($file, $f);
375            &cp_r ($ff, $dest);
376        }
377        }
378
379    } else {
380        &cp($file, $dest);
381    }
382    }
383}
384
385# copies a directory and its contents, excluding subdirectories, into a new directory
386sub cp_r_toplevel {
387    my $dest = pop (@_);
388    my (@srcfiles) = @_;
389
390    # a few sanity checks
391    if (scalar (@srcfiles) == 0) {
392    print STDERR "util::cp_r no destination directory given\n";
393    return;
394    } elsif (-f $dest) {
395    print STDERR "util::cp_r destination must be a directory\n";
396    return;
397    }
398   
399    # create destination directory if it doesn't exist already
400    if (! -d $dest) {
401    my $store_umask = umask(0002);
402    mkdir ($dest, 0777);
403    umask($store_umask);
404    }
405
406    # copy the files
407    foreach my $file (@srcfiles) {
408
409    if (!-e $file) {
410        print STDERR "util::cp_r $file does not exist\n";
411
412    } elsif (-d $file) {
413        # make the new directory
414        my ($filename) = $file =~ /([^\\\/]*)$/;
415        $dest = &util::filename_cat ($dest, $filename);
416        my $store_umask = umask(0002);
417        mkdir ($dest, 0777);
418        umask($store_umask);
419
420        # get the contents of this directory
421        if (!opendir (INDIR, $file)) {
422        print STDERR "util::cp_r could not open directory $file\n";
423        } else {
424        my @filedir = readdir (INDIR);
425        closedir (INDIR);
426        foreach my $f (@filedir) {
427            next if $f =~ /^\.\.?$/;
428           
429            # copy all the files in this directory, but not directories
430            my $ff = &util::filename_cat ($file, $f);
431            if (-f $ff) {
432            &cp($ff, $dest);
433            #&cp_r ($ff, $dest);
434            }
435        }
436        }
437
438    } else {
439        &cp($file, $dest);
440    }
441    }
442}
443
444sub mk_dir {
445    my ($dir) = @_;
446
447    my $store_umask = umask(0002);
448    my $mkdir_ok = mkdir ($dir, 0777);
449    umask($store_umask);
450   
451    if (!$mkdir_ok)
452    {
453    print STDERR "util::mk_dir could not create directory $dir\n";
454    return;
455    }
456}
457
458# in case anyone cares - I did some testing (using perls Benchmark module)
459# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
460# slightly faster (surprisingly) - Stefan.
461sub mk_all_dir {
462    my ($dir) = @_;
463
464    # use / for the directory separator, remove duplicate and
465    # trailing slashes
466    $dir=~s/[\\\/]+/\//g;
467    $dir=~s/[\\\/]+$//;
468
469    # make sure the cache directory exists
470    my $dirsofar = "";
471    my $first = 1;
472    foreach my $dirname (split ("/", $dir)) {
473    $dirsofar .= "/" unless $first;
474    $first = 0;
475
476    $dirsofar .= $dirname;
477
478    next if $dirname =~ /^(|[a-z]:)$/i;
479    if (!-e $dirsofar)
480        {
481        my $store_umask = umask(0002);
482        my $mkdir_ok = mkdir ($dirsofar, 0777);
483        umask($store_umask);
484        if (!$mkdir_ok)
485        {
486            print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
487            return;
488        }
489        }
490    }
491}
492
493# make hard link to file if supported by OS, otherwise copy the file
494sub hard_link {
495    my ($src, $dest, $verbosity) = @_;
496
497    # remove trailing slashes from source and destination files
498    $src =~ s/[\\\/]+$//;
499    $dest =~ s/[\\\/]+$//;
500
501##    print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
502    # a few sanity checks
503    if (-e $dest) {
504    # destination file already exists
505    return;
506    }
507    elsif (!-e $src) {
508    print STDERR "util::hard_link source file \"$src\" does not exist\n";
509    return 1;
510    }
511    elsif (-d $src) {
512    print STDERR "util::hard_link source \"$src\" is a directory\n";
513    return 1;
514    }
515
516    my $dest_dir = &File::Basename::dirname($dest);
517    mk_all_dir($dest_dir) if (!-e $dest_dir);
518
519
520    if (!link($src, $dest)) {
521    if ((!defined $verbosity) || ($verbosity>2)) {
522        print STDERR "util::hard_link: unable to create hard link. ";
523        print STDERR " Copying file: $src -> $dest\n";
524    }
525    &File::Copy::copy ($src, $dest);
526    }
527    return 0;
528}
529
530# make soft link to file if supported by OS, otherwise copy file
531sub soft_link {
532    my ($src, $dest, $ensure_paths_absolute) = @_;
533
534    # remove trailing slashes from source and destination files
535    $src =~ s/[\\\/]+$//;
536    $dest =~ s/[\\\/]+$//;
537
538    # Ensure file paths are absolute IF requested to do so
539    # Soft_linking didn't work for relative paths
540    if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
541    # We need to ensure that the src file is the absolute path
542    # See http://perldoc.perl.org/File/Spec.html
543    if(!File::Spec->file_name_is_absolute( $src ))  { # it's relative
544        $src = File::Spec->rel2abs($src); # make absolute
545    }
546    # Might as well ensure that the destination file's absolute path is used
547    if(!File::Spec->file_name_is_absolute( $dest )) {
548        $dest = File::Spec->rel2abs($dest); # make absolute
549    }
550    }
551
552    # a few sanity checks
553    if (!-e $src) {
554    print STDERR "util::soft_link source file $src does not exist\n";
555    return 0;
556    }
557
558    my $dest_dir = &File::Basename::dirname($dest);
559    mk_all_dir($dest_dir) if (!-e $dest_dir);
560
561    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
562       
563    # symlink not supported on windows
564    &File::Copy::copy ($src, $dest);
565
566    } elsif (!eval {symlink($src, $dest)}) {
567    print STDERR "util::soft_link: unable to create soft link.\n";
568    return 0;
569    }
570
571    return 1;
572}
573
574# Primarily for filenames generated by processing
575# content of HTML files (which are mapped to UTF-8 internally)
576#
577# To turn this into an octet string that really exists on the file
578# system:
579# 1. don't need to do anything special for Unix-based systems
580#   (as underlying file system is byte-code)
581# 2. need to map to short DOS filenames for Windows
582
583sub utf8_to_real_filename
584{
585    my ($utf8_filename) = @_;
586
587    my $real_filename;
588
589    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
590    require Win32;
591
592    print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
593
594    my $unicode_filename = decode("utf8",$utf8_filename);
595    $real_filename = Win32::GetShortPathName($unicode_filename);
596    }
597    else {
598    $real_filename = $utf8_filename;
599    }
600
601    return $real_filename;
602}
603
604
605sub fd_exists
606{
607    my $filename_full_path = shift @_;
608    my $test_op = shift @_ || "-e";
609
610    # By default tests for existance of file or directory (-e)
611    # Can be made more specific by providing second parameter (e.g. -f or -d)
612
613    my $exists = 0;
614
615    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
616    require Win32;
617    my $filename_short_path = Win32::GetShortPathName($filename_full_path);
618    if (!defined $filename_short_path) {
619        # Was probably still in UTF8 form (not what is needed on Windows)
620        my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
621        if (defined $unicode_filename_full_path) {
622        $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
623        }
624    }
625    $filename_full_path = $filename_short_path;
626    }
627
628    if (defined $filename_full_path) {
629    $exists = eval "($test_op \$filename_full_path)";
630    }
631
632    return $exists;
633}
634
635sub file_exists
636{
637    my ($filename_full_path) = @_;
638
639    return fd_exists($filename_full_path,"-f");
640}
641
642sub dir_exists
643{
644    my ($filename_full_path) = @_;
645
646    return fd_exists($filename_full_path,"-d");
647}
648
649
650
651# updates a copy of a directory in some other part of the filesystem
652# verbosity settings are: 0=low, 1=normal, 2=high
653# both $fromdir and $todir should be absolute paths
654sub cachedir {
655    my ($fromdir, $todir, $verbosity) = @_;
656    $verbosity = 1 unless defined $verbosity;
657
658    # use / for the directory separator, remove duplicate and
659    # trailing slashes
660    $fromdir=~s/[\\\/]+/\//g;
661    $fromdir=~s/[\\\/]+$//;
662    $todir=~s/[\\\/]+/\//g;
663    $todir=~s/[\\\/]+$//;
664
665    &mk_all_dir ($todir);
666
667    # get the directories in ascending order
668    if (!opendir (FROMDIR, $fromdir)) {
669    print STDERR "util::cachedir could not read directory $fromdir\n";
670    return;
671    }
672    my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
673    closedir (FROMDIR);
674
675    if (!opendir (TODIR, $todir)) {
676    print STDERR "util::cacedir could not read directory $todir\n";
677    return;
678    }
679    my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
680    closedir (TODIR);
681
682    my $fromi = 0;
683    my $toi = 0;
684           
685    while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
686#   print "fromi: $fromi toi: $toi\n";
687
688    # see if we should delete a file/directory
689    # this should happen if the file/directory
690    # is not in the from list or if its a different
691    # size, or has an older timestamp
692    if ($toi < scalar(@todir)) {
693        if (($fromi >= scalar(@fromdir)) ||
694        ($todir[$toi] lt $fromdir[$fromi] ||
695         ($todir[$toi] eq $fromdir[$fromi] &&
696          &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
697                  $verbosity)))) {
698
699        # the files are different
700        &rm_r("$todir/$todir[$toi]");
701        splice(@todir, $toi, 1); # $toi stays the same
702
703        } elsif ($todir[$toi] eq $fromdir[$fromi]) {
704        # the files are the same
705        # if it is a directory, check its contents
706        if (-d "$todir/$todir[$toi]") {
707            &cachedir ("$fromdir/$fromdir[$fromi]",
708                   "$todir/$todir[$toi]", $verbosity);
709        }
710
711        $toi++;
712        $fromi++;
713        next;
714        }
715    }
716 
717    # see if we should insert a file/directory
718    # we should insert a file/directory if there
719    # is no tofiles left or if the tofile does not exist
720    if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
721                      $todir[$toi] gt $fromdir[$fromi])) {
722        &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
723        splice (@todir, $toi, 0, $fromdir[$fromi]);
724
725        $toi++;
726        $fromi++;
727    }
728    }
729}
730
731# this function returns -1 if either file is not found
732# assumes that $file1 and $file2 are absolute file names or
733# in the current directory
734# $file2 is allowed to be newer than $file1
735sub differentfiles {
736    my ($file1, $file2, $verbosity) = @_;
737    $verbosity = 1 unless defined $verbosity;
738
739    $file1 =~ s/\/+$//;
740    $file2 =~ s/\/+$//;
741   
742    my ($file1name) = $file1 =~ /\/([^\/]*)$/;
743    my ($file2name) = $file2 =~ /\/([^\/]*)$/;
744
745    return -1 unless (-e $file1 && -e $file2);
746    if ($file1name ne $file2name) {
747    print STDERR "filenames are not the same\n" if ($verbosity >= 2);
748    return 1;
749    }
750
751    my @file1stat = stat ($file1);
752    my @file2stat = stat ($file2);
753
754    if (-d $file1) {
755    if (! -d $file2) {
756        print STDERR "one file is a directory\n" if ($verbosity >= 2);
757        return 1;
758    }
759    return 0;
760    }
761
762    # both must be regular files
763    unless (-f $file1 && -f $file2) {
764    print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
765    return 1;
766    }
767
768    # the size of the files must be the same
769    if ($file1stat[7] != $file2stat[7]) {
770    print STDERR "different sized files\n" if ($verbosity >= 2);
771    return 1;
772    }
773
774    # the second file cannot be older than the first
775    if ($file1stat[9] > $file2stat[9]) {
776    print STDERR "file is older\n" if ($verbosity >= 2);
777    return 1;
778    }
779
780    return 0;
781}
782
783
784sub get_tmp_filename
785{
786    my $file_ext = shift(@_) || undef;
787
788    my $opt_dot_file_ext = "";
789    if (defined $file_ext) {
790    if ($file_ext !~ m/\./) {
791        # no dot, so needs one added in at start
792        $opt_dot_file_ext = ".$file_ext"
793    }
794    else {
795        # allow for "extensions" such as _metadata.txt to be handled
796        # gracefully
797        $opt_dot_file_ext = $file_ext;
798    }
799    }
800
801    my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
802    &mk_all_dir ($tmpdir) unless -e $tmpdir;
803
804    my $count = 1000;
805    my $rand = int(rand $count);
806    my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
807
808    while (-e $full_tmp_filename) {
809    $rand = int(rand $count);
810    $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
811    $count++;
812    }
813   
814    return $full_tmp_filename;
815}
816
817sub get_timestamped_tmp_folder
818{
819
820    my $tmp_dirname;
821    if(defined $ENV{'GSDLCOLLECTDIR'}) {
822    $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
823    } elsif(defined $ENV{'GSDLHOME'}) {
824    $tmp_dirname = $ENV{'GSDLHOME'};
825    } else {
826    return undef;
827    }
828
829    $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
830    &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
831
832    # add the timestamp into the path otherwise we can run into problems
833    # if documents have the same name
834    my $timestamp = time;
835    my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
836    $tmp_dirname = $time_tmp_dirname;
837    my $i = 1;
838    while (-e $tmp_dirname) {
839    $tmp_dirname = "$time_tmp_dirname$i";
840    $i++;
841    }
842    &util::mk_dir($tmp_dirname);
843 
844    return $tmp_dirname;
845}
846
847sub get_timestamped_tmp_filename_in_collection
848{
849
850    my ($input_filename, $output_ext) = @_;
851    # derive tmp filename from input filename
852    my ($tailname, $dirname, $suffix)
853    = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
854
855    # softlink to collection tmp dir
856    my $tmp_dirname = &util::get_timestamped_tmp_folder();
857    $tmp_dirname = $dirname unless defined $tmp_dirname;
858
859    # following two steps copied from ConvertBinaryFile
860    # do we need them?? can't use them as is, as they use plugin methods.
861
862    #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
863
864    # URLEncode this since htmls with images where the html filename is utf8 don't seem
865    # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
866    # files on the filesystem.
867    #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
868    if (defined $output_ext) {
869    $output_ext = ".$output_ext"; # add the dot
870    } else {
871    $output_ext = $suffix;
872    }
873    $output_ext= lc($output_ext);
874    my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
875   
876    return $tmp_filename;
877}
878
879sub get_toplevel_tmp_dir
880{
881    return filename_cat($ENV{'GSDLHOME'}, "tmp");
882}
883
884
885sub filename_to_regex {
886    my $filename = shift (@_);
887
888    # need to make single backslashes double so that regex works
889    $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i);   
890   
891    # note that the first part of a substitution is a regex, so RE chars need to be escaped,
892    # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally
893    $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them
894    $filename =~ s@\(@\\(@g; # escape brackets
895    $filename =~ s@\)@\\)@g; # escape brackets
896    $filename =~ s@\[@\\[@g; # escape brackets
897    $filename =~ s@\]@\\]@g; # escape brackets
898   
899    return $filename;
900}
901
902sub unregex_filename {
903    my $filename = shift (@_);
904
905    # need to put doubled backslashes for regex back to single
906    $filename =~ s/\\\./\./g; # remove RE syntax for .
907    $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "("
908    $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")"
909    $filename =~ s@\\\[@[@g; # remove RE syntax for [ => "\[" turns into "["
910    $filename =~ s@\\\]@]@g; # remove RE syntax for ] => "\]" turns into "]"
911   
912    # \\ goes to \
913    # This is the last step in reverse mirroring the order of steps in filename_to_regex()
914    $filename =~ s/\\\\/\\/g; # remove RE syntax for \   
915    return $filename;
916}
917
918sub filename_cat {
919    my $first_file = shift(@_);
920    my (@filenames) = @_;
921
922#   Useful for debugging
923#     -- might make sense to call caller(0) rather than (1)??
924#   my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
925#   print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
926   
927    # If first_file is not null or empty, then add it back into the list
928    if (defined $first_file && $first_file =~ /\S/) {
929    unshift(@filenames, $first_file);
930    }
931
932    my $filename = join("/", @filenames);
933
934    # remove duplicate slashes and remove the last slash
935    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
936    $filename =~ s/[\\\/]+/\\/g;
937    } else {
938    $filename =~ s/[\/]+/\//g;
939    # DB: want a filename abc\de.html to remain like this
940    }
941    $filename =~ s/[\\\/]$//;
942
943    return $filename;
944}
945
946
947sub pathname_cat {
948    my $first_path = shift(@_);
949    my (@pathnames) = @_;
950
951    # If first_path is not null or empty, then add it back into the list
952    if (defined $first_path && $first_path =~ /\S/) {
953    unshift(@pathnames, $first_path);
954    }
955
956    my $join_char;
957    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
958    $join_char = ";";
959    } else {
960    $join_char = ":";
961    }
962
963    my $pathname = join($join_char, @pathnames);
964
965    # remove duplicate slashes
966    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
967    $pathname =~ s/[\\\/]+/\\/g;
968    } else {
969    $pathname =~ s/[\/]+/\//g;
970    # DB: want a pathname abc\de.html to remain like this
971    }
972
973    return $pathname;
974}
975
976
977sub tidy_up_oid {
978    my ($OID) = @_;
979    if ($OID =~ /\./) {
980    print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
981    $OID =~ s/\.//g; #remove any periods
982    }
983    if ($OID =~ /^\s.*\s$/) {
984    print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
985    # remove starting and trailing whitespace
986    $OID =~ s/^\s+//;
987    $OID =~ s/\s+$//;
988    }
989    if ($OID =~ /^[\d]*$/) {
990    print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
991    $OID = "D" . $OID;
992    }       
993   
994    return $OID;
995}
996sub envvar_prepend {
997    my ($var,$val) = @_;
998
999    # do not prepend any value/path that's already in the environment variable
1000   
1001    my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
1002    if (!defined($ENV{$var})) {
1003    $ENV{$var} = "$val";
1004    }
1005    elsif($ENV{$var} !~ m/$escaped_val/) {
1006    $ENV{$var} = "$val;".$ENV{$var};
1007    }
1008}
1009
1010sub envvar_append {
1011    my ($var,$val) = @_;
1012   
1013    # do not append any value/path that's already in the environment variable
1014   
1015    my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
1016    if (!defined($ENV{$var})) {
1017    $ENV{$var} = "$val";
1018    }
1019    elsif($ENV{$var} !~ m/$escaped_val/) {
1020    $ENV{$var} .= ";$val";
1021    }
1022}
1023
1024
1025# splits a filename into a prefix and a tail extension using the tail_re, or
1026# if that fails, splits on the file_extension . (dot)
1027sub get_prefix_and_tail_by_regex {
1028
1029    my ($filename,$tail_re) = @_;
1030   
1031    my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
1032    if ((!defined $file_prefix) || (!defined $file_ext)) {
1033    ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
1034    }
1035
1036    return ($file_prefix,$file_ext);
1037}
1038
1039# get full path and file only path from a base_dir (which may be empty) and
1040# file (which may contain directories)
1041sub get_full_filenames {
1042    my ($base_dir, $file) = @_;
1043   
1044    my $filename_full_path = $file;
1045    # add on directory if present
1046    $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
1047   
1048    my $filename_no_path = $file;
1049
1050    # remove directory if present
1051    $filename_no_path =~ s/^.*[\/\\]//;
1052    return ($filename_full_path, $filename_no_path);
1053}
1054
1055# returns the path of a file without the filename -- ie. the directory the file is in
1056sub filename_head {
1057    my $filename = shift(@_);
1058
1059    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1060    $filename =~ s/[^\\\\]*$//;
1061    }
1062    else {
1063    $filename =~ s/[^\\\/]*$//;
1064    }
1065
1066    return $filename;
1067}
1068
1069
1070
1071# returns 1 if filename1 and filename2 point to the same
1072# file or directory
1073sub filenames_equal {
1074    my ($filename1, $filename2) = @_;
1075
1076    # use filename_cat to clean up trailing slashes and
1077    # multiple slashes
1078    $filename1 = filename_cat ($filename1);
1079    $filename2 = filename_cat ($filename2);
1080
1081    # filenames not case sensitive on windows
1082    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1083    $filename1 =~ tr/[A-Z]/[a-z]/;
1084    $filename2 =~ tr/[A-Z]/[a-z]/;
1085    }
1086    return 1 if $filename1 eq $filename2;
1087    return 0;
1088}
1089
1090# If filename is relative to within_dir, returns the relative path of filename to that directory
1091# with slashes in the filename returned as they were in the original (absolute) filename.
1092sub filename_within_directory
1093{
1094    my ($filename,$within_dir) = @_;
1095   
1096    if ($within_dir !~ m/[\/\\]$/) {
1097    my $dirsep = &util::get_dirsep();
1098    $within_dir .= $dirsep;
1099    }
1100   
1101    $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets   
1102    if ($filename =~ m/^$within_dir(.*)$/) {
1103    $filename = $1;
1104    }
1105   
1106    return $filename;
1107}
1108
1109# If filename is relative to within_dir, returns the relative path of filename to that directory in URL format.
1110# Filename and within_dir can be any type of slashes, but will be compared as URLs (i.e. unix-style slashes).
1111# The subpath returned will also be a URL type filename.
1112sub filename_within_directory_url_format
1113{
1114    my ($filename,$within_dir) = @_;
1115   
1116    # convert parameters only to / slashes if Windows
1117   
1118    my $filename_urlformat = &filepath_to_url_format($filename);
1119    my $within_dir_urlformat = &filepath_to_url_format($within_dir);
1120
1121    #if ($within_dir_urlformat !~ m/\/$/) {
1122        # make sure directory ends with a slash
1123        #$within_dir_urlformat .= "/";
1124    #}
1125   
1126    my $within_dir_urlformat_re = &filename_to_regex($within_dir_urlformat); # escape any special RE characters, such as brackets
1127   
1128    #print STDERR "@@@@@ $filename_urlformat =~ $within_dir_urlformat_re\n";
1129   
1130    # dir prefix may or may not end with a slash (this is discarded when extracting the sub-filepath)
1131    if ($filename_urlformat =~ m/^$within_dir_urlformat_re(?:\/)*(.*)$/) {
1132        $filename_urlformat = $1;
1133    }
1134   
1135    return $filename_urlformat;
1136}
1137
1138# Convert parameter to use / slashes if Windows (if on Linux leave any \ as is,
1139# since on Linux it doesn't represent a file separator but an escape char).
1140sub filepath_to_url_format
1141{
1142    my ($filepath) = @_;
1143    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1144        # Only need to worry about Windows, as Unix style directories already in url-format
1145        # Convert Windows style \ => /
1146        $filepath =~ s@\\@/@g;     
1147    }
1148    return $filepath;
1149}
1150
1151# regex filepaths on windows may include \\ as path separator. Convert \\ to /
1152sub filepath_regex_to_url_format
1153{
1154    my ($filepath) = @_;
1155    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1156    # Only need to worry about Windows, as Unix style directories already in url-format
1157    # Convert Windows style \\ => /
1158    $filepath =~ s@\\\\@/@g;       
1159    }
1160    return $filepath;
1161   
1162}
1163
1164# Like File::Basename::fileparse, but expects filepath in url format (ie only / slash for dirsep)
1165# and ignores trailing /
1166# returns (file, dirs) dirs will be empty if no subdirs
1167sub url_fileparse
1168{
1169    my ($filepath) = @_;
1170    # remove trailing /
1171    $filepath =~ s@/$@@;
1172    if ($filepath !~ m@/@) {
1173    return ($filepath, "");
1174    }
1175    my ($dirs, $file) = $filepath =~ m@(.+/)([^/]+)@;
1176    return ($file, $dirs);
1177   
1178}
1179
1180
1181sub filename_within_collection
1182{
1183    my ($filename) = @_;
1184
1185    my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1186   
1187    if (defined $collect_dir) {
1188
1189    # if from within GSDLCOLLECTDIR, then remove directory prefix
1190    # so source_filename is realative to it.  This is done to aid
1191    # portability, i.e. the collection can be moved to somewhere
1192    # else on the file system and the archives directory will still
1193    # work.  This is needed, for example in the applet version of
1194    # GLI where GSDLHOME/collect on the server will be different to
1195    # the collect directory of the remove user.  Of course,
1196    # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1197    # it back into a full pathname.
1198
1199    $filename = filename_within_directory($filename,$collect_dir);
1200    }
1201   
1202    return $filename;
1203}
1204
1205sub prettyprint_file
1206{
1207    my ($base_dir,$file,$gli) = @_;
1208
1209    my $filename_full_path = &util::filename_cat($base_dir,$file);
1210
1211    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1212    require Win32;
1213
1214    # For some reason base_dir in the form c:/a/b/c
1215    # This leads to confusion later on, so turn it back into
1216    # the more usual Windows form
1217    $base_dir =~ s/\//\\/g;
1218    my $long_base_dir = Win32::GetLongPathName($base_dir);
1219    my $long_full_path = Win32::GetLongPathName($filename_full_path);
1220
1221    $file = filename_within_directory($long_full_path,$long_base_dir);
1222    $file = encode("utf8",$file) if ($gli);
1223    }
1224
1225    return $file;
1226}
1227
1228
1229sub upgrade_if_dos_filename
1230{
1231    my ($filename_full_path,$and_encode) = @_;
1232
1233    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1234    # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1235    # to its long (Windows) version
1236    my $long_filename = Win32::GetLongPathName($filename_full_path);
1237    if (defined $long_filename) {
1238        $filename_full_path = $long_filename;
1239    }
1240    # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
1241    $filename_full_path =~ s/^(.):/\u$1:/;
1242    if ((defined $and_encode) && ($and_encode)) {
1243        $filename_full_path = encode("utf8",$filename_full_path);
1244    }
1245    }
1246
1247    return $filename_full_path;
1248}
1249
1250
1251sub downgrade_if_dos_filename
1252{
1253    my ($filename_full_path) = @_;
1254
1255    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1256    require Win32;
1257
1258    # Ensure the given long Windows filename is in a form that can
1259    # be opened by Perl => convert it to a short DOS-like filename
1260
1261    my $short_filename = Win32::GetShortPathName($filename_full_path);
1262    if (defined $short_filename) {
1263        $filename_full_path = $short_filename;
1264    }
1265    # Make sure initial drive letter is lower-case (to fit in
1266    # with rest of Greenstone)
1267    $filename_full_path =~ s/^(.):/\u$1:/;
1268    }
1269
1270    return $filename_full_path;
1271}
1272
1273sub block_filename
1274{
1275    my ($block_hash,$filename) = @_;
1276   
1277    if ($ENV{'GSDLOS'} =~ m/^windows$/) {
1278   
1279    # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
1280    my $lower_filename = lc($filename);
1281    $block_hash->{'file_blocks'}->{$lower_filename} = 1;
1282#   my $lower_drive = $filename;
1283#   $lower_drive =~ s/^([A-Z]):/\l$1:/i;
1284   
1285#   my $upper_drive = $filename;
1286#   $upper_drive =~ s/^([A-Z]):/\u$1:/i;
1287#   
1288#   $block_hash->{'file_blocks'}->{$lower_drive} = 1;
1289#   $block_hash->{'file_blocks'}->{$upper_drive} = 1;       
1290    }
1291    else {
1292    $block_hash->{'file_blocks'}->{$filename} = 1;
1293    }
1294}
1295
1296
1297sub filename_is_absolute
1298{
1299    my ($filename) = @_;
1300
1301    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1302    return ($filename =~ m/^(\w:)?\\/);
1303    }
1304    else {
1305    return ($filename =~ m/^\//);
1306    }
1307}
1308
1309
1310## @method make_absolute()
1311#
1312#  Ensure the given file path is absolute in respect to the given base path.
1313#
1314#  @param  $base_dir A string denoting the base path the given dir must be
1315#                    absolute to.
1316#  @param  $dir The directory to be made absolute as a string. Note that the
1317#               dir may already be absolute, in which case it will remain
1318#               unchanged.
1319#  @return The now absolute form of the directory as a string.
1320#
1321#  @author John Thompson, DL Consulting Ltd.
1322#  @copy 2006 DL Consulting Ltd.
1323#
1324#used in buildcol.pl, doesn't work for all cases --kjdon
1325sub make_absolute {
1326   
1327    my ($base_dir, $dir) = @_;
1328###    print STDERR "dir = $dir\n";
1329    $dir =~ s/[\\\/]+/\//g;
1330    $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1331    $dir =~ s|^/tmp_mnt||;
1332    1 while($dir =~ s|/[^/]*/\.\./|/|g);
1333    $dir =~ s|/[.][.]?/|/|g;
1334    $dir =~ tr|/|/|s;
1335###    print STDERR "dir = $dir\n";
1336   
1337    return $dir;
1338}
1339## make_absolute() ##
1340
1341sub get_dirsep {
1342
1343    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1344    return "\\";
1345    } else {
1346    return "\/";
1347    }
1348}
1349
1350sub get_os_dirsep {
1351
1352    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1353    return "\\\\";
1354    } else {
1355    return "\\\/";
1356    }
1357}
1358
1359sub get_re_dirsep {
1360
1361    return "\\\\|\\\/";
1362}
1363
1364
1365sub get_dirsep_tail {
1366    my ($filename) = @_;
1367   
1368    # returns last part of directory or filename
1369    # On unix e.g. a/b.d => b.d
1370    #              a/b/c => c
1371
1372    my $dirsep = get_re_dirsep();
1373    my @dirs = split (/$dirsep/, $filename);
1374    my $tail = pop @dirs;
1375
1376    # - caused problems under windows
1377    #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1378
1379    return $tail;
1380}
1381
1382
1383# if this is running on windows we want binaries to end in
1384# .exe, otherwise they don't have to end in any extension
1385sub get_os_exe {
1386    return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1387    return "";
1388}
1389
1390
1391# test to see whether this is a big or little endian machine
1392sub is_little_endian
1393{
1394    # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1395    # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1396    # Otherwise, it's little endian
1397
1398    #return 0 if $^O =~ /^darwin$/i;
1399    #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1400   
1401    # Going back to stating exactly whether the machine is little endian
1402    # or big endian, without any special case for Macs. Since for rata it comes
1403    # back with little endian and for shuttle with bigendian.
1404    return (ord(substr(pack("s",1), 0, 1)) == 1);
1405}
1406
1407
1408# will return the collection name if successful, "" otherwise
1409sub use_collection {
1410    my ($collection, $collectdir) = @_;
1411
1412    if (!defined $collectdir || $collectdir eq "") {
1413    $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1414    }
1415
1416    # get and check the collection
1417    if (!defined($collection) || $collection eq "") {
1418    if (defined $ENV{'GSDLCOLLECTION'}) {
1419        $collection = $ENV{'GSDLCOLLECTION'};
1420    } else {
1421        print STDOUT "No collection specified\n";
1422        return "";
1423    }
1424    }
1425   
1426    if ($collection eq "modelcol") {
1427    print STDOUT "You can't use modelcol.\n";
1428    return "";
1429    }
1430
1431    # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1432    # are defined
1433    $ENV{'GSDLCOLLECTION'} = $collection;
1434    $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1435
1436    # make sure this collection exists
1437    if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1438    print STDOUT "Invalid collection ($collection).\n";
1439    return "";
1440    }
1441
1442    # everything is ready to go
1443    return $collection;
1444}
1445
1446sub get_current_collection_name {
1447    return $ENV{'GSDLCOLLECTION'};
1448}
1449
1450
1451# will return the collection name if successful, "" otherwise. 
1452# Like use_collection (above) but for greenstone 3 (taking account of site level)
1453
1454sub use_site_collection {
1455    my ($site, $collection, $collectdir) = @_;
1456
1457    if (!defined $collectdir || $collectdir eq "") {
1458    die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1459    $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1460    }
1461
1462    # collectdir explicitly set by this point (using $site variable if required).
1463    # Can call "old" gsdl2 use_collection now.
1464
1465    return use_collection($collection,$collectdir);
1466}
1467
1468
1469
1470sub locate_config_file
1471{
1472    my ($file) = @_;
1473
1474    my $locations = locate_config_files($file);
1475
1476    return shift @$locations; # returns undef if 'locations' is empty
1477}
1478
1479
1480sub locate_config_files
1481{
1482    my ($file) = @_;
1483
1484    my @locations = ();
1485
1486    if (-e $file) {
1487    # Clearly specified (most likely full filename)
1488    # No need to hunt in 'etc' directories, return value unchanged
1489    push(@locations,$file);
1490    }
1491    else {
1492    # Check for collection specific one before looking in global GSDL 'etc'
1493    if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1494        my $test_collect_etc_filename
1495        = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1496       
1497        if (-e $test_collect_etc_filename) {
1498        push(@locations,$test_collect_etc_filename);
1499        }
1500    }
1501    my $test_main_etc_filename
1502        = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1503    if (-e $test_main_etc_filename) {
1504        push(@locations,$test_main_etc_filename);
1505    }
1506    }
1507
1508    return \@locations;
1509}
1510
1511
1512sub hyperlink_text
1513{
1514    my ($text) = @_;
1515   
1516    $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1517    $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1518
1519    return $text;
1520}
1521
1522
1523# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1524# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1525sub is_dir_empty
1526{
1527    my ($path) = @_;
1528    opendir DIR, $path;
1529    while(my $entry = readdir DIR) {
1530        next if($entry =~ /^\.\.?$/);
1531        closedir DIR;
1532        return 0;
1533    }
1534    closedir DIR;
1535    return 1;
1536}
1537
1538# Returns the given filename converted using either URL encoding or base64
1539# encoding, as specified by $rename_method. If the given filename has no suffix
1540# (if it is just the tailname), then $no_suffix should be some defined value.
1541# rename_method can be url, none, base64
1542sub rename_file {
1543    my ($filename, $rename_method, $no_suffix)  = @_;
1544
1545    if(!$filename) { # undefined or empty string
1546    return $filename;
1547    }
1548
1549    if (!$rename_method) {
1550    print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1551    # Debugging information
1552    # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1553    # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1554    $rename_method = "url";
1555    } elsif($rename_method eq "none") {
1556    return $filename; # would have already been renamed
1557    }
1558
1559    # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1560    ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1561    ###$filename =~ s/ /_/g;
1562
1563    my ($tailname,$dirname,$suffix);
1564    if($no_suffix) { # given a tailname, no suffix
1565    ($tailname,$dirname) = File::Basename::fileparse($filename);
1566    }
1567    else {
1568    ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1569    }
1570    if (!$suffix) {
1571    $suffix = "";
1572    }
1573    else {
1574    $suffix = lc($suffix);
1575    }
1576
1577    if ($rename_method eq "url") {
1578    $tailname = &unicode::url_encode($tailname);
1579    }
1580    elsif ($rename_method eq "base64") {
1581    $tailname = &unicode::base64_encode($tailname);
1582    $tailname =~ s/\s*//sg;      # for some reason it adds spaces not just at end but also in middle
1583    }
1584
1585    $filename = "$tailname$suffix";
1586    $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1587
1588    return $filename;
1589}
1590
1591
1592# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1593sub rename_ldb_or_bdb_file {
1594    my ($filename_no_ext) = @_;
1595
1596    my $new_filename = "$filename_no_ext.gdb";
1597    return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1598    # try ldb
1599    my $old_filename = "$filename_no_ext.ldb";
1600   
1601    if (-f $old_filename) {
1602    print STDERR "Renaming $old_filename to $new_filename\n";
1603    rename ($old_filename, $new_filename)
1604        || print STDERR "Rename failed: $!\n";
1605    return;
1606    }
1607    # try bdb
1608    $old_filename = "$filename_no_ext.bdb";
1609    if (-f $old_filename) {
1610    print STDERR "Renaming $old_filename to $new_filename\n";   
1611    rename ($old_filename, $new_filename)
1612        || print STDERR "Rename failed: $!\n";
1613    return;
1614    }
1615}
1616
1617sub os_dir() {
1618   
1619    my $gsdlarch = "";
1620    if(defined $ENV{'GSDLARCH'}) {
1621    $gsdlarch = $ENV{'GSDLARCH'};
1622    }
1623    return $ENV{'GSDLOS'}.$gsdlarch;
1624}
1625
1626# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1627# By default, /greenstone3 for GS3 or /greenstone for GS2.
1628sub get_greenstone_url_prefix() {
1629    # if already set on a previous occasion, just return that
1630    # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1631    return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1632
1633    my ($configfile, $urlprefix, $defaultUrlprefix);
1634    my @propertynames = ();
1635
1636    if($ENV{'GSDL3SRCHOME'}) {
1637    $defaultUrlprefix = "/greenstone3";
1638    $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1639    push(@propertynames, qw/path\s*\=/);
1640    } else {
1641    $defaultUrlprefix = "/greenstone";
1642    $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", &os_dir(), "gsdlsite.cfg");
1643    push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1644    }
1645
1646    $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1647
1648    if(!$urlprefix) { # no values found for URL prefix, use default values
1649    $urlprefix = $defaultUrlprefix;
1650    } else {
1651    #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1652    $urlprefix =~ s/^\///; # remove the starting slash
1653    my @dirs = split(/(\\|\/)/, $urlprefix);
1654    $urlprefix = shift(@dirs);
1655
1656    if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1657        $urlprefix = "/$urlprefix";
1658    }
1659    }
1660
1661    # set for the future
1662    $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1663#    print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1664    return $urlprefix;
1665}
1666
1667
1668# Given a config file (xml or java properties file) and a list/array of regular expressions
1669# that represent property names to match on, this function will return the value for the 1st
1670# matching property name. If the return value is undefined, no matching property was found.
1671sub extract_propvalue_from_file() {
1672    my ($configfile, $propertynames) = @_;
1673
1674    my $value;
1675    unless(open(FIN, "<$configfile")) {
1676    print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1677    return $value; # not initialised
1678    }
1679
1680    # Read the entire file at once, as one single line, then close it
1681    my $filecontents;
1682    {
1683    local $/ = undef;       
1684    $filecontents = <FIN>;
1685    }
1686    close(FIN);
1687
1688    foreach my $regex (@$propertynames) {
1689        ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1690    if($value) {
1691            $value =~ s/^\"//;     # remove any startquotes
1692        $value =~ s/\".*$//;   # remove the 1st endquotes (if any) followed by any xml
1693        last;              # found value for a matching property, break from loop
1694    }
1695    }
1696
1697    return $value;
1698}
1699
1700# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1701# given that perllib is in @INC in order to invoke this subroutine.
1702# Call as follows -- after setting up INC to include perllib and
1703# after setting up GSDLHOME and GSDLOS:
1704#
1705# require util;
1706# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1707#
1708sub setup_greenstone_env() {
1709    my ($GSDLHOME, $GSDLOS) = @_;
1710
1711    #my %env_map = ();
1712    # Get the localised ENV settings of running a localised source setup.bash
1713    # and put it into the ENV here. Need to clear GSDLHOME before running setup
1714    #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1715    my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";     
1716    if($GSDLOS =~ m/windows/i) {
1717        #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1718        $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
1719    }
1720    if (!open(PIN, "$perl_command |")) {
1721        print STDERR ("Unable to execute command: $perl_command. $!\n");
1722    }
1723
1724    while (defined (my $perl_output_line = <PIN>)) {
1725        my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1726        #$env_map{$key}=$value;     
1727        $ENV{$key}=$value;
1728    }
1729    close (PIN);
1730
1731    # If any keys in $ENV don't occur in Greenstone's localised env
1732    # (stored in $env_map), delete those entries from $ENV
1733    #foreach $key (keys %ENV) {
1734    #   if(!defined $env_map{$key}) {
1735    #       print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1736    #       delete $ENV{$key}; # del $ENV(key, value) pair
1737    #   }
1738    #}
1739    #undef %env_map;
1740}
1741
1742sub get_perl_exec() {   
1743    my $perl_exec = $^X; # may return just "perl"
1744   
1745    if($ENV{'PERLPATH'}) {
1746        # OR: # $perl_exec = &util::filename_cat($ENV{'PERLPATH'},"perl");
1747        if($ENV{'GSDLOS'} =~ m/windows/) {
1748            $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1749        } else {
1750            $perl_exec = "$ENV{'PERLPATH'}/perl";
1751        }
1752    } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1753        # containing the full path to the current perl executable we're using
1754        $perl_exec = $Config{perlpath}; # configured path for perl
1755        if (!-e $perl_exec) { # may not point to location on this machine
1756            $perl_exec = $^X; # may return just "perl"
1757            if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1758                print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";             
1759            }
1760        }
1761    }
1762   
1763    return $perl_exec;
1764}
1765
1766# returns the path to the java command in the JRE included with GS (if any),
1767# quoted to safeguard any spaces in this path, otherwise a simple java
1768# command is returned which assumes and will try for a system java.
1769sub get_java_command {
1770    my $java = "java";
1771    if(defined $ENV{'GSDLHOME'}) { # should be, as this script would be launched from the cmd line
1772                               # after running setup.bat or from GLI which also runs setup.bat
1773    my $java_bin = &util::filename_cat($ENV{'GSDLHOME'},"packages","jre","bin");
1774    if(-d $java_bin) {
1775        $java = &util::filename_cat($java_bin,"java");
1776        $java = "\"".$java."\""; # quoted to preserve spaces in path
1777    }
1778    }
1779    return $java;
1780}
1781
1782
1783# Given the qualified collection name (colgroup/collection),
1784# returns the collection and colgroup parts
1785sub get_collection_parts {
1786    # http://perldoc.perl.org/File/Basename.html
1787    # my($filename, $directories, $suffix) = fileparse($path);
1788    # "$directories contains everything up to and including the last directory separator in the $path
1789    # including the volume (if applicable). The remainder of the $path is the $filename."
1790    #my ($collection, $colgroup) = &File::Basename::fileparse($qualified_collection);   
1791
1792    my $qualified_collection = shift(@_);
1793
1794    # Since activate.pl can be launched from the command-line, including by a user,
1795    # best not to assume colgroup uses URL-style slashes as would be the case with GLI
1796    # Also allow for the accidental inclusion of multiple slashes
1797    my ($colgroup, $collection) = split(/[\/\\]+/, $qualified_collection); #split('/', $qualified_collection);
1798   
1799    if(!defined $collection) {
1800        $collection = $colgroup;
1801        $colgroup = "";
1802    }
1803    return ($collection, $colgroup);
1804}
1805
1806# work out the "collectdir/collection" location
1807sub resolve_collection_dir {
1808    my ($collect_dir, $qualified_collection, $site) = @_; #, $gs_mode
1809   
1810    my ($colgroup, $collection) = &util::get_collection_parts($qualified_collection);   
1811   
1812    if (defined $collect_dir) {
1813        return &util::filename_cat($collect_dir,$colgroup, $collection);
1814    }
1815    else {
1816        if (defined $site) {
1817            return &util::filename_cat($ENV{'GSDL3HOME'},"sites",$site,"collect",$colgroup, $collection);
1818        }
1819        else {
1820            return &util::filename_cat($ENV{'GSDLHOME'},"collect",$colgroup, $collection);
1821        }
1822    }
1823}
1824
18251;
Note: See TracBrowser for help on using the browser.