root/main/trunk/greenstone2/perllib/util.pm @ 24829

Revision 24829, 45.0 KB (checked in by ak19, 8 years ago)

Changes to bat files and perl code to deal with brackets in (Windows) filepath. Also checked winmake.bat files to see if changes were needed there. These changes go together with the commits 24826 to 24828 for gems.bat, and commit 24820 on makegs2.bat.

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use strict;
29
30use Encode;
31use File::Copy;
32use File::Basename;
33# Config for getting the perlpath in the recommended way, though it uses paths that are
34# hard-coded into the Config file that's generated upon configuring and compiling perl.
35# $^X works better in some cases to return the path to perl used to launch the script,
36# but if launched with plain "perl" (no full-path), that will be just what it returns.
37use Config;
38
39# removes files (but not directories)
40sub rm {
41    my (@files) = @_;
42
43    my @filefiles = ();
44
45    # make sure the files we want to delete exist
46    # and are regular files
47    foreach my $file (@files) {
48    if (!-e $file) {
49        print STDERR "util::rm $file does not exist\n";
50    } elsif ((!-f $file) && (!-l $file)) {
51        print STDERR "util::rm $file is not a regular (or symbolic) file\n";
52    } else {
53        push (@filefiles, $file);
54    }
55    }
56   
57    # remove the files
58    my $numremoved = unlink @filefiles;
59
60    # check to make sure all of them were removed
61    if ($numremoved != scalar(@filefiles)) {
62    print STDERR "util::rm Not all files were removed\n";
63    }
64}
65
66# removes files (but not directories) - can rename this to the default
67# "rm" subroutine  when debugging the deletion of individual files.
68sub rm_debug {
69    my (@files) = @_;
70    my @filefiles = ();
71
72    # make sure the files we want to delete exist
73    # and are regular files
74    foreach my $file (@files) {
75        if (!-e $file) {
76            print STDERR "util::rm $file does not exist\n";
77        } elsif ((!-f $file) && (!-l $file)) {
78            print STDERR "util::rm $file is not a regular (or symbolic) file\n";
79        } else { # debug message
80            unlink($file) or warn "Could not delete file $file: $!\n";
81        }
82    }
83}
84
85
86# recursive removal
87sub filtered_rm_r {
88    my ($files,$file_accept_re,$file_reject_re) = @_;
89
90#   my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(2);
91#   my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
92#   print STDERR "** Calling method (2): $lcfilename:$cline $cpackage->$csubr\n";
93   
94    my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
95
96    # recursively remove the files
97    foreach my $file (@files_array) {
98    $file =~ s/[\/\\]+$//; # remove trailing slashes
99   
100    if (!-e $file) {
101        print STDERR "util::filtered_rm_r $file does not exist\n";
102
103    } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
104        # get the contents of this directory
105        if (!opendir (INDIR, $file)) {
106        print STDERR "util::filtered_rm_r could not open directory $file\n";
107        } else {
108        my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
109        closedir (INDIR);
110               
111        # remove all the files in this directory
112        map {$_="$file/$_";} @filedir;
113        &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
114
115        if (!defined $file_accept_re && !defined $file_reject_re) {
116            # remove this directory
117            if (!rmdir $file) {
118            print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
119            }
120        }
121        }
122    } else {
123        next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
124
125        if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
126        # remove this file 
127        &rm ($file);
128        }
129    }
130    }
131}
132
133
134# recursive removal
135sub rm_r {
136    my (@files) = @_;
137   
138    # use the more general (but reterospectively written function
139    # filtered_rm_r function()
140
141    filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
142}
143
144
145
146
147# moves a file or a group of files
148sub mv {
149    my $dest = pop (@_);
150    my (@srcfiles) = @_;
151
152    # remove trailing slashes from source and destination files
153    $dest =~ s/[\\\/]+$//;
154    map {$_ =~ s/[\\\/]+$//;} @srcfiles;
155
156    # a few sanity checks
157    if (scalar (@srcfiles) == 0) {
158    print STDERR "util::mv no destination directory given\n";
159    return;
160    } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
161    print STDERR "util::mv if multiple source files are given the ".
162        "destination must be a directory\n";
163    return;
164    }
165
166    # move the files
167    foreach my $file (@srcfiles) {
168    my $tempdest = $dest;
169    if (-d $tempdest) {
170        my ($filename) = $file =~ /([^\\\/]+)$/;
171        $tempdest .= "/$filename";
172    }
173    if (!-e $file) {
174        print STDERR "util::mv $file does not exist\n";
175    } else {
176        rename ($file, $tempdest);
177    }
178    }
179}
180
181
182# copies a file or a group of files
183sub cp {
184    my $dest = pop (@_);
185    my (@srcfiles) = @_;
186
187    # remove trailing slashes from source and destination files
188    $dest =~ s/[\\\/]+$//;
189    map {$_ =~ s/[\\\/]+$//;} @srcfiles;
190
191    # a few sanity checks
192    if (scalar (@srcfiles) == 0) {
193    print STDERR "util::cp no destination directory given\n";
194    return;
195    } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
196    print STDERR "util::cp if multiple source files are given the ".
197        "destination must be a directory\n";
198    return;
199    }
200
201    # copy the files
202    foreach my $file (@srcfiles) {
203    my $tempdest = $dest;
204    if (-d $tempdest) {
205        my ($filename) = $file =~ /([^\\\/]+)$/;
206        $tempdest .= "/$filename";
207    }
208    if (!-e $file) {
209        print STDERR "util::cp $file does not exist\n";
210    } elsif (!-f $file) {
211        print STDERR "util::cp $file is not a plain file\n";
212    } else {
213        &File::Copy::copy ($file, $tempdest);
214    }
215    }
216}
217
218
219
220# recursively copies a file or group of files
221# syntax: cp_r (sourcefiles, destination directory)
222# destination must be a directory - to copy one file to
223# another use cp instead
224sub cp_r {
225    my $dest = pop (@_);
226    my (@srcfiles) = @_;
227
228    # a few sanity checks
229    if (scalar (@srcfiles) == 0) {
230    print STDERR "util::cp_r no destination directory given\n";
231    return;
232    } elsif (-f $dest) {
233    print STDERR "util::cp_r destination must be a directory\n";
234    return;
235    }
236   
237    # create destination directory if it doesn't exist already
238    if (! -d $dest) {
239    my $store_umask = umask(0002);
240    mkdir ($dest, 0777);
241    umask($store_umask);
242    }
243
244    # copy the files
245    foreach my $file (@srcfiles) {
246
247    if (!-e $file) {
248        print STDERR "util::cp_r $file does not exist\n";
249
250    } elsif (-d $file) {
251        # make the new directory
252        my ($filename) = $file =~ /([^\\\/]*)$/;
253        $dest = &util::filename_cat ($dest, $filename);
254        my $store_umask = umask(0002);
255        mkdir ($dest, 0777);
256        umask($store_umask);
257
258        # get the contents of this directory
259        if (!opendir (INDIR, $file)) {
260        print STDERR "util::cp_r could not open directory $file\n";
261        } else {
262        my @filedir = readdir (INDIR);
263        closedir (INDIR);
264        foreach my $f (@filedir) {
265            next if $f =~ /^\.\.?$/;
266            # copy all the files in this directory
267            my $ff = &util::filename_cat ($file, $f);
268            &cp_r ($ff, $dest);
269        }
270        }
271
272    } else {
273        &cp($file, $dest);
274    }
275    }
276}
277# recursively copies a file or group of files
278# syntax: cp_r (sourcefiles, destination directory)
279# destination must be a directory - to copy one file to
280# another use cp instead
281sub cp_r_nosvn {
282    my $dest = pop (@_);
283    my (@srcfiles) = @_;
284
285    # a few sanity checks
286    if (scalar (@srcfiles) == 0) {
287    print STDERR "util::cp_r no destination directory given\n";
288    return;
289    } elsif (-f $dest) {
290    print STDERR "util::cp_r destination must be a directory\n";
291    return;
292    }
293   
294    # create destination directory if it doesn't exist already
295    if (! -d $dest) {
296    my $store_umask = umask(0002);
297    mkdir ($dest, 0777);
298    umask($store_umask);
299    }
300
301    # copy the files
302    foreach my $file (@srcfiles) {
303
304    if (!-e $file) {
305        print STDERR "util::cp_r $file does not exist\n";
306
307    } elsif (-d $file) {
308        # make the new directory
309        my ($filename) = $file =~ /([^\\\/]*)$/;
310        $dest = &util::filename_cat ($dest, $filename);
311        my $store_umask = umask(0002);
312        mkdir ($dest, 0777);
313        umask($store_umask);
314
315        # get the contents of this directory
316        if (!opendir (INDIR, $file)) {
317        print STDERR "util::cp_r could not open directory $file\n";
318        } else {
319        my @filedir = readdir (INDIR);
320        closedir (INDIR);
321        foreach my $f (@filedir) {
322            next if $f =~ /^\.\.?$/;
323            next if $f =~ /^\.svn$/;
324            # copy all the files in this directory
325            my $ff = &util::filename_cat ($file, $f);
326            &cp_r ($ff, $dest);
327        }
328        }
329
330    } else {
331        &cp($file, $dest);
332    }
333    }
334}
335
336# copies a directory and its contents, excluding subdirectories, into a new directory
337sub cp_r_toplevel {
338    my $dest = pop (@_);
339    my (@srcfiles) = @_;
340
341    # a few sanity checks
342    if (scalar (@srcfiles) == 0) {
343    print STDERR "util::cp_r no destination directory given\n";
344    return;
345    } elsif (-f $dest) {
346    print STDERR "util::cp_r destination must be a directory\n";
347    return;
348    }
349   
350    # create destination directory if it doesn't exist already
351    if (! -d $dest) {
352    my $store_umask = umask(0002);
353    mkdir ($dest, 0777);
354    umask($store_umask);
355    }
356
357    # copy the files
358    foreach my $file (@srcfiles) {
359
360    if (!-e $file) {
361        print STDERR "util::cp_r $file does not exist\n";
362
363    } elsif (-d $file) {
364        # make the new directory
365        my ($filename) = $file =~ /([^\\\/]*)$/;
366        $dest = &util::filename_cat ($dest, $filename);
367        my $store_umask = umask(0002);
368        mkdir ($dest, 0777);
369        umask($store_umask);
370
371        # get the contents of this directory
372        if (!opendir (INDIR, $file)) {
373        print STDERR "util::cp_r could not open directory $file\n";
374        } else {
375        my @filedir = readdir (INDIR);
376        closedir (INDIR);
377        foreach my $f (@filedir) {
378            next if $f =~ /^\.\.?$/;
379           
380            # copy all the files in this directory, but not directories
381            my $ff = &util::filename_cat ($file, $f);
382            if (-f $ff) {
383            &cp($ff, $dest);
384            #&cp_r ($ff, $dest);
385            }
386        }
387        }
388
389    } else {
390        &cp($file, $dest);
391    }
392    }
393}
394
395sub mk_dir {
396    my ($dir) = @_;
397
398    my $store_umask = umask(0002);
399    my $mkdir_ok = mkdir ($dir, 0777);
400    umask($store_umask);
401   
402    if (!$mkdir_ok)
403    {
404    print STDERR "util::mk_dir could not create directory $dir\n";
405    return;
406    }
407}
408
409# in case anyone cares - I did some testing (using perls Benchmark module)
410# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
411# slightly faster (surprisingly) - Stefan.
412sub mk_all_dir {
413    my ($dir) = @_;
414
415    # use / for the directory separator, remove duplicate and
416    # trailing slashes
417    $dir=~s/[\\\/]+/\//g;
418    $dir=~s/[\\\/]+$//;
419
420    # make sure the cache directory exists
421    my $dirsofar = "";
422    my $first = 1;
423    foreach my $dirname (split ("/", $dir)) {
424    $dirsofar .= "/" unless $first;
425    $first = 0;
426
427    $dirsofar .= $dirname;
428
429    next if $dirname =~ /^(|[a-z]:)$/i;
430    if (!-e $dirsofar)
431        {
432        my $store_umask = umask(0002);
433        my $mkdir_ok = mkdir ($dirsofar, 0777);
434        umask($store_umask);
435        if (!$mkdir_ok)
436        {
437            print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
438            return;
439        }
440        }
441    }
442}
443
444# make hard link to file if supported by OS, otherwise copy the file
445sub hard_link {
446    my ($src, $dest, $verbosity) = @_;
447
448    # remove trailing slashes from source and destination files
449    $src =~ s/[\\\/]+$//;
450    $dest =~ s/[\\\/]+$//;
451
452##    print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
453    # a few sanity checks
454    if (-e $dest) {
455    # destination file already exists
456    return;
457    }
458    elsif (!-e $src) {
459    print STDERR "util::hard_link source file \"$src\" does not exist\n";
460    return 1;
461    }
462    elsif (-d $src) {
463    print STDERR "util::hard_link source \"$src\" is a directory\n";
464    return 1;
465    }
466
467    my $dest_dir = &File::Basename::dirname($dest);
468    mk_all_dir($dest_dir) if (!-e $dest_dir);
469
470
471    if (!link($src, $dest)) {
472    if ((!defined $verbosity) || ($verbosity>2)) {
473        print STDERR "util::hard_link: unable to create hard link. ";
474        print STDERR " Copying file: $src -> $dest\n";
475    }
476    &File::Copy::copy ($src, $dest);
477    }
478    return 0;
479}
480
481# make soft link to file if supported by OS, otherwise copy file
482sub soft_link {
483    my ($src, $dest, $ensure_paths_absolute) = @_;
484
485    # remove trailing slashes from source and destination files
486    $src =~ s/[\\\/]+$//;
487    $dest =~ s/[\\\/]+$//;
488
489    # Ensure file paths are absolute IF requested to do so
490    # Soft_linking didn't work for relative paths
491    if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
492    # We need to ensure that the src file is the absolute path
493    # See http://perldoc.perl.org/File/Spec.html
494    if(!File::Spec->file_name_is_absolute( $src ))  { # it's relative
495        $src = File::Spec->rel2abs($src); # make absolute
496    }
497    # Might as well ensure that the destination file's absolute path is used
498    if(!File::Spec->file_name_is_absolute( $dest )) {
499        $dest = File::Spec->rel2abs($dest); # make absolute
500    }
501    }
502
503    # a few sanity checks
504    if (!-e $src) {
505    print STDERR "util::soft_link source file $src does not exist\n";
506    return 0;
507    }
508
509    my $dest_dir = &File::Basename::dirname($dest);
510    mk_all_dir($dest_dir) if (!-e $dest_dir);
511
512    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
513       
514    # symlink not supported on windows
515    &File::Copy::copy ($src, $dest);
516
517    } elsif (!eval {symlink($src, $dest)}) {
518    print STDERR "util::soft_link: unable to create soft link.\n";
519    return 0;
520    }
521
522    return 1;
523}
524
525# Primarily for filenames generated by processing
526# content of HTML files (which are mapped to UTF-8 internally)
527#
528# To turn this into an octet string that really exists on the file
529# system:
530# 1. don't need to do anything special for Unix-based systems
531#   (as underlying file system is byte-code)
532# 2. need to map to short DOS filenames for Windows
533
534sub utf8_to_real_filename
535{
536    my ($utf8_filename) = @_;
537
538    my $real_filename;
539
540    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
541    require Win32;
542
543    print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
544
545    my $unicode_filename = decode("utf8",$utf8_filename);
546    $real_filename = Win32::GetShortPathName($unicode_filename);
547    }
548    else {
549    $real_filename = $utf8_filename;
550    }
551
552    return $real_filename;
553}
554
555
556sub fd_exists
557{
558    my $filename_full_path = shift @_;
559    my $test_op = shift @_ || "-e";
560
561    # By default tests for existance of file or directory (-e)
562    # Can be made more specific by providing second parameter (e.g. -f or -d)
563
564    my $exists = 0;
565
566    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
567    require Win32;
568    my $filename_short_path = Win32::GetShortPathName($filename_full_path);
569    if (!defined $filename_short_path) {
570        # Was probably still in UTF8 form (not what is needed on Windows)
571        my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
572        if (defined $unicode_filename_full_path) {
573        $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
574        }
575    }
576    $filename_full_path = $filename_short_path;
577    }
578
579    if (defined $filename_full_path) {
580    $exists = eval "($test_op \$filename_full_path)";
581    }
582
583    return $exists;
584}
585
586sub file_exists
587{
588    my ($filename_full_path) = @_;
589
590    return fd_exists($filename_full_path,"-f");
591}
592
593sub dir_exists
594{
595    my ($filename_full_path) = @_;
596
597    return fd_exists($filename_full_path,"-d");
598}
599
600
601
602# updates a copy of a directory in some other part of the filesystem
603# verbosity settings are: 0=low, 1=normal, 2=high
604# both $fromdir and $todir should be absolute paths
605sub cachedir {
606    my ($fromdir, $todir, $verbosity) = @_;
607    $verbosity = 1 unless defined $verbosity;
608
609    # use / for the directory separator, remove duplicate and
610    # trailing slashes
611    $fromdir=~s/[\\\/]+/\//g;
612    $fromdir=~s/[\\\/]+$//;
613    $todir=~s/[\\\/]+/\//g;
614    $todir=~s/[\\\/]+$//;
615
616    &mk_all_dir ($todir);
617
618    # get the directories in ascending order
619    if (!opendir (FROMDIR, $fromdir)) {
620    print STDERR "util::cachedir could not read directory $fromdir\n";
621    return;
622    }
623    my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
624    closedir (FROMDIR);
625
626    if (!opendir (TODIR, $todir)) {
627    print STDERR "util::cacedir could not read directory $todir\n";
628    return;
629    }
630    my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
631    closedir (TODIR);
632
633    my $fromi = 0;
634    my $toi = 0;
635           
636    while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
637#   print "fromi: $fromi toi: $toi\n";
638
639    # see if we should delete a file/directory
640    # this should happen if the file/directory
641    # is not in the from list or if its a different
642    # size, or has an older timestamp
643    if ($toi < scalar(@todir)) {
644        if (($fromi >= scalar(@fromdir)) ||
645        ($todir[$toi] lt $fromdir[$fromi] ||
646         ($todir[$toi] eq $fromdir[$fromi] &&
647          &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
648                  $verbosity)))) {
649
650        # the files are different
651        &rm_r("$todir/$todir[$toi]");
652        splice(@todir, $toi, 1); # $toi stays the same
653
654        } elsif ($todir[$toi] eq $fromdir[$fromi]) {
655        # the files are the same
656        # if it is a directory, check its contents
657        if (-d "$todir/$todir[$toi]") {
658            &cachedir ("$fromdir/$fromdir[$fromi]",
659                   "$todir/$todir[$toi]", $verbosity);
660        }
661
662        $toi++;
663        $fromi++;
664        next;
665        }
666    }
667 
668    # see if we should insert a file/directory
669    # we should insert a file/directory if there
670    # is no tofiles left or if the tofile does not exist
671    if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
672                      $todir[$toi] gt $fromdir[$fromi])) {
673        &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
674        splice (@todir, $toi, 0, $fromdir[$fromi]);
675
676        $toi++;
677        $fromi++;
678    }
679    }
680}
681
682# this function returns -1 if either file is not found
683# assumes that $file1 and $file2 are absolute file names or
684# in the current directory
685# $file2 is allowed to be newer than $file1
686sub differentfiles {
687    my ($file1, $file2, $verbosity) = @_;
688    $verbosity = 1 unless defined $verbosity;
689
690    $file1 =~ s/\/+$//;
691    $file2 =~ s/\/+$//;
692   
693    my ($file1name) = $file1 =~ /\/([^\/]*)$/;
694    my ($file2name) = $file2 =~ /\/([^\/]*)$/;
695
696    return -1 unless (-e $file1 && -e $file2);
697    if ($file1name ne $file2name) {
698    print STDERR "filenames are not the same\n" if ($verbosity >= 2);
699    return 1;
700    }
701
702    my @file1stat = stat ($file1);
703    my @file2stat = stat ($file2);
704
705    if (-d $file1) {
706    if (! -d $file2) {
707        print STDERR "one file is a directory\n" if ($verbosity >= 2);
708        return 1;
709    }
710    return 0;
711    }
712
713    # both must be regular files
714    unless (-f $file1 && -f $file2) {
715    print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
716    return 1;
717    }
718
719    # the size of the files must be the same
720    if ($file1stat[7] != $file2stat[7]) {
721    print STDERR "different sized files\n" if ($verbosity >= 2);
722    return 1;
723    }
724
725    # the second file cannot be older than the first
726    if ($file1stat[9] > $file2stat[9]) {
727    print STDERR "file is older\n" if ($verbosity >= 2);
728    return 1;
729    }
730
731    return 0;
732}
733
734
735sub get_tmp_filename
736{
737    my $file_ext = shift(@_) || undef;
738
739    my $opt_dot_file_ext = "";
740    if (defined $file_ext) {
741    if ($file_ext !~ m/\./) {
742        # no dot, so needs one added in at start
743        $opt_dot_file_ext = ".$file_ext"
744    }
745    else {
746        # allow for "extensions" such as _metadata.txt to be handled
747        # gracefully
748        $opt_dot_file_ext = $file_ext;
749    }
750    }
751
752    my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
753    &mk_all_dir ($tmpdir) unless -e $tmpdir;
754
755    my $count = 1000;
756    my $rand = int(rand $count);
757    my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
758
759    while (-e $full_tmp_filename) {
760    $rand = int(rand $count);
761    $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
762    $count++;
763    }
764   
765    return $full_tmp_filename;
766}
767
768sub get_timestamped_tmp_folder
769{
770
771    my $tmp_dirname;
772    if(defined $ENV{'GSDLCOLLECTDIR'}) {
773    $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
774    } elsif(defined $ENV{'GSDLHOME'}) {
775    $tmp_dirname = $ENV{'GSDLHOME'};
776    } else {
777    return undef;
778    }
779
780    $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
781    &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
782
783    # add the timestamp into the path otherwise we can run into problems
784    # if documents have the same name
785    my $timestamp = time;
786    my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
787    $tmp_dirname = $time_tmp_dirname;
788    my $i = 1;
789    while (-e $tmp_dirname) {
790    $tmp_dirname = "$time_tmp_dirname$i";
791    $i++;
792    }
793    &util::mk_dir($tmp_dirname);
794 
795    return $tmp_dirname;
796}
797
798sub get_timestamped_tmp_filename_in_collection
799{
800
801    my ($input_filename, $output_ext) = @_;
802    # derive tmp filename from input filename
803    my ($tailname, $dirname, $suffix)
804    = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
805
806    # softlink to collection tmp dir
807    my $tmp_dirname = &util::get_timestamped_tmp_folder();
808    $tmp_dirname = $dirname unless defined $tmp_dirname;
809
810    # following two steps copied from ConvertBinaryFile
811    # do we need them?? can't use them as is, as they use plugin methods.
812
813    #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
814
815    # URLEncode this since htmls with images where the html filename is utf8 don't seem
816    # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
817    # files on the filesystem.
818    #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
819    if (defined $output_ext) {
820    $output_ext = ".$output_ext"; # add the dot
821    } else {
822    $output_ext = $suffix;
823    }
824    $output_ext= lc($output_ext);
825    my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
826   
827    return $tmp_filename;
828}
829
830sub get_toplevel_tmp_dir
831{
832    return filename_cat($ENV{'GSDLHOME'}, "tmp");
833}
834
835
836sub filename_to_regex {
837    my $filename = shift (@_);
838
839    # need to put single backslash back to double so that regex works
840    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
841        $filename =~ s/\\/\\\\/g;
842    }
843   
844    # note that the first part of a substitution is a regex, so RE chars need to be escaped,
845    # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally
846    $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them
847    $filename =~ s@\(@\\(@g; # escape brackets
848    $filename =~ s@\)@\\)@g; # escape brackets
849   
850    return $filename;
851}
852
853sub unregex_filename {
854    my $filename = shift (@_);
855
856    # need to put doubled backslashes for regex back to single
857    $filename =~ s/\\\\/\\/g; # remove RE syntax for \   
858    $filename =~ s/\\\./\./g; # remove RE syntax for .
859    $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "("
860    $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")"
861    return $filename;
862}
863
864sub filename_cat {
865    my $first_file = shift(@_);
866    my (@filenames) = @_;
867
868#   Useful for debugging
869#     -- might make sense to call caller(0) rather than (1)??
870#   my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
871#   print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
872   
873    # If first_file is not null or empty, then add it back into the list
874    if (defined $first_file && $first_file =~ /\S/) {
875    unshift(@filenames, $first_file);
876    }
877
878    my $filename = join("/", @filenames);
879
880    # remove duplicate slashes and remove the last slash
881    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
882    $filename =~ s/[\\\/]+/\\/g;
883    } else {
884    $filename =~ s/[\/]+/\//g;
885    # DB: want a filename abc\de.html to remain like this
886    }
887    $filename =~ s/[\\\/]$//;
888
889    return $filename;
890}
891
892
893sub pathname_cat {
894    my $first_path = shift(@_);
895    my (@pathnames) = @_;
896
897    # If first_path is not null or empty, then add it back into the list
898    if (defined $first_path && $first_path =~ /\S/) {
899    unshift(@pathnames, $first_path);
900    }
901
902    my $join_char;
903    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
904    $join_char = ";";
905    } else {
906    $join_char = ":";
907    }
908
909    my $pathname = join($join_char, @pathnames);
910
911    # remove duplicate slashes
912    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
913    $pathname =~ s/[\\\/]+/\\/g;
914    } else {
915    $pathname =~ s/[\/]+/\//g;
916    # DB: want a pathname abc\de.html to remain like this
917    }
918
919    return $pathname;
920}
921
922
923sub tidy_up_oid {
924    my ($OID) = @_;
925    if ($OID =~ /\./) {
926    print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
927    $OID =~ s/\.//g; #remove any periods
928    }
929    if ($OID =~ /^\s.*\s$/) {
930    print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
931    # remove starting and trailing whitespace
932    $OID =~ s/^\s+//;
933    $OID =~ s/\s+$//;
934    }
935    if ($OID =~ /^[\d]*$/) {
936    print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
937    $OID = "D" . $OID;
938    }       
939   
940    return $OID;
941}
942sub envvar_prepend {
943    my ($var,$val) = @_;
944
945    # do not prepend any value/path that's already in the environment variable
946    if ($ENV{'GSDLOS'} =~ /^windows$/i)
947    {
948    my $escaped_val = &filename_to_regex($val); # escape any Windows backslashes and brackets for upcoming regex
949    if (!defined($ENV{$var})) {
950        $ENV{$var} = "$val";
951    }
952    elsif($ENV{$var} !~ m/$escaped_val/) {
953        $ENV{$var} = "$val;".$ENV{$var};
954    }
955    }
956    else {
957    if (!defined($ENV{$var})) {
958        $ENV{$var} = "$val";
959    }
960    elsif($ENV{$var} !~ m/$val/) {
961        $ENV{$var} = "$val:".$ENV{$var};
962    }
963    }
964}
965
966sub envvar_append {
967    my ($var,$val) = @_;
968
969    # do not append any value/path that's already in the environment variable
970    if ($ENV{'GSDLOS'} =~ /^windows$/i)
971    {
972    my $escaped_val = &filename_to_regex($val); # escape any Windows backslashes and brackets for upcoming regex
973    if (!defined($ENV{$var})) {
974        $ENV{$var} = "$val";
975    }
976    elsif($ENV{$var} !~ m/$escaped_val/) {
977        $ENV{$var} .= ";$val";
978    }
979    }
980    else {
981    if (!defined($ENV{$var})) {
982        $ENV{$var} = "$val";
983    }
984    elsif($ENV{$var} !~ m/$val/) {
985        $ENV{$var} .= ":$val";
986    }
987    }   
988}
989
990
991# splits a filename into a prefix and a tail extension using the tail_re, or
992# if that fails, splits on the file_extension . (dot)
993sub get_prefix_and_tail_by_regex {
994
995    my ($filename,$tail_re) = @_;
996   
997    my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
998    if ((!defined $file_prefix) || (!defined $file_ext)) {
999    ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
1000    }
1001
1002    return ($file_prefix,$file_ext);
1003}
1004
1005# get full path and file only path from a base_dir (which may be empty) and
1006# file (which may contain directories)
1007sub get_full_filenames {
1008    my ($base_dir, $file) = @_;
1009   
1010    my $filename_full_path = $file;
1011    # add on directory if present
1012    $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
1013   
1014    my $filename_no_path = $file;
1015
1016    # remove directory if present
1017    $filename_no_path =~ s/^.*[\/\\]//;
1018    return ($filename_full_path, $filename_no_path);
1019}
1020
1021# returns the path of a file without the filename -- ie. the directory the file is in
1022sub filename_head {
1023    my $filename = shift(@_);
1024
1025    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1026    $filename =~ s/[^\\\\]*$//;
1027    }
1028    else {
1029    $filename =~ s/[^\\\/]*$//;
1030    }
1031
1032    return $filename;
1033}
1034
1035
1036
1037# returns 1 if filename1 and filename2 point to the same
1038# file or directory
1039sub filenames_equal {
1040    my ($filename1, $filename2) = @_;
1041
1042    # use filename_cat to clean up trailing slashes and
1043    # multiple slashes
1044    $filename1 = filename_cat ($filename1);
1045    $filename2 = filename_cat ($filename2);
1046
1047    # filenames not case sensitive on windows
1048    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1049    $filename1 =~ tr/[A-Z]/[a-z]/;
1050    $filename2 =~ tr/[A-Z]/[a-z]/;
1051    }
1052    return 1 if $filename1 eq $filename2;
1053    return 0;
1054}
1055
1056
1057sub filename_within_directory
1058{
1059    my ($filename,$within_dir) = @_;
1060   
1061    if ($within_dir !~ m/[\/\\]$/) {
1062    my $dirsep = &util::get_dirsep();
1063    $within_dir .= $dirsep;
1064    }
1065   
1066    $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets   
1067    if ($filename =~ m/^$within_dir(.*)$/) {
1068    $filename = $1;
1069    }
1070   
1071    return $filename;
1072}
1073
1074sub filename_within_collection
1075{
1076    my ($filename) = @_;
1077
1078    my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1079   
1080    if (defined $collect_dir) {
1081
1082    # if from within GSDLCOLLECTDIR, then remove directory prefix
1083    # so source_filename is realative to it.  This is done to aid
1084    # portability, i.e. the collection can be moved to somewhere
1085    # else on the file system and the archives directory will still
1086    # work.  This is needed, for example in the applet version of
1087    # GLI where GSDLHOME/collect on the server will be different to
1088    # the collect directory of the remove user.  Of course,
1089    # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1090    # it back into a full pathname.
1091
1092    $filename = filename_within_directory($filename,$collect_dir);
1093    }
1094   
1095    return $filename;
1096}
1097
1098sub prettyprint_file
1099{
1100    my ($base_dir,$file,$gli) = @_;
1101
1102    my $filename_full_path = &util::filename_cat($base_dir,$file);
1103
1104    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1105    require Win32;
1106
1107    # For some reason base_dir in the form c:/a/b/c
1108    # This leads to confusion later on, so turn it back into
1109    # the more usual Windows form
1110    $base_dir =~ s/\//\\/g;
1111    my $long_base_dir = Win32::GetLongPathName($base_dir);
1112    my $long_full_path = Win32::GetLongPathName($filename_full_path);
1113
1114    $file = filename_within_directory($long_full_path,$long_base_dir);
1115    $file = encode("utf8",$file) if ($gli);
1116    }
1117
1118    return $file;
1119}
1120
1121
1122sub upgrade_if_dos_filename
1123{
1124    my ($filename_full_path,$and_encode) = @_;
1125
1126    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1127    # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1128    # to its long (Windows) version
1129    my $long_filename = Win32::GetLongPathName($filename_full_path);
1130    if (defined $long_filename) {
1131        $filename_full_path = $long_filename;
1132    }
1133    # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
1134    $filename_full_path =~ s/^(.):/\u$1:/;
1135    if ((defined $and_encode) && ($and_encode)) {
1136        $filename_full_path = encode("utf8",$filename_full_path);
1137    }
1138    }
1139
1140    return $filename_full_path;
1141}
1142
1143
1144sub downgrade_if_dos_filename
1145{
1146    my ($filename_full_path) = @_;
1147
1148    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1149    require Win32;
1150
1151    # Ensure the given long Windows filename is in a form that can
1152    # be opened by Perl => convert it to a short DOS-like filename
1153
1154    my $short_filename = Win32::GetShortPathName($filename_full_path);
1155    if (defined $short_filename) {
1156        $filename_full_path = $short_filename;
1157    }
1158    # Make sure initial drive letter is lower-case (to fit in
1159    # with rest of Greenstone)
1160    $filename_full_path =~ s/^(.):/\u$1:/;
1161    }
1162
1163    return $filename_full_path;
1164}
1165
1166sub block_filename
1167{
1168    my ($block_hash,$filename) = @_;
1169   
1170    if ($ENV{'GSDLOS'} =~ m/^windows$/) {
1171   
1172    # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
1173    my $lower_filename = lc($filename);
1174    $block_hash->{'file_blocks'}->{$lower_filename} = 1;
1175#   my $lower_drive = $filename;
1176#   $lower_drive =~ s/^([A-Z]):/\l$1:/i;
1177   
1178#   my $upper_drive = $filename;
1179#   $upper_drive =~ s/^([A-Z]):/\u$1:/i;
1180#   
1181#   $block_hash->{'file_blocks'}->{$lower_drive} = 1;
1182#   $block_hash->{'file_blocks'}->{$upper_drive} = 1;       
1183    }
1184    else {
1185    $block_hash->{'file_blocks'}->{$filename} = 1;
1186    }
1187}
1188
1189
1190sub filename_is_absolute
1191{
1192    my ($filename) = @_;
1193
1194    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1195    return ($filename =~ m/^(\w:)?\\/);
1196    }
1197    else {
1198    return ($filename =~ m/^\//);
1199    }
1200}
1201
1202
1203## @method make_absolute()
1204#
1205#  Ensure the given file path is absolute in respect to the given base path.
1206#
1207#  @param  $base_dir A string denoting the base path the given dir must be
1208#                    absolute to.
1209#  @param  $dir The directory to be made absolute as a string. Note that the
1210#               dir may already be absolute, in which case it will remain
1211#               unchanged.
1212#  @return The now absolute form of the directory as a string.
1213#
1214#  @author John Thompson, DL Consulting Ltd.
1215#  @copy 2006 DL Consulting Ltd.
1216#
1217#used in buildcol.pl, doesn't work for all cases --kjdon
1218sub make_absolute {
1219   
1220    my ($base_dir, $dir) = @_;
1221###    print STDERR "dir = $dir\n";
1222    $dir =~ s/[\\\/]+/\//g;
1223    $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1224    $dir =~ s|^/tmp_mnt||;
1225    1 while($dir =~ s|/[^/]*/\.\./|/|g);
1226    $dir =~ s|/[.][.]?/|/|g;
1227    $dir =~ tr|/|/|s;
1228###    print STDERR "dir = $dir\n";
1229   
1230    return $dir;
1231}
1232## make_absolute() ##
1233
1234sub get_dirsep {
1235
1236    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1237    return "\\";
1238    } else {
1239    return "\/";
1240    }
1241}
1242
1243sub get_os_dirsep {
1244
1245    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1246    return "\\\\";
1247    } else {
1248    return "\\\/";
1249    }
1250}
1251
1252sub get_re_dirsep {
1253
1254    return "\\\\|\\\/";
1255}
1256
1257
1258sub get_dirsep_tail {
1259    my ($filename) = @_;
1260   
1261    # returns last part of directory or filename
1262    # On unix e.g. a/b.d => b.d
1263    #              a/b/c => c
1264
1265    my $dirsep = get_re_dirsep();
1266    my @dirs = split (/$dirsep/, $filename);
1267    my $tail = pop @dirs;
1268
1269    # - caused problems under windows
1270    #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1271
1272    return $tail;
1273}
1274
1275
1276# if this is running on windows we want binaries to end in
1277# .exe, otherwise they don't have to end in any extension
1278sub get_os_exe {
1279    return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1280    return "";
1281}
1282
1283
1284# test to see whether this is a big or little endian machine
1285sub is_little_endian
1286{
1287    # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1288    # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1289    # Otherwise, it's little endian
1290
1291    #return 0 if $^O =~ /^darwin$/i;
1292    #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1293   
1294    # Going back to stating exactly whether the machine is little endian
1295    # or big endian, without any special case for Macs. Since for rata it comes
1296    # back with little endian and for shuttle with bigendian.
1297    return (ord(substr(pack("s",1), 0, 1)) == 1);
1298}
1299
1300
1301# will return the collection name if successful, "" otherwise
1302sub use_collection {
1303    my ($collection, $collectdir) = @_;
1304
1305    if (!defined $collectdir || $collectdir eq "") {
1306    $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1307    }
1308
1309    # get and check the collection
1310    if (!defined($collection) || $collection eq "") {
1311    if (defined $ENV{'GSDLCOLLECTION'}) {
1312        $collection = $ENV{'GSDLCOLLECTION'};
1313    } else {
1314        print STDOUT "No collection specified\n";
1315        return "";
1316    }
1317    }
1318   
1319    if ($collection eq "modelcol") {
1320    print STDOUT "You can't use modelcol.\n";
1321    return "";
1322    }
1323
1324    # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1325    # are defined
1326    $ENV{'GSDLCOLLECTION'} = $collection;
1327    $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1328
1329    # make sure this collection exists
1330    if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1331    print STDOUT "Invalid collection ($collection).\n";
1332    return "";
1333    }
1334
1335    # everything is ready to go
1336    return $collection;
1337}
1338
1339sub get_current_collection_name {
1340    return $ENV{'GSDLCOLLECTION'};
1341}
1342
1343
1344# will return the collection name if successful, "" otherwise. 
1345# Like use_collection (above) but for greenstone 3 (taking account of site level)
1346
1347sub use_site_collection {
1348    my ($site, $collection, $collectdir) = @_;
1349
1350    if (!defined $collectdir || $collectdir eq "") {
1351    die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1352    $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1353    }
1354
1355    # collectdir explicitly set by this point (using $site variable if required).
1356    # Can call "old" gsdl2 use_collection now.
1357
1358    return use_collection($collection,$collectdir);
1359}
1360
1361
1362
1363sub locate_config_file
1364{
1365    my ($file) = @_;
1366
1367    my $locations = locate_config_files($file);
1368
1369    return shift @$locations; # returns undef if 'locations' is empty
1370}
1371
1372
1373sub locate_config_files
1374{
1375    my ($file) = @_;
1376
1377    my @locations = ();
1378
1379    if (-e $file) {
1380    # Clearly specified (most likely full filename)
1381    # No need to hunt in 'etc' directories, return value unchanged
1382    push(@locations,$file);
1383    }
1384    else {
1385    # Check for collection specific one before looking in global GSDL 'etc'
1386    if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1387        my $test_collect_etc_filename
1388        = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1389       
1390        if (-e $test_collect_etc_filename) {
1391        push(@locations,$test_collect_etc_filename);
1392        }
1393    }
1394    my $test_main_etc_filename
1395        = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1396    if (-e $test_main_etc_filename) {
1397        push(@locations,$test_main_etc_filename);
1398    }
1399    }
1400
1401    return \@locations;
1402}
1403
1404
1405sub hyperlink_text
1406{
1407    my ($text) = @_;
1408   
1409    $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1410    $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1411
1412    return $text;
1413}
1414
1415
1416# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1417# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1418sub is_dir_empty
1419{
1420    my ($path) = @_;
1421    opendir DIR, $path;
1422    while(my $entry = readdir DIR) {
1423        next if($entry =~ /^\.\.?$/);
1424        closedir DIR;
1425        return 0;
1426    }
1427    closedir DIR;
1428    return 1;
1429}
1430
1431# Returns the given filename converted using either URL encoding or base64
1432# encoding, as specified by $rename_method. If the given filename has no suffix
1433# (if it is just the tailname), then $no_suffix should be some defined value.
1434# rename_method can be url, none, base64
1435sub rename_file {
1436    my ($filename, $rename_method, $no_suffix)  = @_;
1437
1438    if(!$filename) { # undefined or empty string
1439    return $filename;
1440    }
1441
1442    if (!$rename_method) {
1443    print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1444    # Debugging information
1445    # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1446    # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1447    $rename_method = "url";
1448    } elsif($rename_method eq "none") {
1449    return $filename; # would have already been renamed
1450    }
1451
1452    # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1453    ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1454    ###$filename =~ s/ /_/g;
1455
1456    my ($tailname,$dirname,$suffix);
1457    if($no_suffix) { # given a tailname, no suffix
1458    ($tailname,$dirname) = File::Basename::fileparse($filename);
1459    }
1460    else {
1461    ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1462    }
1463    if (!$suffix) {
1464    $suffix = "";
1465    }
1466    else {
1467    $suffix = lc($suffix);
1468    }
1469
1470    if ($rename_method eq "url") {
1471    $tailname = &unicode::url_encode($tailname);
1472    }
1473    elsif ($rename_method eq "base64") {
1474    $tailname = &unicode::base64_encode($tailname);
1475    $tailname =~ s/\s*//sg;      # for some reason it adds spaces not just at end but also in middle
1476    }
1477
1478    $filename = "$tailname$suffix";
1479    $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1480
1481    return $filename;
1482}
1483
1484
1485# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1486sub rename_ldb_or_bdb_file {
1487    my ($filename_no_ext) = @_;
1488
1489    my $new_filename = "$filename_no_ext.gdb";
1490    return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1491    # try ldb
1492    my $old_filename = "$filename_no_ext.ldb";
1493   
1494    if (-f $old_filename) {
1495    print STDERR "Renaming $old_filename to $new_filename\n";
1496    rename ($old_filename, $new_filename)
1497        || print STDERR "Rename failed: $!\n";
1498    return;
1499    }
1500    # try bdb
1501    $old_filename = "$filename_no_ext.bdb";
1502    if (-f $old_filename) {
1503    print STDERR "Renaming $old_filename to $new_filename\n";   
1504    rename ($old_filename, $new_filename)
1505        || print STDERR "Rename failed: $!\n";
1506    return;
1507    }
1508}
1509
1510
1511# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1512# By default, /greenstone3 for GS3 or /greenstone for GS2.
1513sub get_greenstone_url_prefix() {
1514    # if already set on a previous occasion, just return that
1515    # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1516    return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1517
1518    my ($configfile, $urlprefix, $defaultUrlprefix);
1519    my @propertynames = ();
1520
1521    if($ENV{'GSDL3SRCHOME'}) {
1522    $defaultUrlprefix = "/greenstone3";
1523    $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1524    push(@propertynames, qw/path\s*\=/);
1525    } else {
1526    $defaultUrlprefix = "/greenstone";
1527    $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", "gsdlsite.cfg");
1528    push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1529    }
1530
1531    $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1532
1533    if(!$urlprefix) { # no values found for URL prefix, use default values
1534    $urlprefix = $defaultUrlprefix;
1535    } else {
1536    #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1537    $urlprefix =~ s/^\///; # remove the starting slash
1538    my @dirs = split(/(\\|\/)/, $urlprefix);
1539    $urlprefix = shift(@dirs);
1540
1541    if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1542        $urlprefix = "/$urlprefix";
1543    }
1544    }
1545
1546    # set for the future
1547    $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1548#    print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1549    return $urlprefix;
1550}
1551
1552
1553# Given a config file (xml or java properties file) and a list/array of regular expressions
1554# that represent property names to match on, this function will return the value for the 1st
1555# matching property name. If the return value is undefined, no matching property was found.
1556sub extract_propvalue_from_file() {
1557    my ($configfile, $propertynames) = @_;
1558
1559    my $value;
1560    unless(open(FIN, "<$configfile")) {
1561    print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1562    return $value; # not initialised
1563    }
1564
1565    # Read the entire file at once, as one single line, then close it
1566    my $filecontents;
1567    {
1568    local $/ = undef;       
1569    $filecontents = <FIN>;
1570    }
1571    close(FIN);
1572
1573    foreach my $regex (@$propertynames) {
1574        ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1575    if($value) {
1576            $value =~ s/^\"//;     # remove any startquotes
1577        $value =~ s/\".*$//;   # remove the 1st endquotes (if any) followed by any xml
1578        last;              # found value for a matching property, break from loop
1579    }
1580    }
1581
1582    return $value;
1583}
1584
1585# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1586# given that perllib is in @INC in order to invoke this subroutine.
1587# Call as follows -- after setting up INC to include perllib and
1588# after setting up GSDLHOME and GSDLOS:
1589#
1590# require util;
1591# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1592#
1593sub setup_greenstone_env() {
1594    my ($GSDLHOME, $GSDLOS) = @_;
1595
1596    #my %env_map = ();
1597    # Get the localised ENV settings of running a localised source setup.bash
1598    # and put it into the ENV here. Need to clear GSDLHOME before running setup
1599    #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1600    my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";     
1601    if($GSDLOS =~ m/windows/i) {
1602        #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1603        $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
1604    }
1605    if (!open(PIN, "$perl_command |")) {
1606        print STDERR ("Unable to execute command: $perl_command. $!\n");
1607    }
1608
1609    while (defined (my $perl_output_line = <PIN>)) {
1610        my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1611        #$env_map{$key}=$value;     
1612        $ENV{$key}=$value;
1613    }
1614    close (PIN);
1615
1616    # If any keys in $ENV don't occur in Greenstone's localised env
1617    # (stored in $env_map), delete those entries from $ENV
1618    #foreach $key (keys %ENV) {
1619    #   if(!defined $env_map{$key}) {
1620    #       print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1621    #       delete $ENV{$key}; # del $ENV(key, value) pair
1622    #   }
1623    #}
1624    #undef %env_map;
1625}
1626
1627sub get_perl_exec() {   
1628    my $perl_exec = $^X; # may return just "perl"
1629   
1630    if($ENV{'PERLPATH'}) {
1631        # OR: # $perl_exec = &util::filename_cat($ENV{'PERLPATH'},"perl");
1632        if($ENV{'GSDLOS'} =~ m/windows/) {
1633            $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1634        } else {
1635            $perl_exec = "$ENV{'PERLPATH'}/perl";
1636        }
1637    } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1638        # containing the full path to the current perl executable we're using
1639        $perl_exec = $Config{perlpath}; # configured path for perl
1640        if (!-e $perl_exec) { # may not point to location on this machine
1641            $perl_exec = $^X; # may return just "perl"
1642            if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1643                print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";             
1644            }
1645        }
1646    }
1647   
1648    return $perl_exec;
1649}
1650
1651
16521;
Note: See TracBrowser for help on using the browser.