root/main/trunk/greenstone2/perllib/util.pm @ 24563

Revision 24563, 44.2 KB (checked in by ak19, 8 years ago)

1. On Windows, Depositor needs env var OS for the call to util::setup_greenstone_env() from bin\script\build to work (it will try to run setup.bat; if OS is set, it will be Windows_NT and if it's not set, it will default to Win95 and try to run a COMMAND statement that conflicts with the Depositor's building process. The fix is to let Apache have access to the env var OS, by using PassEnv?. 2. Fixed oversight of not closing filehandle in util::setup_greenstone_env().

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use strict;
29
30use Encode;
31use File::Copy;
32use File::Basename;
33# Config for getting the perlpath in the recommended way, though it uses paths that are
34# hard-coded into the Config file that's generated upon configuring and compiling perl.
35# $^X works better in some cases to return the path to perl used to launch the script,
36# but if launched with plain "perl" (no full-path), that will be just what it returns.
37use Config;
38
39# removes files (but not directories)
40sub rm {
41    my (@files) = @_;
42
43    my @filefiles = ();
44
45    # make sure the files we want to delete exist
46    # and are regular files
47    foreach my $file (@files) {
48    if (!-e $file) {
49        print STDERR "util::rm $file does not exist\n";
50    } elsif ((!-f $file) && (!-l $file)) {
51        print STDERR "util::rm $file is not a regular (or symbolic) file\n";
52    } else {
53        push (@filefiles, $file);
54    }
55    }
56   
57    # remove the files
58    my $numremoved = unlink @filefiles;
59
60    # check to make sure all of them were removed
61    if ($numremoved != scalar(@filefiles)) {
62    print STDERR "util::rm Not all files were removed\n";
63    }
64}
65
66# removes files (but not directories) - can rename this to the default
67# "rm" subroutine  when debugging the deletion of individual files.
68sub rm_debug {
69    my (@files) = @_;
70    my @filefiles = ();
71
72    # make sure the files we want to delete exist
73    # and are regular files
74    foreach my $file (@files) {
75        if (!-e $file) {
76            print STDERR "util::rm $file does not exist\n";
77        } elsif ((!-f $file) && (!-l $file)) {
78            print STDERR "util::rm $file is not a regular (or symbolic) file\n";
79        } else { # debug message
80            unlink($file) or warn "Could not delete file $file: $!\n";
81        }
82    }
83}
84
85
86# recursive removal
87sub filtered_rm_r {
88    my ($files,$file_accept_re,$file_reject_re) = @_;
89
90#   my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(2);
91#   my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
92#   print STDERR "** Calling method (2): $lcfilename:$cline $cpackage->$csubr\n";
93   
94    my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
95
96    # recursively remove the files
97    foreach my $file (@files_array) {
98    $file =~ s/[\/\\]+$//; # remove trailing slashes
99   
100    if (!-e $file) {
101        print STDERR "util::filtered_rm_r $file does not exist\n";
102
103    } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
104        # get the contents of this directory
105        if (!opendir (INDIR, $file)) {
106        print STDERR "util::filtered_rm_r could not open directory $file\n";
107        } else {
108        my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
109        closedir (INDIR);
110               
111        # remove all the files in this directory
112        map {$_="$file/$_";} @filedir;
113        &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
114
115        if (!defined $file_accept_re && !defined $file_reject_re) {
116            # remove this directory
117            if (!rmdir $file) {
118            print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
119            }
120        }
121        }
122    } else {
123        next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
124
125        if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
126        # remove this file 
127        &rm ($file);
128        }
129    }
130    }
131}
132
133
134# recursive removal
135sub rm_r {
136    my (@files) = @_;
137   
138    # use the more general (but reterospectively written function
139    # filtered_rm_r function()
140
141    filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
142}
143
144
145
146
147# moves a file or a group of files
148sub mv {
149    my $dest = pop (@_);
150    my (@srcfiles) = @_;
151
152    # remove trailing slashes from source and destination files
153    $dest =~ s/[\\\/]+$//;
154    map {$_ =~ s/[\\\/]+$//;} @srcfiles;
155
156    # a few sanity checks
157    if (scalar (@srcfiles) == 0) {
158    print STDERR "util::mv no destination directory given\n";
159    return;
160    } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
161    print STDERR "util::mv if multiple source files are given the ".
162        "destination must be a directory\n";
163    return;
164    }
165
166    # move the files
167    foreach my $file (@srcfiles) {
168    my $tempdest = $dest;
169    if (-d $tempdest) {
170        my ($filename) = $file =~ /([^\\\/]+)$/;
171        $tempdest .= "/$filename";
172    }
173    if (!-e $file) {
174        print STDERR "util::mv $file does not exist\n";
175    } else {
176        rename ($file, $tempdest);
177    }
178    }
179}
180
181
182# copies a file or a group of files
183sub cp {
184    my $dest = pop (@_);
185    my (@srcfiles) = @_;
186
187    # remove trailing slashes from source and destination files
188    $dest =~ s/[\\\/]+$//;
189    map {$_ =~ s/[\\\/]+$//;} @srcfiles;
190
191    # a few sanity checks
192    if (scalar (@srcfiles) == 0) {
193    print STDERR "util::cp no destination directory given\n";
194    return;
195    } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
196    print STDERR "util::cp if multiple source files are given the ".
197        "destination must be a directory\n";
198    return;
199    }
200
201    # copy the files
202    foreach my $file (@srcfiles) {
203    my $tempdest = $dest;
204    if (-d $tempdest) {
205        my ($filename) = $file =~ /([^\\\/]+)$/;
206        $tempdest .= "/$filename";
207    }
208    if (!-e $file) {
209        print STDERR "util::cp $file does not exist\n";
210    } elsif (!-f $file) {
211        print STDERR "util::cp $file is not a plain file\n";
212    } else {
213        &File::Copy::copy ($file, $tempdest);
214    }
215    }
216}
217
218
219
220# recursively copies a file or group of files
221# syntax: cp_r (sourcefiles, destination directory)
222# destination must be a directory - to copy one file to
223# another use cp instead
224sub cp_r {
225    my $dest = pop (@_);
226    my (@srcfiles) = @_;
227
228    # a few sanity checks
229    if (scalar (@srcfiles) == 0) {
230    print STDERR "util::cp_r no destination directory given\n";
231    return;
232    } elsif (-f $dest) {
233    print STDERR "util::cp_r destination must be a directory\n";
234    return;
235    }
236   
237    # create destination directory if it doesn't exist already
238    if (! -d $dest) {
239    my $store_umask = umask(0002);
240    mkdir ($dest, 0777);
241    umask($store_umask);
242    }
243
244    # copy the files
245    foreach my $file (@srcfiles) {
246
247    if (!-e $file) {
248        print STDERR "util::cp_r $file does not exist\n";
249
250    } elsif (-d $file) {
251        # make the new directory
252        my ($filename) = $file =~ /([^\\\/]*)$/;
253        $dest = &util::filename_cat ($dest, $filename);
254        my $store_umask = umask(0002);
255        mkdir ($dest, 0777);
256        umask($store_umask);
257
258        # get the contents of this directory
259        if (!opendir (INDIR, $file)) {
260        print STDERR "util::cp_r could not open directory $file\n";
261        } else {
262        my @filedir = readdir (INDIR);
263        closedir (INDIR);
264        foreach my $f (@filedir) {
265            next if $f =~ /^\.\.?$/;
266            # copy all the files in this directory
267            my $ff = &util::filename_cat ($file, $f);
268            &cp_r ($ff, $dest);
269        }
270        }
271
272    } else {
273        &cp($file, $dest);
274    }
275    }
276}
277# recursively copies a file or group of files
278# syntax: cp_r (sourcefiles, destination directory)
279# destination must be a directory - to copy one file to
280# another use cp instead
281sub cp_r_nosvn {
282    my $dest = pop (@_);
283    my (@srcfiles) = @_;
284
285    # a few sanity checks
286    if (scalar (@srcfiles) == 0) {
287    print STDERR "util::cp_r no destination directory given\n";
288    return;
289    } elsif (-f $dest) {
290    print STDERR "util::cp_r destination must be a directory\n";
291    return;
292    }
293   
294    # create destination directory if it doesn't exist already
295    if (! -d $dest) {
296    my $store_umask = umask(0002);
297    mkdir ($dest, 0777);
298    umask($store_umask);
299    }
300
301    # copy the files
302    foreach my $file (@srcfiles) {
303
304    if (!-e $file) {
305        print STDERR "util::cp_r $file does not exist\n";
306
307    } elsif (-d $file) {
308        # make the new directory
309        my ($filename) = $file =~ /([^\\\/]*)$/;
310        $dest = &util::filename_cat ($dest, $filename);
311        my $store_umask = umask(0002);
312        mkdir ($dest, 0777);
313        umask($store_umask);
314
315        # get the contents of this directory
316        if (!opendir (INDIR, $file)) {
317        print STDERR "util::cp_r could not open directory $file\n";
318        } else {
319        my @filedir = readdir (INDIR);
320        closedir (INDIR);
321        foreach my $f (@filedir) {
322            next if $f =~ /^\.\.?$/;
323            next if $f =~ /^\.svn$/;
324            # copy all the files in this directory
325            my $ff = &util::filename_cat ($file, $f);
326            &cp_r ($ff, $dest);
327        }
328        }
329
330    } else {
331        &cp($file, $dest);
332    }
333    }
334}
335
336# copies a directory and its contents, excluding subdirectories, into a new directory
337sub cp_r_toplevel {
338    my $dest = pop (@_);
339    my (@srcfiles) = @_;
340
341    # a few sanity checks
342    if (scalar (@srcfiles) == 0) {
343    print STDERR "util::cp_r no destination directory given\n";
344    return;
345    } elsif (-f $dest) {
346    print STDERR "util::cp_r destination must be a directory\n";
347    return;
348    }
349   
350    # create destination directory if it doesn't exist already
351    if (! -d $dest) {
352    my $store_umask = umask(0002);
353    mkdir ($dest, 0777);
354    umask($store_umask);
355    }
356
357    # copy the files
358    foreach my $file (@srcfiles) {
359
360    if (!-e $file) {
361        print STDERR "util::cp_r $file does not exist\n";
362
363    } elsif (-d $file) {
364        # make the new directory
365        my ($filename) = $file =~ /([^\\\/]*)$/;
366        $dest = &util::filename_cat ($dest, $filename);
367        my $store_umask = umask(0002);
368        mkdir ($dest, 0777);
369        umask($store_umask);
370
371        # get the contents of this directory
372        if (!opendir (INDIR, $file)) {
373        print STDERR "util::cp_r could not open directory $file\n";
374        } else {
375        my @filedir = readdir (INDIR);
376        closedir (INDIR);
377        foreach my $f (@filedir) {
378            next if $f =~ /^\.\.?$/;
379           
380            # copy all the files in this directory, but not directories
381            my $ff = &util::filename_cat ($file, $f);
382            if (-f $ff) {
383            &cp($ff, $dest);
384            #&cp_r ($ff, $dest);
385            }
386        }
387        }
388
389    } else {
390        &cp($file, $dest);
391    }
392    }
393}
394
395sub mk_dir {
396    my ($dir) = @_;
397
398    my $store_umask = umask(0002);
399    my $mkdir_ok = mkdir ($dir, 0777);
400    umask($store_umask);
401   
402    if (!$mkdir_ok)
403    {
404    print STDERR "util::mk_dir could not create directory $dir\n";
405    return;
406    }
407}
408
409# in case anyone cares - I did some testing (using perls Benchmark module)
410# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
411# slightly faster (surprisingly) - Stefan.
412sub mk_all_dir {
413    my ($dir) = @_;
414
415    # use / for the directory separator, remove duplicate and
416    # trailing slashes
417    $dir=~s/[\\\/]+/\//g;
418    $dir=~s/[\\\/]+$//;
419
420    # make sure the cache directory exists
421    my $dirsofar = "";
422    my $first = 1;
423    foreach my $dirname (split ("/", $dir)) {
424    $dirsofar .= "/" unless $first;
425    $first = 0;
426
427    $dirsofar .= $dirname;
428
429    next if $dirname =~ /^(|[a-z]:)$/i;
430    if (!-e $dirsofar)
431        {
432        my $store_umask = umask(0002);
433        my $mkdir_ok = mkdir ($dirsofar, 0777);
434        umask($store_umask);
435        if (!$mkdir_ok)
436        {
437            print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
438            return;
439        }
440        }
441    }
442}
443
444# make hard link to file if supported by OS, otherwise copy the file
445sub hard_link {
446    my ($src, $dest, $verbosity) = @_;
447
448    # remove trailing slashes from source and destination files
449    $src =~ s/[\\\/]+$//;
450    $dest =~ s/[\\\/]+$//;
451
452##    print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
453    # a few sanity checks
454    if (-e $dest) {
455    # destination file already exists
456    return;
457    }
458    elsif (!-e $src) {
459    print STDERR "util::hard_link source file \"$src\" does not exist\n";
460    return 1;
461    }
462    elsif (-d $src) {
463    print STDERR "util::hard_link source \"$src\" is a directory\n";
464    return 1;
465    }
466
467    my $dest_dir = &File::Basename::dirname($dest);
468    mk_all_dir($dest_dir) if (!-e $dest_dir);
469
470
471    if (!link($src, $dest)) {
472    if ((!defined $verbosity) || ($verbosity>2)) {
473        print STDERR "util::hard_link: unable to create hard link. ";
474        print STDERR " Copying file: $src -> $dest\n";
475    }
476    &File::Copy::copy ($src, $dest);
477    }
478    return 0;
479}
480
481# make soft link to file if supported by OS, otherwise copy file
482sub soft_link {
483    my ($src, $dest, $ensure_paths_absolute) = @_;
484
485    # remove trailing slashes from source and destination files
486    $src =~ s/[\\\/]+$//;
487    $dest =~ s/[\\\/]+$//;
488
489    # Ensure file paths are absolute IF requested to do so
490    # Soft_linking didn't work for relative paths
491    if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
492    # We need to ensure that the src file is the absolute path
493    # See http://perldoc.perl.org/File/Spec.html
494    if(!File::Spec->file_name_is_absolute( $src ))  { # it's relative
495        $src = File::Spec->rel2abs($src); # make absolute
496    }
497    # Might as well ensure that the destination file's absolute path is used
498    if(!File::Spec->file_name_is_absolute( $dest )) {
499        $dest = File::Spec->rel2abs($dest); # make absolute
500    }
501    }
502
503    # a few sanity checks
504    if (!-e $src) {
505    print STDERR "util::soft_link source file $src does not exist\n";
506    return 0;
507    }
508
509    my $dest_dir = &File::Basename::dirname($dest);
510    mk_all_dir($dest_dir) if (!-e $dest_dir);
511
512    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
513       
514    # symlink not supported on windows
515    &File::Copy::copy ($src, $dest);
516
517    } elsif (!eval {symlink($src, $dest)}) {
518    print STDERR "util::soft_link: unable to create soft link.\n";
519    return 0;
520    }
521
522    return 1;
523}
524
525# Primarily for filenames generated by processing
526# content of HTML files (which are mapped to UTF-8 internally)
527#
528# To turn this into an octet string that really exists on the file
529# system:
530# 1. don't need to do anything special for Unix-based systems
531#   (as underlying file system is byte-code)
532# 2. need to map to short DOS filenames for Windows
533
534sub utf8_to_real_filename
535{
536    my ($utf8_filename) = @_;
537
538    my $real_filename;
539
540    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
541    require Win32;
542
543    print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
544
545    my $unicode_filename = decode("utf8",$utf8_filename);
546    $real_filename = Win32::GetShortPathName($unicode_filename);
547    }
548    else {
549    $real_filename = $utf8_filename;
550    }
551
552    return $real_filename;
553}
554
555
556sub fd_exists
557{
558    my $filename_full_path = shift @_;
559    my $test_op = shift @_ || "-e";
560
561    # By default tests for existance of file or directory (-e)
562    # Can be made more specific by providing second parameter (e.g. -f or -d)
563
564    my $exists = 0;
565
566    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
567    require Win32;
568    my $filename_short_path = Win32::GetShortPathName($filename_full_path);
569    if (!defined $filename_short_path) {
570        # Was probably still in UTF8 form (not what is needed on Windows)
571        my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
572        if (defined $unicode_filename_full_path) {
573        $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
574        }
575    }
576    $filename_full_path = $filename_short_path;
577    }
578
579    if (defined $filename_full_path) {
580    $exists = eval "($test_op \$filename_full_path)";
581    }
582
583    return $exists;
584}
585
586sub file_exists
587{
588    my ($filename_full_path) = @_;
589
590    return fd_exists($filename_full_path,"-f");
591}
592
593sub dir_exists
594{
595    my ($filename_full_path) = @_;
596
597    return fd_exists($filename_full_path,"-d");
598}
599
600
601
602# updates a copy of a directory in some other part of the filesystem
603# verbosity settings are: 0=low, 1=normal, 2=high
604# both $fromdir and $todir should be absolute paths
605sub cachedir {
606    my ($fromdir, $todir, $verbosity) = @_;
607    $verbosity = 1 unless defined $verbosity;
608
609    # use / for the directory separator, remove duplicate and
610    # trailing slashes
611    $fromdir=~s/[\\\/]+/\//g;
612    $fromdir=~s/[\\\/]+$//;
613    $todir=~s/[\\\/]+/\//g;
614    $todir=~s/[\\\/]+$//;
615
616    &mk_all_dir ($todir);
617
618    # get the directories in ascending order
619    if (!opendir (FROMDIR, $fromdir)) {
620    print STDERR "util::cachedir could not read directory $fromdir\n";
621    return;
622    }
623    my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
624    closedir (FROMDIR);
625
626    if (!opendir (TODIR, $todir)) {
627    print STDERR "util::cacedir could not read directory $todir\n";
628    return;
629    }
630    my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
631    closedir (TODIR);
632
633    my $fromi = 0;
634    my $toi = 0;
635           
636    while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
637#   print "fromi: $fromi toi: $toi\n";
638
639    # see if we should delete a file/directory
640    # this should happen if the file/directory
641    # is not in the from list or if its a different
642    # size, or has an older timestamp
643    if ($toi < scalar(@todir)) {
644        if (($fromi >= scalar(@fromdir)) ||
645        ($todir[$toi] lt $fromdir[$fromi] ||
646         ($todir[$toi] eq $fromdir[$fromi] &&
647          &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
648                  $verbosity)))) {
649
650        # the files are different
651        &rm_r("$todir/$todir[$toi]");
652        splice(@todir, $toi, 1); # $toi stays the same
653
654        } elsif ($todir[$toi] eq $fromdir[$fromi]) {
655        # the files are the same
656        # if it is a directory, check its contents
657        if (-d "$todir/$todir[$toi]") {
658            &cachedir ("$fromdir/$fromdir[$fromi]",
659                   "$todir/$todir[$toi]", $verbosity);
660        }
661
662        $toi++;
663        $fromi++;
664        next;
665        }
666    }
667 
668    # see if we should insert a file/directory
669    # we should insert a file/directory if there
670    # is no tofiles left or if the tofile does not exist
671    if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
672                      $todir[$toi] gt $fromdir[$fromi])) {
673        &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
674        splice (@todir, $toi, 0, $fromdir[$fromi]);
675
676        $toi++;
677        $fromi++;
678    }
679    }
680}
681
682# this function returns -1 if either file is not found
683# assumes that $file1 and $file2 are absolute file names or
684# in the current directory
685# $file2 is allowed to be newer than $file1
686sub differentfiles {
687    my ($file1, $file2, $verbosity) = @_;
688    $verbosity = 1 unless defined $verbosity;
689
690    $file1 =~ s/\/+$//;
691    $file2 =~ s/\/+$//;
692   
693    my ($file1name) = $file1 =~ /\/([^\/]*)$/;
694    my ($file2name) = $file2 =~ /\/([^\/]*)$/;
695
696    return -1 unless (-e $file1 && -e $file2);
697    if ($file1name ne $file2name) {
698    print STDERR "filenames are not the same\n" if ($verbosity >= 2);
699    return 1;
700    }
701
702    my @file1stat = stat ($file1);
703    my @file2stat = stat ($file2);
704
705    if (-d $file1) {
706    if (! -d $file2) {
707        print STDERR "one file is a directory\n" if ($verbosity >= 2);
708        return 1;
709    }
710    return 0;
711    }
712
713    # both must be regular files
714    unless (-f $file1 && -f $file2) {
715    print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
716    return 1;
717    }
718
719    # the size of the files must be the same
720    if ($file1stat[7] != $file2stat[7]) {
721    print STDERR "different sized files\n" if ($verbosity >= 2);
722    return 1;
723    }
724
725    # the second file cannot be older than the first
726    if ($file1stat[9] > $file2stat[9]) {
727    print STDERR "file is older\n" if ($verbosity >= 2);
728    return 1;
729    }
730
731    return 0;
732}
733
734
735sub get_tmp_filename
736{
737    my $file_ext = shift(@_) || undef;
738
739    my $opt_dot_file_ext = "";
740    if (defined $file_ext) {
741    if ($file_ext !~ m/\./) {
742        # no dot, so needs one added in at start
743        $opt_dot_file_ext = ".$file_ext"
744    }
745    else {
746        # allow for "extensions" such as _metadata.txt to be handled
747        # gracefully
748        $opt_dot_file_ext = $file_ext;
749    }
750    }
751
752    my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
753    &mk_all_dir ($tmpdir) unless -e $tmpdir;
754
755    my $count = 1000;
756    my $rand = int(rand $count);
757    my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
758
759    while (-e $full_tmp_filename) {
760    $rand = int(rand $count);
761    $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
762    $count++;
763    }
764   
765    return $full_tmp_filename;
766}
767
768sub get_timestamped_tmp_folder
769{
770
771    my $tmp_dirname;
772    if(defined $ENV{'GSDLCOLLECTDIR'}) {
773    $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
774    } elsif(defined $ENV{'GSDLHOME'}) {
775    $tmp_dirname = $ENV{'GSDLHOME'};
776    } else {
777    return undef;
778    }
779
780    $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
781    &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
782
783    # add the timestamp into the path otherwise we can run into problems
784    # if documents have the same name
785    my $timestamp = time;
786    my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
787    $tmp_dirname = $time_tmp_dirname;
788    my $i = 1;
789    while (-e $tmp_dirname) {
790    $tmp_dirname = "$time_tmp_dirname$i";
791    $i++;
792    }
793    &util::mk_dir($tmp_dirname);
794 
795    return $tmp_dirname;
796}
797
798sub get_timestamped_tmp_filename_in_collection
799{
800
801    my ($input_filename, $output_ext) = @_;
802    # derive tmp filename from input filename
803    my ($tailname, $dirname, $suffix)
804    = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
805
806    # softlink to collection tmp dir
807    my $tmp_dirname = &util::get_timestamped_tmp_folder();
808    $tmp_dirname = $dirname unless defined $tmp_dirname;
809
810    # following two steps copied from ConvertBinaryFile
811    # do we need them?? can't use them as is, as they use plugin methods.
812
813    #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
814
815    # URLEncode this since htmls with images where the html filename is utf8 don't seem
816    # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
817    # files on the filesystem.
818    #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
819    if (defined $output_ext) {
820    $output_ext = ".$output_ext"; # add the dot
821    } else {
822    $output_ext = $suffix;
823    }
824    $output_ext= lc($output_ext);
825    my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
826   
827    return $tmp_filename;
828}
829
830sub get_toplevel_tmp_dir
831{
832    return filename_cat($ENV{'GSDLHOME'}, "tmp");
833}
834
835
836sub filename_to_regex {
837    my $filename = shift (@_);
838
839    # need to put single backslash back to double so that regex works
840    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
841    $filename =~ s/\\/\\\\/g;
842    }
843    return $filename;
844}
845
846sub filename_cat {
847    my $first_file = shift(@_);
848    my (@filenames) = @_;
849
850#   Useful for debugging
851#     -- might make sense to call caller(0) rather than (1)??
852#   my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
853#   print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
854   
855    # If first_file is not null or empty, then add it back into the list
856    if (defined $first_file && $first_file =~ /\S/) {
857    unshift(@filenames, $first_file);
858    }
859
860    my $filename = join("/", @filenames);
861
862    # remove duplicate slashes and remove the last slash
863    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
864    $filename =~ s/[\\\/]+/\\/g;
865    } else {
866    $filename =~ s/[\/]+/\//g;
867    # DB: want a filename abc\de.html to remain like this
868    }
869    $filename =~ s/[\\\/]$//;
870
871    return $filename;
872}
873
874
875sub pathname_cat {
876    my $first_path = shift(@_);
877    my (@pathnames) = @_;
878
879    # If first_path is not null or empty, then add it back into the list
880    if (defined $first_path && $first_path =~ /\S/) {
881    unshift(@pathnames, $first_path);
882    }
883
884    my $join_char;
885    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
886    $join_char = ";";
887    } else {
888    $join_char = ":";
889    }
890
891    my $pathname = join($join_char, @pathnames);
892
893    # remove duplicate slashes
894    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
895    $pathname =~ s/[\\\/]+/\\/g;
896    } else {
897    $pathname =~ s/[\/]+/\//g;
898    # DB: want a pathname abc\de.html to remain like this
899    }
900
901    return $pathname;
902}
903
904
905sub tidy_up_oid {
906    my ($OID) = @_;
907    if ($OID =~ /\./) {
908    print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
909    $OID =~ s/\.//g; #remove any periods
910    }
911    if ($OID =~ /^\s.*\s$/) {
912    print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
913    # remove starting and trailing whitespace
914    $OID =~ s/^\s+//;
915    $OID =~ s/\s+$//;
916    }
917    if ($OID =~ /^[\d]*$/) {
918    print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
919    $OID = "D" . $OID;
920    }       
921   
922    return $OID;
923}
924sub envvar_prepend {
925    my ($var,$val) = @_;
926
927    # do not prepend any value/path that's already in the environment variable
928    if ($ENV{'GSDLOS'} =~ /^windows$/i)
929    {
930    my $escaped_val = $val;
931    $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
932    if (!defined($ENV{$var})) {
933        $ENV{$var} = "$val";
934    }
935    elsif($ENV{$var} !~ m/$escaped_val/) {
936        $ENV{$var} = "$val;".$ENV{$var};
937    }
938    }
939    else {
940    if (!defined($ENV{$var})) {
941        $ENV{$var} = "$val";
942    }
943    elsif($ENV{$var} !~ m/$val/) {
944        $ENV{$var} = "$val:".$ENV{$var};
945    }
946    }
947}
948
949sub envvar_append {
950    my ($var,$val) = @_;
951
952    # do not append any value/path that's already in the environment variable
953    if ($ENV{'GSDLOS'} =~ /^windows$/i)
954    {
955    my $escaped_val = $val;
956    $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
957    if (!defined($ENV{$var})) {
958        $ENV{$var} = "$val";
959    }
960    elsif($ENV{$var} !~ m/$escaped_val/) {
961        $ENV{$var} .= ";$val";
962    }
963    }
964    else {
965    if (!defined($ENV{$var})) {
966        $ENV{$var} = "$val";
967    }
968    elsif($ENV{$var} !~ m/$val/) {
969        $ENV{$var} .= ":$val";
970    }
971    }   
972}
973
974
975# splits a filename into a prefix and a tail extension using the tail_re, or
976# if that fails, splits on the file_extension . (dot)
977sub get_prefix_and_tail_by_regex {
978
979    my ($filename,$tail_re) = @_;
980   
981    my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
982    if ((!defined $file_prefix) || (!defined $file_ext)) {
983    ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
984    }
985
986    return ($file_prefix,$file_ext);
987}
988
989# get full path and file only path from a base_dir (which may be empty) and
990# file (which may contain directories)
991sub get_full_filenames {
992    my ($base_dir, $file) = @_;
993   
994    my $filename_full_path = $file;
995    # add on directory if present
996    $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
997   
998    my $filename_no_path = $file;
999
1000    # remove directory if present
1001    $filename_no_path =~ s/^.*[\/\\]//;
1002    return ($filename_full_path, $filename_no_path);
1003}
1004
1005# returns the path of a file without the filename -- ie. the directory the file is in
1006sub filename_head {
1007    my $filename = shift(@_);
1008
1009    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1010    $filename =~ s/[^\\\\]*$//;
1011    }
1012    else {
1013    $filename =~ s/[^\\\/]*$//;
1014    }
1015
1016    return $filename;
1017}
1018
1019
1020
1021# returns 1 if filename1 and filename2 point to the same
1022# file or directory
1023sub filenames_equal {
1024    my ($filename1, $filename2) = @_;
1025
1026    # use filename_cat to clean up trailing slashes and
1027    # multiple slashes
1028    $filename1 = filename_cat ($filename1);
1029    $filename2 = filename_cat ($filename2);
1030
1031    # filenames not case sensitive on windows
1032    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1033    $filename1 =~ tr/[A-Z]/[a-z]/;
1034    $filename2 =~ tr/[A-Z]/[a-z]/;
1035    }
1036    return 1 if $filename1 eq $filename2;
1037    return 0;
1038}
1039
1040
1041sub filename_within_directory
1042{
1043    my ($filename,$within_dir) = @_;
1044   
1045    if ($within_dir !~ m/[\/\\]$/) {
1046    my $dirsep = &util::get_dirsep();
1047    $within_dir .= $dirsep;
1048    }
1049   
1050    $within_dir =~ s/\\/\\\\/g; # escape DOS style file separator
1051
1052    if ($filename =~ m/^$within_dir(.*)$/) {
1053    $filename = $1;
1054    }
1055   
1056    return $filename;
1057}
1058
1059sub filename_within_collection
1060{
1061    my ($filename) = @_;
1062
1063    my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1064   
1065    if (defined $collect_dir) {
1066
1067    # if from within GSDLCOLLECTDIR, then remove directory prefix
1068    # so source_filename is realative to it.  This is done to aid
1069    # portability, i.e. the collection can be moved to somewhere
1070    # else on the file system and the archives directory will still
1071    # work.  This is needed, for example in the applet version of
1072    # GLI where GSDLHOME/collect on the server will be different to
1073    # the collect directory of the remove user.  Of course,
1074    # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1075    # it back into a full pathname.
1076
1077    $filename = filename_within_directory($filename,$collect_dir);
1078    }
1079   
1080    return $filename;
1081}
1082
1083sub prettyprint_file
1084{
1085    my ($base_dir,$file,$gli) = @_;
1086
1087    my $filename_full_path = &util::filename_cat($base_dir,$file);
1088
1089    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1090    require Win32;
1091
1092    # For some reason base_dir in the form c:/a/b/c
1093    # This leads to confusion later on, so turn it back into
1094    # the more usual Windows form
1095    $base_dir =~ s/\//\\/g;
1096    my $long_base_dir = Win32::GetLongPathName($base_dir);
1097    my $long_full_path = Win32::GetLongPathName($filename_full_path);
1098
1099    $file = filename_within_directory($long_full_path,$long_base_dir);
1100    $file = encode("utf8",$file) if ($gli);
1101    }
1102
1103    return $file;
1104}
1105
1106
1107sub upgrade_if_dos_filename
1108{
1109    my ($filename_full_path,$and_encode) = @_;
1110
1111    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1112    # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1113    # to its long (Windows) version
1114    my $long_filename = Win32::GetLongPathName($filename_full_path);
1115    if (defined $long_filename) {
1116        $filename_full_path = $long_filename;
1117    }
1118    # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
1119    $filename_full_path =~ s/^(.):/\u$1:/;
1120    if ((defined $and_encode) && ($and_encode)) {
1121        $filename_full_path = encode("utf8",$filename_full_path);
1122    }
1123    }
1124
1125    return $filename_full_path;
1126}
1127
1128
1129sub downgrade_if_dos_filename
1130{
1131    my ($filename_full_path) = @_;
1132
1133    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1134    require Win32;
1135
1136    # Ensure the given long Windows filename is in a form that can
1137    # be opened by Perl => convert it to a short DOS-like filename
1138
1139    my $short_filename = Win32::GetShortPathName($filename_full_path);
1140    if (defined $short_filename) {
1141        $filename_full_path = $short_filename;
1142    }
1143    # Make sure initial drive letter is lower-case (to fit in
1144    # with rest of Greenstone)
1145    $filename_full_path =~ s/^(.):/\u$1:/;
1146    }
1147
1148    return $filename_full_path;
1149}
1150
1151sub block_filename
1152{
1153    my ($block_hash,$filename) = @_;
1154   
1155    if ($ENV{'GSDLOS'} =~ m/^windows$/) {
1156   
1157    # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
1158    my $lower_filename = lc($filename);
1159    $block_hash->{'file_blocks'}->{$lower_filename} = 1;
1160#   my $lower_drive = $filename;
1161#   $lower_drive =~ s/^([A-Z]):/\l$1:/i;
1162   
1163#   my $upper_drive = $filename;
1164#   $upper_drive =~ s/^([A-Z]):/\u$1:/i;
1165#   
1166#   $block_hash->{'file_blocks'}->{$lower_drive} = 1;
1167#   $block_hash->{'file_blocks'}->{$upper_drive} = 1;       
1168    }
1169    else {
1170    $block_hash->{'file_blocks'}->{$filename} = 1;
1171    }
1172}
1173
1174
1175sub filename_is_absolute
1176{
1177    my ($filename) = @_;
1178
1179    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1180    return ($filename =~ m/^(\w:)?\\/);
1181    }
1182    else {
1183    return ($filename =~ m/^\//);
1184    }
1185}
1186
1187
1188## @method make_absolute()
1189#
1190#  Ensure the given file path is absolute in respect to the given base path.
1191#
1192#  @param  $base_dir A string denoting the base path the given dir must be
1193#                    absolute to.
1194#  @param  $dir The directory to be made absolute as a string. Note that the
1195#               dir may already be absolute, in which case it will remain
1196#               unchanged.
1197#  @return The now absolute form of the directory as a string.
1198#
1199#  @author John Thompson, DL Consulting Ltd.
1200#  @copy 2006 DL Consulting Ltd.
1201#
1202#used in buildcol.pl, doesn't work for all cases --kjdon
1203sub make_absolute {
1204   
1205    my ($base_dir, $dir) = @_;
1206###    print STDERR "dir = $dir\n";
1207    $dir =~ s/[\\\/]+/\//g;
1208    $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1209    $dir =~ s|^/tmp_mnt||;
1210    1 while($dir =~ s|/[^/]*/\.\./|/|g);
1211    $dir =~ s|/[.][.]?/|/|g;
1212    $dir =~ tr|/|/|s;
1213###    print STDERR "dir = $dir\n";
1214   
1215    return $dir;
1216}
1217## make_absolute() ##
1218
1219sub get_dirsep {
1220
1221    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1222    return "\\";
1223    } else {
1224    return "\/";
1225    }
1226}
1227
1228sub get_os_dirsep {
1229
1230    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1231    return "\\\\";
1232    } else {
1233    return "\\\/";
1234    }
1235}
1236
1237sub get_re_dirsep {
1238
1239    return "\\\\|\\\/";
1240}
1241
1242
1243sub get_dirsep_tail {
1244    my ($filename) = @_;
1245   
1246    # returns last part of directory or filename
1247    # On unix e.g. a/b.d => b.d
1248    #              a/b/c => c
1249
1250    my $dirsep = get_re_dirsep();
1251    my @dirs = split (/$dirsep/, $filename);
1252    my $tail = pop @dirs;
1253
1254    # - caused problems under windows
1255    #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1256
1257    return $tail;
1258}
1259
1260
1261# if this is running on windows we want binaries to end in
1262# .exe, otherwise they don't have to end in any extension
1263sub get_os_exe {
1264    return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1265    return "";
1266}
1267
1268
1269# test to see whether this is a big or little endian machine
1270sub is_little_endian
1271{
1272    # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1273    # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1274    # Otherwise, it's little endian
1275
1276    #return 0 if $^O =~ /^darwin$/i;
1277    #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1278   
1279    # Going back to stating exactly whether the machine is little endian
1280    # or big endian, without any special case for Macs. Since for rata it comes
1281    # back with little endian and for shuttle with bigendian.
1282    return (ord(substr(pack("s",1), 0, 1)) == 1);
1283}
1284
1285
1286# will return the collection name if successful, "" otherwise
1287sub use_collection {
1288    my ($collection, $collectdir) = @_;
1289
1290    if (!defined $collectdir || $collectdir eq "") {
1291    $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1292    }
1293
1294    # get and check the collection
1295    if (!defined($collection) || $collection eq "") {
1296    if (defined $ENV{'GSDLCOLLECTION'}) {
1297        $collection = $ENV{'GSDLCOLLECTION'};
1298    } else {
1299        print STDOUT "No collection specified\n";
1300        return "";
1301    }
1302    }
1303   
1304    if ($collection eq "modelcol") {
1305    print STDOUT "You can't use modelcol.\n";
1306    return "";
1307    }
1308
1309    # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1310    # are defined
1311    $ENV{'GSDLCOLLECTION'} = $collection;
1312    $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1313
1314    # make sure this collection exists
1315    if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1316    print STDOUT "Invalid collection ($collection).\n";
1317    return "";
1318    }
1319
1320    # everything is ready to go
1321    return $collection;
1322}
1323
1324sub get_current_collection_name {
1325    return $ENV{'GSDLCOLLECTION'};
1326}
1327
1328
1329# will return the collection name if successful, "" otherwise. 
1330# Like use_collection (above) but for greenstone 3 (taking account of site level)
1331
1332sub use_site_collection {
1333    my ($site, $collection, $collectdir) = @_;
1334
1335    if (!defined $collectdir || $collectdir eq "") {
1336    die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1337    $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1338    }
1339
1340    # collectdir explicitly set by this point (using $site variable if required).
1341    # Can call "old" gsdl2 use_collection now.
1342
1343    return use_collection($collection,$collectdir);
1344}
1345
1346
1347
1348sub locate_config_file
1349{
1350    my ($file) = @_;
1351
1352    my $locations = locate_config_files($file);
1353
1354    return shift @$locations; # returns undef if 'locations' is empty
1355}
1356
1357
1358sub locate_config_files
1359{
1360    my ($file) = @_;
1361
1362    my @locations = ();
1363
1364    if (-e $file) {
1365    # Clearly specified (most likely full filename)
1366    # No need to hunt in 'etc' directories, return value unchanged
1367    push(@locations,$file);
1368    }
1369    else {
1370    # Check for collection specific one before looking in global GSDL 'etc'
1371    if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1372        my $test_collect_etc_filename
1373        = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1374       
1375        if (-e $test_collect_etc_filename) {
1376        push(@locations,$test_collect_etc_filename);
1377        }
1378    }
1379    my $test_main_etc_filename
1380        = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1381    if (-e $test_main_etc_filename) {
1382        push(@locations,$test_main_etc_filename);
1383    }
1384    }
1385
1386    return \@locations;
1387}
1388
1389
1390sub hyperlink_text
1391{
1392    my ($text) = @_;
1393   
1394    $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1395    $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1396
1397    return $text;
1398}
1399
1400
1401# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1402# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1403sub is_dir_empty
1404{
1405    my ($path) = @_;
1406    opendir DIR, $path;
1407    while(my $entry = readdir DIR) {
1408        next if($entry =~ /^\.\.?$/);
1409        closedir DIR;
1410        return 0;
1411    }
1412    closedir DIR;
1413    return 1;
1414}
1415
1416# Returns the given filename converted using either URL encoding or base64
1417# encoding, as specified by $rename_method. If the given filename has no suffix
1418# (if it is just the tailname), then $no_suffix should be some defined value.
1419# rename_method can be url, none, base64
1420sub rename_file {
1421    my ($filename, $rename_method, $no_suffix)  = @_;
1422
1423    if(!$filename) { # undefined or empty string
1424    return $filename;
1425    }
1426
1427    if (!$rename_method) {
1428    print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1429    # Debugging information
1430    # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1431    # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1432    $rename_method = "url";
1433    } elsif($rename_method eq "none") {
1434    return $filename; # would have already been renamed
1435    }
1436
1437    # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1438    ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1439    ###$filename =~ s/ /_/g;
1440
1441    my ($tailname,$dirname,$suffix);
1442    if($no_suffix) { # given a tailname, no suffix
1443    ($tailname,$dirname) = File::Basename::fileparse($filename);
1444    }
1445    else {
1446    ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1447    }
1448    if (!$suffix) {
1449    $suffix = "";
1450    }
1451    else {
1452    $suffix = lc($suffix);
1453    }
1454
1455    if ($rename_method eq "url") {
1456    $tailname = &unicode::url_encode($tailname);
1457    }
1458    elsif ($rename_method eq "base64") {
1459    $tailname = &unicode::base64_encode($tailname);
1460    $tailname =~ s/\s*//sg;      # for some reason it adds spaces not just at end but also in middle
1461    }
1462
1463    $filename = "$tailname$suffix";
1464    $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1465
1466    return $filename;
1467}
1468
1469
1470# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1471sub rename_ldb_or_bdb_file {
1472    my ($filename_no_ext) = @_;
1473
1474    my $new_filename = "$filename_no_ext.gdb";
1475    return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1476    # try ldb
1477    my $old_filename = "$filename_no_ext.ldb";
1478   
1479    if (-f $old_filename) {
1480    print STDERR "Renaming $old_filename to $new_filename\n";
1481    rename ($old_filename, $new_filename)
1482        || print STDERR "Rename failed: $!\n";
1483    return;
1484    }
1485    # try bdb
1486    $old_filename = "$filename_no_ext.bdb";
1487    if (-f $old_filename) {
1488    print STDERR "Renaming $old_filename to $new_filename\n";   
1489    rename ($old_filename, $new_filename)
1490        || print STDERR "Rename failed: $!\n";
1491    return;
1492    }
1493}
1494
1495
1496# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1497# By default, /greenstone3 for GS3 or /greenstone for GS2.
1498sub get_greenstone_url_prefix() {
1499    # if already set on a previous occasion, just return that
1500    # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1501    return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1502
1503    my ($configfile, $urlprefix, $defaultUrlprefix);
1504    my @propertynames = ();
1505
1506    if($ENV{'GSDL3SRCHOME'}) {
1507    $defaultUrlprefix = "/greenstone3";
1508    $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1509    push(@propertynames, qw/path\s*\=/);
1510    } else {
1511    $defaultUrlprefix = "/greenstone";
1512    $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", "gsdlsite.cfg");
1513    push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1514    }
1515
1516    $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1517
1518    if(!$urlprefix) { # no values found for URL prefix, use default values
1519    $urlprefix = $defaultUrlprefix;
1520    } else {
1521    #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1522    $urlprefix =~ s/^\///; # remove the starting slash
1523    my @dirs = split(/(\\|\/)/, $urlprefix);
1524    $urlprefix = shift(@dirs);
1525
1526    if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1527        $urlprefix = "/$urlprefix";
1528    }
1529    }
1530
1531    # set for the future
1532    $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1533#    print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1534    return $urlprefix;
1535}
1536
1537
1538# Given a config file (xml or java properties file) and a list/array of regular expressions
1539# that represent property names to match on, this function will return the value for the 1st
1540# matching property name. If the return value is undefined, no matching property was found.
1541sub extract_propvalue_from_file() {
1542    my ($configfile, $propertynames) = @_;
1543
1544    my $value;
1545    unless(open(FIN, "<$configfile")) {
1546    print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1547    return $value; # not initialised
1548    }
1549
1550    # Read the entire file at once, as one single line, then close it
1551    my $filecontents;
1552    {
1553    local $/ = undef;       
1554    $filecontents = <FIN>;
1555    }
1556    close(FIN);
1557
1558    foreach my $regex (@$propertynames) {
1559        ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1560    if($value) {
1561            $value =~ s/^\"//;     # remove any startquotes
1562        $value =~ s/\".*$//;   # remove the 1st endquotes (if any) followed by any xml
1563        last;              # found value for a matching property, break from loop
1564    }
1565    }
1566
1567    return $value;
1568}
1569
1570# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1571# given that perllib is in @INC in order to invoke this subroutine.
1572# Call as follows -- after setting up INC to include perllib and
1573# after setting up GSDLHOME and GSDLOS:
1574#
1575# require util;
1576# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1577#
1578sub setup_greenstone_env() {
1579    my ($GSDLHOME, $GSDLOS) = @_;
1580
1581    #my %env_map = ();
1582    # Get the localised ENV settings of running a localised source setup.bash
1583    # and put it into the ENV here. Need to clear GSDLHOME before running setup
1584    #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1585    my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";     
1586    if($GSDLOS =~ m/windows/i) {
1587        #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1588        $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
1589    }
1590    if (!open(PIN, "$perl_command |")) {
1591        print STDERR ("Unable to execute command: $perl_command. $!\n");
1592    }
1593
1594    while (defined (my $perl_output_line = <PIN>)) {
1595        my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1596        #$env_map{$key}=$value;     
1597        $ENV{$key}=$value;
1598    }
1599    close (PIN);
1600
1601    # If any keys in $ENV don't occur in Greenstone's localised env
1602    # (stored in $env_map), delete those entries from $ENV
1603    #foreach $key (keys %ENV) {
1604    #   if(!defined $env_map{$key}) {
1605    #       print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1606    #       delete $ENV{$key}; # del $ENV(key, value) pair
1607    #   }
1608    #}
1609    #undef %env_map;
1610}
1611
1612sub get_perl_exec() {   
1613    my $perl_exec = $^X; # may return just "perl"
1614   
1615    if($ENV{'PERLPATH'}) {
1616        # OR: # $perl_exec = &util::filename_cat($ENV{'PERLPATH'},"perl");
1617        if($ENV{'GSDLOS'} =~ m/windows/) {
1618            $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1619        } else {
1620            $perl_exec = "$ENV{'PERLPATH'}/perl";
1621        }
1622    } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1623        # containing the full path to the current perl executable we're using
1624        $perl_exec = $Config{perlpath}; # configured path for perl
1625        if (!-e $perl_exec) { # may not point to location on this machine
1626            $perl_exec = $^X; # may return just "perl"
1627            if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1628                print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";             
1629            }
1630        }
1631    }
1632   
1633    return $perl_exec;
1634}
1635
1636
16371;
Note: See TracBrowser for help on using the browser.