root/main/trunk/greenstone2/perllib/util.pm @ 23362

Revision 23362, 41.4 KB (checked in by davidb, 10 years ago)

Additional routines (and few upgraded) to help support Greenstone working with filenames under Windows when then go beyond Latin-1 and start turning up in their DOS abbreviated form (e.g. Test~1.txt)

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use strict;
29
30use Encode;
31use File::Copy;
32use File::Basename;
33
34# removes files (but not directories)
35sub rm {
36    my (@files) = @_;
37
38    my @filefiles = ();
39
40    # make sure the files we want to delete exist
41    # and are regular files
42    foreach my $file (@files) {
43    if (!-e $file) {
44        print STDERR "util::rm $file does not exist\n";
45    } elsif ((!-f $file) && (!-l $file)) {
46        print STDERR "util::rm $file is not a regular (or symbolic) file\n";
47    } else {
48        push (@filefiles, $file);
49    }
50    }
51   
52    # remove the files
53    my $numremoved = unlink @filefiles;
54
55    # check to make sure all of them were removed
56    if ($numremoved != scalar(@filefiles)) {
57    print STDERR "util::rm Not all files were removed\n";
58    }
59}
60
61# removes files (but not directories) - can rename this to the default
62# "rm" subroutine  when debugging the deletion of individual files.
63sub rm_debug {
64    my (@files) = @_;
65    my @filefiles = ();
66
67    # make sure the files we want to delete exist
68    # and are regular files
69    foreach my $file (@files) {
70        if (!-e $file) {
71            print STDERR "util::rm $file does not exist\n";
72        } elsif ((!-f $file) && (!-l $file)) {
73            print STDERR "util::rm $file is not a regular (or symbolic) file\n";
74        } else { # debug message
75            unlink($file) or warn "Could not delete file $file: $!\n";
76        }
77    }
78}
79
80
81# recursive removal
82sub filtered_rm_r {
83    my ($files,$file_accept_re,$file_reject_re) = @_;
84
85    my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
86
87    # recursively remove the files
88    foreach my $file (@files_array) {
89    $file =~ s/[\/\\]+$//; # remove trailing slashes
90   
91    if (!-e $file) {
92        print STDERR "util::filtered_rm_r $file does not exist\n";
93
94    } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
95        # get the contents of this directory
96        if (!opendir (INDIR, $file)) {
97        print STDERR "util::filtered_rm_r could not open directory $file\n";
98        } else {
99        my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
100        closedir (INDIR);
101               
102        # remove all the files in this directory
103        map {$_="$file/$_";} @filedir;
104        &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
105
106        if (!defined $file_accept_re && !defined $file_reject_re) {
107            # remove this directory
108            if (!rmdir $file) {
109            print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
110            }
111        }
112        }
113    } else {
114        next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
115
116        if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
117        # remove this file 
118        &rm ($file);
119        }
120    }
121    }
122}
123
124
125# recursive removal
126sub rm_r {
127    my (@files) = @_;
128   
129    # use the more general (but reterospectively written function
130    # filtered_rm_r function()
131
132    filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
133}
134
135
136
137
138# moves a file or a group of files
139sub mv {
140    my $dest = pop (@_);
141    my (@srcfiles) = @_;
142
143    # remove trailing slashes from source and destination files
144    $dest =~ s/[\\\/]+$//;
145    map {$_ =~ s/[\\\/]+$//;} @srcfiles;
146
147    # a few sanity checks
148    if (scalar (@srcfiles) == 0) {
149    print STDERR "util::mv no destination directory given\n";
150    return;
151    } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
152    print STDERR "util::mv if multiple source files are given the ".
153        "destination must be a directory\n";
154    return;
155    }
156
157    # move the files
158    foreach my $file (@srcfiles) {
159    my $tempdest = $dest;
160    if (-d $tempdest) {
161        my ($filename) = $file =~ /([^\\\/]+)$/;
162        $tempdest .= "/$filename";
163    }
164    if (!-e $file) {
165        print STDERR "util::mv $file does not exist\n";
166    } else {
167        rename ($file, $tempdest);
168    }
169    }
170}
171
172
173# copies a file or a group of files
174sub cp {
175    my $dest = pop (@_);
176    my (@srcfiles) = @_;
177
178    # remove trailing slashes from source and destination files
179    $dest =~ s/[\\\/]+$//;
180    map {$_ =~ s/[\\\/]+$//;} @srcfiles;
181
182    # a few sanity checks
183    if (scalar (@srcfiles) == 0) {
184    print STDERR "util::cp no destination directory given\n";
185    return;
186    } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
187    print STDERR "util::cp if multiple source files are given the ".
188        "destination must be a directory\n";
189    return;
190    }
191
192    # copy the files
193    foreach my $file (@srcfiles) {
194    my $tempdest = $dest;
195    if (-d $tempdest) {
196        my ($filename) = $file =~ /([^\\\/]+)$/;
197        $tempdest .= "/$filename";
198    }
199    if (!-e $file) {
200        print STDERR "util::cp $file does not exist\n";
201    } elsif (!-f $file) {
202        print STDERR "util::cp $file is not a plain file\n";
203    } else {
204        &File::Copy::copy ($file, $tempdest);
205    }
206    }
207}
208
209
210
211# recursively copies a file or group of files
212# syntax: cp_r (sourcefiles, destination directory)
213# destination must be a directory - to copy one file to
214# another use cp instead
215sub cp_r {
216    my $dest = pop (@_);
217    my (@srcfiles) = @_;
218
219    # a few sanity checks
220    if (scalar (@srcfiles) == 0) {
221    print STDERR "util::cp_r no destination directory given\n";
222    return;
223    } elsif (-f $dest) {
224    print STDERR "util::cp_r destination must be a directory\n";
225    return;
226    }
227   
228    # create destination directory if it doesn't exist already
229    if (! -d $dest) {
230    my $store_umask = umask(0002);
231    mkdir ($dest, 0777);
232    umask($store_umask);
233    }
234
235    # copy the files
236    foreach my $file (@srcfiles) {
237
238    if (!-e $file) {
239        print STDERR "util::cp_r $file does not exist\n";
240
241    } elsif (-d $file) {
242        # make the new directory
243        my ($filename) = $file =~ /([^\\\/]*)$/;
244        $dest = &util::filename_cat ($dest, $filename);
245        my $store_umask = umask(0002);
246        mkdir ($dest, 0777);
247        umask($store_umask);
248
249        # get the contents of this directory
250        if (!opendir (INDIR, $file)) {
251        print STDERR "util::cp_r could not open directory $file\n";
252        } else {
253        my @filedir = readdir (INDIR);
254        closedir (INDIR);
255        foreach my $f (@filedir) {
256            next if $f =~ /^\.\.?$/;
257            # copy all the files in this directory
258            my $ff = &util::filename_cat ($file, $f);
259            &cp_r ($ff, $dest);
260        }
261        }
262
263    } else {
264        &cp($file, $dest);
265    }
266    }
267}
268# recursively copies a file or group of files
269# syntax: cp_r (sourcefiles, destination directory)
270# destination must be a directory - to copy one file to
271# another use cp instead
272sub cp_r_nosvn {
273    my $dest = pop (@_);
274    my (@srcfiles) = @_;
275
276    # a few sanity checks
277    if (scalar (@srcfiles) == 0) {
278    print STDERR "util::cp_r no destination directory given\n";
279    return;
280    } elsif (-f $dest) {
281    print STDERR "util::cp_r destination must be a directory\n";
282    return;
283    }
284   
285    # create destination directory if it doesn't exist already
286    if (! -d $dest) {
287    my $store_umask = umask(0002);
288    mkdir ($dest, 0777);
289    umask($store_umask);
290    }
291
292    # copy the files
293    foreach my $file (@srcfiles) {
294
295    if (!-e $file) {
296        print STDERR "util::cp_r $file does not exist\n";
297
298    } elsif (-d $file) {
299        # make the new directory
300        my ($filename) = $file =~ /([^\\\/]*)$/;
301        $dest = &util::filename_cat ($dest, $filename);
302        my $store_umask = umask(0002);
303        mkdir ($dest, 0777);
304        umask($store_umask);
305
306        # get the contents of this directory
307        if (!opendir (INDIR, $file)) {
308        print STDERR "util::cp_r could not open directory $file\n";
309        } else {
310        my @filedir = readdir (INDIR);
311        closedir (INDIR);
312        foreach my $f (@filedir) {
313            next if $f =~ /^\.\.?$/;
314            next if $f =~ /^\.svn$/;
315            # copy all the files in this directory
316            my $ff = &util::filename_cat ($file, $f);
317            &cp_r ($ff, $dest);
318        }
319        }
320
321    } else {
322        &cp($file, $dest);
323    }
324    }
325}
326
327# copies a directory and its contents, excluding subdirectories, into a new directory
328sub cp_r_toplevel {
329    my $dest = pop (@_);
330    my (@srcfiles) = @_;
331
332    # a few sanity checks
333    if (scalar (@srcfiles) == 0) {
334    print STDERR "util::cp_r no destination directory given\n";
335    return;
336    } elsif (-f $dest) {
337    print STDERR "util::cp_r destination must be a directory\n";
338    return;
339    }
340   
341    # create destination directory if it doesn't exist already
342    if (! -d $dest) {
343    my $store_umask = umask(0002);
344    mkdir ($dest, 0777);
345    umask($store_umask);
346    }
347
348    # copy the files
349    foreach my $file (@srcfiles) {
350
351    if (!-e $file) {
352        print STDERR "util::cp_r $file does not exist\n";
353
354    } elsif (-d $file) {
355        # make the new directory
356        my ($filename) = $file =~ /([^\\\/]*)$/;
357        $dest = &util::filename_cat ($dest, $filename);
358        my $store_umask = umask(0002);
359        mkdir ($dest, 0777);
360        umask($store_umask);
361
362        # get the contents of this directory
363        if (!opendir (INDIR, $file)) {
364        print STDERR "util::cp_r could not open directory $file\n";
365        } else {
366        my @filedir = readdir (INDIR);
367        closedir (INDIR);
368        foreach my $f (@filedir) {
369            next if $f =~ /^\.\.?$/;
370           
371            # copy all the files in this directory, but not directories
372            my $ff = &util::filename_cat ($file, $f);
373            if (-f $ff) {
374            &cp($ff, $dest);
375            #&cp_r ($ff, $dest);
376            }
377        }
378        }
379
380    } else {
381        &cp($file, $dest);
382    }
383    }
384}
385
386sub mk_dir {
387    my ($dir) = @_;
388
389    my $store_umask = umask(0002);
390    my $mkdir_ok = mkdir ($dir, 0777);
391    umask($store_umask);
392   
393    if (!$mkdir_ok)
394    {
395    print STDERR "util::mk_dir could not create directory $dir\n";
396    return;
397    }
398}
399
400# in case anyone cares - I did some testing (using perls Benchmark module)
401# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
402# slightly faster (surprisingly) - Stefan.
403sub mk_all_dir {
404    my ($dir) = @_;
405
406    # use / for the directory separator, remove duplicate and
407    # trailing slashes
408    $dir=~s/[\\\/]+/\//g;
409    $dir=~s/[\\\/]+$//;
410
411    # make sure the cache directory exists
412    my $dirsofar = "";
413    my $first = 1;
414    foreach my $dirname (split ("/", $dir)) {
415    $dirsofar .= "/" unless $first;
416    $first = 0;
417
418    $dirsofar .= $dirname;
419
420    next if $dirname =~ /^(|[a-z]:)$/i;
421    if (!-e $dirsofar)
422        {
423        my $store_umask = umask(0002);
424        my $mkdir_ok = mkdir ($dirsofar, 0777);
425        umask($store_umask);
426        if (!$mkdir_ok)
427        {
428            print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
429            return;
430        }
431        }
432    }
433}
434
435# make hard link to file if supported by OS, otherwise copy the file
436sub hard_link {
437    my ($src, $dest, $verbosity) = @_;
438
439    # remove trailing slashes from source and destination files
440    $src =~ s/[\\\/]+$//;
441    $dest =~ s/[\\\/]+$//;
442
443##    print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
444    # a few sanity checks
445    if (-e $dest) {
446    # destination file already exists
447    return;
448    }
449    elsif (!-e $src) {
450    print STDERR "util::hard_link source file \"$src\" does not exist\n";
451    return 1;
452    }
453    elsif (-d $src) {
454    print STDERR "util::hard_link source \"$src\" is a directory\n";
455    return 1;
456    }
457
458    my $dest_dir = &File::Basename::dirname($dest);
459    mk_all_dir($dest_dir) if (!-e $dest_dir);
460
461
462    if (!link($src, $dest)) {
463    if ((!defined $verbosity) || ($verbosity>2)) {
464        print STDERR "util::hard_link: unable to create hard link. ";
465        print STDERR " Copying file: $src -> $dest\n";
466    }
467    &File::Copy::copy ($src, $dest);
468    }
469    return 0;
470}
471
472# make soft link to file if supported by OS, otherwise copy file
473sub soft_link {
474    my ($src, $dest, $ensure_paths_absolute) = @_;
475
476    # remove trailing slashes from source and destination files
477    $src =~ s/[\\\/]+$//;
478    $dest =~ s/[\\\/]+$//;
479
480    # Ensure file paths are absolute IF requested to do so
481    # Soft_linking didn't work for relative paths
482    if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
483    # We need to ensure that the src file is the absolute path
484    # See http://perldoc.perl.org/File/Spec.html
485    if(!File::Spec->file_name_is_absolute( $src ))  { # it's relative
486        $src = File::Spec->rel2abs($src); # make absolute
487    }
488    # Might as well ensure that the destination file's absolute path is used
489    if(!File::Spec->file_name_is_absolute( $dest )) {
490        $dest = File::Spec->rel2abs($dest); # make absolute
491    }
492    }
493
494    # a few sanity checks
495    if (!-e $src) {
496    print STDERR "util::soft_link source file $src does not exist\n";
497    return 0;
498    }
499
500    my $dest_dir = &File::Basename::dirname($dest);
501    mk_all_dir($dest_dir) if (!-e $dest_dir);
502
503    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
504    # symlink not supported on windows
505    &File::Copy::copy ($src, $dest);
506
507    } elsif (!eval {symlink($src, $dest)}) {
508    print STDERR "util::soft_link: unable to create soft link.\n";
509    return 0;
510    }
511
512    return 1;
513}
514
515# Primarily for filenames generated by processing
516# content of HTML files (which are mapped to UTF-8 internally)
517#
518# To turn this into an octet string that really exists on the file
519# system:
520# 1. don't need to do anything special for Unix-based systems
521#   (as underlying file system is byte-code)
522# 2. need to map to short DOS filenames for Windows
523
524sub utf8_to_real_filename
525{
526    my ($utf8_filename) = @_;
527
528    my $real_filename;
529
530    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
531    require Win32;
532    my $unicode_filename = decode("utf8",$utf8_filename);
533    $real_filename = Win32::GetShortPathName($unicode_filename);
534    }
535    else {
536    $real_filename = $utf8_filename;
537    }
538
539    return $real_filename;
540}
541
542
543sub fd_exists
544{
545    my $filename_full_path = shift @_;
546    my $test_op = shift @_ || "-e";
547
548    # By default tests for existance of file or directory (-e)
549    # Can be made more specific by providing second parameter (e.g. -f or -d)
550
551    my $exists = 0;
552
553    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
554    require Win32;
555    my $filename_short_path = Win32::GetShortPathName($filename_full_path);
556    if (!defined $filename_short_path) {
557        # Was probably still in UTF8 form (not what is needed on Windows)
558        my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
559        if (defined $unicode_filename_full_path) {
560        $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
561        }
562    }
563    $filename_full_path = $filename_short_path;
564    }
565
566    if (defined $filename_full_path) {
567    $exists = eval "($test_op \$filename_full_path)";
568    }
569
570    return $exists;
571}
572
573sub file_exists
574{
575    my ($filename_full_path) = @_;
576
577    return fd_exists($filename_full_path,"-f");
578}
579
580sub dir_exists
581{
582    my ($filename_full_path) = @_;
583
584    return fd_exists($filename_full_path,"-d");
585}
586
587
588
589# updates a copy of a directory in some other part of the filesystem
590# verbosity settings are: 0=low, 1=normal, 2=high
591# both $fromdir and $todir should be absolute paths
592sub cachedir {
593    my ($fromdir, $todir, $verbosity) = @_;
594    $verbosity = 1 unless defined $verbosity;
595
596    # use / for the directory separator, remove duplicate and
597    # trailing slashes
598    $fromdir=~s/[\\\/]+/\//g;
599    $fromdir=~s/[\\\/]+$//;
600    $todir=~s/[\\\/]+/\//g;
601    $todir=~s/[\\\/]+$//;
602
603    &mk_all_dir ($todir);
604
605    # get the directories in ascending order
606    if (!opendir (FROMDIR, $fromdir)) {
607    print STDERR "util::cachedir could not read directory $fromdir\n";
608    return;
609    }
610    my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
611    closedir (FROMDIR);
612
613    if (!opendir (TODIR, $todir)) {
614    print STDERR "util::cacedir could not read directory $todir\n";
615    return;
616    }
617    my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
618    closedir (TODIR);
619
620    my $fromi = 0;
621    my $toi = 0;
622           
623    while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
624#   print "fromi: $fromi toi: $toi\n";
625
626    # see if we should delete a file/directory
627    # this should happen if the file/directory
628    # is not in the from list or if its a different
629    # size, or has an older timestamp
630    if ($toi < scalar(@todir)) {
631        if (($fromi >= scalar(@fromdir)) ||
632        ($todir[$toi] lt $fromdir[$fromi] ||
633         ($todir[$toi] eq $fromdir[$fromi] &&
634          &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
635                  $verbosity)))) {
636
637        # the files are different
638        &rm_r("$todir/$todir[$toi]");
639        splice(@todir, $toi, 1); # $toi stays the same
640
641        } elsif ($todir[$toi] eq $fromdir[$fromi]) {
642        # the files are the same
643        # if it is a directory, check its contents
644        if (-d "$todir/$todir[$toi]") {
645            &cachedir ("$fromdir/$fromdir[$fromi]",
646                   "$todir/$todir[$toi]", $verbosity);
647        }
648
649        $toi++;
650        $fromi++;
651        next;
652        }
653    }
654 
655    # see if we should insert a file/directory
656    # we should insert a file/directory if there
657    # is no tofiles left or if the tofile does not exist
658    if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
659                      $todir[$toi] gt $fromdir[$fromi])) {
660        &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
661        splice (@todir, $toi, 0, $fromdir[$fromi]);
662
663        $toi++;
664        $fromi++;
665    }
666    }
667}
668
669# this function returns -1 if either file is not found
670# assumes that $file1 and $file2 are absolute file names or
671# in the current directory
672# $file2 is allowed to be newer than $file1
673sub differentfiles {
674    my ($file1, $file2, $verbosity) = @_;
675    $verbosity = 1 unless defined $verbosity;
676
677    $file1 =~ s/\/+$//;
678    $file2 =~ s/\/+$//;
679   
680    my ($file1name) = $file1 =~ /\/([^\/]*)$/;
681    my ($file2name) = $file2 =~ /\/([^\/]*)$/;
682
683    return -1 unless (-e $file1 && -e $file2);
684    if ($file1name ne $file2name) {
685    print STDERR "filenames are not the same\n" if ($verbosity >= 2);
686    return 1;
687    }
688
689    my @file1stat = stat ($file1);
690    my @file2stat = stat ($file2);
691
692    if (-d $file1) {
693    if (! -d $file2) {
694        print STDERR "one file is a directory\n" if ($verbosity >= 2);
695        return 1;
696    }
697    return 0;
698    }
699
700    # both must be regular files
701    unless (-f $file1 && -f $file2) {
702    print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
703    return 1;
704    }
705
706    # the size of the files must be the same
707    if ($file1stat[7] != $file2stat[7]) {
708    print STDERR "different sized files\n" if ($verbosity >= 2);
709    return 1;
710    }
711
712    # the second file cannot be older than the first
713    if ($file1stat[9] > $file2stat[9]) {
714    print STDERR "file is older\n" if ($verbosity >= 2);
715    return 1;
716    }
717
718    return 0;
719}
720
721
722sub get_tmp_filename
723{
724    my $file_ext = shift(@_) || undef;
725
726    my $opt_dot_file_ext = "";
727    if (defined $file_ext) {
728    if ($file_ext !~ m/\./) {
729        # no dot, so needs one added in at start
730        $opt_dot_file_ext = ".$file_ext"
731    }
732    else {
733        # allow for "extensions" such as _metadata.txt to be handled
734        # gracefully
735        $opt_dot_file_ext = $file_ext;
736    }
737    }
738
739    my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
740    &mk_all_dir ($tmpdir) unless -e $tmpdir;
741
742    my $count = 1000;
743    my $rand = int(rand $count);
744    my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
745
746    while (-e $full_tmp_filename) {
747    $rand = int(rand $count);
748    $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
749    $count++;
750    }
751   
752    return $full_tmp_filename;
753}
754
755sub get_timestamped_tmp_folder
756{
757
758    my $tmp_dirname;
759    if(defined $ENV{'GSDLCOLLECTDIR'}) {
760    $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
761    } elsif(defined $ENV{'GSDLHOME'}) {
762    $tmp_dirname = $ENV{'GSDLHOME'};
763    } else {
764    return undef;
765    }
766
767    $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
768    &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
769
770    # add the timestamp into the path otherwise we can run into problems
771    # if documents have the same name
772    my $timestamp = time;
773    my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
774    $tmp_dirname = $time_tmp_dirname;
775    my $i = 1;
776    while (-e $tmp_dirname) {
777    $tmp_dirname = "$time_tmp_dirname$i";
778    $i++;
779    }
780    &util::mk_dir($tmp_dirname);
781 
782    return $tmp_dirname;
783}
784
785sub get_timestamped_tmp_filename_in_collection
786{
787
788    my ($input_filename, $output_ext) = @_;
789    # derive tmp filename from input filename
790    my ($tailname, $dirname, $suffix)
791    = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
792
793    # softlink to collection tmp dir
794    my $tmp_dirname = &util::get_timestamped_tmp_folder();
795    $tmp_dirname = $dirname unless defined $tmp_dirname;
796
797    # following two steps copied from ConvertBinaryFile
798    # do we need them?? can't use them as is, as they use plugin methods.
799
800    #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
801
802    # URLEncode this since htmls with images where the html filename is utf8 don't seem
803    # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
804    # files on the filesystem.
805    #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
806    if (defined $output_ext) {
807    $output_ext = ".$output_ext"; # add the dot
808    } else {
809    $output_ext = $suffix;
810    }
811    $output_ext= lc($output_ext);
812    my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
813   
814    return $tmp_filename;
815}
816
817sub get_toplevel_tmp_dir
818{
819    return filename_cat($ENV{'GSDLHOME'}, "tmp");
820}
821
822
823sub filename_to_regex {
824    my $filename = shift (@_);
825
826    # need to put single backslash back to double so that regex works
827    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
828    $filename =~ s/\\/\\\\/g;
829    }
830    return $filename;
831}
832
833sub filename_cat {
834    my $first_file = shift(@_);
835    my (@filenames) = @_;
836
837#   Useful for debugging
838#     -- might make sense to call caller(0) rather than (1)??
839#   my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
840#   print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
841   
842    # If first_file is not null or empty, then add it back into the list
843    if (defined $first_file && $first_file =~ /\S/) {
844    unshift(@filenames, $first_file);
845    }
846
847    my $filename = join("/", @filenames);
848
849    # remove duplicate slashes and remove the last slash
850    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
851    $filename =~ s/[\\\/]+/\\/g;
852    } else {
853    $filename =~ s/[\/]+/\//g;
854    # DB: want a filename abc\de.html to remain like this
855    }
856    $filename =~ s/[\\\/]$//;
857
858    return $filename;
859}
860
861
862sub pathname_cat {
863    my $first_path = shift(@_);
864    my (@pathnames) = @_;
865
866    # If first_path is not null or empty, then add it back into the list
867    if (defined $first_path && $first_path =~ /\S/) {
868    unshift(@pathnames, $first_path);
869    }
870
871    my $join_char;
872    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
873    $join_char = ";";
874    } else {
875    $join_char = ":";
876    }
877
878    my $pathname = join($join_char, @pathnames);
879
880    # remove duplicate slashes
881    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
882    $pathname =~ s/[\\\/]+/\\/g;
883    } else {
884    $pathname =~ s/[\/]+/\//g;
885    # DB: want a pathname abc\de.html to remain like this
886    }
887
888    return $pathname;
889}
890
891
892sub tidy_up_oid {
893    my ($OID) = @_;
894    if ($OID =~ /\./) {
895    print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
896    $OID =~ s/\.//g; #remove any periods
897    }
898    if ($OID =~ /^\s.*\s$/) {
899    print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
900    # remove starting and trailing whitespace
901    $OID =~ s/^\s+//;
902    $OID =~ s/\s+$//;
903    }
904    if ($OID =~ /^[\d]*$/) {
905    print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
906    $OID = "D" . $OID;
907    }       
908   
909    return $OID;
910}
911sub envvar_prepend {
912    my ($var,$val) = @_;
913
914    # do not prepend any value/path that's already in the environment variable
915    if ($ENV{'GSDLOS'} =~ /^windows$/i)
916    {
917    my $escaped_val = $val;
918    $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
919    if (!defined($ENV{$var})) {
920        $ENV{$var} = "$val";
921    }
922    elsif($ENV{$var} !~ m/$escaped_val/) {
923        $ENV{$var} = "$val;".$ENV{$var};
924    }
925    }
926    else {
927    if (!defined($ENV{$var})) {
928        $ENV{$var} = "$val";
929    }
930    elsif($ENV{$var} !~ m/$val/) {
931        $ENV{$var} = "$val:".$ENV{$var};
932    }
933    }
934}
935
936sub envvar_append {
937    my ($var,$val) = @_;
938
939    # do not append any value/path that's already in the environment variable
940    if ($ENV{'GSDLOS'} =~ /^windows$/i)
941    {
942    my $escaped_val = $val;
943    $escaped_val =~ s/\\/\\\\/g; # escape any Windows backslashes for upcoming regex
944    if (!defined($ENV{$var})) {
945        $ENV{$var} = "$val";
946    }
947    elsif($ENV{$var} !~ m/$escaped_val/) {
948        $ENV{$var} .= ";$val";
949    }
950    }
951    else {
952    if (!defined($ENV{$var})) {
953        $ENV{$var} = "$val";
954    }
955    elsif($ENV{$var} !~ m/$val/) {
956        $ENV{$var} .= ":$val";
957    }
958    }   
959}
960
961
962# splits a filename into a prefix and a tail extension using the tail_re, or
963# if that fails, splits on the file_extension . (dot)
964sub get_prefix_and_tail_by_regex {
965
966    my ($filename,$tail_re) = @_;
967   
968    my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
969    if ((!defined $file_prefix) || (!defined $file_ext)) {
970    ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
971    }
972
973    return ($file_prefix,$file_ext);
974}
975
976# get full path and file only path from a base_dir (which may be empty) and
977# file (which may contain directories)
978sub get_full_filenames {
979    my ($base_dir, $file) = @_;
980   
981    my $filename_full_path = $file;
982    # add on directory if present
983    $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
984   
985    my $filename_no_path = $file;
986
987    # remove directory if present
988    $filename_no_path =~ s/^.*[\/\\]//;
989    return ($filename_full_path, $filename_no_path);
990}
991
992# returns the path of a file without the filename -- ie. the directory the file is in
993sub filename_head {
994    my $filename = shift(@_);
995
996    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
997    $filename =~ s/[^\\\\]*$//;
998    }
999    else {
1000    $filename =~ s/[^\\\/]*$//;
1001    }
1002
1003    return $filename;
1004}
1005
1006
1007
1008# returns 1 if filename1 and filename2 point to the same
1009# file or directory
1010sub filenames_equal {
1011    my ($filename1, $filename2) = @_;
1012
1013    # use filename_cat to clean up trailing slashes and
1014    # multiple slashes
1015    $filename1 = filename_cat ($filename1);
1016    $filename2 = filename_cat ($filename2);
1017
1018    # filenames not case sensitive on windows
1019    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1020    $filename1 =~ tr/[A-Z]/[a-z]/;
1021    $filename2 =~ tr/[A-Z]/[a-z]/;
1022    }
1023    return 1 if $filename1 eq $filename2;
1024    return 0;
1025}
1026
1027
1028sub filename_within_directory
1029{
1030    my ($filename,$within_dir) = @_;
1031   
1032    my $dirsep = &util::get_dirsep();
1033    if ($within_dir !~ m/$dirsep$/) {
1034    $within_dir .= $dirsep;
1035    }
1036   
1037    $within_dir =~ s/\\/\\\\/g; # escape DOS style file separator
1038   
1039    if ($filename =~ m/^$within_dir(.*)$/) {
1040    $filename = $1;
1041    }
1042   
1043    return $filename;
1044}
1045
1046sub filename_within_collection
1047{
1048    my ($filename) = @_;
1049
1050    my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1051   
1052    if (defined $collect_dir) {
1053
1054    # if from within GSDLCOLLECTDIR, then remove directory prefix
1055    # so source_filename is realative to it.  This is done to aid
1056    # portability, i.e. the collection can be moved to somewhere
1057    # else on the file system and the archives directory will still
1058    # work.  This is needed, for example in the applet version of
1059    # GLI where GSDLHOME/collect on the server will be different to
1060    # the collect directory of the remove user.  Of course,
1061    # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1062    # it back into a full pathname.
1063
1064    $filename = filename_within_directory($filename,$collect_dir);
1065    }
1066   
1067    return $filename;
1068}
1069
1070sub prettyprint_file
1071{
1072    my ($base_dir,$file) = @_;
1073
1074    my $filename_full_path = &util::filename_cat($base_dir,$file);
1075
1076    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1077    require Win32;
1078
1079    # For some reason base_dir in the form c:/a/b/c
1080    # This leads to confusion later on, so turn it back into
1081    # the more usual Windows form
1082    $base_dir =~ s/\//\\/g;
1083    my $long_base_dir = Win32::GetLongPathName($base_dir);
1084    my $long_full_path = Win32::GetLongPathName($filename_full_path);
1085
1086    $file = filename_within_directory($long_full_path,$long_base_dir);
1087    }
1088
1089    return $file;
1090}
1091
1092
1093sub upgrade_if_dos_filename
1094{
1095    my ($filename_full_path) = @_;
1096
1097    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1098    require Win32;
1099    # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1100    # to its long (Windows) version
1101    $filename_full_path = Win32::GetLongPathName($filename_full_path);
1102    # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
1103    $filename_full_path =~ s/^(.)/\l$1/;
1104    }
1105
1106    return $filename_full_path;
1107}
1108
1109
1110sub filename_is_absolute
1111{
1112    my ($filename) = @_;
1113
1114    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1115    return ($filename =~ m/^(\w:)?\\/);
1116    }
1117    else {
1118    return ($filename =~ m/^\//);
1119    }
1120}
1121
1122
1123## @method make_absolute()
1124#
1125#  Ensure the given file path is absolute in respect to the given base path.
1126#
1127#  @param  $base_dir A string denoting the base path the given dir must be
1128#                    absolute to.
1129#  @param  $dir The directory to be made absolute as a string. Note that the
1130#               dir may already be absolute, in which case it will remain
1131#               unchanged.
1132#  @return The now absolute form of the directory as a string.
1133#
1134#  @author John Thompson, DL Consulting Ltd.
1135#  @copy 2006 DL Consulting Ltd.
1136#
1137#used in buildcol.pl, doesn't work for all cases --kjdon
1138sub make_absolute {
1139   
1140    my ($base_dir, $dir) = @_;
1141###    print STDERR "dir = $dir\n";
1142    $dir =~ s/[\\\/]+/\//g;
1143    $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1144    $dir =~ s|^/tmp_mnt||;
1145    1 while($dir =~ s|/[^/]*/\.\./|/|g);
1146    $dir =~ s|/[.][.]?/|/|g;
1147    $dir =~ tr|/|/|s;
1148###    print STDERR "dir = $dir\n";
1149   
1150    return $dir;
1151}
1152## make_absolute() ##
1153
1154sub get_dirsep {
1155
1156    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1157    return "\\";
1158    } else {
1159    return "\/";
1160    }
1161}
1162
1163sub get_os_dirsep {
1164
1165    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1166    return "\\\\";
1167    } else {
1168    return "\\\/";
1169    }
1170}
1171
1172sub get_re_dirsep {
1173
1174    return "\\\\|\\\/";
1175}
1176
1177
1178sub get_dirsep_tail {
1179    my ($filename) = @_;
1180   
1181    # returns last part of directory or filename
1182    # On unix e.g. a/b.d => b.d
1183    #              a/b/c => c
1184
1185    my $dirsep = get_re_dirsep();
1186    my @dirs = split (/$dirsep/, $filename);
1187    my $tail = pop @dirs;
1188
1189    # - caused problems under windows
1190    #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1191
1192    return $tail;
1193}
1194
1195
1196# if this is running on windows we want binaries to end in
1197# .exe, otherwise they don't have to end in any extension
1198sub get_os_exe {
1199    return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1200    return "";
1201}
1202
1203
1204# test to see whether this is a big or little endian machine
1205sub is_little_endian
1206{
1207    # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1208    # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1209    # Otherwise, it's little endian
1210
1211    #return 0 if $^O =~ /^darwin$/i;
1212    #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1213   
1214    # Going back to stating exactly whether the machine is little endian
1215    # or big endian, without any special case for Macs. Since for rata it comes
1216    # back with little endian and for shuttle with bigendian.
1217    return (ord(substr(pack("s",1), 0, 1)) == 1);
1218}
1219
1220
1221# will return the collection name if successful, "" otherwise
1222sub use_collection {
1223    my ($collection, $collectdir) = @_;
1224
1225    if (!defined $collectdir || $collectdir eq "") {
1226    $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1227    }
1228
1229    # get and check the collection
1230    if (!defined($collection) || $collection eq "") {
1231    if (defined $ENV{'GSDLCOLLECTION'}) {
1232        $collection = $ENV{'GSDLCOLLECTION'};
1233    } else {
1234        print STDOUT "No collection specified\n";
1235        return "";
1236    }
1237    }
1238   
1239    if ($collection eq "modelcol") {
1240    print STDOUT "You can't use modelcol.\n";
1241    return "";
1242    }
1243
1244    # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1245    # are defined
1246    $ENV{'GSDLCOLLECTION'} = $collection;
1247    $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1248
1249    # make sure this collection exists
1250    if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1251    print STDOUT "Invalid collection ($collection).\n";
1252    return "";
1253    }
1254
1255    # everything is ready to go
1256    return $collection;
1257}
1258
1259sub get_current_collection_name {
1260    return $ENV{'GSDLCOLLECTION'};
1261}
1262
1263
1264# will return the collection name if successful, "" otherwise. 
1265# Like use_collection (above) but for greenstone 3 (taking account of site level)
1266
1267sub use_site_collection {
1268    my ($site, $collection, $collectdir) = @_;
1269
1270    if (!defined $collectdir || $collectdir eq "") {
1271    die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1272    $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1273    }
1274
1275    # collectdir explicitly set by this point (using $site variable if required).
1276    # Can call "old" gsdl2 use_collection now.
1277
1278    return use_collection($collection,$collectdir);
1279}
1280
1281
1282
1283sub locate_config_file
1284{
1285    my ($file) = @_;
1286
1287    my $locations = locate_config_files($file);
1288
1289    return shift @$locations; # returns undef if 'locations' is empty
1290}
1291
1292
1293sub locate_config_files
1294{
1295    my ($file) = @_;
1296
1297    my @locations = ();
1298
1299    if (-e $file) {
1300    # Clearly specified (most likely full filename)
1301    # No need to hunt in 'etc' directories, return value unchanged
1302    push(@locations,$file);
1303    }
1304    else {
1305    # Check for collection specific one before looking in global GSDL 'etc'
1306    if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1307        my $test_collect_etc_filename
1308        = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1309       
1310        if (-e $test_collect_etc_filename) {
1311        push(@locations,$test_collect_etc_filename);
1312        }
1313    }
1314    my $test_main_etc_filename
1315        = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1316    if (-e $test_main_etc_filename) {
1317        push(@locations,$test_main_etc_filename);
1318    }
1319    }
1320
1321    return \@locations;
1322}
1323
1324
1325sub hyperlink_text
1326{
1327    my ($text) = @_;
1328   
1329    $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1330    $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1331
1332    return $text;
1333}
1334
1335
1336# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1337# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1338sub is_dir_empty
1339{
1340    my ($path) = @_;
1341    opendir DIR, $path;
1342    while(my $entry = readdir DIR) {
1343        next if($entry =~ /^\.\.?$/);
1344        closedir DIR;
1345        return 0;
1346    }
1347    closedir DIR;
1348    return 1;
1349}
1350
1351# Returns the given filename converted using either URL encoding or base64
1352# encoding, as specified by $rename_method. If the given filename has no suffix
1353# (if it is just the tailname), then $no_suffix should be some defined value.
1354# rename_method can be url, none, base64
1355sub rename_file {
1356    my ($filename, $rename_method, $no_suffix)  = @_;
1357
1358    if(!$filename) { # undefined or empty string
1359    return $filename;
1360    }
1361
1362    if (!$rename_method) {
1363    print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1364    # Debugging information
1365    # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1366    # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1367    $rename_method = "url";
1368    } elsif($rename_method eq "none") {
1369    return $filename; # would have already been renamed
1370    }
1371
1372    # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1373    ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1374    ###$filename =~ s/ /_/g;
1375
1376    my ($tailname,$dirname,$suffix);
1377    if($no_suffix) { # given a tailname, no suffix
1378    ($tailname,$dirname) = File::Basename::fileparse($filename);
1379    }
1380    else {
1381    ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1382    }
1383    $suffix = "" if !$suffix;
1384
1385    if ($rename_method eq "url") {
1386    $tailname = &unicode::url_encode($tailname);
1387    }
1388    elsif ($rename_method eq "base64") {
1389    $tailname = &unicode::base64_encode($tailname);
1390    $tailname =~ s/\s*//sg;      # for some reason it adds spaces not just at end but also in middle
1391    }
1392
1393    $filename = "$tailname$suffix";
1394    $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1395
1396    return $filename;
1397}
1398
1399
1400# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1401sub rename_ldb_or_bdb_file {
1402    my ($filename_no_ext) = @_;
1403
1404    my $new_filename = "$filename_no_ext.gdb";
1405    return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1406    # try ldb
1407    my $old_filename = "$filename_no_ext.ldb";
1408   
1409    if (-f $old_filename) {
1410    print STDERR "Renaming $old_filename to $new_filename\n";
1411    rename ($old_filename, $new_filename)
1412        || print STDERR "Rename failed: $!\n";
1413    return;
1414    }
1415    # try bdb
1416    $old_filename = "$filename_no_ext.bdb";
1417    if (-f $old_filename) {
1418    print STDERR "Renaming $old_filename to $new_filename\n";   
1419    rename ($old_filename, $new_filename)
1420        || print STDERR "Rename failed: $!\n";
1421    return;
1422    }
1423}
1424
1425
1426# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1427# By default, /greenstone3 for GS3 or /greenstone for GS2.
1428sub get_greenstone_url_prefix() {
1429    # if already set on a previous occasion, just return that
1430    # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1431    return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1432
1433    my ($configfile, $urlprefix, $defaultUrlprefix);
1434    my @propertynames = ();
1435
1436    if($ENV{'GSDL3SRCHOME'}) {
1437    $defaultUrlprefix = "/greenstone3";
1438    $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1439    push(@propertynames, qw/path\s*\=/);
1440    } else {
1441    $defaultUrlprefix = "/greenstone";
1442    $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", "gsdlsite.cfg");
1443    push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1444    }
1445
1446    $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1447
1448    if(!$urlprefix) { # no values found for URL prefix, use default values
1449    $urlprefix = $defaultUrlprefix;
1450    } else {
1451    #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1452    $urlprefix =~ s/^\///; # remove the starting slash
1453    my @dirs = split(/(\\|\/)/, $urlprefix);
1454    $urlprefix = shift(@dirs);
1455
1456    if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1457        $urlprefix = "/$urlprefix";
1458    }
1459    }
1460
1461    # set for the future
1462    $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1463#    print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1464    return $urlprefix;
1465}
1466
1467
1468# Given a config file (xml or java properties file) and a list/array of regular expressions
1469# that represent property names to match on, this function will return the value for the 1st
1470# matching property name. If the return value is undefined, no matching property was found.
1471sub extract_propvalue_from_file() {
1472    my ($configfile, $propertynames) = @_;
1473
1474    my $value;
1475    unless(open(FIN, "<$configfile")) {
1476    print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1477    return $value; # not initialised
1478    }
1479
1480    # Read the entire file at once, as one single line, then close it
1481    my $filecontents;
1482    {
1483    local $/ = undef;       
1484    $filecontents = <FIN>;
1485    }
1486    close(FIN);
1487
1488    foreach my $regex (@$propertynames) {
1489        ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1490    if($value) {
1491            $value =~ s/^\"//;     # remove any startquotes
1492        $value =~ s/\".*$//;   # remove the 1st endquotes (if any) followed by any xml
1493        last;              # found value for a matching property, break from loop
1494    }
1495    }
1496
1497    return $value;
1498}
1499
1500# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1501# given that perllib is in @INC in order to invoke this subroutine.
1502# Call as follows -- after setting up INC to include perllib and
1503# after setting up GSDLHOME and GSDLOS:
1504#
1505# require util;
1506# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1507#
1508sub setup_greenstone_env() {
1509    my ($GSDLHOME, $GSDLOS) = @_;
1510
1511    #my %env_map = ();
1512    # Get the localised ENV settings of running a localised source setup.bash
1513    # and put it into the ENV here. Need to clear GSDLHOME before running setup
1514    #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1515    my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";     
1516    if($GSDLOS =~ m/windows/i) {
1517        #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1518        $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
1519    }
1520    if (!open(PIN, "$perl_command |")) {
1521        print STDERR ("Unable to execute command: $perl_command. $!\n");
1522    }
1523
1524    while (defined (my $perl_output_line = <PIN>)) {
1525        my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1526        #$env_map{$key}=$value;     
1527        $ENV{$key}=$value;
1528    }
1529   
1530    # If any keys in $ENV don't occur in Greenstone's localised env
1531    # (stored in $env_map), delete those entries from $ENV
1532    #foreach $key (keys %ENV) {
1533    #   if(!defined $env_map{$key}) {
1534    #       print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1535    #       delete $ENV{$key}; # del $ENV(key, value) pair
1536    #   }
1537    #}
1538    #undef %env_map;
1539}
1540
15411;
Note: See TracBrowser for help on using the browser.