root/main/trunk/greenstone2/perllib/util.pm @ 24932

Revision 24932, 46.5 KB (checked in by ak19, 9 years ago)

Diego noticed how the metadata in a toplevel metadata.xml, which specifies metadata for files in import's subfolders, does not get attached to the files on Windows, while this works on Linux. It had to do with the difference between the file slashes used on the OS versus the URL-type fileslashes used in the metadata.xml Diego had constructed. This has now been fixed and Dr Bainbridge came up with a tidier solution of a new method in util.pm that would handle the details.

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use strict;
29
30use Encode;
31use File::Copy;
32use File::Basename;
33# Config for getting the perlpath in the recommended way, though it uses paths that are
34# hard-coded into the Config file that's generated upon configuring and compiling perl.
35# $^X works better in some cases to return the path to perl used to launch the script,
36# but if launched with plain "perl" (no full-path), that will be just what it returns.
37use Config;
38
39# removes files (but not directories)
40sub rm {
41    my (@files) = @_;
42
43    my @filefiles = ();
44
45    # make sure the files we want to delete exist
46    # and are regular files
47    foreach my $file (@files) {
48    if (!-e $file) {
49        print STDERR "util::rm $file does not exist\n";
50    } elsif ((!-f $file) && (!-l $file)) {
51        print STDERR "util::rm $file is not a regular (or symbolic) file\n";
52    } else {
53        push (@filefiles, $file);
54    }
55    }
56   
57    # remove the files
58    my $numremoved = unlink @filefiles;
59
60    # check to make sure all of them were removed
61    if ($numremoved != scalar(@filefiles)) {
62    print STDERR "util::rm Not all files were removed\n";
63    }
64}
65
66# removes files (but not directories) - can rename this to the default
67# "rm" subroutine  when debugging the deletion of individual files.
68sub rm_debug {
69    my (@files) = @_;
70    my @filefiles = ();
71
72    # make sure the files we want to delete exist
73    # and are regular files
74    foreach my $file (@files) {
75        if (!-e $file) {
76            print STDERR "util::rm $file does not exist\n";
77        } elsif ((!-f $file) && (!-l $file)) {
78            print STDERR "util::rm $file is not a regular (or symbolic) file\n";
79        } else { # debug message
80            unlink($file) or warn "Could not delete file $file: $!\n";
81        }
82    }
83}
84
85
86# recursive removal
87sub filtered_rm_r {
88    my ($files,$file_accept_re,$file_reject_re) = @_;
89
90#   my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(2);
91#   my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
92#   print STDERR "** Calling method (2): $lcfilename:$cline $cpackage->$csubr\n";
93   
94    my @files_array = (ref $files eq "ARRAY") ? @$files : ($files);
95
96    # recursively remove the files
97    foreach my $file (@files_array) {
98    $file =~ s/[\/\\]+$//; # remove trailing slashes
99   
100    if (!-e $file) {
101        print STDERR "util::filtered_rm_r $file does not exist\n";
102
103    } elsif ((-d $file) && (!-l $file)) { # don't recurse down symbolic link
104        # get the contents of this directory
105        if (!opendir (INDIR, $file)) {
106        print STDERR "util::filtered_rm_r could not open directory $file\n";
107        } else {
108        my @filedir = grep (!/^\.\.?$/, readdir (INDIR));
109        closedir (INDIR);
110               
111        # remove all the files in this directory
112        map {$_="$file/$_";} @filedir;
113        &filtered_rm_r (\@filedir,$file_accept_re,$file_reject_re);
114
115        if (!defined $file_accept_re && !defined $file_reject_re) {
116            # remove this directory
117            if (!rmdir $file) {
118            print STDERR "util::filtered_rm_r couldn't remove directory $file\n";
119            }
120        }
121        }
122    } else {
123        next if (defined $file_reject_re && ($file =~ m/$file_reject_re/));
124
125        if ((!defined $file_accept_re) || ($file =~ m/$file_accept_re/)) {
126        # remove this file 
127        &rm ($file);
128        }
129    }
130    }
131}
132
133
134# recursive removal
135sub rm_r {
136    my (@files) = @_;
137   
138    # use the more general (but reterospectively written function
139    # filtered_rm_r function()
140
141    filtered_rm_r(\@files,undef,undef); # no accept or reject expressions
142}
143
144
145
146
147# moves a file or a group of files
148sub mv {
149    my $dest = pop (@_);
150    my (@srcfiles) = @_;
151
152    # remove trailing slashes from source and destination files
153    $dest =~ s/[\\\/]+$//;
154    map {$_ =~ s/[\\\/]+$//;} @srcfiles;
155
156    # a few sanity checks
157    if (scalar (@srcfiles) == 0) {
158    print STDERR "util::mv no destination directory given\n";
159    return;
160    } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
161    print STDERR "util::mv if multiple source files are given the ".
162        "destination must be a directory\n";
163    return;
164    }
165
166    # move the files
167    foreach my $file (@srcfiles) {
168    my $tempdest = $dest;
169    if (-d $tempdest) {
170        my ($filename) = $file =~ /([^\\\/]+)$/;
171        $tempdest .= "/$filename";
172    }
173    if (!-e $file) {
174        print STDERR "util::mv $file does not exist\n";
175    } else {
176        rename ($file, $tempdest);
177    }
178    }
179}
180
181
182# copies a file or a group of files
183sub cp {
184    my $dest = pop (@_);
185    my (@srcfiles) = @_;
186
187    # remove trailing slashes from source and destination files
188    $dest =~ s/[\\\/]+$//;
189    map {$_ =~ s/[\\\/]+$//;} @srcfiles;
190
191    # a few sanity checks
192    if (scalar (@srcfiles) == 0) {
193    print STDERR "util::cp no destination directory given\n";
194    return;
195    } elsif ((scalar (@srcfiles) > 1) && (!-d $dest)) {
196    print STDERR "util::cp if multiple source files are given the ".
197        "destination must be a directory\n";
198    return;
199    }
200
201    # copy the files
202    foreach my $file (@srcfiles) {
203    my $tempdest = $dest;
204    if (-d $tempdest) {
205        my ($filename) = $file =~ /([^\\\/]+)$/;
206        $tempdest .= "/$filename";
207    }
208    if (!-e $file) {
209        print STDERR "util::cp $file does not exist\n";
210    } elsif (!-f $file) {
211        print STDERR "util::cp $file is not a plain file\n";
212    } else {
213        &File::Copy::copy ($file, $tempdest);
214    }
215    }
216}
217
218
219
220# recursively copies a file or group of files
221# syntax: cp_r (sourcefiles, destination directory)
222# destination must be a directory - to copy one file to
223# another use cp instead
224sub cp_r {
225    my $dest = pop (@_);
226    my (@srcfiles) = @_;
227
228    # a few sanity checks
229    if (scalar (@srcfiles) == 0) {
230    print STDERR "util::cp_r no destination directory given\n";
231    return;
232    } elsif (-f $dest) {
233    print STDERR "util::cp_r destination must be a directory\n";
234    return;
235    }
236   
237    # create destination directory if it doesn't exist already
238    if (! -d $dest) {
239    my $store_umask = umask(0002);
240    mkdir ($dest, 0777);
241    umask($store_umask);
242    }
243
244    # copy the files
245    foreach my $file (@srcfiles) {
246
247    if (!-e $file) {
248        print STDERR "util::cp_r $file does not exist\n";
249
250    } elsif (-d $file) {
251        # make the new directory
252        my ($filename) = $file =~ /([^\\\/]*)$/;
253        $dest = &util::filename_cat ($dest, $filename);
254        my $store_umask = umask(0002);
255        mkdir ($dest, 0777);
256        umask($store_umask);
257
258        # get the contents of this directory
259        if (!opendir (INDIR, $file)) {
260        print STDERR "util::cp_r could not open directory $file\n";
261        } else {
262        my @filedir = readdir (INDIR);
263        closedir (INDIR);
264        foreach my $f (@filedir) {
265            next if $f =~ /^\.\.?$/;
266            # copy all the files in this directory
267            my $ff = &util::filename_cat ($file, $f);
268            &cp_r ($ff, $dest);
269        }
270        }
271
272    } else {
273        &cp($file, $dest);
274    }
275    }
276}
277# recursively copies a file or group of files
278# syntax: cp_r (sourcefiles, destination directory)
279# destination must be a directory - to copy one file to
280# another use cp instead
281sub cp_r_nosvn {
282    my $dest = pop (@_);
283    my (@srcfiles) = @_;
284
285    # a few sanity checks
286    if (scalar (@srcfiles) == 0) {
287    print STDERR "util::cp_r no destination directory given\n";
288    return;
289    } elsif (-f $dest) {
290    print STDERR "util::cp_r destination must be a directory\n";
291    return;
292    }
293   
294    # create destination directory if it doesn't exist already
295    if (! -d $dest) {
296    my $store_umask = umask(0002);
297    mkdir ($dest, 0777);
298    umask($store_umask);
299    }
300
301    # copy the files
302    foreach my $file (@srcfiles) {
303
304    if (!-e $file) {
305        print STDERR "util::cp_r $file does not exist\n";
306
307    } elsif (-d $file) {
308        # make the new directory
309        my ($filename) = $file =~ /([^\\\/]*)$/;
310        $dest = &util::filename_cat ($dest, $filename);
311        my $store_umask = umask(0002);
312        mkdir ($dest, 0777);
313        umask($store_umask);
314
315        # get the contents of this directory
316        if (!opendir (INDIR, $file)) {
317        print STDERR "util::cp_r could not open directory $file\n";
318        } else {
319        my @filedir = readdir (INDIR);
320        closedir (INDIR);
321        foreach my $f (@filedir) {
322            next if $f =~ /^\.\.?$/;
323            next if $f =~ /^\.svn$/;
324            # copy all the files in this directory
325            my $ff = &util::filename_cat ($file, $f);
326            &cp_r ($ff, $dest);
327        }
328        }
329
330    } else {
331        &cp($file, $dest);
332    }
333    }
334}
335
336# copies a directory and its contents, excluding subdirectories, into a new directory
337sub cp_r_toplevel {
338    my $dest = pop (@_);
339    my (@srcfiles) = @_;
340
341    # a few sanity checks
342    if (scalar (@srcfiles) == 0) {
343    print STDERR "util::cp_r no destination directory given\n";
344    return;
345    } elsif (-f $dest) {
346    print STDERR "util::cp_r destination must be a directory\n";
347    return;
348    }
349   
350    # create destination directory if it doesn't exist already
351    if (! -d $dest) {
352    my $store_umask = umask(0002);
353    mkdir ($dest, 0777);
354    umask($store_umask);
355    }
356
357    # copy the files
358    foreach my $file (@srcfiles) {
359
360    if (!-e $file) {
361        print STDERR "util::cp_r $file does not exist\n";
362
363    } elsif (-d $file) {
364        # make the new directory
365        my ($filename) = $file =~ /([^\\\/]*)$/;
366        $dest = &util::filename_cat ($dest, $filename);
367        my $store_umask = umask(0002);
368        mkdir ($dest, 0777);
369        umask($store_umask);
370
371        # get the contents of this directory
372        if (!opendir (INDIR, $file)) {
373        print STDERR "util::cp_r could not open directory $file\n";
374        } else {
375        my @filedir = readdir (INDIR);
376        closedir (INDIR);
377        foreach my $f (@filedir) {
378            next if $f =~ /^\.\.?$/;
379           
380            # copy all the files in this directory, but not directories
381            my $ff = &util::filename_cat ($file, $f);
382            if (-f $ff) {
383            &cp($ff, $dest);
384            #&cp_r ($ff, $dest);
385            }
386        }
387        }
388
389    } else {
390        &cp($file, $dest);
391    }
392    }
393}
394
395sub mk_dir {
396    my ($dir) = @_;
397
398    my $store_umask = umask(0002);
399    my $mkdir_ok = mkdir ($dir, 0777);
400    umask($store_umask);
401   
402    if (!$mkdir_ok)
403    {
404    print STDERR "util::mk_dir could not create directory $dir\n";
405    return;
406    }
407}
408
409# in case anyone cares - I did some testing (using perls Benchmark module)
410# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
411# slightly faster (surprisingly) - Stefan.
412sub mk_all_dir {
413    my ($dir) = @_;
414
415    # use / for the directory separator, remove duplicate and
416    # trailing slashes
417    $dir=~s/[\\\/]+/\//g;
418    $dir=~s/[\\\/]+$//;
419
420    # make sure the cache directory exists
421    my $dirsofar = "";
422    my $first = 1;
423    foreach my $dirname (split ("/", $dir)) {
424    $dirsofar .= "/" unless $first;
425    $first = 0;
426
427    $dirsofar .= $dirname;
428
429    next if $dirname =~ /^(|[a-z]:)$/i;
430    if (!-e $dirsofar)
431        {
432        my $store_umask = umask(0002);
433        my $mkdir_ok = mkdir ($dirsofar, 0777);
434        umask($store_umask);
435        if (!$mkdir_ok)
436        {
437            print STDERR "util::mk_all_dir could not create directory $dirsofar\n";
438            return;
439        }
440        }
441    }
442}
443
444# make hard link to file if supported by OS, otherwise copy the file
445sub hard_link {
446    my ($src, $dest, $verbosity) = @_;
447
448    # remove trailing slashes from source and destination files
449    $src =~ s/[\\\/]+$//;
450    $dest =~ s/[\\\/]+$//;
451
452##    print STDERR "**** src = ", unicode::debug_unicode_string($src),"\n";
453    # a few sanity checks
454    if (-e $dest) {
455    # destination file already exists
456    return;
457    }
458    elsif (!-e $src) {
459    print STDERR "util::hard_link source file \"$src\" does not exist\n";
460    return 1;
461    }
462    elsif (-d $src) {
463    print STDERR "util::hard_link source \"$src\" is a directory\n";
464    return 1;
465    }
466
467    my $dest_dir = &File::Basename::dirname($dest);
468    mk_all_dir($dest_dir) if (!-e $dest_dir);
469
470
471    if (!link($src, $dest)) {
472    if ((!defined $verbosity) || ($verbosity>2)) {
473        print STDERR "util::hard_link: unable to create hard link. ";
474        print STDERR " Copying file: $src -> $dest\n";
475    }
476    &File::Copy::copy ($src, $dest);
477    }
478    return 0;
479}
480
481# make soft link to file if supported by OS, otherwise copy file
482sub soft_link {
483    my ($src, $dest, $ensure_paths_absolute) = @_;
484
485    # remove trailing slashes from source and destination files
486    $src =~ s/[\\\/]+$//;
487    $dest =~ s/[\\\/]+$//;
488
489    # Ensure file paths are absolute IF requested to do so
490    # Soft_linking didn't work for relative paths
491    if(defined $ensure_paths_absolute && $ensure_paths_absolute) {
492    # We need to ensure that the src file is the absolute path
493    # See http://perldoc.perl.org/File/Spec.html
494    if(!File::Spec->file_name_is_absolute( $src ))  { # it's relative
495        $src = File::Spec->rel2abs($src); # make absolute
496    }
497    # Might as well ensure that the destination file's absolute path is used
498    if(!File::Spec->file_name_is_absolute( $dest )) {
499        $dest = File::Spec->rel2abs($dest); # make absolute
500    }
501    }
502
503    # a few sanity checks
504    if (!-e $src) {
505    print STDERR "util::soft_link source file $src does not exist\n";
506    return 0;
507    }
508
509    my $dest_dir = &File::Basename::dirname($dest);
510    mk_all_dir($dest_dir) if (!-e $dest_dir);
511
512    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
513       
514    # symlink not supported on windows
515    &File::Copy::copy ($src, $dest);
516
517    } elsif (!eval {symlink($src, $dest)}) {
518    print STDERR "util::soft_link: unable to create soft link.\n";
519    return 0;
520    }
521
522    return 1;
523}
524
525# Primarily for filenames generated by processing
526# content of HTML files (which are mapped to UTF-8 internally)
527#
528# To turn this into an octet string that really exists on the file
529# system:
530# 1. don't need to do anything special for Unix-based systems
531#   (as underlying file system is byte-code)
532# 2. need to map to short DOS filenames for Windows
533
534sub utf8_to_real_filename
535{
536    my ($utf8_filename) = @_;
537
538    my $real_filename;
539
540    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
541    require Win32;
542
543    print STDERR "***** utf8 filename = $utf8_filename\n\n\n";
544
545    my $unicode_filename = decode("utf8",$utf8_filename);
546    $real_filename = Win32::GetShortPathName($unicode_filename);
547    }
548    else {
549    $real_filename = $utf8_filename;
550    }
551
552    return $real_filename;
553}
554
555
556sub fd_exists
557{
558    my $filename_full_path = shift @_;
559    my $test_op = shift @_ || "-e";
560
561    # By default tests for existance of file or directory (-e)
562    # Can be made more specific by providing second parameter (e.g. -f or -d)
563
564    my $exists = 0;
565
566    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
567    require Win32;
568    my $filename_short_path = Win32::GetShortPathName($filename_full_path);
569    if (!defined $filename_short_path) {
570        # Was probably still in UTF8 form (not what is needed on Windows)
571        my $unicode_filename_full_path = eval "decode(\"utf8\",\$filename_full_path)";
572        if (defined $unicode_filename_full_path) {
573        $filename_short_path = Win32::GetShortPathName($unicode_filename_full_path);
574        }
575    }
576    $filename_full_path = $filename_short_path;
577    }
578
579    if (defined $filename_full_path) {
580    $exists = eval "($test_op \$filename_full_path)";
581    }
582
583    return $exists;
584}
585
586sub file_exists
587{
588    my ($filename_full_path) = @_;
589
590    return fd_exists($filename_full_path,"-f");
591}
592
593sub dir_exists
594{
595    my ($filename_full_path) = @_;
596
597    return fd_exists($filename_full_path,"-d");
598}
599
600
601
602# updates a copy of a directory in some other part of the filesystem
603# verbosity settings are: 0=low, 1=normal, 2=high
604# both $fromdir and $todir should be absolute paths
605sub cachedir {
606    my ($fromdir, $todir, $verbosity) = @_;
607    $verbosity = 1 unless defined $verbosity;
608
609    # use / for the directory separator, remove duplicate and
610    # trailing slashes
611    $fromdir=~s/[\\\/]+/\//g;
612    $fromdir=~s/[\\\/]+$//;
613    $todir=~s/[\\\/]+/\//g;
614    $todir=~s/[\\\/]+$//;
615
616    &mk_all_dir ($todir);
617
618    # get the directories in ascending order
619    if (!opendir (FROMDIR, $fromdir)) {
620    print STDERR "util::cachedir could not read directory $fromdir\n";
621    return;
622    }
623    my @fromdir = grep (!/^\.\.?$/, sort(readdir (FROMDIR)));
624    closedir (FROMDIR);
625
626    if (!opendir (TODIR, $todir)) {
627    print STDERR "util::cacedir could not read directory $todir\n";
628    return;
629    }
630    my @todir = grep (!/^\.\.?$/, sort(readdir (TODIR)));
631    closedir (TODIR);
632
633    my $fromi = 0;
634    my $toi = 0;
635           
636    while ($fromi < scalar(@fromdir) || $toi < scalar(@todir)) {
637#   print "fromi: $fromi toi: $toi\n";
638
639    # see if we should delete a file/directory
640    # this should happen if the file/directory
641    # is not in the from list or if its a different
642    # size, or has an older timestamp
643    if ($toi < scalar(@todir)) {
644        if (($fromi >= scalar(@fromdir)) ||
645        ($todir[$toi] lt $fromdir[$fromi] ||
646         ($todir[$toi] eq $fromdir[$fromi] &&
647          &differentfiles("$fromdir/$fromdir[$fromi]","$todir/$todir[$toi]",
648                  $verbosity)))) {
649
650        # the files are different
651        &rm_r("$todir/$todir[$toi]");
652        splice(@todir, $toi, 1); # $toi stays the same
653
654        } elsif ($todir[$toi] eq $fromdir[$fromi]) {
655        # the files are the same
656        # if it is a directory, check its contents
657        if (-d "$todir/$todir[$toi]") {
658            &cachedir ("$fromdir/$fromdir[$fromi]",
659                   "$todir/$todir[$toi]", $verbosity);
660        }
661
662        $toi++;
663        $fromi++;
664        next;
665        }
666    }
667 
668    # see if we should insert a file/directory
669    # we should insert a file/directory if there
670    # is no tofiles left or if the tofile does not exist
671    if ($fromi < scalar(@fromdir) && ($toi >= scalar(@todir) ||
672                      $todir[$toi] gt $fromdir[$fromi])) {
673        &cp_r ("$fromdir/$fromdir[$fromi]", "$todir/$fromdir[$fromi]");
674        splice (@todir, $toi, 0, $fromdir[$fromi]);
675
676        $toi++;
677        $fromi++;
678    }
679    }
680}
681
682# this function returns -1 if either file is not found
683# assumes that $file1 and $file2 are absolute file names or
684# in the current directory
685# $file2 is allowed to be newer than $file1
686sub differentfiles {
687    my ($file1, $file2, $verbosity) = @_;
688    $verbosity = 1 unless defined $verbosity;
689
690    $file1 =~ s/\/+$//;
691    $file2 =~ s/\/+$//;
692   
693    my ($file1name) = $file1 =~ /\/([^\/]*)$/;
694    my ($file2name) = $file2 =~ /\/([^\/]*)$/;
695
696    return -1 unless (-e $file1 && -e $file2);
697    if ($file1name ne $file2name) {
698    print STDERR "filenames are not the same\n" if ($verbosity >= 2);
699    return 1;
700    }
701
702    my @file1stat = stat ($file1);
703    my @file2stat = stat ($file2);
704
705    if (-d $file1) {
706    if (! -d $file2) {
707        print STDERR "one file is a directory\n" if ($verbosity >= 2);
708        return 1;
709    }
710    return 0;
711    }
712
713    # both must be regular files
714    unless (-f $file1 && -f $file2) {
715    print STDERR "one file is not a regular file\n" if ($verbosity >= 2);
716    return 1;
717    }
718
719    # the size of the files must be the same
720    if ($file1stat[7] != $file2stat[7]) {
721    print STDERR "different sized files\n" if ($verbosity >= 2);
722    return 1;
723    }
724
725    # the second file cannot be older than the first
726    if ($file1stat[9] > $file2stat[9]) {
727    print STDERR "file is older\n" if ($verbosity >= 2);
728    return 1;
729    }
730
731    return 0;
732}
733
734
735sub get_tmp_filename
736{
737    my $file_ext = shift(@_) || undef;
738
739    my $opt_dot_file_ext = "";
740    if (defined $file_ext) {
741    if ($file_ext !~ m/\./) {
742        # no dot, so needs one added in at start
743        $opt_dot_file_ext = ".$file_ext"
744    }
745    else {
746        # allow for "extensions" such as _metadata.txt to be handled
747        # gracefully
748        $opt_dot_file_ext = $file_ext;
749    }
750    }
751
752    my $tmpdir = filename_cat($ENV{'GSDLHOME'}, "tmp");
753    &mk_all_dir ($tmpdir) unless -e $tmpdir;
754
755    my $count = 1000;
756    my $rand = int(rand $count);
757    my $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
758
759    while (-e $full_tmp_filename) {
760    $rand = int(rand $count);
761    $full_tmp_filename = &filename_cat($tmpdir, "F$rand$opt_dot_file_ext");
762    $count++;
763    }
764   
765    return $full_tmp_filename;
766}
767
768sub get_timestamped_tmp_folder
769{
770
771    my $tmp_dirname;
772    if(defined $ENV{'GSDLCOLLECTDIR'}) {
773    $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
774    } elsif(defined $ENV{'GSDLHOME'}) {
775    $tmp_dirname = $ENV{'GSDLHOME'};
776    } else {
777    return undef;
778    }
779
780    $tmp_dirname = &util::filename_cat($tmp_dirname, "tmp");
781    &util::mk_dir($tmp_dirname) if (!-e $tmp_dirname);
782
783    # add the timestamp into the path otherwise we can run into problems
784    # if documents have the same name
785    my $timestamp = time;
786    my $time_tmp_dirname = &util::filename_cat($tmp_dirname, $timestamp);
787    $tmp_dirname = $time_tmp_dirname;
788    my $i = 1;
789    while (-e $tmp_dirname) {
790    $tmp_dirname = "$time_tmp_dirname$i";
791    $i++;
792    }
793    &util::mk_dir($tmp_dirname);
794 
795    return $tmp_dirname;
796}
797
798sub get_timestamped_tmp_filename_in_collection
799{
800
801    my ($input_filename, $output_ext) = @_;
802    # derive tmp filename from input filename
803    my ($tailname, $dirname, $suffix)
804    = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
805
806    # softlink to collection tmp dir
807    my $tmp_dirname = &util::get_timestamped_tmp_folder();
808    $tmp_dirname = $dirname unless defined $tmp_dirname;
809
810    # following two steps copied from ConvertBinaryFile
811    # do we need them?? can't use them as is, as they use plugin methods.
812
813    #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
814
815    # URLEncode this since htmls with images where the html filename is utf8 don't seem
816    # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
817    # files on the filesystem.
818    #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
819    if (defined $output_ext) {
820    $output_ext = ".$output_ext"; # add the dot
821    } else {
822    $output_ext = $suffix;
823    }
824    $output_ext= lc($output_ext);
825    my $tmp_filename = &util::filename_cat($tmp_dirname, "$tailname$output_ext");
826   
827    return $tmp_filename;
828}
829
830sub get_toplevel_tmp_dir
831{
832    return filename_cat($ENV{'GSDLHOME'}, "tmp");
833}
834
835
836sub filename_to_regex {
837    my $filename = shift (@_);
838
839    # need to put single backslash back to double so that regex works
840    $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i);   
841   
842    # note that the first part of a substitution is a regex, so RE chars need to be escaped,
843    # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally
844    $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them
845    $filename =~ s@\(@\\(@g; # escape brackets
846    $filename =~ s@\)@\\)@g; # escape brackets
847    $filename =~ s@\[@\\[@g; # escape brackets
848    $filename =~ s@\]@\\]@g; # escape brackets
849   
850    return $filename;
851}
852
853sub unregex_filename {
854    my $filename = shift (@_);
855
856    # need to put doubled backslashes for regex back to single
857    $filename =~ s/\\\\/\\/g; # remove RE syntax for \   
858    $filename =~ s/\\\./\./g; # remove RE syntax for .
859    $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "("
860    $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")"
861    $filename =~ s@\\\[@[@g; # remove RE syntax for [ => "\[" turns into "["
862    $filename =~ s@\\\]@]@g; # remove RE syntax for ] => "\]" turns into "]"
863    return $filename;
864}
865
866sub filename_cat {
867    my $first_file = shift(@_);
868    my (@filenames) = @_;
869
870#   Useful for debugging
871#     -- might make sense to call caller(0) rather than (1)??
872#   my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
873#   print STDERR "Calling method: $cfilename:$cline $cpackage->$csubr\n";
874   
875    # If first_file is not null or empty, then add it back into the list
876    if (defined $first_file && $first_file =~ /\S/) {
877    unshift(@filenames, $first_file);
878    }
879
880    my $filename = join("/", @filenames);
881
882    # remove duplicate slashes and remove the last slash
883    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
884    $filename =~ s/[\\\/]+/\\/g;
885    } else {
886    $filename =~ s/[\/]+/\//g;
887    # DB: want a filename abc\de.html to remain like this
888    }
889    $filename =~ s/[\\\/]$//;
890
891    return $filename;
892}
893
894
895sub pathname_cat {
896    my $first_path = shift(@_);
897    my (@pathnames) = @_;
898
899    # If first_path is not null or empty, then add it back into the list
900    if (defined $first_path && $first_path =~ /\S/) {
901    unshift(@pathnames, $first_path);
902    }
903
904    my $join_char;
905    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
906    $join_char = ";";
907    } else {
908    $join_char = ":";
909    }
910
911    my $pathname = join($join_char, @pathnames);
912
913    # remove duplicate slashes
914    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
915    $pathname =~ s/[\\\/]+/\\/g;
916    } else {
917    $pathname =~ s/[\/]+/\//g;
918    # DB: want a pathname abc\de.html to remain like this
919    }
920
921    return $pathname;
922}
923
924
925sub tidy_up_oid {
926    my ($OID) = @_;
927    if ($OID =~ /\./) {
928    print STDERR "Warning, identifier $OID contains periods (.), removing them\n";
929    $OID =~ s/\.//g; #remove any periods
930    }
931    if ($OID =~ /^\s.*\s$/) {
932    print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
933    # remove starting and trailing whitespace
934    $OID =~ s/^\s+//;
935    $OID =~ s/\s+$//;
936    }
937    if ($OID =~ /^[\d]*$/) {
938    print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
939    $OID = "D" . $OID;
940    }       
941   
942    return $OID;
943}
944sub envvar_prepend {
945    my ($var,$val) = @_;
946
947    # do not prepend any value/path that's already in the environment variable
948   
949    my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
950    if (!defined($ENV{$var})) {
951    $ENV{$var} = "$val";
952    }
953    elsif($ENV{$var} !~ m/$escaped_val/) {
954    $ENV{$var} = "$val;".$ENV{$var};
955    }
956}
957
958sub envvar_append {
959    my ($var,$val) = @_;
960   
961    # do not append any value/path that's already in the environment variable
962   
963    my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
964    if (!defined($ENV{$var})) {
965    $ENV{$var} = "$val";
966    }
967    elsif($ENV{$var} !~ m/$escaped_val/) {
968    $ENV{$var} .= ";$val";
969    }
970}
971
972
973# splits a filename into a prefix and a tail extension using the tail_re, or
974# if that fails, splits on the file_extension . (dot)
975sub get_prefix_and_tail_by_regex {
976
977    my ($filename,$tail_re) = @_;
978   
979    my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
980    if ((!defined $file_prefix) || (!defined $file_ext)) {
981    ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
982    }
983
984    return ($file_prefix,$file_ext);
985}
986
987# get full path and file only path from a base_dir (which may be empty) and
988# file (which may contain directories)
989sub get_full_filenames {
990    my ($base_dir, $file) = @_;
991   
992    my $filename_full_path = $file;
993    # add on directory if present
994    $filename_full_path = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\S/;
995   
996    my $filename_no_path = $file;
997
998    # remove directory if present
999    $filename_no_path =~ s/^.*[\/\\]//;
1000    return ($filename_full_path, $filename_no_path);
1001}
1002
1003# returns the path of a file without the filename -- ie. the directory the file is in
1004sub filename_head {
1005    my $filename = shift(@_);
1006
1007    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1008    $filename =~ s/[^\\\\]*$//;
1009    }
1010    else {
1011    $filename =~ s/[^\\\/]*$//;
1012    }
1013
1014    return $filename;
1015}
1016
1017
1018
1019# returns 1 if filename1 and filename2 point to the same
1020# file or directory
1021sub filenames_equal {
1022    my ($filename1, $filename2) = @_;
1023
1024    # use filename_cat to clean up trailing slashes and
1025    # multiple slashes
1026    $filename1 = filename_cat ($filename1);
1027    $filename2 = filename_cat ($filename2);
1028
1029    # filenames not case sensitive on windows
1030    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1031    $filename1 =~ tr/[A-Z]/[a-z]/;
1032    $filename2 =~ tr/[A-Z]/[a-z]/;
1033    }
1034    return 1 if $filename1 eq $filename2;
1035    return 0;
1036}
1037
1038# If filename is relative to within_dir, returns the relative path of filename to that directory
1039# with slashes in the filename returned as they were in the original (absolute) filename.
1040sub filename_within_directory
1041{
1042    my ($filename,$within_dir) = @_;
1043   
1044    if ($within_dir !~ m/[\/\\]$/) {
1045    my $dirsep = &util::get_dirsep();
1046    $within_dir .= $dirsep;
1047    }
1048   
1049    $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets   
1050    if ($filename =~ m/^$within_dir(.*)$/) {
1051    $filename = $1;
1052    }
1053   
1054    return $filename;
1055}
1056
1057# If filename is relative to within_dir, returns the relative path of filename to that directory in URL format.
1058# Filename and within_dir can be any type of slashes, but will be compared as URLs (i.e. unix-style slashes).
1059# The subpath returned will also be a URL type filename.
1060sub filename_within_directory_url_format
1061{
1062    my ($filename,$within_dir) = @_;
1063   
1064    # convert parameters only to / slashes if Windows
1065   
1066    my $filename_urlformat = $filename;
1067    my $within_dir_urlformat = $within_dir;
1068   
1069    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1070        # Only need to worry about Windows, as Unix style directories already in url-format
1071        # Convert Windows style \ => /
1072        $filename_urlformat =~ s@\\@/@g;
1073        $within_dir_urlformat =~ s@\\@/@g;
1074    }
1075   
1076    #if ($within_dir_urlformat !~ m/\/$/) {
1077        # make sure directory ends with a slash
1078        #$within_dir_urlformat .= "/";
1079    #}
1080   
1081    my $within_dir_urlformat_re = &filename_to_regex($within_dir_urlformat); # escape any special RE characters, such as brackets
1082   
1083    #print STDERR "@@@@@ $filename_urlformat =~ $within_dir_urlformat_re\n";
1084   
1085    # dir prefix may or may not end with a slash (this is discarded when extracting the sub-filepath)
1086    if ($filename_urlformat =~ m/^$within_dir_urlformat_re(?:\/)*(.*)$/) {
1087        $filename_urlformat = $1;
1088    }
1089   
1090    return $filename_urlformat;
1091}
1092
1093
1094sub filename_within_collection
1095{
1096    my ($filename) = @_;
1097
1098    my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
1099   
1100    if (defined $collect_dir) {
1101
1102    # if from within GSDLCOLLECTDIR, then remove directory prefix
1103    # so source_filename is realative to it.  This is done to aid
1104    # portability, i.e. the collection can be moved to somewhere
1105    # else on the file system and the archives directory will still
1106    # work.  This is needed, for example in the applet version of
1107    # GLI where GSDLHOME/collect on the server will be different to
1108    # the collect directory of the remove user.  Of course,
1109    # GSDLCOLLECTDIR subsequently needs to be put back on to turn
1110    # it back into a full pathname.
1111
1112    $filename = filename_within_directory($filename,$collect_dir);
1113    }
1114   
1115    return $filename;
1116}
1117
1118sub prettyprint_file
1119{
1120    my ($base_dir,$file,$gli) = @_;
1121
1122    my $filename_full_path = &util::filename_cat($base_dir,$file);
1123
1124    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1125    require Win32;
1126
1127    # For some reason base_dir in the form c:/a/b/c
1128    # This leads to confusion later on, so turn it back into
1129    # the more usual Windows form
1130    $base_dir =~ s/\//\\/g;
1131    my $long_base_dir = Win32::GetLongPathName($base_dir);
1132    my $long_full_path = Win32::GetLongPathName($filename_full_path);
1133
1134    $file = filename_within_directory($long_full_path,$long_base_dir);
1135    $file = encode("utf8",$file) if ($gli);
1136    }
1137
1138    return $file;
1139}
1140
1141
1142sub upgrade_if_dos_filename
1143{
1144    my ($filename_full_path,$and_encode) = @_;
1145
1146    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1147    # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
1148    # to its long (Windows) version
1149    my $long_filename = Win32::GetLongPathName($filename_full_path);
1150    if (defined $long_filename) {
1151        $filename_full_path = $long_filename;
1152    }
1153    # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
1154    $filename_full_path =~ s/^(.):/\u$1:/;
1155    if ((defined $and_encode) && ($and_encode)) {
1156        $filename_full_path = encode("utf8",$filename_full_path);
1157    }
1158    }
1159
1160    return $filename_full_path;
1161}
1162
1163
1164sub downgrade_if_dos_filename
1165{
1166    my ($filename_full_path) = @_;
1167
1168    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1169    require Win32;
1170
1171    # Ensure the given long Windows filename is in a form that can
1172    # be opened by Perl => convert it to a short DOS-like filename
1173
1174    my $short_filename = Win32::GetShortPathName($filename_full_path);
1175    if (defined $short_filename) {
1176        $filename_full_path = $short_filename;
1177    }
1178    # Make sure initial drive letter is lower-case (to fit in
1179    # with rest of Greenstone)
1180    $filename_full_path =~ s/^(.):/\u$1:/;
1181    }
1182
1183    return $filename_full_path;
1184}
1185
1186sub block_filename
1187{
1188    my ($block_hash,$filename) = @_;
1189   
1190    if ($ENV{'GSDLOS'} =~ m/^windows$/) {
1191   
1192    # lower case the entire thing, eg for cover.jpg when its actually cover.JPG
1193    my $lower_filename = lc($filename);
1194    $block_hash->{'file_blocks'}->{$lower_filename} = 1;
1195#   my $lower_drive = $filename;
1196#   $lower_drive =~ s/^([A-Z]):/\l$1:/i;
1197   
1198#   my $upper_drive = $filename;
1199#   $upper_drive =~ s/^([A-Z]):/\u$1:/i;
1200#   
1201#   $block_hash->{'file_blocks'}->{$lower_drive} = 1;
1202#   $block_hash->{'file_blocks'}->{$upper_drive} = 1;       
1203    }
1204    else {
1205    $block_hash->{'file_blocks'}->{$filename} = 1;
1206    }
1207}
1208
1209
1210sub filename_is_absolute
1211{
1212    my ($filename) = @_;
1213
1214    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1215    return ($filename =~ m/^(\w:)?\\/);
1216    }
1217    else {
1218    return ($filename =~ m/^\//);
1219    }
1220}
1221
1222
1223## @method make_absolute()
1224#
1225#  Ensure the given file path is absolute in respect to the given base path.
1226#
1227#  @param  $base_dir A string denoting the base path the given dir must be
1228#                    absolute to.
1229#  @param  $dir The directory to be made absolute as a string. Note that the
1230#               dir may already be absolute, in which case it will remain
1231#               unchanged.
1232#  @return The now absolute form of the directory as a string.
1233#
1234#  @author John Thompson, DL Consulting Ltd.
1235#  @copy 2006 DL Consulting Ltd.
1236#
1237#used in buildcol.pl, doesn't work for all cases --kjdon
1238sub make_absolute {
1239   
1240    my ($base_dir, $dir) = @_;
1241###    print STDERR "dir = $dir\n";
1242    $dir =~ s/[\\\/]+/\//g;
1243    $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
1244    $dir =~ s|^/tmp_mnt||;
1245    1 while($dir =~ s|/[^/]*/\.\./|/|g);
1246    $dir =~ s|/[.][.]?/|/|g;
1247    $dir =~ tr|/|/|s;
1248###    print STDERR "dir = $dir\n";
1249   
1250    return $dir;
1251}
1252## make_absolute() ##
1253
1254sub get_dirsep {
1255
1256    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1257    return "\\";
1258    } else {
1259    return "\/";
1260    }
1261}
1262
1263sub get_os_dirsep {
1264
1265    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
1266    return "\\\\";
1267    } else {
1268    return "\\\/";
1269    }
1270}
1271
1272sub get_re_dirsep {
1273
1274    return "\\\\|\\\/";
1275}
1276
1277
1278sub get_dirsep_tail {
1279    my ($filename) = @_;
1280   
1281    # returns last part of directory or filename
1282    # On unix e.g. a/b.d => b.d
1283    #              a/b/c => c
1284
1285    my $dirsep = get_re_dirsep();
1286    my @dirs = split (/$dirsep/, $filename);
1287    my $tail = pop @dirs;
1288
1289    # - caused problems under windows
1290    #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
1291
1292    return $tail;
1293}
1294
1295
1296# if this is running on windows we want binaries to end in
1297# .exe, otherwise they don't have to end in any extension
1298sub get_os_exe {
1299    return ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i;
1300    return "";
1301}
1302
1303
1304# test to see whether this is a big or little endian machine
1305sub is_little_endian
1306{
1307    # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
1308    # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
1309    # Otherwise, it's little endian
1310
1311    #return 0 if $^O =~ /^darwin$/i;
1312    #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
1313   
1314    # Going back to stating exactly whether the machine is little endian
1315    # or big endian, without any special case for Macs. Since for rata it comes
1316    # back with little endian and for shuttle with bigendian.
1317    return (ord(substr(pack("s",1), 0, 1)) == 1);
1318}
1319
1320
1321# will return the collection name if successful, "" otherwise
1322sub use_collection {
1323    my ($collection, $collectdir) = @_;
1324
1325    if (!defined $collectdir || $collectdir eq "") {
1326    $collectdir = &filename_cat ($ENV{'GSDLHOME'}, "collect");
1327    }
1328
1329    # get and check the collection
1330    if (!defined($collection) || $collection eq "") {
1331    if (defined $ENV{'GSDLCOLLECTION'}) {
1332        $collection = $ENV{'GSDLCOLLECTION'};
1333    } else {
1334        print STDOUT "No collection specified\n";
1335        return "";
1336    }
1337    }
1338   
1339    if ($collection eq "modelcol") {
1340    print STDOUT "You can't use modelcol.\n";
1341    return "";
1342    }
1343
1344    # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1345    # are defined
1346    $ENV{'GSDLCOLLECTION'} = $collection;
1347    $ENV{'GSDLCOLLECTDIR'} = &filename_cat ($collectdir, $collection);
1348
1349    # make sure this collection exists
1350    if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1351    print STDOUT "Invalid collection ($collection).\n";
1352    return "";
1353    }
1354
1355    # everything is ready to go
1356    return $collection;
1357}
1358
1359sub get_current_collection_name {
1360    return $ENV{'GSDLCOLLECTION'};
1361}
1362
1363
1364# will return the collection name if successful, "" otherwise. 
1365# Like use_collection (above) but for greenstone 3 (taking account of site level)
1366
1367sub use_site_collection {
1368    my ($site, $collection, $collectdir) = @_;
1369
1370    if (!defined $collectdir || $collectdir eq "") {
1371    die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1372    $collectdir = &filename_cat ($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1373    }
1374
1375    # collectdir explicitly set by this point (using $site variable if required).
1376    # Can call "old" gsdl2 use_collection now.
1377
1378    return use_collection($collection,$collectdir);
1379}
1380
1381
1382
1383sub locate_config_file
1384{
1385    my ($file) = @_;
1386
1387    my $locations = locate_config_files($file);
1388
1389    return shift @$locations; # returns undef if 'locations' is empty
1390}
1391
1392
1393sub locate_config_files
1394{
1395    my ($file) = @_;
1396
1397    my @locations = ();
1398
1399    if (-e $file) {
1400    # Clearly specified (most likely full filename)
1401    # No need to hunt in 'etc' directories, return value unchanged
1402    push(@locations,$file);
1403    }
1404    else {
1405    # Check for collection specific one before looking in global GSDL 'etc'
1406    if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1407        my $test_collect_etc_filename
1408        = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1409       
1410        if (-e $test_collect_etc_filename) {
1411        push(@locations,$test_collect_etc_filename);
1412        }
1413    }
1414    my $test_main_etc_filename
1415        = &util::filename_cat($ENV{'GSDLHOME'},"etc", $file);
1416    if (-e $test_main_etc_filename) {
1417        push(@locations,$test_main_etc_filename);
1418    }
1419    }
1420
1421    return \@locations;
1422}
1423
1424
1425sub hyperlink_text
1426{
1427    my ($text) = @_;
1428   
1429    $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1430    $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1431
1432    return $text;
1433}
1434
1435
1436# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1437# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1438sub is_dir_empty
1439{
1440    my ($path) = @_;
1441    opendir DIR, $path;
1442    while(my $entry = readdir DIR) {
1443        next if($entry =~ /^\.\.?$/);
1444        closedir DIR;
1445        return 0;
1446    }
1447    closedir DIR;
1448    return 1;
1449}
1450
1451# Returns the given filename converted using either URL encoding or base64
1452# encoding, as specified by $rename_method. If the given filename has no suffix
1453# (if it is just the tailname), then $no_suffix should be some defined value.
1454# rename_method can be url, none, base64
1455sub rename_file {
1456    my ($filename, $rename_method, $no_suffix)  = @_;
1457
1458    if(!$filename) { # undefined or empty string
1459    return $filename;
1460    }
1461
1462    if (!$rename_method) {
1463    print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1464    # Debugging information
1465    # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1466    # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1467    $rename_method = "url";
1468    } elsif($rename_method eq "none") {
1469    return $filename; # would have already been renamed
1470    }
1471
1472    # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1473    ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1474    ###$filename =~ s/ /_/g;
1475
1476    my ($tailname,$dirname,$suffix);
1477    if($no_suffix) { # given a tailname, no suffix
1478    ($tailname,$dirname) = File::Basename::fileparse($filename);
1479    }
1480    else {
1481    ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1482    }
1483    if (!$suffix) {
1484    $suffix = "";
1485    }
1486    else {
1487    $suffix = lc($suffix);
1488    }
1489
1490    if ($rename_method eq "url") {
1491    $tailname = &unicode::url_encode($tailname);
1492    }
1493    elsif ($rename_method eq "base64") {
1494    $tailname = &unicode::base64_encode($tailname);
1495    $tailname =~ s/\s*//sg;      # for some reason it adds spaces not just at end but also in middle
1496    }
1497
1498    $filename = "$tailname$suffix";
1499    $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1500
1501    return $filename;
1502}
1503
1504
1505# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1506sub rename_ldb_or_bdb_file {
1507    my ($filename_no_ext) = @_;
1508
1509    my $new_filename = "$filename_no_ext.gdb";
1510    return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1511    # try ldb
1512    my $old_filename = "$filename_no_ext.ldb";
1513   
1514    if (-f $old_filename) {
1515    print STDERR "Renaming $old_filename to $new_filename\n";
1516    rename ($old_filename, $new_filename)
1517        || print STDERR "Rename failed: $!\n";
1518    return;
1519    }
1520    # try bdb
1521    $old_filename = "$filename_no_ext.bdb";
1522    if (-f $old_filename) {
1523    print STDERR "Renaming $old_filename to $new_filename\n";   
1524    rename ($old_filename, $new_filename)
1525        || print STDERR "Rename failed: $!\n";
1526    return;
1527    }
1528}
1529
1530sub os_dir() {
1531   
1532    my $gsdlarch = "";
1533    if(defined $ENV{'GSDLARCH'}) {
1534    $gsdlarch = $ENV{'GSDLARCH'};
1535    }
1536    return $ENV{'GSDLOS'}.$gsdlarch;
1537}
1538
1539# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1540# By default, /greenstone3 for GS3 or /greenstone for GS2.
1541sub get_greenstone_url_prefix() {
1542    # if already set on a previous occasion, just return that
1543    # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1544    return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1545
1546    my ($configfile, $urlprefix, $defaultUrlprefix);
1547    my @propertynames = ();
1548
1549    if($ENV{'GSDL3SRCHOME'}) {
1550    $defaultUrlprefix = "/greenstone3";
1551    $configfile = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1552    push(@propertynames, qw/path\s*\=/);
1553    } else {
1554    $defaultUrlprefix = "/greenstone";
1555    $configfile = &util::filename_cat($ENV{'GSDLHOME'}, "cgi-bin", &os_dir(), "gsdlsite.cfg");
1556    push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1557    }
1558
1559    $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1560
1561    if(!$urlprefix) { # no values found for URL prefix, use default values
1562    $urlprefix = $defaultUrlprefix;
1563    } else {
1564    #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1565    $urlprefix =~ s/^\///; # remove the starting slash
1566    my @dirs = split(/(\\|\/)/, $urlprefix);
1567    $urlprefix = shift(@dirs);
1568
1569    if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1570        $urlprefix = "/$urlprefix";
1571    }
1572    }
1573
1574    # set for the future
1575    $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1576#    print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1577    return $urlprefix;
1578}
1579
1580
1581# Given a config file (xml or java properties file) and a list/array of regular expressions
1582# that represent property names to match on, this function will return the value for the 1st
1583# matching property name. If the return value is undefined, no matching property was found.
1584sub extract_propvalue_from_file() {
1585    my ($configfile, $propertynames) = @_;
1586
1587    my $value;
1588    unless(open(FIN, "<$configfile")) {
1589    print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1590    return $value; # not initialised
1591    }
1592
1593    # Read the entire file at once, as one single line, then close it
1594    my $filecontents;
1595    {
1596    local $/ = undef;       
1597    $filecontents = <FIN>;
1598    }
1599    close(FIN);
1600
1601    foreach my $regex (@$propertynames) {
1602        ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1603    if($value) {
1604            $value =~ s/^\"//;     # remove any startquotes
1605        $value =~ s/\".*$//;   # remove the 1st endquotes (if any) followed by any xml
1606        last;              # found value for a matching property, break from loop
1607    }
1608    }
1609
1610    return $value;
1611}
1612
1613# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1614# given that perllib is in @INC in order to invoke this subroutine.
1615# Call as follows -- after setting up INC to include perllib and
1616# after setting up GSDLHOME and GSDLOS:
1617#
1618# require util;
1619# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1620#
1621sub setup_greenstone_env() {
1622    my ($GSDLHOME, $GSDLOS) = @_;
1623
1624    #my %env_map = ();
1625    # Get the localised ENV settings of running a localised source setup.bash
1626    # and put it into the ENV here. Need to clear GSDLHOME before running setup
1627    #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1628    my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";     
1629    if($GSDLOS =~ m/windows/i) {
1630        #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1631        $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
1632    }
1633    if (!open(PIN, "$perl_command |")) {
1634        print STDERR ("Unable to execute command: $perl_command. $!\n");
1635    }
1636
1637    while (defined (my $perl_output_line = <PIN>)) {
1638        my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1639        #$env_map{$key}=$value;     
1640        $ENV{$key}=$value;
1641    }
1642    close (PIN);
1643
1644    # If any keys in $ENV don't occur in Greenstone's localised env
1645    # (stored in $env_map), delete those entries from $ENV
1646    #foreach $key (keys %ENV) {
1647    #   if(!defined $env_map{$key}) {
1648    #       print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{'$key'}\n";
1649    #       delete $ENV{$key}; # del $ENV(key, value) pair
1650    #   }
1651    #}
1652    #undef %env_map;
1653}
1654
1655sub get_perl_exec() {   
1656    my $perl_exec = $^X; # may return just "perl"
1657   
1658    if($ENV{'PERLPATH'}) {
1659        # OR: # $perl_exec = &util::filename_cat($ENV{'PERLPATH'},"perl");
1660        if($ENV{'GSDLOS'} =~ m/windows/) {
1661            $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1662        } else {
1663            $perl_exec = "$ENV{'PERLPATH'}/perl";
1664        }
1665    } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1666        # containing the full path to the current perl executable we're using
1667        $perl_exec = $Config{perlpath}; # configured path for perl
1668        if (!-e $perl_exec) { # may not point to location on this machine
1669            $perl_exec = $^X; # may return just "perl"
1670            if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1671                print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";             
1672            }
1673        }
1674    }
1675   
1676    return $perl_exec;
1677}
1678
1679
16801;
Note: See TracBrowser for help on using the browser.