root/main/trunk/greenstone2/perllib/util.pm @ 31862

Revision 31862, 62.6 KB (checked in by ak19, 3 years ago)

Adding a new subroutine print_env() that may be useful for debugging env vars. It will print the values of all environment variables to STDERR if no specific ones are requested. If specific env vars are requested, it will just print their values to STDERR.

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# util.pm -- various useful utilities
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package util;
27
28use strict;
29use FileUtils;
30
31use Encode;
32use Unicode::Normalize 'normalize';
33
34use File::Copy;
35use File::Basename;
36# Config for getting the perlpath in the recommended way, though it uses paths that are
37# hard-coded into the Config file that's generated upon configuring and compiling perl.
38# $^X works better in some cases to return the path to perl used to launch the script,
39# but if launched with plain "perl" (no full-path), that will be just what it returns.
40use Config;
41# New module for file related utility functions - intended as a
42# placeholder for an extension that allows a variety of different
43# filesystems (FTP, HTTP, SAMBA, WEBDav, HDFS etc)
44use FileUtils;
45
46if ($ENV{'GSDLOS'} =~ /^windows$/i) {
47    require Win32; # for working out Windows Long Filenames from Win 8.3 short filenames
48}
49
50# removes files (but not directories)
51sub rm {
52  warnings::warnif("deprecated", "util::rm() is deprecated, using FileUtils::removeFiles() instead");
53  return &FileUtils::removeFiles(@_);
54}
55
56# recursive removal
57sub filtered_rm_r {
58  warnings::warnif("deprecated", "util::filtered_rm_r() is deprecated, using FileUtils::removeFilesFiltered() instead");
59  return &FileUtils::removeFilesFiltered(@_);
60}
61
62# recursive removal
63sub rm_r {
64  warnings::warnif("deprecated", "util::rm_r() is deprecated, using FileUtils::removeFilesRecursive() instead");
65  return &FileUtils::removeFilesRecursive(@_);
66}
67
68# moves a file or a group of files
69sub mv {
70  warnings::warnif("deprecated", "util::mv() is deprecated, using FileUtils::moveFiles() instead");
71  return &FileUtils::moveFiles(@_);
72}
73
74# Move the contents of source directory into target directory
75# (as opposed to merely replacing target dir with the src dir)
76# This can overwrite any files with duplicate names in the target
77# but other files and folders in the target will continue to exist
78sub mv_dir_contents {
79  warnings::warnif("deprecated", "util::mv_dir_contents() is deprecated, using FileUtils::moveDirectoryContents() instead");
80  return &FileUtils::moveDirectoryContents(@_);
81}
82
83# copies a file or a group of files
84sub cp {
85  warnings::warnif("deprecated", "util::cp() is deprecated, using FileUtils::copyFiles() instead");
86  return &FileUtils::copyFiles(@_);
87}
88
89# recursively copies a file or group of files
90# syntax: cp_r (sourcefiles, destination directory)
91# destination must be a directory - to copy one file to
92# another use cp instead
93sub cp_r {
94  warnings::warnif("deprecated", "util::cp_r() is deprecated, using FileUtils::copyFilesrecursive() instead");
95  return &FileUtils::copyFilesRecursive(@_);
96}
97
98# recursively copies a file or group of files
99# syntax: cp_r (sourcefiles, destination directory)
100# destination must be a directory - to copy one file to
101# another use cp instead
102sub cp_r_nosvn {
103  warnings::warnif("deprecated", "util::cp_r_nosvn() is deprecated, using FileUtils::copyFilesRecursiveNoSVN() instead");
104  return &FileUtils::copyFilesRecursiveNoSVN(@_);
105}
106
107# copies a directory and its contents, excluding subdirectories, into a new directory
108sub cp_r_toplevel {
109  warnings::warnif("deprecated", "util::cp_r_toplevel() is deprecated, using FileUtils::recursiveCopyTopLevel() instead");
110  return &FileUtils::recursiveCopyTopLevel(@_);
111}
112
113sub mk_dir {
114  warnings::warnif("deprecated", "util::mk_dir() is deprecated, using FileUtils::makeDirectory() instead");
115  return &FileUtils::makeDirectory(@_);
116}
117
118# in case anyone cares - I did some testing (using perls Benchmark module)
119# on this subroutine against File::Path::mkpath (). mk_all_dir() is apparently
120# slightly faster (surprisingly) - Stefan.
121sub mk_all_dir {
122  warnings::warnif("deprecated", "util::mk_all_dir() is deprecated, using FileUtils::makeAllDirectories() instead");
123  return &FileUtils::makeAllDirectories(@_);
124}
125
126# make hard link to file if supported by OS, otherwise copy the file
127sub hard_link {
128  warnings::warnif("deprecated", "util::hard_link() is deprecated, using FileUtils::hardLink() instead");
129  return &FileUtils::hardLink(@_);
130}
131
132# make soft link to file if supported by OS, otherwise copy file
133sub soft_link {
134  warnings::warnif("deprecated", "util::soft_link() is deprecated, using FileUtils::softLink() instead");
135  return &FileUtils::softLink(@_);
136}
137
138# Primarily for filenames generated by processing
139# content of HTML files (which are mapped to UTF-8 internally)
140#
141# To turn this into an octet string that really exists on the file
142# system:
143# 1. don't need to do anything special for Unix-based systems
144#   (as underlying file system is byte-code)
145# 2. need to map to short DOS filenames for Windows
146
147sub utf8_to_real_filename
148{
149    my ($utf8_filename) = @_;
150
151    my $real_filename;
152
153    if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
154    require Win32;
155
156    my $unicode_filename = decode("utf8",$utf8_filename);
157    $real_filename = Win32::GetShortPathName($unicode_filename);
158    }
159    else {
160    $real_filename = $utf8_filename;
161    }
162
163    return $real_filename;
164}
165
166sub raw_filename_to_unicode
167{
168    my ($directory, $raw_file, $filename_encoding ) = @_;
169       
170    my $unicode_filename = $raw_file;
171    if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
172        # Try turning a short version to the long version
173        # If there are "funny" characters in the file name, that can't be represented in the ANSI code, then we will have a short weird version, eg E74~1.txt
174        $unicode_filename = &util::get_dirsep_tail(&util::upgrade_if_dos_filename(&FileUtils::filenameConcatenate($directory, $raw_file), 0));
175       
176       
177        if ($unicode_filename eq $raw_file) {
178        # This means the original filename *was* able to be encoded in the local ANSI file encoding (eg windows_1252), so now we turn it back to perl's unicode
179       
180        $unicode_filename = &Encode::decode(locale_fs => $unicode_filename);
181        }
182        # else This means we did have one of the funny filenames. the getLongPathName (used in upgrade_if_dos_filename) will return unicode, so we don't need to do anything more.
183       
184                   
185    } else {
186        # we had a utf-8 string, turn it into perl internal unicode
187        $unicode_filename = &Encode::decode("utf-8", $unicode_filename);
188   
189       
190    }
191    #Does the filename have url encoded chars in it?
192    if (&unicode::is_url_encoded($unicode_filename)) {
193        $unicode_filename = &unicode::url_decode($unicode_filename);
194    }
195   
196    # Normalise the filename to canonical composition - on mac, filenames use decopmposed form for accented chars
197    if ($ENV{'GSDLOS'} =~ m/^darwin$/i) {
198        $unicode_filename = normalize('C', $unicode_filename); # Composed form 'C'
199    }
200    return $unicode_filename;
201
202}
203sub fd_exists {
204  warnings::warnif("deprecated", "util::fd_exists() is deprecated, using FileUtils::fileTest() instead");
205  return &FileUtils::fileTest(@_);
206}
207
208sub file_exists {
209  warnings::warnif("deprecated", "util::file_exists() is deprecated, using FileUtils::fileExists() instead");
210  return &FileUtils::fileExists(@_);
211}
212
213sub dir_exists {
214  warnings::warnif("deprecated", "util::dir_exists() is deprecated, using FileUtils::directoryExists() instead");
215  return &FileUtils::directoryExists(@_);
216}
217
218# updates a copy of a directory in some other part of the filesystem
219# verbosity settings are: 0=low, 1=normal, 2=high
220# both $fromdir and $todir should be absolute paths
221sub cachedir {
222  warnings::warnif("deprecated", "util::cachedir() is deprecated, using FileUtils::synchronizeDirectories() instead");
223  return &FileUtils::synchronizeDirectories(@_);
224}
225
226# this function returns -1 if either file is not found
227# assumes that $file1 and $file2 are absolute file names or
228# in the current directory
229# $file2 is allowed to be newer than $file1
230sub differentfiles {
231  warnings::warnif("deprecated", "util::differentfiles() is deprecated, using FileUtils::differentFiles() instead");
232  return &FileUtils::differentFiles(@_);
233}
234
235
236# works out the temporary directory, including in the case where Greenstone is not writable
237# In that case, gs3-setup.bat would already have set the GS_TMP_OUTPUT_DIR temp variable
238sub determine_tmp_dir
239{
240    my $try_collect_dir = shift(@_) || 0;
241
242    my $tmp_dirname;
243    if(defined $ENV{'GS_TMP_OUTPUT_DIR'}) {
244        $tmp_dirname = $ENV{'GS_TMP_OUTPUT_DIR'};
245    } elsif($try_collect_dir && defined $ENV{'GSDLCOLLECTDIR'}) {
246        $tmp_dirname = $ENV{'GSDLCOLLECTDIR'};
247    } elsif(defined $ENV{'GSDLHOME'}) {
248        $tmp_dirname = $ENV{'GSDLHOME'};
249    } else {
250        return undef;
251    }
252   
253    if(!defined $ENV{'GS_TMP_OUTPUT_DIR'}) {
254        # test the tmp_dirname folder is writable, by trying to write out a file
255        # Unfortunately, cound not get if(-w $dirname) to work on directories on Windows
256            ## http://alvinalexander.com/blog/post/perl/perl-file-test-operators-reference-cheat-sheet (test file/dir writable)
257            ## http://www.makelinux.net/alp/083 (real and effective user IDs)
258       
259        my $tmp_test_file = &FileUtils::filenameConcatenate($tmp_dirname, "writability_test.tmp");
260        if (open (FOUT, ">$tmp_test_file")) {
261            close(FOUT);
262            &FileUtils::removeFiles($tmp_test_file);
263        } else { # location not writable, use TMP location
264        if (defined $ENV{'TMP'}) {
265            $tmp_dirname = $ENV{'TMP'};
266        } else {
267            $tmp_dirname = "/tmp";
268        }
269        $tmp_dirname = &FileUtils::filenameConcatenate($tmp_dirname, "greenstone");
270            $ENV{'GS_TMP_OUTPUT_DIR'} = $tmp_dirname; # store for next time
271        }
272    }
273   
274    $tmp_dirname = &FileUtils::filenameConcatenate($tmp_dirname, "tmp");
275    &FileUtils::makeAllDirectories ($tmp_dirname) unless -e $tmp_dirname;
276
277    return $tmp_dirname;
278}
279
280sub get_tmp_filename
281{
282    my $file_ext = shift(@_) || undef;
283
284    my $opt_dot_file_ext = "";
285    if (defined $file_ext) {
286    if ($file_ext !~ m/\./) {
287        # no dot, so needs one added in at start
288        $opt_dot_file_ext = ".$file_ext"
289    }
290    else {
291        # allow for "extensions" such as _metadata.txt to be handled
292        # gracefully
293        $opt_dot_file_ext = $file_ext;
294    }
295    }
296
297    my $tmpdir = &util::determine_tmp_dir(0);
298
299    my $count = 1000;
300    my $rand = int(rand $count);
301    my $full_tmp_filename = &FileUtils::filenameConcatenate($tmpdir, "F$rand$opt_dot_file_ext");
302
303    while (-e $full_tmp_filename) {
304    $rand = int(rand $count);
305    $full_tmp_filename = &FileUtils::filenameConcatenate($tmpdir, "F$rand$opt_dot_file_ext");
306    $count++;
307    }
308   
309    return $full_tmp_filename;
310}
311
312# These 2 are "static" variables used by the get_timestamped_tmp_folder() subroutine below and
313# belong with that function. They help ensure the timestamped tmp folders generated are unique.
314my $previous_timestamp = undef;
315my $previous_timestamp_f = 0; # frequency
316
317sub get_timestamped_tmp_folder
318{
319    my $tmp_dirname = &util::determine_tmp_dir(1);
320   
321    # add the timestamp into the path otherwise we can run into problems
322    # if documents have the same name
323    my $timestamp = time;   
324   
325    if (!defined $previous_timestamp || ($timestamp > $previous_timestamp)) {
326        $previous_timestamp_f = 0;
327        $previous_timestamp = $timestamp;
328    } else {
329        $previous_timestamp_f++;
330    }
331   
332    my $time_tmp_dirname = &FileUtils::filenameConcatenate($tmp_dirname, $timestamp);
333    $tmp_dirname = $time_tmp_dirname;   
334    my $i = $previous_timestamp_f;
335   
336    if($previous_timestamp_f > 0) {
337        $tmp_dirname = $time_tmp_dirname."_".$i;
338        $i++;
339    }
340    while (-e $tmp_dirname) {
341    $tmp_dirname = $time_tmp_dirname."_".$i;
342    $i++;
343    }
344    &FileUtils::makeDirectory($tmp_dirname);
345
346    return $tmp_dirname;
347}
348
349sub get_timestamped_tmp_filename_in_collection
350{
351
352    my ($input_filename, $output_ext) = @_;
353    # derive tmp filename from input filename
354    my ($tailname, $dirname, $suffix)
355    = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
356
357    # softlink to collection tmp dir
358    my $tmp_dirname = &util::get_timestamped_tmp_folder();
359    $tmp_dirname = $dirname unless defined $tmp_dirname;
360
361    # following two steps copied from ConvertBinaryFile
362    # do we need them?? can't use them as is, as they use plugin methods.
363
364    #$tailname = $self->SUPER::filepath_to_utf8($tailname) unless &unicode::check_is_utf8($tailname);
365
366    # URLEncode this since htmls with images where the html filename is utf8 don't seem
367    # to work on Windows (IE or Firefox), as browsers are looking for filesystem-encoded
368    # files on the filesystem.
369    #$tailname = &util::rename_file($tailname, $self->{'file_rename_method'}, "without_suffix");
370    if (defined $output_ext) {
371    $output_ext = ".$output_ext"; # add the dot
372    } else {
373    $output_ext = $suffix;
374    }
375    $output_ext= lc($output_ext);
376    my $tmp_filename = &FileUtils::filenameConcatenate($tmp_dirname, "$tailname$output_ext");
377   
378    return $tmp_filename;
379}
380
381sub get_toplevel_tmp_dir
382{
383    return &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "tmp");
384}
385
386
387sub get_collectlevel_tmp_dir
388{
389    my $tmp_dirname = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'}, "tmp");
390    &FileUtils::makeDirectory($tmp_dirname) if (!-e $tmp_dirname);
391
392    return $tmp_dirname;
393}
394
395sub get_parent_folder
396{
397    my ($path) = @_;
398    my ($tailname, $dirname, $suffix)
399    = &File::Basename::fileparse($path, "\\.[^\\.]+\$");
400
401    return &FileUtils::sanitizePath($dirname);
402}
403
404sub filename_to_regex {
405    my $filename = shift (@_);
406
407    # need to make single backslashes double so that regex works
408    $filename =~ s/\\/\\\\/g; # if ($ENV{'GSDLOS'} =~ /^windows$/i);   
409   
410    # note that the first part of a substitution is a regex, so RE chars need to be escaped,
411    # the second part of a substitution is not a regex, so for e.g. full-stop can be specified literally
412    $filename =~ s/\./\\./g; # in case there are extensions/other full stops, escape them
413    $filename =~ s@\(@\\(@g; # escape brackets
414    $filename =~ s@\)@\\)@g; # escape brackets
415    $filename =~ s@\[@\\[@g; # escape brackets
416    $filename =~ s@\]@\\]@g; # escape brackets
417   
418    return $filename;
419}
420
421sub unregex_filename {
422    my $filename = shift (@_);
423
424    # need to put doubled backslashes for regex back to single
425    $filename =~ s/\\\./\./g; # remove RE syntax for .
426    $filename =~ s@\\\(@(@g; # remove RE syntax for ( => "\(" turns into "("
427    $filename =~ s@\\\)@)@g; # remove RE syntax for ) => "\)" turns into ")"
428    $filename =~ s@\\\[@[@g; # remove RE syntax for [ => "\[" turns into "["
429    $filename =~ s@\\\]@]@g; # remove RE syntax for ] => "\]" turns into "]"
430   
431    # \\ goes to \
432    # This is the last step in reverse mirroring the order of steps in filename_to_regex()
433    $filename =~ s/\\\\/\\/g; # remove RE syntax for \   
434    return $filename;
435}
436
437sub filename_cat {
438  # I've disabled this warning for now, as every Greenstone perl
439  # script seems to make use of this function and so you drown in a
440  # sea of deprecated warnings [jmt12]
441#  warnings::warnif("deprecated", "util::filename_cat() is deprecated, using FileUtils::filenameConcatenate() instead");
442  return &FileUtils::filenameConcatenate(@_);
443}
444
445
446sub _pathname_cat {
447    my $join_char  = shift(@_);
448    my $first_path = shift(@_);
449    my (@pathnames) = @_;
450
451    # If first_path is not null or empty, then add it back into the list
452    if (defined $first_path && $first_path =~ /\S/) {
453    unshift(@pathnames, $first_path);
454    }
455
456    my $pathname = join($join_char, @pathnames);
457
458    # remove duplicate slashes
459    if ($join_char eq ";") {
460    $pathname =~ s/[\\\/]+/\\/g;
461    if ($^O eq "cygwin") {
462        # Once we've collapsed muliple (potentialy > 2) slashes
463        # For cygwin, actually want things double-backslahed
464        $pathname =~ s/\\/\\\\/g;
465    }
466
467    } else {
468    $pathname =~ s/[\/]+/\//g;
469    # DB: want a pathname abc\de.html to remain like this
470    }
471
472    return $pathname;
473}
474
475
476sub pathname_cat {
477    my (@pathnames) = @_;
478
479    my $join_char;
480    if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
481    $join_char = ";";
482    } else {
483    $join_char = ":";
484    }
485    return _pathname_cat($join_char,@pathnames);
486}
487
488
489sub javapathname_cat {
490    my (@pathnames) = @_;
491
492    my $join_char;
493
494    # Unlike pathname_cat() above, not interested if running in a Cygwin environment
495    # This is because the java we run is actually a native Windows executable
496
497    if (($ENV{'GSDLOS'} =~ /^windows$/i)) {
498    $join_char = ";";
499    } else {
500    $join_char = ":";
501    }
502    return _pathname_cat($join_char,@pathnames);
503}
504
505
506sub makeFilenameJavaCygwinCompatible
507{
508    my ($java_filename) = @_;
509
510    if ($^O eq "cygwin") {
511    # To be used with a Java program, but under Cygwin
512    # Because the java binary that is native to Windows, need to
513    # convert the Cygwin paths (i.e. Unix style) to be Windows
514    # compatible
515   
516    $java_filename = `cygpath -wp "$java_filename"`;
517    chomp($java_filename);
518    $java_filename =~ s%\\%\\\\%g;
519    }
520
521    return $java_filename;
522}
523
524sub tidy_up_oid {
525    my ($OID) = @_;
526    if ($OID =~ /[\.\/\\]/) {
527    print STDERR "Warning, identifier $OID contains periods or slashes(.\\\/), replacing them with _\n";
528    $OID =~ s/[\.\\\/]/_/g; #remove any periods
529    }
530    if ($OID =~ /^\s.*\s$/) {
531    print STDERR "Warning, identifier $OID starts or ends with whitespace. Removing it\n";
532    # remove starting and trailing whitespace
533    $OID =~ s/^\s+//;
534    $OID =~ s/\s+$//;
535    }
536    if ($OID =~ /^[\d]*$/) {
537    print STDERR "Warning, identifier $OID contains only digits. Prepending 'D'.\n";
538    $OID = "D" . $OID;
539    }       
540   
541    return $OID;
542}
543
544sub envvar_prepend {
545    my ($var,$val) = @_;
546
547    # 64 bit linux can't handle ";" as path separator, so make sure to set this to the right one for the OS
548##    my $pathsep = (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} !~ m/windows/) ? ":" : ";";
549
550    # Rewritten above to make ":" the default (Windows is the special
551    # case, anything else 'unusual' such as Solaris etc is Unix)
552    my $pathsep = (defined $ENV{'GSDLOS'} && (($ENV{'GSDLOS'} =~ m/windows/) && ($^O ne "cygwin"))) ? ";" : ":";
553
554    # do not prepend any value/path that's already in the environment variable
555   
556    my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
557    if (!defined($ENV{$var})) {
558    $ENV{$var} = "$val";
559    }
560    elsif($ENV{$var} !~ m/$escaped_val/) {
561    $ENV{$var} = "$val".$pathsep.$ENV{$var};
562    }
563}
564
565sub envvar_append {
566    my ($var,$val) = @_;
567
568    # 64 bit linux can't handle ";" as path separator, so make sure to set this to the right one for the OS
569    my $pathsep = (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} !~ m/windows/) ? ":" : ";";
570   
571    # do not append any value/path that's already in the environment variable
572
573    my $escaped_val = &filename_to_regex($val); # escape any backslashes and brackets for upcoming regex
574    if (!defined($ENV{$var})) {
575    $ENV{$var} = "$val";
576    }
577    elsif($ENV{$var} !~ m/$escaped_val/) {
578    $ENV{$var} = $ENV{$var}.$pathsep."$val";
579    }
580}
581
582# debug aid
583sub print_env {
584    my @envvars = @_;
585
586    if (scalar(@envvars) == 0) {
587    print STDERR "@@@ No env vars requested\n";
588   
589   
590    my $output;
591    if (defined $ENV{'GSDLOS'} && $ENV{'GSDLOS'} =~ m/windows/) {
592        $output = `set`;
593    }
594    else {
595        $output = `env`;
596    }
597   
598    print STDERR "@@@ Environment was:\n********\n$output\n*******\n";
599    } else {
600    print STDERR "@@@ Environment was:\n********\n";
601    foreach my $envvar (@envvars) {
602        if(defined $ENV{$envvar}) {
603        print STDERR "\t$envvar = ".$ENV{$envvar}."\n";
604        } else {
605        $envvar = uc($envvar);
606        if(defined $ENV{$envvar}) {
607            print STDERR "\t$envvar = ".$ENV{$envvar}."\n";
608        } else {
609            print STDERR "Env var $envvar was not set\n";
610        }
611        }
612    }
613    print STDERR "@@@*******\n";
614    }
615}
616
617
618# splits a filename into a prefix and a tail extension using the tail_re, or
619# if that fails, splits on the file_extension . (dot)
620sub get_prefix_and_tail_by_regex {
621
622    my ($filename,$tail_re) = @_;
623   
624    my ($file_prefix,$file_ext) = ($filename =~ m/^(.*?)($tail_re)$/);
625    if ((!defined $file_prefix) || (!defined $file_ext)) {
626    ($file_prefix,$file_ext) = ($filename =~ m/^(.*)(\..*?)$/);
627    }
628
629    return ($file_prefix,$file_ext);
630}
631
632# get full path and file only path from a base_dir (which may be empty) and
633# file (which may contain directories)
634sub get_full_filenames {
635    my ($base_dir, $file) = @_;
636   
637#    my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(0);
638#    my ($lcfilename) = ($cfilename =~ m/([^\\\/]*)$/);
639#    print STDERR "** Calling method: $lcfilename:$cline $cpackage->$csubr\n";
640
641
642    my $filename_full_path = $file;
643    # add on directory if present
644    $filename_full_path = &FileUtils::filenameConcatenate($base_dir, $file) if $base_dir =~ /\S/;
645   
646    my $filename_no_path = $file;
647
648    # remove directory if present
649    $filename_no_path =~ s/^.*[\/\\]//;
650    return ($filename_full_path, $filename_no_path);
651}
652
653# returns the path of a file without the filename -- ie. the directory the file is in
654sub filename_head {
655    my $filename = shift(@_);
656
657    if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
658    $filename =~ s/[^\\\\]*$//;
659    }
660    else {
661    $filename =~ s/[^\\\/]*$//;
662    }
663
664    return $filename;
665}
666
667
668
669# returns 1 if filename1 and filename2 point to the same
670# file or directory
671sub filenames_equal {
672    my ($filename1, $filename2) = @_;
673
674    # use filename_cat to clean up trailing slashes and
675    # multiple slashes
676    $filename1 = &FileUtils::filenameConcatenate($filename1);
677    $filename2 = &FileUtils::filenameConcatenate($filename2);
678
679    # filenames not case sensitive on windows
680    if ($ENV{'GSDLOS'} =~ /^windows$/i) {
681    $filename1 =~ tr/[A-Z]/[a-z]/;
682    $filename2 =~ tr/[A-Z]/[a-z]/;
683    }
684    return 1 if $filename1 eq $filename2;
685    return 0;
686}
687
688# If filename is relative to within_dir, returns the relative path of filename to that directory
689# with slashes in the filename returned as they were in the original (absolute) filename.
690sub filename_within_directory
691{
692    my ($filename,$within_dir) = @_;
693   
694    if ($within_dir !~ m/[\/\\]$/) {
695    my $dirsep = &util::get_dirsep();
696    $within_dir .= $dirsep;
697    }
698   
699    $within_dir = &filename_to_regex($within_dir); # escape DOS style file separator and brackets   
700    if ($filename =~ m/^$within_dir(.*)$/) {
701    $filename = $1;
702    }
703   
704    return $filename;
705}
706
707# If filename is relative to within_dir, returns the relative path of filename to that directory in URL format.
708# Filename and within_dir can be any type of slashes, but will be compared as URLs (i.e. unix-style slashes).
709# The subpath returned will also be a URL type filename.
710sub filename_within_directory_url_format
711{
712    my ($filename,$within_dir) = @_;
713   
714    # convert parameters only to / slashes if Windows
715   
716    my $filename_urlformat = &filepath_to_url_format($filename);
717    my $within_dir_urlformat = &filepath_to_url_format($within_dir);
718
719    #if ($within_dir_urlformat !~ m/\/$/) {
720        # make sure directory ends with a slash
721        #$within_dir_urlformat .= "/";
722    #}
723   
724    my $within_dir_urlformat_re = &filename_to_regex($within_dir_urlformat); # escape any special RE characters, such as brackets
725   
726    #print STDERR "@@@@@ $filename_urlformat =~ $within_dir_urlformat_re\n";
727   
728    # dir prefix may or may not end with a slash (this is discarded when extracting the sub-filepath)
729    if ($filename_urlformat =~ m/^$within_dir_urlformat_re(?:\/)*(.*)$/) {
730        $filename_urlformat = $1;
731    }
732   
733    return $filename_urlformat;
734}
735
736# Convert parameter to use / slashes if Windows (if on Linux leave any \ as is,
737# since on Linux it doesn't represent a file separator but an escape char).
738sub filepath_to_url_format
739{
740    my ($filepath) = @_;
741    if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
742        # Only need to worry about Windows, as Unix style directories already in url-format
743        # Convert Windows style \ => /
744        $filepath =~ s@\\@/@g;     
745    }
746    return $filepath;
747}
748
749# regex filepaths on windows may include \\ as path separator. Convert \\ to /
750sub filepath_regex_to_url_format
751{
752    my ($filepath) = @_;
753    if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
754    # Only need to worry about Windows, as Unix style directories already in url-format
755    # Convert Windows style \\ => /
756    $filepath =~ s@\\\\@/@g;       
757    }
758    return $filepath;
759   
760}
761
762# Like File::Basename::fileparse, but expects filepath in url format (ie only / slash for dirsep)
763# and ignores trailing /
764# returns (file, dirs) dirs will be empty if no subdirs
765sub url_fileparse
766{
767    my ($filepath) = @_;
768    # remove trailing /
769    $filepath =~ s@/$@@;
770    if ($filepath !~ m@/@) {
771    return ($filepath, "");
772    }
773    my ($dirs, $file) = $filepath =~ m@(.+/)([^/]+)@;
774    return ($file, $dirs);
775   
776}
777
778
779sub filename_within_collection
780{
781    my ($filename) = @_;
782
783    my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
784   
785    if (defined $collect_dir) {
786
787    # if from within GSDLCOLLECTDIR, then remove directory prefix
788    # so source_filename is realative to it.  This is done to aid
789    # portability, i.e. the collection can be moved to somewhere
790    # else on the file system and the archives directory will still
791    # work.  This is needed, for example in the applet version of
792    # GLI where GSDLHOME/collect on the server will be different to
793    # the collect directory of the remove user.  Of course,
794    # GSDLCOLLECTDIR subsequently needs to be put back on to turn
795    # it back into a full pathname.
796
797    $filename = filename_within_directory($filename,$collect_dir);
798    }
799   
800    return $filename;
801}
802
803sub prettyprint_file
804{
805    my ($base_dir,$file,$gli) = @_;
806
807    my $filename_full_path = &FileUtils::filenameConcatenate($base_dir,$file);
808
809    if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
810    require Win32;
811
812    # For some reason base_dir in the form c:/a/b/c
813    # This leads to confusion later on, so turn it back into
814    # the more usual Windows form
815    $base_dir =~ s/\//\\/g;
816    my $long_base_dir = Win32::GetLongPathName($base_dir);
817    my $long_full_path = Win32::GetLongPathName($filename_full_path);
818
819    $file = filename_within_directory($long_full_path,$long_base_dir);
820    $file = encode("utf8",$file) if ($gli);
821    }
822
823    return $file;
824}
825
826
827sub upgrade_if_dos_filename
828{
829    my ($filename_full_path,$and_encode) = @_;
830
831    if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
832    # Ensure any DOS-like filename, such as test~1.txt, has been upgraded
833    # to its long (Windows) version
834    my $long_filename = Win32::GetLongPathName($filename_full_path);
835    if (defined $long_filename) {
836       
837        $filename_full_path = $long_filename;
838    }
839    # Make sure initial drive letter is lower-case (to fit in with rest of Greenstone)
840    $filename_full_path =~ s/^(.):/\u$1:/;
841   
842    if ((defined $and_encode) && ($and_encode)) {
843        $filename_full_path = encode("utf8",$filename_full_path);
844    }
845    }
846
847    return $filename_full_path;
848}
849
850
851sub downgrade_if_dos_filename
852{
853    my ($filename_full_path) = @_;
854
855    if (($ENV{'GSDLOS'} =~ m/^windows$/i) && ($^O ne "cygwin")) {
856    require Win32;
857
858    # Ensure the given long Windows filename is in a form that can
859    # be opened by Perl => convert it to a short DOS-like filename
860
861    my $short_filename = Win32::GetShortPathName($filename_full_path);
862    if (defined $short_filename) {
863        $filename_full_path = $short_filename;
864    }
865    # Make sure initial drive letter is lower-case (to fit in
866    # with rest of Greenstone)
867    $filename_full_path =~ s/^(.):/\u$1:/;
868    }
869
870    return $filename_full_path;
871}
872
873
874sub filename_is_absolute
875{
876  warnings::warnif("deprecated", "util::filename_is_absolute() is deprecated, using FileUtils::isFilenameAbsolute() instead");
877  return &FileUtils::isFilenameAbsolute(@_);
878}
879
880
881## @method make_absolute()
882#
883#  Ensure the given file path is absolute in respect to the given base path.
884#
885#  @param  $base_dir A string denoting the base path the given dir must be
886#                    absolute to.
887#  @param  $dir The directory to be made absolute as a string. Note that the
888#               dir may already be absolute, in which case it will remain
889#               unchanged.
890#  @return The now absolute form of the directory as a string.
891#
892#  @author John Thompson, DL Consulting Ltd.
893#  @copy 2006 DL Consulting Ltd.
894#
895#used in buildcol.pl, doesn't work for all cases --kjdon
896sub make_absolute {
897   
898    my ($base_dir, $dir) = @_;
899###    print STDERR "dir = $dir\n";
900    $dir =~ s/[\\\/]+/\//g;
901    $dir = $base_dir . "/$dir" unless ($dir =~ m|^(\w:)?/|);
902    $dir =~ s|^/tmp_mnt||;
903    1 while($dir =~ s|/[^/]*/\.\./|/|g);
904    $dir =~ s|/[.][.]?/|/|g;
905    $dir =~ tr|/|/|s;
906###    print STDERR "dir = $dir\n";
907   
908    return $dir;
909}
910## make_absolute() ##
911
912sub get_dirsep {
913
914    if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
915    return "\\";
916    } else {
917    return "\/";
918    }
919}
920
921sub get_os_dirsep {
922
923    if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin")) {
924    return "\\\\";
925    } else {
926    return "\\\/";
927    }
928}
929
930sub get_re_dirsep {
931
932    return "\\\\|\\\/";
933}
934
935
936sub get_dirsep_tail {
937    my ($filename) = @_;
938   
939    # returns last part of directory or filename
940    # On unix e.g. a/b.d => b.d
941    #              a/b/c => c
942
943    my $dirsep = get_re_dirsep();
944    my @dirs = split (/$dirsep/, $filename);
945    my $tail = pop @dirs;
946
947    # - caused problems under windows
948    #my ($tail) = ($filename =~ m/^(?:.*?$dirsep)?(.*?)$/);
949
950    return $tail;
951}
952
953
954# if this is running on windows we want binaries to end in
955# .exe, otherwise they don't have to end in any extension
956sub get_os_exe {
957    return ".exe" if (($ENV{'GSDLOS'} =~ /^windows$/i) && ($^O ne "cygwin"));
958    return "";
959}
960
961
962# test to see whether this is a big or little endian machine
963sub is_little_endian
964{
965    # To determine the name of the operating system, the variable $^O is a cheap alternative to pulling it out of the Config module;
966    # If it is a Macintosh machine (i.e. the Darwin operating system), regardless if it's running on the IBM power-pc cpu or the x86 Intel-based chip with a power-pc emulator running on top of it, it's big-endian
967    # Otherwise, it's little endian
968
969    #return 0 if $^O =~ /^darwin$/i;
970    #return 0 if $ENV{'GSDLOS'} =~ /^darwin$/i;
971   
972    # Going back to stating exactly whether the machine is little endian
973    # or big endian, without any special case for Macs. Since for rata it comes
974    # back with little endian and for shuttle with bigendian.
975    return (ord(substr(pack("s",1), 0, 1)) == 1);
976}
977
978
979# will return the collection name if successful, "" otherwise
980sub use_collection {
981    my ($collection, $collectdir) = @_;
982
983    if (!defined $collectdir || $collectdir eq "") {
984    $collectdir = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "collect");
985    }
986
987    if (!defined $ENV{'GREENSTONEHOME'}) { # for GS3, would have been defined in use_site_collection, to GSDL3HOME
988     $ENV{'GREENSTONEHOME'} = $ENV{'GSDLHOME'};
989    }
990
991    # get and check the collection
992    if (!defined($collection) || $collection eq "") {
993    if (defined $ENV{'GSDLCOLLECTION'}) {
994        $collection = $ENV{'GSDLCOLLECTION'};
995    } else {
996        print STDOUT "No collection specified\n";
997        return "";
998    }
999    }
1000   
1001    if ($collection eq "modelcol") {
1002    print STDOUT "You can't use modelcol.\n";
1003    return "";
1004    }
1005
1006    # make sure the environment variables GSDLCOLLECTION and GSDLCOLLECTDIR
1007    # are defined
1008    $ENV{'GSDLCOLLECTION'} = $collection;
1009    $ENV{'GSDLCOLLECTHOME'} = $collectdir;
1010    $ENV{'GSDLCOLLECTDIR'} = &FileUtils::filenameConcatenate($collectdir, $collection);
1011
1012    # make sure this collection exists
1013    if (!-e $ENV{'GSDLCOLLECTDIR'}) {
1014    print STDOUT "Invalid collection ($collection).\n";
1015    return "";
1016    }
1017
1018    # everything is ready to go
1019    return $collection;
1020}
1021
1022sub get_current_collection_name {
1023    return $ENV{'GSDLCOLLECTION'};
1024}
1025
1026
1027# will return the collection name if successful, "" otherwise. 
1028# Like use_collection (above) but for greenstone 3 (taking account of site level)
1029
1030sub use_site_collection {
1031    my ($site, $collection, $collectdir) = @_;
1032
1033    if (!defined $collectdir || $collectdir eq "") {
1034    die "GSDL3HOME not set.\n" unless defined $ENV{'GSDL3HOME'};
1035    $collectdir = &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'}, "sites", $site, "collect");
1036    }
1037
1038    if (defined $ENV{'GSDL3HOME'}) {
1039    $ENV{'GREENSTONEHOME'} = $ENV{'GSDL3HOME'};     
1040    $ENV{'SITEHOME'} = &FileUtils::filenameConcatenate($ENV{'GREENSTONEHOME'}, "sites", $site);
1041    } elsif (defined $ENV{'GSDL3SRCHOME'}) {
1042    $ENV{'GREENSTONEHOME'} = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'}, "web"); 
1043    $ENV{'SITEHOME'} = &FileUtils::filenameConcatenate($ENV{'GREENSTONEHOME'}, "sites", $site);
1044    } else {
1045    print STDERR "*** util::use_site_collection(). Warning: Neither GSDL3HOME nor GSDL3SRCHOME set.\n";
1046    }
1047
1048    # collectdir explicitly set by this point (using $site variable if required).
1049    # Can call "old" gsdl2 use_collection now.
1050
1051    return use_collection($collection,$collectdir);
1052}
1053
1054
1055
1056sub locate_config_file
1057{
1058    my ($file) = @_;
1059
1060    my $locations = locate_config_files($file);
1061
1062    return shift @$locations; # returns undef if 'locations' is empty
1063}
1064
1065
1066sub locate_config_files
1067{
1068    my ($file) = @_;
1069
1070    my @locations = ();
1071
1072    if (-e $file) {
1073    # Clearly specified (most likely full filename)
1074    # No need to hunt in 'etc' directories, return value unchanged
1075    push(@locations,$file);
1076    }
1077    else {
1078    # Check for collection specific one before looking in global GSDL 'etc'
1079    if (defined $ENV{'GSDLCOLLECTDIR'} && $ENV{'GSDLCOLLECTDIR'} ne "") {
1080        my $test_collect_etc_filename
1081        = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},"etc", $file);
1082       
1083        if (-e $test_collect_etc_filename) {
1084        push(@locations,$test_collect_etc_filename);
1085        }
1086    }
1087    my $test_main_etc_filename
1088        = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"etc", $file);
1089    if (-e $test_main_etc_filename) {
1090        push(@locations,$test_main_etc_filename);
1091    }
1092    }
1093
1094    return \@locations;
1095}
1096
1097
1098sub hyperlink_text
1099{
1100    my ($text) = @_;
1101   
1102    $text =~ s/(http:\/\/[^\s]+)/<a href=\"$1\">$1<\/a>/mg;
1103    $text =~ s/(^|\s+)(www\.(\w|\.)+)/<a href=\"http:\/\/$2\">$2<\/a>/mg;
1104
1105    return $text;
1106}
1107
1108
1109# A method to check if a directory is empty (note that an empty directory still has non-zero size!!!)
1110# Code is from http://episteme.arstechnica.com/eve/forums/a/tpc/f/6330927813/m/436007700831
1111sub is_dir_empty {
1112  warnings::warnif("deprecated", "util::is_dir_empty() is deprecated, using FileUtils::isDirectoryEmpty() instead");
1113  return &FileUtils::isDirectoryEmpty(@_);
1114}
1115
1116# Returns the given filename converted using either URL encoding or base64
1117# encoding, as specified by $rename_method. If the given filename has no suffix
1118# (if it is just the tailname), then $no_suffix should be some defined value.
1119# rename_method can be url, none, base64
1120sub rename_file {
1121    my ($filename, $rename_method, $no_suffix)  = @_;
1122
1123    if(!$filename) { # undefined or empty string
1124    return $filename;
1125    }
1126
1127    if (!$rename_method) {
1128    print STDERR "WARNING: no file renaming method specified. Defaulting to using URL encoding...\n";
1129    # Debugging information
1130    # my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(1);
1131    # print STDERR "Called from method: $cfilename:$cline $cpackage->$csubr\n";
1132    $rename_method = "url";
1133    } elsif($rename_method eq "none") {
1134    return $filename; # would have already been renamed
1135    }
1136
1137    # No longer replace spaces with underscores, since underscores mess with incremental rebuild
1138    ### Replace spaces with underscore. Do this first else it can go wrong below when getting tailname
1139    ###$filename =~ s/ /_/g;
1140
1141    my ($tailname,$dirname,$suffix);
1142    if($no_suffix) { # given a tailname, no suffix
1143    ($tailname,$dirname) = File::Basename::fileparse($filename);
1144    }
1145    else {
1146    ($tailname,$dirname,$suffix) = File::Basename::fileparse($filename, "\\.(?:[^\\.]+?)\$");
1147    }
1148    if (!$suffix) {
1149    $suffix = "";
1150    }
1151    # This breaks GLI matching extracted metadata to files in Enrich panel, as
1152    # original is eg .JPG while gsdlsourcefilename ends up .jpg
1153    # Not sure why it was done in first place...
1154    #else {
1155    #$suffix = lc($suffix);
1156    #}
1157
1158    if ($rename_method eq "url") {
1159    $tailname = &unicode::url_encode($tailname);
1160    }
1161    elsif ($rename_method eq "base64") {
1162    $tailname = &unicode::base64_encode($tailname);
1163    $tailname =~ s/\s*//sg;      # for some reason it adds spaces not just at end but also in middle
1164    }
1165
1166    $filename = "$tailname$suffix";
1167    $filename = "$dirname$filename" if ($dirname ne "./" && $dirname ne ".\\");
1168
1169    return $filename;
1170}
1171
1172
1173# BACKWARDS COMPATIBILITY: Just in case there are old .ldb/.bdb files
1174sub rename_ldb_or_bdb_file {
1175    my ($filename_no_ext) = @_;
1176
1177    my $new_filename = "$filename_no_ext.gdb";
1178    return if (-f $new_filename); # if the file has the right extension, don't need to do anything
1179    # try ldb
1180    my $old_filename = "$filename_no_ext.ldb";
1181   
1182    if (-f $old_filename) {
1183    print STDERR "Renaming $old_filename to $new_filename\n";
1184    rename ($old_filename, $new_filename)
1185        || print STDERR "Rename failed: $!\n";
1186    return;
1187    }
1188    # try bdb
1189    $old_filename = "$filename_no_ext.bdb";
1190    if (-f $old_filename) {
1191    print STDERR "Renaming $old_filename to $new_filename\n";   
1192    rename ($old_filename, $new_filename)
1193        || print STDERR "Rename failed: $!\n";
1194    return;
1195    }
1196}
1197
1198sub os_dir() {
1199   
1200    my $gsdlarch = "";
1201    if(defined $ENV{'GSDLARCH'}) {
1202    $gsdlarch = $ENV{'GSDLARCH'};
1203    }
1204    return $ENV{'GSDLOS'}.$gsdlarch;
1205}
1206
1207# returns 1 if this (GS server) is a GS3 installation, returns 0 if it's GS2.
1208sub is_gs3() {
1209    if($ENV{'GSDL3SRCHOME'}) {
1210    return 1;
1211    } else {
1212    return 0;
1213    }
1214}
1215
1216# Returns the greenstone URL prefix extracted from the appropriate GS2/GS3 config file.
1217# By default, /greenstone3 for GS3 or /greenstone for GS2.
1218sub get_greenstone_url_prefix() {
1219    # if already set on a previous occasion, just return that
1220    # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1221    return $ENV{'GREENSTONE_URL_PREFIX'} if($ENV{'GREENSTONE_URL_PREFIX'});
1222
1223    my ($configfile, $urlprefix, $defaultUrlprefix);
1224    my @propertynames = ();
1225
1226    if($ENV{'GSDL3SRCHOME'}) {
1227    $defaultUrlprefix = "/greenstone3";
1228    $configfile = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'}, "packages", "tomcat", "conf", "Catalina", "localhost", "greenstone3.xml");
1229    push(@propertynames, qw/path\s*\=/);
1230    } else {
1231    $defaultUrlprefix = "/greenstone";
1232    $configfile = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "cgi-bin", &os_dir(), "gsdlsite.cfg");
1233    push(@propertynames, (qw/\nhttpprefix/, qw/\ngwcgi/)); # inspect one property then the other
1234    }
1235
1236    $urlprefix = &extract_propvalue_from_file($configfile, \@propertynames);
1237
1238    if(!$urlprefix) { # no values found for URL prefix, use default values
1239    $urlprefix = $defaultUrlprefix;
1240    } else {
1241    #gwcgi can contain more than the wanted prefix, we split on / to get the first "directory" level
1242    $urlprefix =~ s/^\///; # remove the starting slash
1243    my @dirs = split(/(\\|\/)/, $urlprefix);
1244    $urlprefix = shift(@dirs);
1245
1246    if($urlprefix !~ m/^\//) { # in all cases: ensure the required forward slash is at the front
1247        $urlprefix = "/$urlprefix";
1248    }
1249    }
1250
1251    # set for the future
1252    $ENV{'GREENSTONE_URL_PREFIX'} = $urlprefix;
1253#    print STDERR "*** in get_greenstone_url_prefix(): $urlprefix\n\n";
1254    return $urlprefix;
1255}
1256
1257
1258
1259#
1260# The following comes from activate.pl
1261#
1262# Designed to work with a server included with GS.
1263#  - For GS3, we ask ant for the library URL.
1264#  - For GS2, we derive the URL from the llssite.cfg file.
1265
1266sub get_full_greenstone_url_prefix
1267{   
1268    my ($gs_mode, $lib_name) = @_;
1269   
1270    # if already set on a previous occasion, just return that
1271    # (Don't want to keep repeating this: cost of re-opening and scanning files.)
1272    return $ENV{'FULL_GREENSTONE_URL_PREFIX'} if($ENV{'FULL_GREENSTONE_URL_PREFIX'});
1273
1274    # set gs_mode if it was not passed in (servercontrol.pm would pass it in, any other callers won't)
1275    $gs_mode = ($ENV{'GSDL3SRCHOME'}) ? "gs3" : "gs2" unless defined $gs_mode;
1276   
1277    my $url = undef;   
1278   
1279    if($gs_mode eq "gs2") {     
1280    my $llssite_cfg = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "llssite.cfg");
1281   
1282    if(-f $llssite_cfg) {
1283        # check llssite.cfg for line with url property
1284        # for server.exe also need to use portnumber and enterlib properties           
1285       
1286        # Read in the entire contents of the file in one hit
1287        if (!open (FIN, $llssite_cfg)) {
1288        print STDERR "util::get_full_greenstone_url_prefix() failed to open $llssite_cfg ($!)\n";
1289        return undef;
1290        }
1291       
1292        my $contents;
1293        sysread(FIN, $contents, -s FIN);           
1294        close(FIN);
1295       
1296        my @lines = split(/[\n\r]+/, $contents); # split on carriage-returns and/or linefeeds
1297        my $enterlib = "";
1298        my $portnumber = "8282"; # will remain empty (implicit port 80) unless it's specifically been assigned
1299       
1300        foreach my $line (@lines) {             
1301        if($line =~ m/^url=(.*)$/) {
1302            $url = $1;                 
1303        } elsif($line =~ m/^enterlib=(.*)$/) {
1304            $enterlib = $1;                 
1305        } elsif($line =~ m/^portnumber=(.*)$/) {
1306            $portnumber = $1;                   
1307        }   
1308        }
1309       
1310        if(!$url) {
1311        return undef;
1312        }
1313        elsif($url eq "URL_pending") { # library is not running
1314        # do not process url=URL_pending in the file, since for server.exe
1315        # this just means the Enter Library button hasn't been pressed yet             
1316        $url = undef;
1317        }
1318        else {
1319        # In the case of server.exe, need to do extra work to get the proper URL
1320        # But first, need to know whether we're indeed dealing with server.exe:
1321       
1322        # compare the URL's domain to the full URL
1323        # E.g. for http://localhost:8383/greenstone3/cgi-bin, the domain is localhost:8383
1324        my $uri = URI->new( $url );
1325        my $host = $uri->host;
1326        #print STDERR "@@@@@ host: $host\n";
1327        if($url =~ m/https?:\/\/$host(\/)?$/) {
1328            #if($url !~ m/https?:\/\/$host:$portnumber(\/)?/ || $url =~ m/https?:\/\/$host(\/)?$/) {
1329            # (if the URL does not contain the portnumber, OR if the port is implicitly 80 and)                 
1330            # If the domain with http:// prefix is completely the same as the URL, assume server.exe
1331            # then the actual URL is the result of suffixing the port and enterlib properties in llssite.cfg
1332            $url = $url.":".$portnumber.$enterlib;         
1333        } # else, apache web server         
1334       
1335        }           
1336    }
1337    } elsif($gs_mode eq "gs3") {
1338    # Either check build.properties for tomcat.server, tomcat.port and app.name (and default servlet name).
1339    # app.name is stored in app.path by build.xml. Need to move app.name in build.properties from build.xml
1340   
1341    # Or, run the new target get-default-servlet-url
1342    # the output can look like:
1343    #
1344    # Buildfile: build.xml
1345    #   [echo] os.name: Windows Vista
1346    #
1347    # get-default-servlet-url:
1348    #   [echo] http://localhost:8383/greenstone3/library
1349    # BUILD SUCCESSFUL
1350    # Total time: 0 seconds
1351   
1352    #my $output = qx/ant get-default-servlet-url/; # backtick operator, to get STDOUT (else 2>&1)
1353    # - see http://stackoverflow.com/questions/799968/whats-the-difference-between-perls-backticks-system-and-exec
1354   
1355    # The get-default-servlet-url ant target can be run from anywhere by specifying the
1356    # location of GS3's ant build.xml buildfile. Activate.pl can be run from anywhere for GS3
1357    # GSDL3SRCHOME will be set for GS3 by gs3-setup.sh, a step that would have been necessary
1358    # to run the activate.pl script in the first place
1359   
1360    my $full_build_xml = &FileUtils::javaFilenameConcatenate($ENV{'GSDL3SRCHOME'},"build.xml");
1361
1362    my $perl_command = "ant -buildfile \"$full_build_xml\" get-default-servlet-url";
1363   
1364    if (open(PIN, "$perl_command |")) {
1365        while (defined (my $perl_output_line = <PIN>)) {
1366
1367        if($perl_output_line =~ m@(https?):\/\/(\S*)@) { # grab all the non-whitespace chars
1368            $url="$1://".$2; # preserve the http protocol #$url="http://".$1;
1369        }
1370        }
1371        close(PIN);
1372       
1373        if (defined $lib_name) { # url won't be undef now
1374            # replace the servlet_name portion of the url found, with the given library_name
1375            $url =~ s@/[^/]*$@/$lib_name@;
1376        }
1377    } else {
1378        print STDERR "util::get_full_greenstone_url_prefix() failed to run $perl_command to work out library URL for $gs_mode\n";
1379    }
1380    }
1381   
1382    # either the url is still undef or it is now set
1383    #print STDERR "\n@@@@@ final URL:|$url|\n" if $url;     
1384    #print STDERR "\n@@@@@ URL still undef\n" if !$url;
1385
1386    $ENV{'FULL_GREENSTONE_URL_PREFIX'} = $url;
1387
1388    return $url;
1389}
1390
1391
1392# Given a config file (xml or java properties file) and a list/array of regular expressions
1393# that represent property names to match on, this function will return the value for the 1st
1394# matching property name. If the return value is undefined, no matching property was found.
1395sub extract_propvalue_from_file() {
1396    my ($configfile, $propertynames) = @_;
1397
1398    my $value;
1399    unless(open(FIN, "<$configfile")) {
1400    print STDERR "extract_propvalue_from_file(): Unable to open $configfile. $!\n";
1401    return $value; # not initialised
1402    }
1403
1404    # Read the entire file at once, as one single line, then close it
1405    my $filecontents;
1406    {
1407    local $/ = undef;       
1408    $filecontents = <FIN>;
1409    }
1410    close(FIN);
1411
1412    foreach my $regex (@$propertynames) {
1413        ($value) = $filecontents=~ m/$regex\s*(\S*)/s; # read value of the property given by regex up to the 1st space
1414    if($value) {
1415            $value =~ s/^\"//;     # remove any startquotes
1416        $value =~ s/\".*$//;   # remove the 1st endquotes (if any) followed by any xml
1417        last;              # found value for a matching property, break from loop
1418    }
1419    }
1420
1421    return $value;
1422}
1423
1424# Subroutine that sources setup.bash, given GSDLHOME and GSDLOS and
1425# given that perllib is in @INC in order to invoke this subroutine.
1426# Call as follows -- after setting up INC to include perllib and
1427# after setting up GSDLHOME and GSDLOS:
1428#
1429# require util;
1430# &util::setup_greenstone_env($ENV{'GSDLHOME'}, $ENV{'GSDLOS'});
1431#
1432sub setup_greenstone_env() {
1433    my ($GSDLHOME, $GSDLOS) = @_;
1434
1435    #my %env_map = ();
1436    # Get the localised ENV settings of running a localised source setup.bash
1437    # and put it into the ENV here. Need to clear GSDLHOME before running setup
1438    #my $perl_command = "(cd $GSDLHOME; export GSDLHOME=; . ./setup.bash > /dev/null; env)";
1439    my $perl_command = "(cd $GSDLHOME; /bin/bash -c \"export GSDLHOME=; source setup.bash > /dev/null; env\")";     
1440    if (($GSDLOS =~ m/windows/i) && ($^O ne "cygwin"))  {
1441        #$perl_command = "cmd /C \"cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set\"";
1442        $perl_command = "(cd $GSDLHOME&& set GSDLHOME=&& setup.bat > nul&& set)";
1443    }
1444    if (!open(PIN, "$perl_command |")) {
1445        print STDERR ("Unable to execute command: $perl_command. $!\n");
1446    }
1447
1448    my $lastkey;
1449    while (defined (my $perl_output_line = <PIN>)) {
1450        my($key,$value) = ($perl_output_line =~ m/^([^=]*)[=](.*)$/);
1451        if(defined $key) {
1452            #$env_map{$key}=$value;     
1453            $ENV{$key}=$value;
1454            $lastkey = $key;
1455        } elsif($lastkey && $perl_output_line !~ m/^\s*$/) {
1456            # there was no equals sign in $perl_output_line, so this
1457            # $perl_output_line may be a spillover from the previous
1458            $ENV{$lastkey} = $ENV{$lastkey}."\n".$perl_output_line;
1459        }
1460    }
1461    close (PIN);
1462
1463    # If any keys in $ENV don't occur in Greenstone's localised env
1464    # (stored in $env_map), delete those entries from $ENV
1465    #foreach $key (keys %ENV) {
1466    #   if(!defined $env_map{$key}) {
1467    #       print STDOUT "**** DELETING ENV KEY: $key\tVALUE: $ENV{$key}\n";
1468    #       delete $ENV{$key}; # del $ENV(key, value) pair
1469    #   }
1470    #}
1471    #undef %env_map;
1472}
1473
1474sub get_perl_exec() {   
1475    my $perl_exec = $^X; # may return just "perl"
1476   
1477    if($ENV{'PERLPATH'}) {
1478        # OR: # $perl_exec = &FileUtils::filenameConcatenate($ENV{'PERLPATH'},"perl");
1479        if (($ENV{'GSDLOS'} =~ m/windows/) && ($^O ne "cygwin")) {
1480            $perl_exec = "$ENV{'PERLPATH'}\\Perl.exe";
1481        } else {
1482            $perl_exec = "$ENV{'PERLPATH'}/perl";
1483        }
1484    } else { # no PERLPATH, use Config{perlpath} else $^X: special variables
1485        # containing the full path to the current perl executable we're using
1486        $perl_exec = $Config{perlpath}; # configured path for perl
1487        if (!-e $perl_exec) { # may not point to location on this machine
1488            $perl_exec = $^X; # may return just "perl"
1489            if($perl_exec =~ m/^perl/i) { # warn if just perl or Perl.exe
1490                print STDERR "**** WARNING: Perl exec found contains no path: $perl_exec";             
1491            }
1492        }
1493    }
1494   
1495    return $perl_exec;
1496}
1497
1498# returns the path to the java command in the JRE included with GS (if any),
1499# quoted to safeguard any spaces in this path, otherwise a simple java
1500# command is returned which assumes and will try for a system java.
1501sub get_java_command {
1502    my $java = "java";
1503    if(defined $ENV{'GSDLHOME'}) { # should be, as this script would be launched from the cmd line
1504                               # after running setup.bat or from GLI which also runs setup.bat
1505    my $java_bin = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"packages","jre","bin");
1506    if(-d $java_bin) {
1507        $java = &FileUtils::filenameConcatenate($java_bin,"java");
1508        $java = "\"".$java."\""; # quoted to preserve spaces in path
1509    }
1510    }
1511    return $java;
1512}
1513
1514
1515# Given the qualified collection name (colgroup/collection),
1516# returns the collection and colgroup parts
1517sub get_collection_parts {
1518    # http://perldoc.perl.org/File/Basename.html
1519    # my($filename, $directories, $suffix) = fileparse($path);
1520    # "$directories contains everything up to and including the last directory separator in the $path
1521    # including the volume (if applicable). The remainder of the $path is the $filename."
1522    #my ($collection, $colgroup) = &File::Basename::fileparse($qualified_collection);   
1523
1524    my $qualified_collection = shift(@_);
1525
1526    # Since activate.pl can be launched from the command-line, including by a user,
1527    # best not to assume colgroup uses URL-style slashes as would be the case with GLI
1528    # Also allow for the accidental inclusion of multiple slashes
1529    my ($colgroup, $collection) = split(/[\/\\]+/, $qualified_collection); #split('/', $qualified_collection);
1530   
1531    if(!defined $collection) {
1532        $collection = $colgroup;
1533        $colgroup = "";
1534    }
1535    return ($collection, $colgroup);
1536}
1537
1538# work out the "collectdir/collection" location
1539sub resolve_collection_dir {
1540    my ($collect_dir, $qualified_collection, $site) = @_; #, $gs_mode
1541   
1542    if (defined $ENV{'GSDLCOLLECTDIR'}) { # a predefined collection dir exists
1543        return $ENV{'GSDLCOLLECTDIR'};
1544    }
1545
1546    my ($colgroup, $collection) = &util::get_collection_parts($qualified_collection);   
1547   
1548    if (!defined $collect_dir || !$collect_dir) { # if undefined or empty string
1549        $collect_dir = &util::get_working_collect_dir($site);
1550    }
1551
1552    return &FileUtils::filenameConcatenate($collect_dir,$colgroup,$collection);
1553}
1554
1555# work out the full path to "collect" of this greenstone 2/3 installation
1556sub get_working_collect_dir {
1557    my ($site) = @_;   
1558
1559    if (defined $ENV{'GSDLCOLLECTHOME'}) { # a predefined collect dir exists
1560    return $ENV{'GSDLCOLLECTHOME'};
1561    }
1562
1563    if (defined $site && $site) { # site non-empty, so get default collect dir for GS3
1564   
1565    if (defined $ENV{'GSDL3HOME'}) {
1566        return &FileUtils::filenameConcatenate($ENV{'GSDL3HOME'},"sites",$site,"collect"); # web folder
1567    }
1568    elsif (defined $ENV{'GSDL3SRCHOME'}) {
1569        return &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"web","sites",$site,"collect");
1570    }
1571    }
1572
1573    elsif (defined $ENV{'SITEHOME'}) {
1574    return &FileUtils::filenameConcatenate($ENV{'SITEHOME'},"collect");
1575    }
1576   
1577    else { # get default collect dir for GS2
1578    return &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"collect");
1579    }
1580}
1581
1582sub is_abs_path_any_os {
1583    my ($path) = @_;
1584
1585    # We can have filenames in our DBs that were produced on other OS, so this method exists
1586    # to help identify absolute paths in such cases.
1587
1588    return 1 if($path =~ m@^/@); # full paths begin with forward slash on linux/mac
1589    return 1 if($path =~ m@^([a-zA-Z]\:|\\)@); # full paths begin with drive letter colon for Win or \ for volume, http://stackoverflow.com/questions/13011013/get-only-volume-name-from-filepath
1590
1591    return 0;
1592}
1593
1594
1595# This subroutine is for improving portability of Greenstone collections from one OS to another,
1596# to be used to convert absolute paths going into db files into paths with placeholders instead.
1597# This sub works with util::get_common_gs_paths and takes a path to a greenstone file and, if it's
1598# an absolute path, then it will replace the longest matching greenstone-path prefix of the given
1599# path with a placeholder to match.
1600# The Greenstone-path prefixes that can be matched are the following common Greenstone paths:
1601# the path to the current (specific) collection, the path to the general GS collect directory,
1602# the path to the site directory if GS3, else the path to the GSDLHOME/GSDL3HOME folder.
1603# The longest matching prefix will be replaced with the equivalent placeholder:
1604# @THISCOLLECTPATH@, else @COLLECTHOME@, else @SITEHOME@, else @GSDLHOME@.
1605sub abspath_to_placeholders {
1606    my $path = shift(@_); # path to convert from absolute to one with placeholders
1607    my $opt_long_or_short_winfilenames = shift(@_) || "short"; # whether we want to force use of long file names even on windows, default uses short
1608
1609    return $path unless is_abs_path_any_os($path); # path is relative
1610
1611    if ($opt_long_or_short_winfilenames eq "long") {
1612    $path = &util::upgrade_if_dos_filename($path); # will only do something on windows
1613    }
1614   
1615    # now we know we're dealing with absolute paths and have to replace gs prefixes with placeholders
1616    my @gs_paths = ($ENV{'GSDLCOLLECTDIR'}, $ENV{'GSDLCOLLECTHOME'}, $ENV{'SITEHOME'}, $ENV{'GREENSTONEHOME'}); # list in this order: from longest to shortest path
1617
1618    my %placeholder_map = ($ENV{'GREENSTONEHOME'} => '@GSDLHOME@', # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array
1619               $ENV{'GSDLCOLLECTHOME'} => '@COLLECTHOME@',
1620               $ENV{'GSDLCOLLECTDIR'} => '@THISCOLLECTPATH@'
1621    );
1622    $placeholder_map{$ENV{'SITEHOME'}} = '@SITEHOME@' if defined $ENV{'SITEHOME'};
1623
1624    $path = &util::_abspath_to_placeholders($path, \@gs_paths, \%placeholder_map);
1625
1626    if ($ENV{'GSDLOS'} =~ /^windows$/i && $opt_long_or_short_winfilenames eq "short") {
1627    # for windows need to look for matches on short file names too
1628    # matched paths are again to be replaced with the usual placeholders
1629
1630    my $gsdlcollectdir = &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTDIR'});
1631    my $gsdlcollecthome = &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTHOME'});
1632    my $sitehome = (defined $ENV{'SITEHOME'}) ? &util::downgrade_if_dos_filename($ENV{'SITEHOME'}) : undef;
1633    my $greenstonehome =  &util::downgrade_if_dos_filename($ENV{'GREENSTONEHOME'});
1634
1635    @gs_paths = ($gsdlcollectdir, $gsdlcollecthome, $sitehome, $greenstonehome); # order matters
1636
1637    %placeholder_map = ($greenstonehome => '@GSDLHOME@', # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array
1638                $gsdlcollecthome => '@COLLECTHOME@',
1639                $gsdlcollectdir => '@THISCOLLECTPATH@'
1640        );
1641    $placeholder_map{$sitehome} = '@SITEHOME@' if defined $sitehome;
1642
1643    $path = &util::_abspath_to_placeholders($path, \@gs_paths, \%placeholder_map);
1644    }
1645
1646    return $path;
1647}
1648
1649sub _abspath_to_placeholders {
1650    my ($path, $gs_paths_ref, $placeholder_map_ref) = @_;
1651
1652    # The sequence of elements in @gs_paths matters   
1653    # Need to loop starting from the *longest* matching path (the path to the specific collection)
1654    # to the shortest matching path (the path to gsdlhome/gsdl3home folder):
1655
1656    foreach my $gs_path (@$gs_paths_ref) {
1657    next if(!defined $gs_path); # site undefined for GS2
1658
1659    my $re_path =  &util::filename_to_regex($gs_path); # escape for regex
1660
1661    if($path =~ m/^$re_path/i) { # case sensitive or not for OS?
1662
1663        my $placeholder = $placeholder_map_ref->{$gs_path}; # get the placeholder to replace the matched path with
1664
1665        $path =~ s/^$re_path/$placeholder/; #case sensitive or not?
1666        #$path =~ s/^[\\\/]//; # remove gs_path's trailing separator left behind at the start of the path
1667        # lowercase file extension, This is needed when shortfilenames are used, as case affects alphetical ordering, which affects diffcol     
1668        $path =~ s/\.([A-Z]+)$/".".lc($1)/e;
1669        last; # done
1670    }
1671    }
1672   
1673    return $path;
1674}
1675
1676# Function that does the reverse of the util::abspath_to_placeholders subroutine
1677# Once again, call this with the values returned from util::get_common_gs_paths
1678sub placeholders_to_abspath {
1679    my $path = shift(@_); # path that can contain placeholders to convert to resolved absolute path
1680    my $opt_long_or_short_winfilenames = shift(@_) || "short"; # whether we want to force use of long file names even on windows, default uses short
1681
1682    return $path if($path !~ m/@/); # path contains no placeholders
1683   
1684    # replace placeholders with gs prefixes
1685    my @placeholders = ('@THISCOLLECTPATH@', '@COLLECTHOME@', '@SITEHOME@', '@GSDLHOME@'); # order of paths not crucial in this case,
1686                       # but listed here from longest to shortest once placeholders are have been resolved
1687
1688    # can't use double-quotes around at-sign, else perl tries to evaluate it as referring to an array
1689    my %placeholder_to_gspath_map;
1690    if ($ENV{'GSDLOS'} =~ /^windows$/i && $opt_long_or_short_winfilenames eq "short") {
1691    # always replace placeholders with short file names of the absolute paths on windows?
1692    %placeholder_to_gspath_map = ('@GSDLHOME@' => &util::downgrade_if_dos_filename($ENV{'GREENSTONEHOME'}),
1693                     '@COLLECTHOME@' => &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTHOME'}),
1694                     '@THISCOLLECTPATH@' => &util::downgrade_if_dos_filename($ENV{'GSDLCOLLECTDIR'})
1695    );
1696    $placeholder_to_gspath_map{'@SITEHOME@'} =  &util::downgrade_if_dos_filename($ENV{'SITEHOME'}) if defined $ENV{'SITEHOME'};
1697    } else {
1698    %placeholder_to_gspath_map = ('@GSDLHOME@' => $ENV{'GREENSTONEHOME'},
1699                      '@SITEHOME@' => $ENV{'SITEHOME'}, # can be undef
1700                      '@COLLECTHOME@' => $ENV{'GSDLCOLLECTHOME'},
1701                      '@THISCOLLECTPATH@' => $ENV{'GSDLCOLLECTDIR'}
1702        ); # $placeholder_to_gspath_map{'@SITEHOME@'} = $ENV{'SITEHOME'} if defined $ENV{'SITEHOME'};
1703    }
1704
1705    foreach my $placeholder (@placeholders) {
1706    my $gs_path = $placeholder_to_gspath_map{$placeholder};
1707
1708    next if(!defined $gs_path); # sitehome for GS2 is undefined
1709
1710    if($path =~ m/^$placeholder/) {
1711        $path =~ s/^$placeholder/$gs_path/;
1712        last; # done
1713    }
1714    }
1715   
1716    return $path;
1717}
1718
1719# Used by pdfpstoimg.pl and PDFBoxConverter to create a .item file from
1720# a directory containing sequentially numbered images.
1721sub create_itemfile
1722{
1723    my ($output_dir, $convert_basename, $convert_to) = @_;
1724    my $page_num = "";
1725
1726    opendir(DIR, $output_dir) || die "can't opendir $output_dir: $!";
1727    my @dir_files = grep {-f "$output_dir/$_"} readdir(DIR);
1728    closedir DIR;
1729
1730    # Sort files in the directory by page_num
1731    sub page_number {
1732    my ($dir) = @_;
1733    my ($pagenum) =($dir =~ m/^.*?[-\.]?(\d+)(\.(jpg|gif|png))?$/i);
1734#   my ($pagenum) =($dir =~ m/(\d+)(\.(jpg|gif|png))?$/i); # this works but is not as safe/strict about input filepatterns as the above
1735
1736    $pagenum = 1 unless defined $pagenum;
1737    return $pagenum;
1738    }
1739
1740    # sort the files in the directory in the order of page_num rather than lexically.
1741    @dir_files = sort { page_number($a) <=> page_number($b) } @dir_files;
1742
1743    # work out if the numbering of the now sorted image files starts at 0 or not
1744    # by checking the number of the first _image_ file (skipping item files)
1745    my $starts_at_0 = 0;
1746    my $firstfile = ($dir_files[0] !~ /\.item$/i) ? $dir_files[0] : $dir_files[1];
1747    if(page_number($firstfile) == 0) { # 00 will evaluate to 0 too in this condition
1748    $starts_at_0 = 1;
1749    }
1750
1751    my $item_file = &FileUtils::filenameConcatenate($output_dir, $convert_basename.".item");
1752    my $item_fh;
1753    &FileUtils::openFileHandle($item_file, 'w', \$item_fh);
1754    print $item_fh "<PagedDocument>\n";
1755
1756    foreach my $file (@dir_files){
1757    if ($file !~ /\.item/i){
1758        $page_num = page_number($file);
1759        $page_num++ if $starts_at_0; # image numbers start at 0, so add 1
1760        print $item_fh "   <Page pagenum=\"$page_num\" imgfile=\"$file\" txtfile=\"\"/>\n";
1761    }
1762    }
1763
1764    print $item_fh "</PagedDocument>\n";
1765    &FileUtils::closeFileHandle($item_file, \$item_fh);
1766    return $item_file;
1767}
1768
1769# Sets the gnomelib_env. Based on the logic in wvware.pl which can perhaps be replaced with a call to this function in future
1770sub set_gnomelib_env
1771{
1772    ## SET THE ENVIRONMENT AS DONE IN SETUP.BASH/BAT OF GNOME-LIB
1773    # Though this is only needed for darwin Lion at this point (and android, though that is untested)
1774
1775    my $libext = "so";
1776    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
1777    return;
1778    } elsif ($ENV{'GSDLOS'} =~ m/^darwin$/i) {
1779    $libext = "dylib";
1780    }
1781
1782    if (!defined $ENV{'GEXTGNOME'}) {
1783        ##print STDERR "@@@ Setting GEXTGNOME env\n";
1784
1785    my $gnome_dir = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"ext","gnome-lib-minimal");
1786
1787    if(! -d $gnome_dir) {
1788        $gnome_dir = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"ext","gnome-lib");
1789
1790        if(! -d $gnome_dir) {
1791        $gnome_dir = "";
1792        }
1793    }
1794   
1795    # now set other the related env vars,
1796    # IF we've found the gnome-lib dir installed in the ext folder 
1797
1798    if ($gnome_dir ne "" && -f &FileUtils::filenameConcatenate($gnome_dir, $ENV{'GSDLOS'}, "lib", "libiconv.$libext")) {
1799        $ENV{'GEXTGNOME'} = $gnome_dir;
1800        $ENV{'GEXTGNOME_INSTALLED'}=&FileUtils::filenameConcatenate($ENV{'GEXTGNOME'}, $ENV{'GSDLOS'});
1801       
1802        my $gnomelib_bin = &FileUtils::filenameConcatenate($ENV{'GEXTGNOME_INSTALLED'}, "bin");
1803        if(-d $gnomelib_bin) { # no bin subfolder in GS binary's cutdown gnome-lib-minimal folder
1804        &util::envvar_prepend("PATH", $gnomelib_bin);
1805        }
1806
1807        # util's prepend will create LD/DYLD_LIB_PATH if it doesn't exist yet
1808        my $gextlib = &FileUtils::filenameConcatenate($ENV{'GEXTGNOME_INSTALLED'}, "lib");
1809
1810        if($ENV{'GSDLOS'} eq "linux") {
1811        &util::envvar_prepend("LD_LIBRARY_PATH", $gextlib);
1812        }
1813        elsif ($ENV{'GSDLOS'} eq "darwin") {
1814        #&util::envvar_prepend("DYLD_LIBRARY_PATH", $gextlib);
1815        &util::envvar_prepend("DYLD_FALLBACK_LIBRARY_PATH", $gextlib);
1816        }
1817    }
1818   
1819    # Above largely mimics the setup.bash of the gnome-lib-minimal.
1820    # Not doing the devel-srcpack that gnome-lib-minimal's setup.bash used to set
1821    # Not exporting GSDLEXTS variable either
1822    }
1823
1824#    print STDERR "@@@@@ GEXTGNOME: ".$ENV{'GEXTGNOME'}."\n\tINSTALL".$ENV{'GEXTGNOME_INSTALLED'}."\n";
1825#    print STDERR "\tPATH".$ENV{'PATH'}."\n";
1826#    print STDERR "\tLD_LIB_PATH".$ENV{'LD_LIBRARY_PATH'}."\n" if $ENV{'LD_LIBRARY_PATH};
1827#    print STDERR "\tDYLD_FALLBACK_LIB_PATH".$ENV{'DYLD_FALLBACK_LIBRARY_PATH'}."\n" if $ENV{'DYLD_FALLBACK_LIBRARY_PATH};
1828
1829    # if no GEXTGNOME, maybe users didn't need gnome-lib to run gnomelib/libiconv dependent binaries like hashfile, suffix, wget
1830    # (wvware is launched in a gnomelib env from its own script, but could possibly go through this script in future)
1831}
1832
1833
1834
1835## @function augmentINC()
1836#
1837#  Prepend a path (if it exists) onto INC but only if it isn't already in INC
1838#  @param $new_path The path to add
1839#  @author jmt12
1840#
1841sub augmentINC
1842{
1843  my ($new_path) = @_;
1844  my $did_add_path = 0;
1845  # might need to be replaced with FileUtils::directoryExists() call eventually
1846  if (-d $new_path)
1847  {
1848    my $did_find_path = 0;
1849    foreach my $existing_path (@INC)
1850    {
1851      if ($existing_path eq $new_path)
1852      {
1853        $did_find_path = 1;
1854        last;
1855      }
1856    }
1857    if (!$did_find_path)
1858    {
1859      unshift(@INC, $new_path);
1860      $did_add_path = 1;
1861    }
1862  }
1863  return $did_add_path;
1864}
1865## augmentINC()
1866
1867
18681;
Note: See TracBrowser for help on using the browser.